#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "uds_frame.h" #include "uds_server.h" #include "uds_det.h" #include "ctrl_flags.h" #include "v4l2_interface.h" #include "nc_utils.h" #include "nc_ts_fsync_flipflop_buffers.h" #include "nc_app_config_parser.h" #include "nc_cnn_aiware_runtime.h" #include "nc_cnn_communicator.h" #include "nc_cnn_worker_for_postprocess.h" #include "nc_neon.h" #ifdef AIWARE_DEVICE_SUPPORTED #include "aiware/runtime/c/aiwaredevice.h" #endif #ifdef __GNUC__ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wconversion" #pragma GCC diagnostic ignored "-Wsign-conversion" #endif #include "stb_image.h" #ifdef __GNUC__ #pragma GCC diagnostic pop #endif #ifdef USE_BYTETRACK #include "nc_cnn_tracker.h" #endif #ifdef USE_8MP_VI #include "nc_dsr_helper.h" #include "nc_dsr_set.h" #include "nc_dmabuf_ctrl_helper.h" #endif /* don't modify */ #define MAX_WIDTH_FOR_VDMA_CNN_DS (1280) #define MAX_HEIGHT_FOR_VDMA_CNN_DS (720) #ifdef USE_8MP_VI #define VIDEO_WIDTH MAX_WIDTH_FOR_VDMA_CNN_DS #define VIDEO_HEIGHT MAX_HEIGHT_FOR_VDMA_CNN_DS #else #define VIDEO_WIDTH (1280) #define VIDEO_HEIGHT (720) #endif #define CAP_FPS (30) #define MQ_NAME_CNN_BUF "/cnn_data" #define DEV_FILE_DSR "/dev/dsr" #ifdef SHOW_FACE_DETECT #define NETWORK_FILE_FACE_DET "/mnt/user_data/applications/misc/networks/cuuva/modified_yolov8n-face.aiwbin" #endif #ifdef SHOW_CUUVA_DETECT #define NETWORK_FILE_CUUVA_DET "/mnt/user_data/applications/misc/networks/cuuva/modified_6class_vis_best_v8m.aiwbin" #endif #ifdef SHOW_FIRE_DETECT #define NETWORK_FILE_FIRE_DET "/mnt/user_data/applications/misc/networks/cuuva/modified_best_fire_detect.aiwbin" #endif #ifdef SHOW_LPR_DETECT #define NETWORK_FILE_LPR_DET "/mnt/user_data/applications/misc/networks/cuuva/modified_best_lp_detect.aiwbin" #endif #define ENC_WORKER_NUM (4) #define ENC_QUEUE_SIZE (16) static volatile int g_running = 1; st_npu_input_info npu_input_info; static uint32_t ch_seq[1] = {0}; static uint64_t g_frame_count = 0; static uint64_t g_fps_last_ts_us = 0; static uint64_t g_fps_last_cnt2 = 0; #define FOLLOWUP_PERSON_ID 0 #define FOLLOWUP_CAR_ID 1 #define NETWORK_OBJDET 0 #define NETWORK_ABNORM 1 #define NETWORK_FIRE 2 #define NETWORK_LPR 3 #define NETWORK_FACE 4 static char** g_img_list = NULL; static int g_img_count = 0; static int g_img_idx = 0; // NPU 입력용 interleaved RGB 버퍼 (640x384x3) static unsigned char* g_src_rgb_1080p = NULL; static unsigned char* g_src_rgb_640p = NULL; typedef struct { unsigned char* jpeg; // JPEG data unsigned long jpeg_size; // JPEG bytes int w, h; } RamFrame; #define TARGET_FPS 30 #define FRAME_INTERVAL_US (1000000 / TARGET_FPS) static RamFrame* g_ram_frames = NULL; static int g_ram_frame_count = 0; static int g_ram_cur_idx = 0; typedef struct { RamFrame* frames; int count; } RamSet; static RamSet ram_sets[7]; static inline void mem_fence(void) { __sync_synchronize(); } int uds_server_start(const char* sock_path); void uds_server_stop(const char* sock_path); int uds_send_frame(int ch, const void* frame_ptr, uint32_t w, uint32_t h, uint32_t stride, uint32_t pixfmt, uint64_t ts_ns); static inline uint64_t now_ns(void){ struct timespec ts; clock_gettime(CLOCK_MONOTONIC_RAW, &ts); return (uint64_t)ts.tv_sec*1000000000ull + ts.tv_nsec; } static inline uint64_t now_us(void){ struct timespec ts; clock_gettime(CLOCK_MONOTONIC_RAW, &ts); return (uint64_t)ts.tv_sec*1000000ull + ts.tv_nsec/1000ull; } typedef struct { int valid; int ch; int w, h; int stride; size_t buf_size; uint8_t *data; // RGB24 복사 버퍼 uint64_t ts_us; } enc_job_t; static enc_job_t g_enc_queue[ENC_QUEUE_SIZE]; static int g_enc_head = 0; static int g_enc_tail = 0; static pthread_mutex_t g_enc_mutex = PTHREAD_MUTEX_INITIALIZER; static pthread_cond_t g_enc_cond = PTHREAD_COND_INITIALIZER; static unsigned long g_enc_frame_count = 0; static pthread_mutex_t g_stat_mutex = PTHREAD_MUTEX_INITIALIZER; static struct timespec g_fps_last_ts = {0, 0}; static unsigned long g_fps_last_cnt = 0; #ifdef USE_8MP_VI size_t BUFF_SIZE; int dsr_fd; DSR_Data_t dsr_info; dma_alloc_info dma_info; st_nc_dsr_config dsr_config; img_input_config dsr_input_config; img_output_config dsr_output_config; #endif #ifdef USE_8MP_VI static int dsr_init(void) { int input_fd = 0, output_fd = 0; long page_size = sysconf(_SC_PAGESIZE); dsr_input_config.format = IMG_FORMAT_RGB888; dsr_input_config.width = MAX_WIDTH_FOR_VDMA_CNN_DS; dsr_input_config.height = MAX_HEIGHT_FOR_VDMA_CNN_DS; dsr_output_config.format = IMG_FORMAT_RGB888; BUFF_SIZE = dsr_input_config.width * dsr_input_config.height * 3; BUFF_SIZE = ((BUFF_SIZE + page_size - 1) / page_size) * page_size; if(dsr_config_downscale(&dsr_config, 1, NPU_INPUT_WIDTH, NPU_INPUT_HEIGHT) < 0){ perror("Error: DSR config fail"); return -1; } if(open_device_and_dma_buffers(DEV_FILE_DSR, &dsr_fd, &input_fd, &output_fd, BUFF_SIZE) < 0){ perror("Error: DSR open fail"); return -1; } if(dsr_setup_buffer(&dsr_info, dsr_fd, &dma_info, input_fd, output_fd, BUFF_SIZE) < 0){ perror("Error: DSR setup fail"); return -1; } return 0; } static int dsr_deinit(void) { nc_dmabuf_ctrl_end_cpu_access(dma_info.dmabuf_fd_in); nc_dmabuf_ctrl_end_cpu_access(dma_info.dmabuf_fd_out); nc_dmabuf_ctrl_free_dma_fd(dma_info.dmabuf_fd_in); nc_dmabuf_ctrl_free_dma_fd(dma_info.dmabuf_fd_out); nc_dmabuf_ctrl_close(); for (int i = 0; i < BUFF_NUM; i++) { if (dsr_info.dsr_in_buf[i] != MAP_FAILED) { munmap(dsr_info.dsr_in_buf[i], BUFF_SIZE); } if (dsr_info.dsr_out_buf[i] != MAP_FAILED) { munmap(dsr_info.dsr_out_buf[i], BUFF_SIZE); } } dsr_device_deinit(&dsr_fd); return 0; } #endif static void enc_enqueue(int ch, int w, int h, int stride, const void *src, uint64_t ts_us) { size_t bytes = (size_t)stride * (size_t)h; pthread_mutex_lock(&g_enc_mutex); int next_tail = (g_enc_tail + 1) % ENC_QUEUE_SIZE; if (next_tail == g_enc_head) { g_enc_head = (g_enc_head + 1) % ENC_QUEUE_SIZE; } enc_job_t *job = &g_enc_queue[g_enc_tail]; if (!job->data || job->buf_size < bytes) { free(job->data); job->data = (uint8_t*)malloc(bytes); job->buf_size = bytes; } memcpy(job->data, src, bytes); job->ch = ch; job->w = w; job->h = h; job->stride = stride; job->ts_us = ts_us; job->valid = 1; g_enc_tail = next_tail; pthread_cond_signal(&g_enc_cond); pthread_mutex_unlock(&g_enc_mutex); } static void* enc_worker_thread(void *arg) { intptr_t wid = (intptr_t)arg; tjhandle tj = tjInitCompress(); if (!tj) { fprintf(stderr, "[ENC%ld] tjInitCompress failed: %s\n", (long)wid, tjGetErrorStr()); return NULL; } while (g_running) { enc_job_t job; memset(&job, 0, sizeof(job)); // 큐에서 job 하나 가져오기 pthread_mutex_lock(&g_enc_mutex); while (g_enc_head == g_enc_tail && g_running) { pthread_cond_wait(&g_enc_cond, &g_enc_mutex); } if (!g_running) { pthread_mutex_unlock(&g_enc_mutex); break; } job = g_enc_queue[g_enc_head]; // 구조체 복사 (data 포인터 공유) g_enc_head = (g_enc_head + 1) % ENC_QUEUE_SIZE; pthread_mutex_unlock(&g_enc_mutex); if (!job.valid || !job.data) continue; unsigned char *jpegBuf = NULL; unsigned long jpegSize = 0; struct timespec t0, t1; clock_gettime(CLOCK_MONOTONIC, &t0); int rc = tjCompress2( tj, job.data, job.w, job.stride, job.h, TJPF_RGB, // V4L2_PIX_FMT_RGB24 기준 &jpegBuf, &jpegSize, TJSAMP_420, 45, // quality TJFLAG_FASTDCT ); clock_gettime(CLOCK_MONOTONIC, &t1); if (rc != 0) { fprintf(stderr, "[ENC%ld] tjCompress2 error: %s\n", (long)wid, tjGetErrorStr()); if (jpegBuf) tjFree(jpegBuf); continue; } // double sec_diff = (double)(t1.tv_sec - t0.tv_sec); // double nsec_diff = (double)(t1.tv_nsec - t0.tv_nsec); // double ms = sec_diff * 1000.0 + // nsec_diff / 1e6; pthread_mutex_lock(&g_stat_mutex); g_enc_frame_count++; unsigned long n = g_enc_frame_count; // ---- FPS 계산 ---- struct timespec now; clock_gettime(CLOCK_MONOTONIC, &now); if (g_fps_last_ts.tv_sec == 0 && g_fps_last_ts.tv_nsec == 0) { // 첫 호출 초기화 g_fps_last_ts = now; g_fps_last_cnt2 = n; } else { double sec = (double)(now.tv_sec - g_fps_last_ts.tv_sec) + (double)(now.tv_nsec - g_fps_last_ts.tv_nsec) / 1e9; if (sec >= 1.0) { unsigned long diff = n - g_fps_last_cnt2; double fps = (double)diff / sec; printf("[ENC_STAT] total=%lu fps=%.2f\n", n, fps); g_fps_last_ts = now; g_fps_last_cnt2 = n; } } pthread_mutex_unlock(&g_stat_mutex); uint32_t jpeg_sz = (uint32_t)jpegSize; uint64_t ts_us = job.ts_us; // 프로토콜 설계에 따라: // - pixfmt: WS_PIXFMT_JPEG // - stride: JPEG 바이트 수로 사용 int sret = uds_send_frame( job.ch, jpegBuf, // JPEG 바이트 (uint32_t)job.w, (uint32_t)job.h, jpeg_sz, // stride 대신 "size"로 사용 WS_PIXFMT_JPEG, // ← uds_frame.h에 정의 ts_us ); if (sret != 0) { fprintf(stderr, "[ENC%ld] uds_send_frame JPEG failed (ret=%d)\n", (long)wid, sret); } tjFree(jpegBuf); } tjDestroy(tj); return NULL; } static int has_img_ext(const char* name) { const char* dot = strrchr(name, '.'); char ext[8]; int i; if (!dot) return 0; dot++; for (i = 0; i < 7 && dot[i]; i++) { ext[i] = (char)tolower((unsigned char)dot[i]); } ext[i] = '\0'; return (!strcmp(ext, "jpg") || !strcmp(ext, "jpeg") || !strcmp(ext, "png")); } int init_rgb_buffer(void) { size_t sz_1080p = (size_t)1280 * 720 * 3; size_t sz_640p = (size_t)NPU_INPUT_WIDTH * NPU_INPUT_HEIGHT * 3; // 640*384*3 g_src_rgb_1080p = (unsigned char*)malloc(sz_1080p); if (!g_src_rgb_1080p) { fprintf(stderr, "[IMG] malloc failed (g_src_rgb_1080p)\n"); return -1; } g_src_rgb_640p = (unsigned char*)malloc(sz_640p); if (!g_src_rgb_640p) { fprintf(stderr, "[IMG] malloc failed (g_src_rgb_640p)\n"); return -1; } return 0; } void free_rgb_buffer(void) { if (g_src_rgb_1080p) { free(g_src_rgb_1080p); g_src_rgb_1080p = NULL; } if (g_src_rgb_640p) { free(g_src_rgb_640p); g_src_rgb_640p = NULL; } } static int extract_tail5(const char* path) { const char* base = strrchr(path, '/'); if (!base) base = path; else base++; // '/' 다음부터 파일명 const char* dot = strrchr(base, '.'); if (!dot) return 0; if (dot - base < 5) return 0; // 5자리 안 되면 const char* p = dot - 5; int v = 0; for (int i = 0; i < 5; i++) { if (!isdigit((unsigned char)p[i])) return 0; v = v * 10 + (p[i] - '0'); } return v; } static int cmp_img_tail5(const void* a, const void* b) { const char* sa = *(const char* const*)a; const char* sb = *(const char* const*)b; int na = extract_tail5(sa); int nb = extract_tail5(sb); if (na < nb) return -1; if (na > nb) return 1; return strcmp(sa, sb); } int load_image_list(const char* dirpath, int limit) { DIR* d; struct dirent* ent; int i = 0; const int IMG_LIMIT = limit; // ★ limit 인자 사용 if (g_img_list) { for (int k = 0; k < g_img_count; ++k) { free(g_img_list[k]); } free(g_img_list); } g_img_list = NULL; g_img_count = 0; g_img_idx = 0; d = opendir(dirpath); if (!d) { perror("[IMG] opendir"); return -1; } int count = 0; while ((ent = readdir(d)) != NULL) { if (ent->d_name[0] == '.') continue; if (!has_img_ext(ent->d_name)) continue; count++; } rewinddir(d); if (count == 0) { closedir(d); fprintf(stderr, "[IMG] no images in %s\n", dirpath); return -1; } if (count > IMG_LIMIT) count = IMG_LIMIT; g_img_list = (char**)calloc(count, sizeof(char*)); if (!g_img_list) { closedir(d); return -1; } while ((ent = readdir(d)) != NULL) { if (ent->d_name[0] == '.') continue; if (!has_img_ext(ent->d_name)) continue; if (i >= IMG_LIMIT) break; char full[PATH_MAX]; snprintf(full, sizeof(full), "%s/%s", dirpath, ent->d_name); g_img_list[i] = strdup(full); if (!g_img_list[i]) { closedir(d); return -1; } i++; } g_img_count = i; closedir(d); qsort(g_img_list, g_img_count, sizeof(char*), cmp_img_tail5); printf("[IMG] %d images loaded (limit=%d)\n", g_img_count, IMG_LIMIT); return 0; } int load_images_to_ram(void) { if (g_img_count <= 0) return -1; g_ram_frames = (RamFrame*)calloc(g_img_count, sizeof(RamFrame)); if (!g_ram_frames) return -1; tjhandle tj = tjInitCompress(); if (!tj) { fprintf(stderr, "tjInitCompress failed\n"); return -1; } for (int i = 0; i < g_img_count; i++) { const char* path = g_img_list[i]; int w, h, ch; unsigned char* img = stbi_load(path, &w, &h, &ch, 3); if (!img) return -1; if (w != VIDEO_WIDTH || h != VIDEO_HEIGHT) { stbi_image_free(img); return -1; } unsigned char* jpegBuf = NULL; unsigned long jpegSize = 0; int rc = tjCompress2( tj, img, w, w * 3, h, TJPF_RGB, &jpegBuf, &jpegSize, TJSAMP_420, 45, TJFLAG_FASTDCT ); stbi_image_free(img); if (rc != 0) { fprintf(stderr, "[RAM] Compress failed: %s\n", tjGetErrorStr()); continue; } g_ram_frames[i].jpeg = jpegBuf; g_ram_frames[i].jpeg_size = jpegSize; g_ram_frames[i].w = w; g_ram_frames[i].h = h; if (i % 10 == 0) fprintf(stderr, "[RAM][LOAD] %d/%d loaded\n", i, g_img_count); } tjDestroy(tj); g_ram_frame_count = g_img_count; g_ram_cur_idx = 0; fprintf(stderr, "[RAM] loaded %d JPEG images\n", g_ram_frame_count); return 0; } void preload_all_images(void) { struct { int feat; const char* path; int limit; } cfg[] = { { FEAT_OBJDET, "/mnt/nvme/test_images/1", 200 }, { FEAT_FIRE, "/mnt/nvme/test_images/2", 50 }, { FEAT_ABNORM, "/mnt/nvme/test_images/6", 150 } }; for (int i = 0; i < (int)(sizeof(cfg)/sizeof(cfg[0])); i++) { load_image_list(cfg[i].path, cfg[i].limit); // ★ limit 전달 load_images_to_ram(); ram_sets[cfg[i].feat].frames = g_ram_frames; ram_sets[cfg[i].feat].count = g_ram_frame_count; g_ram_frames = NULL; g_ram_frame_count = 0; } } static void update_image_source_for_feat(void) { uint32_t fm = ctrl_feat_mask_snapshot(); printf("[IMG] update_image_source_for_feat: feat_mask=0x%08X\n", fm); const uint32_t BIT_OBJDET = (1u << FEAT_OBJDET); const uint32_t BIT_ABNORM = (1u << FEAT_ABNORM); const uint32_t BIT_FIRE = (1u << FEAT_FIRE); int feat = -1; // 비트마스크로 판정 if (fm & BIT_OBJDET) feat = FEAT_OBJDET; else if (fm & BIT_FIRE) feat = FEAT_FIRE; else if (fm & BIT_ABNORM) feat = FEAT_ABNORM; printf("[IMG] selected feat=%d\n", feat); // 1) 어떤 것도 선택 안 됐으면 OBJDET로 기본값 if (feat < 0) feat = FEAT_OBJDET; // 2) 선택된 feat의 영상 세트가 없으면 OBJDET로 fallback if (ram_sets[feat].count == 0) feat = FEAT_OBJDET; // 3) 실제로 적용 g_ram_frames = ram_sets[feat].frames; g_ram_frame_count = ram_sets[feat].count; } static void on_sigint(int sig) { (void)sig; g_running = 0; } int send_cnn_buf (uint8_t *ptr_cnn_buf, uint64_t time_stamp_us, uint32_t cam_ch, E_NETWORK_UID net_id) { int ret = 0; stCnnData *cnn_data; struct mq_attr attr; attr.mq_maxmsg = MAX_MQ_MSG_CNT; attr.mq_msgsize = sizeof(stCnnData*); int oflag = O_WRONLY | O_CREAT; mqd_t mfd = mq_open(MQ_NAME_CNN_BUF, oflag, 0666, &attr); if (mfd == -1) { perror("mq open error"); return -1; } cnn_data = (stCnnData*)malloc(sizeof(stCnnData)); cnn_data->cam_ch = cam_ch; cnn_data->ptr_cnn_buf = ptr_cnn_buf; cnn_data->time_stamp_us = time_stamp_us; cnn_data->net_id = net_id; if ((ret = mq_send(mfd, (const char *)&cnn_data, attr.mq_msgsize, 1)) == -1) { printf("errno of mq_send = %d\n", errno); } mq_close(mfd); return ret; } int receive_cnn_buf (stCnnData **out_cnn_buf) { int ret = 0; struct mq_attr attr; attr.mq_maxmsg = MAX_MQ_MSG_CNT; attr.mq_msgsize = sizeof(stCnnData *); int oflag = O_RDONLY | O_CREAT; mqd_t mfd = mq_open(MQ_NAME_CNN_BUF, oflag, 0666, &attr); if (mfd == -1) { perror("mq open error"); return -1; } if ((ret = (int32_t)mq_receive(mfd, (char*)out_cnn_buf, attr.mq_msgsize, NULL)) == -1) { printf("errno of mq_receive = %d\n", errno); } mq_close(mfd); return ret; } int npu_init(st_npu_input_info *npu_input_info) { /* Initialize CNN */ if (nc_aiw_init_cnn() < 0 ) { fprintf(stderr, "nc_aiw_init_cnn() failure!!\n"); return -1; } #ifdef SHOW_FACE_DETECT if (nc_aiw_add_network_to_builder(nc_localize_path((const char *)NETWORK_FILE_FACE_DET), NETWORK_FACE_DET, nc_postprocess_yolov8_inference_result) < 0) { fprintf(stderr, "nc_aiw_add_network_to_builder() failure!!\n"); return -1; } #endif #ifdef SHOW_CUUVA_DETECT if (nc_aiw_add_network_to_builder(nc_localize_path((const char *)NETWORK_FILE_CUUVA_DET), NETWORK_CUUVA_DET, nc_postprocess_yolov8_inference_result) < 0) { fprintf(stderr, "nc_aiw_add_network_to_builder() failure!!\n"); return -1; } #endif #ifdef SHOW_FIRE_DETECT if (nc_aiw_add_network_to_builder(nc_localize_path((const char *)NETWORK_FILE_FIRE_DET), NETWORK_FIRE_DET, nc_postprocess_yolov8_inference_result) < 0) { fprintf(stderr, "nc_aiw_add_network_to_builder() failure!!\n"); return -1; } #endif #ifdef SHOW_LPR_DETECT if (nc_aiw_add_network_to_builder(nc_localize_path((const char *)NETWORK_FILE_LPR_DET), NETWORK_LPR_DET, nc_postprocess_yolov8_inference_result) < 0) { fprintf(stderr, "nc_aiw_add_network_to_builder() failure!!\n"); return -1; } #endif if(nc_aiw_finish_network_builder() < 0 ) { fprintf(stderr, "nc_aiw_finish_network_builder() failure!!\n"); return -1; } // obtain the input resoltution for the CNN network // get information of the input tensor aiwTensorInfo in_tinfo; #ifdef SHOW_FACE_DETECT if(nc_get_cnn_network_input_resol(NETWORK_FACE_DET, &in_tinfo) < 0) { printf("failed to get the input resolution for the CNN network\n"); } #endif #ifdef SHOW_CUUVA_DETECT if(nc_get_cnn_network_input_resol(NETWORK_CUUVA_DET, &in_tinfo) < 0) { printf("failed to get the input resolution for the CNN network\n"); } #endif #ifdef SHOW_FIRE_DETECT if(nc_get_cnn_network_input_resol(NETWORK_FIRE_DET, &in_tinfo) < 0) { printf("failed to get the input resolution for the CNN network\n"); } #endif #ifdef SHOW_LPR_DETECT if(nc_get_cnn_network_input_resol(NETWORK_LPR_DET, &in_tinfo) < 0) { printf("failed to get the input resolution for the CNN network\n"); } #endif npu_input_info->w = in_tinfo.dim.w;// network input width npu_input_info->h = in_tinfo.dim.h;// network input height npu_input_info->rgb_size = in_tinfo.dim.w * in_tinfo.dim.h * RGB_CNT; #ifdef USE_BYTETRACK E_NETWORK_UID net_id = NETWORK_CUUVA_DET; if (nc_init_bytetrackers(CAP_FPS, 0, net_id) != 0) { perror("nc_init_bytetrackers() error"); return -1; } #endif printf("aiw finish\n"); return 0; } void *cnn_task(void *arg) { (void) arg; printf("CNN TASK RUN!!\n"); while (g_running) { stCnnData *cnn_data = NULL; if(receive_cnn_buf(&cnn_data) != -1){ nc_aiw_run_cnn(cnn_data->ptr_cnn_buf, cnn_data->time_stamp_us, cnn_data->cam_ch, cnn_data->net_id); if(cnn_data) { free(cnn_data->ptr_cnn_buf); free(cnn_data); } } } printf("EXIT CNN_TASK!!\n"); return NULL; } static inline void dup_and_send(unsigned char* src, size_t sz, uint64_t ts_us, uint32_t ch, E_NETWORK_UID net_uid){ unsigned char* p = (unsigned char*)malloc(sz); if (!p) return; memcpy(p, src, sz); send_cnn_buf(p, ts_us, ch, net_uid); } static void dispatch_stage1_and_plan_followups( uint32_t m, uint32_t ch, unsigned char* rgb_planar, size_t rgb_size, uint64_t ts_us) { const int UID_DET = nc_get_cnn_networks_id_by_uid(NETWORK_CUUVA_DET); const int UID_FIRE = nc_get_cnn_networks_id_by_uid(NETWORK_FIRE_DET); if (CTRL_MODEL_ON(m, NETWORK_OBJDET) && UID_DET >= 0) dup_and_send(rgb_planar, rgb_size, ts_us, ch, (E_NETWORK_UID)UID_DET); if (CTRL_MODEL_ON(m, NETWORK_FIRE) && UID_FIRE >= 0) dup_and_send(rgb_planar, rgb_size, ts_us, ch, (E_NETWORK_UID)UID_FIRE); } static void dispatch_stage2_followups( uint32_t m, // ctrl_active_models_acquire() 결과(모델 비트) uint32_t ch, unsigned char* rgb_planar, size_t rgb_size, uint64_t ts_us, bool need_face, bool need_lpr ){ const int UID_FACE = nc_get_cnn_networks_id_by_uid(NETWORK_FACE_DET); const int UID_LPR = nc_get_cnn_networks_id_by_uid(NETWORK_LPR_DET); // const int UID_FIRE = nc_get_cnn_networks_id_by_uid(NETWORK_FIRE_DET); // const int UID_FIRE = nc_get_cnn_networks_id_by_uid(NETWORK_FIRE_DET); if (need_face && CTRL_MODEL_ON(m, NETWORK_FACE) && UID_FACE >= 0){ dup_and_send(rgb_planar, rgb_size, ts_us, ch, (E_NETWORK_UID)UID_FACE); } if (need_lpr && CTRL_MODEL_ON(m, NETWORK_LPR) && UID_LPR >= 0){ dup_and_send(rgb_planar, rgb_size, ts_us, ch, (E_NETWORK_UID)UID_LPR); } // dup_and_send(rgb_planar, rgb_size, ts_us, ch, (E_NETWORK_UID)UID_FACE); // if (need_abnorm && CTRL_MODEL_ON(m, NETWORK_ABNORM) && UID_ABNORM >= 0) // dup_and_send(rgb_planar, rgb_size, ts_us, ch, (E_NETWORK_UID)UID_ABNORM); } static inline void send_by_ctrl_mask( uint32_t m, uint32_t ch, unsigned char* rgb_planar, size_t rgb_size, uint64_t ts_us) { dispatch_stage1_and_plan_followups(m, ch, rgb_planar, rgb_size, ts_us); } #ifndef ARRAY_LEN #define ARRAY_LEN(a) ((uint32_t)(sizeof(a)/sizeof((a)[0]))) #endif static void collect_and_append( uint32_t ch, const stCnnPostprocessingResults *R, const char *tag, bool *need_face_followup, bool *need_lpr_followup, uds_det_entry_t **total_items, uint32_t *total_items_cnt ){ const uint32_t CLASS_CAP = ARRAY_LEN(R->class_objs); uint32_t total = 0; for (uint32_t ci = 0; ci < CLASS_CAP; ++ci) { int cnt = R->class_objs[ci].obj_cnt; if (cnt > 0) total += (uint32_t)cnt; } if (total == 0) return; uds_det_entry_t *items = (uds_det_entry_t*)malloc((size_t)total * sizeof(uds_det_entry_t)); if (!items) return; uint16_t tag_id = 0; if (!strcmp(tag, "DET")) tag_id = UDS_TAG_DET; else if (!strcmp(tag, "FIRE")) tag_id = UDS_TAG_FIRE; else if (!strcmp(tag, "FACE")) tag_id = UDS_TAG_FACE; else if (!strcmp(tag, "LPR")) tag_id = UDS_TAG_LPR; uint32_t k = 0; for (uint32_t ci = 0; ci < CLASS_CAP; ++ci) { int n = R->class_objs[ci].obj_cnt; if (n <= 0) continue; const stObjInfo *objs = R->class_objs[ci].objs; for (int j = 0; j < n; ++j) { const stObjInfo *o = &objs[j]; uds_det_entry_t *dst = &items[k++]; // uint32_t fm = ctrl_feat_mask_snapshot(); // uint32_t md = ctrl_active_models_acquire(); dst->prob = (float)o->prob; dst->x = (float)o->bbox.x; dst->y = (float)o->bbox.y; dst->w = (float)o->bbox.w; dst->h = (float)o->bbox.h; dst->cls = UDS_ENC_CLS(tag_id, (uint16_t)ci); dst->tid = (uint16_t)o->track_id; dst->reserved = (uint16_t)(UDS_RESERVED_PACK(ctrl_feat_mask_snapshot(), ctrl_active_models_acquire()) & 0xFFFF); // uint16_t p = dst->reserved; // fprintf(stderr, // "[UDS][RESV] fm8=0x%02X md8=0x%02X pack=0x%04X GET_FEAT=0x%02X GET_MODEL=0x%02X\n", // (unsigned)(fm & 0xFF), (unsigned)(md & 0xFF), // (unsigned)p, // (unsigned)UDS_RESERVED_GET_FEAT(p), // 기대: fm8 // (unsigned)UDS_RESERVED_GET_MODEL(p) // 기대: md8 // ); // if (ch >= 256) ch = 255; printf("[%s] ch=%u ci=%u j=%d k=%u prob=%.3f bbox=%.1f,%.1f,%.1f,%.1f\n", tag, ch, ci, j, k, (float)o->prob, (float)o->bbox.x, (float)o->bbox.y, (float)o->bbox.w, (float)o->bbox.h); if ((uint32_t)ci == FOLLOWUP_PERSON_ID && tag_id == UDS_TAG_DET) { *need_face_followup = true; } if ((uint32_t)ci == FOLLOWUP_CAR_ID && tag_id == UDS_TAG_DET) { *need_lpr_followup = true; } } } uds_det_entry_t *tmp = (uds_det_entry_t*)realloc(*total_items, (size_t)(*total_items_cnt + total) * sizeof(uds_det_entry_t)); if (tmp) { *total_items = tmp; memcpy(&(*total_items)[*total_items_cnt], items, (size_t)total * sizeof(uds_det_entry_t)); *total_items_cnt += total; } free(items); } static void read_and_collect_for( uint32_t ch, uint32_t network_offset, const char *tag, uint64_t *ts_ns_latest, uds_det_entry_t **total_items, uint32_t *total_items_cnt, bool *need_face_followup, bool *need_lpr_followup ){ uint64_t ts = 0; pp_result_buf *det_buf = (pp_result_buf*)nc_tsfs_ff_get_readable_buffer_and_timestamp(ch + network_offset, &ts); if (det_buf) { stCnnPostprocessingResults *R = &det_buf->cnn_result; collect_and_append(ch, R, tag, need_face_followup, need_lpr_followup, total_items, total_items_cnt); uint64_t ts_ns = ts * 1000ULL; if (ts_ns > *ts_ns_latest) *ts_ns_latest = ts_ns; } nc_tsfs_ff_finish_read_buf(ch + network_offset); } // ----------------------------- // 메인 // ----------------------------- int main(int argc, char** argv) { (void)argc; (void)argv; // SIGINT 핸들링 struct sigaction sa; sa.sa_handler = on_sigint; sigemptyset(&sa.sa_mask); sa.sa_flags = SA_RESETHAND; sigaction(SIGINT, &sa, NULL); pthread_t p_thread[MAX_TASK_CNT]; int task_cnt = 0; int thr_id; int status; ctrl_uds_start(); ctrl_feature_on(FEAT_OBJDET); if (uds_server_start(UDS_SOCK_PATH) != 0) { fprintf(stderr, "uds_server_start failed\n"); return -1; } #ifdef USE_8MP_VI dsr_init(); #endif #ifdef DETECT_NETWORK int det_buf_size = sizeof(pp_result_buf); if(nc_tsfs_ff_create_buffers(0+DETECT_NETWORK, det_buf_size) < 0) { exit(1); } #endif #ifdef FACE_NETWORK int face_buf_size = sizeof(pp_result_buf); if(nc_tsfs_ff_create_buffers(0+FACE_NETWORK, face_buf_size) < 0) { exit(1); } #endif #ifdef FIRE_NETWORK int fire_buf_size = sizeof(pp_result_buf); if(nc_tsfs_ff_create_buffers(0+FIRE_NETWORK, fire_buf_size) < 0) { exit(1); } #endif #ifdef LPR_NETWORK int lpr_buf_size = sizeof(pp_result_buf); if(nc_tsfs_ff_create_buffers(0+LPR_NETWORK, lpr_buf_size) < 0) { exit(1); } #endif mq_unlink(MQ_NAME_CNN_BUF); if(npu_init(&npu_input_info) < 0) { printf("failed to init NPU\n"); return -1; } printf("create tasks\n"); thr_id = pthread_create(&p_thread[task_cnt++], NULL, cnn_task, (void *)NULL); if (thr_id < 0) { perror("thread create error : cnn_task"); exit(1); } cnn_postprocess_arg cnn_post_param; cnn_post_param.target_width = VIDEO_WIDTH; cnn_post_param.target_height = VIDEO_HEIGHT; thr_id = pthread_create(&p_thread[task_cnt++], NULL, nc_cnn_postprocess_task, (void *)&cnn_post_param); if (thr_id < 0) { perror("thread create error : nc_cnn_postprocess_task"); exit(1); } pthread_t enc_threads[ENC_WORKER_NUM]; for (int i = 0; i < ENC_WORKER_NUM; i++) { if (pthread_create(&enc_threads[i], NULL, enc_worker_thread, (void*)(intptr_t)i) != 0) { perror("pthread_create enc_worker_thread"); } } unsigned char* rgbdata_for_cnn = (unsigned char *)malloc(npu_input_info.rgb_size); bool need_face_followup = false; bool need_lpr_followup = false; int ch = 0; preload_all_images(); if (init_rgb_buffer() < 0) return -1; uint64_t next_ts = now_us(); tjhandle tjd_global = tjInitDecompress(); if (!tjd_global) { fprintf(stderr, "tjInitDecompress failed\n"); return -1; } while (g_running) { // uint64_t loop_start = now_us(); uint32_t m = ctrl_active_models_acquire(); need_face_followup = false; need_lpr_followup = false; update_image_source_for_feat(); if (g_ram_frame_count == 0) { fprintf(stderr, "[IMG] no ram frames\n"); break; } uint64_t time_stamp_us = now_us(); RamFrame* f = &g_ram_frames[g_ram_cur_idx]; int w = f->w; int h = f->h; tjDecompress2( tjd_global, f->jpeg, f->jpeg_size, g_src_rgb_1080p, w, 0, h, TJPF_RGB, TJFLAG_FASTDCT ); g_ram_cur_idx = (g_ram_cur_idx + 1) % g_ram_frame_count; memcpy(dsr_info.dsr_in_buf[0], g_src_rgb_1080p, VIDEO_WIDTH * VIDEO_HEIGHT * 3); dsr_downscale( dsr_fd, &dsr_info, dsr_input_config, dsr_output_config, dsr_config, 0 ); g_img_idx = (g_img_idx + 1) % g_img_count; memcpy(g_src_rgb_640p, dsr_info.dsr_out_buf[0], NPU_INPUT_WIDTH * NPU_INPUT_HEIGHT * 3); nc_rgb_interleaved_to_planar_neon( g_src_rgb_640p, (unsigned char*)rgbdata_for_cnn, (unsigned char*)rgbdata_for_cnn + (NPU_INPUT_WIDTH * NPU_INPUT_HEIGHT), (unsigned char*)rgbdata_for_cnn + (NPU_INPUT_WIDTH * NPU_INPUT_HEIGHT * 2), NPU_INPUT_WIDTH, NPU_INPUT_HEIGHT ); enc_enqueue( ch, VIDEO_WIDTH, VIDEO_HEIGHT, VIDEO_WIDTH * 3, g_src_rgb_1080p, time_stamp_us ); dispatch_stage1_and_plan_followups(m, ch, rgbdata_for_cnn, npu_input_info.rgb_size, time_stamp_us); uint64_t ts_ns_latest = 0; uds_det_entry_t *total_items = NULL; uint32_t total_items_cnt = 0; if (CTRL_MODEL_ON(m, NETWORK_OBJDET)){ read_and_collect_for( ch, DETECT_NETWORK, "DET", &ts_ns_latest, &total_items, &total_items_cnt, &need_face_followup, &need_lpr_followup ); } if (CTRL_MODEL_ON(m, NETWORK_FIRE)){ read_and_collect_for( ch, FIRE_NETWORK, "FIRE", &ts_ns_latest, &total_items, &total_items_cnt, &need_face_followup, &need_lpr_followup ); } bool need_face = need_face_followup; bool need_lpr = need_lpr_followup; dispatch_stage2_followups(m, ch, rgbdata_for_cnn, npu_input_info.rgb_size, time_stamp_us, need_face, need_lpr); if (CTRL_MODEL_ON(m, NETWORK_FACE) && need_face){ read_and_collect_for( ch, FACE_NETWORK, "FACE", &ts_ns_latest, &total_items, &total_items_cnt, &need_face_followup, &need_lpr_followup ); } if (CTRL_MODEL_ON(m, NETWORK_LPR) && need_lpr){ read_and_collect_for( ch, LPR_NETWORK, "LPR", &ts_ns_latest, &total_items, &total_items_cnt, &need_face_followup, &need_lpr_followup ); } uds_det_entry_t dummy; uint64_t det_ts_us = time_stamp_us; if (ts_ns_latest > 0) { det_ts_us = ts_ns_latest / 1000ULL; } uds_det_entry_t *send_items = (total_items_cnt > 0) ? total_items : &dummy; uint32_t send_cnt = total_items_cnt; uint32_t seq = ch_seq[ch]++; uds_send_dets_min(ch, seq, det_ts_us, send_items, send_cnt); if (total_items) free(total_items); g_frame_count++; uint64_t now = now_us(); if (g_fps_last_ts_us == 0) { g_fps_last_ts_us = now; g_fps_last_cnt = g_frame_count; } else { uint64_t diff_us = now - g_fps_last_ts_us; if (diff_us >= 1000000ULL) { uint64_t diff_cnt = g_frame_count - g_fps_last_cnt; double fps = (double)diff_cnt * 1000000.0 / (double)diff_us; printf("[IMG_FPS] total=%llu fps=%.2f\n", (unsigned long long)g_frame_count, fps); g_fps_last_ts_us = now; g_fps_last_cnt = g_frame_count; } } // uint64_t t1 = now_us(); // uint64_t elapsed_us = t1 - time_stamp_us; // fprintf(stderr, "[LOOP] time=%llu us (%.3f ms)\n", // (unsigned long long)elapsed_us, // (double)elapsed_us / 1000.0); next_ts += FRAME_INTERVAL_US; uint64_t now2 = now_us(); if (next_ts > now2) { usleep((useconds_t)(next_ts - now2)); } else { next_ts = now2; } // uint64_t loop_end = now_us(); // uint64_t loop_time = loop_end - loop_start; // printf("[LOOP] %llu us (%.3f ms)\n", // (unsigned long long)loop_time, // (double)loop_time / 1000.0); } for(int i =0; i< task_cnt; i++) { pthread_join(p_thread[i], (void **)&status); } #ifdef USE_8MP_VI dsr_deinit(); #endif uds_server_stop(UDS_SOCK_PATH); free_rgb_buffer(); for (int i = 0; i < ENC_WORKER_NUM; i++) { pthread_join(enc_threads[i], NULL); } #ifdef USE_BYTETRACK nc_deInit_bytetrackers(0); #endif if (g_img_list) { for (int i = 0; i < g_img_count; ++i) { free(g_img_list[i]); } free(g_img_list); } return 0; }