You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1335 lines
36 KiB

7 months ago
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#include <signal.h>
#include <errno.h>
#include <pthread.h>
#include <linux/videodev2.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <time.h>
#include <unistd.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <dirent.h>
#include <limits.h>
#include <ctype.h>
#include <arm_neon.h>
#include <omp.h>
#include <turbojpeg.h>
#include "uds_frame.h"
#include "uds_server.h"
#include "uds_det.h"
#include "ctrl_flags.h"
#include "v4l2_interface.h"
#include "nc_utils.h"
#include "nc_ts_fsync_flipflop_buffers.h"
#include "nc_app_config_parser.h"
#include "nc_cnn_aiware_runtime.h"
#include "nc_cnn_communicator.h"
#include "nc_cnn_worker_for_postprocess.h"
#include "nc_neon.h"
#ifdef AIWARE_DEVICE_SUPPORTED
#include "aiware/runtime/c/aiwaredevice.h"
#endif
#ifdef __GNUC__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wconversion"
#pragma GCC diagnostic ignored "-Wsign-conversion"
#endif
#include "stb_image.h"
#ifdef __GNUC__
#pragma GCC diagnostic pop
#endif
#ifdef USE_BYTETRACK
#include "nc_cnn_tracker.h"
#endif
#ifdef USE_8MP_VI
#include "nc_dsr_helper.h"
#include "nc_dsr_set.h"
#include "nc_dmabuf_ctrl_helper.h"
#endif
/* don't modify */
#define MAX_WIDTH_FOR_VDMA_CNN_DS (1280)
#define MAX_HEIGHT_FOR_VDMA_CNN_DS (720)
#ifdef USE_8MP_VI
#define VIDEO_WIDTH MAX_WIDTH_FOR_VDMA_CNN_DS
#define VIDEO_HEIGHT MAX_HEIGHT_FOR_VDMA_CNN_DS
#else
#define VIDEO_WIDTH (1280)
#define VIDEO_HEIGHT (720)
#endif
#define CAP_FPS (30)
#define MQ_NAME_CNN_BUF "/cnn_data"
#define DEV_FILE_DSR "/dev/dsr"
#ifdef SHOW_FACE_DETECT
#define NETWORK_FILE_FACE_DET "/mnt/user_data/applications/misc/networks/cuuva/modified_yolov8n-face.aiwbin"
#endif
#ifdef SHOW_CUUVA_DETECT
#define NETWORK_FILE_CUUVA_DET "/mnt/user_data/applications/misc/networks/cuuva/modified_6class_vis_best_v8m.aiwbin"
#endif
#ifdef SHOW_FIRE_DETECT
#define NETWORK_FILE_FIRE_DET "/mnt/user_data/applications/misc/networks/cuuva/modified_best_fire_detect.aiwbin"
#endif
#ifdef SHOW_LPR_DETECT
#define NETWORK_FILE_LPR_DET "/mnt/user_data/applications/misc/networks/cuuva/modified_best_lp_detect.aiwbin"
#endif
#define ENC_WORKER_NUM (4)
#define ENC_QUEUE_SIZE (16)
static volatile int g_running = 1;
st_npu_input_info npu_input_info;
static uint32_t ch_seq[1] = {0};
static uint64_t g_frame_count = 0;
static uint64_t g_fps_last_ts_us = 0;
static uint64_t g_fps_last_cnt2 = 0;
#define FOLLOWUP_PERSON_ID 0
#define FOLLOWUP_CAR_ID 1
#define NETWORK_OBJDET 0
#define NETWORK_ABNORM 1
#define NETWORK_FIRE 2
#define NETWORK_LPR 3
#define NETWORK_FACE 4
static char** g_img_list = NULL;
static int g_img_count = 0;
static int g_img_idx = 0;
// NPU 입력용 interleaved RGB 버퍼 (640x384x3)
static unsigned char* g_src_rgb_1080p = NULL;
static unsigned char* g_src_rgb_640p = NULL;
typedef struct {
unsigned char* jpeg; // JPEG data
unsigned long jpeg_size; // JPEG bytes
int w, h;
} RamFrame;
#define TARGET_FPS 30
#define FRAME_INTERVAL_US (1000000 / TARGET_FPS)
static RamFrame* g_ram_frames = NULL;
static int g_ram_frame_count = 0;
static int g_ram_cur_idx = 0;
typedef struct {
RamFrame* frames;
int count;
} RamSet;
static RamSet ram_sets[7];
static inline void mem_fence(void) { __sync_synchronize(); }
int uds_server_start(const char* sock_path);
void uds_server_stop(const char* sock_path);
int uds_send_frame(int ch, const void* frame_ptr,
uint32_t w, uint32_t h, uint32_t stride,
uint32_t pixfmt, uint64_t ts_ns);
static inline uint64_t now_ns(void){
struct timespec ts; clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
return (uint64_t)ts.tv_sec*1000000000ull + ts.tv_nsec;
}
static inline uint64_t now_us(void){
struct timespec ts; clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
return (uint64_t)ts.tv_sec*1000000ull + ts.tv_nsec/1000ull;
}
typedef struct {
int valid;
int ch;
int w, h;
int stride;
size_t buf_size;
uint8_t *data; // RGB24 복사 버퍼
uint64_t ts_us;
} enc_job_t;
static enc_job_t g_enc_queue[ENC_QUEUE_SIZE];
static int g_enc_head = 0;
static int g_enc_tail = 0;
static pthread_mutex_t g_enc_mutex = PTHREAD_MUTEX_INITIALIZER;
static pthread_cond_t g_enc_cond = PTHREAD_COND_INITIALIZER;
static unsigned long g_enc_frame_count = 0;
static pthread_mutex_t g_stat_mutex = PTHREAD_MUTEX_INITIALIZER;
static struct timespec g_fps_last_ts = {0, 0};
static unsigned long g_fps_last_cnt = 0;
#ifdef USE_8MP_VI
size_t BUFF_SIZE;
int dsr_fd;
DSR_Data_t dsr_info;
dma_alloc_info dma_info;
st_nc_dsr_config dsr_config;
img_input_config dsr_input_config;
img_output_config dsr_output_config;
#endif
#ifdef USE_8MP_VI
static int dsr_init(void)
{
int input_fd = 0, output_fd = 0;
long page_size = sysconf(_SC_PAGESIZE);
dsr_input_config.format = IMG_FORMAT_RGB888;
dsr_input_config.width = MAX_WIDTH_FOR_VDMA_CNN_DS;
dsr_input_config.height = MAX_HEIGHT_FOR_VDMA_CNN_DS;
dsr_output_config.format = IMG_FORMAT_RGB888;
BUFF_SIZE = dsr_input_config.width * dsr_input_config.height * 3;
BUFF_SIZE = ((BUFF_SIZE + page_size - 1) / page_size) * page_size;
if(dsr_config_downscale(&dsr_config, 1, NPU_INPUT_WIDTH, NPU_INPUT_HEIGHT) < 0){
perror("Error: DSR config fail");
return -1;
}
if(open_device_and_dma_buffers(DEV_FILE_DSR, &dsr_fd, &input_fd, &output_fd, BUFF_SIZE) < 0){
perror("Error: DSR open fail");
return -1;
}
if(dsr_setup_buffer(&dsr_info, dsr_fd, &dma_info, input_fd, output_fd, BUFF_SIZE) < 0){
perror("Error: DSR setup fail");
return -1;
}
return 0;
}
static int dsr_deinit(void)
{
nc_dmabuf_ctrl_end_cpu_access(dma_info.dmabuf_fd_in);
nc_dmabuf_ctrl_end_cpu_access(dma_info.dmabuf_fd_out);
nc_dmabuf_ctrl_free_dma_fd(dma_info.dmabuf_fd_in);
nc_dmabuf_ctrl_free_dma_fd(dma_info.dmabuf_fd_out);
nc_dmabuf_ctrl_close();
for (int i = 0; i < BUFF_NUM; i++) {
if (dsr_info.dsr_in_buf[i] != MAP_FAILED) {
munmap(dsr_info.dsr_in_buf[i], BUFF_SIZE);
}
if (dsr_info.dsr_out_buf[i] != MAP_FAILED) {
munmap(dsr_info.dsr_out_buf[i], BUFF_SIZE);
}
}
dsr_device_deinit(&dsr_fd);
return 0;
}
#endif
static void enc_enqueue(int ch, int w, int h, int stride, const void *src, uint64_t ts_us)
{
size_t bytes = (size_t)stride * (size_t)h;
pthread_mutex_lock(&g_enc_mutex);
int next_tail = (g_enc_tail + 1) % ENC_QUEUE_SIZE;
if (next_tail == g_enc_head) {
g_enc_head = (g_enc_head + 1) % ENC_QUEUE_SIZE;
}
enc_job_t *job = &g_enc_queue[g_enc_tail];
if (!job->data || job->buf_size < bytes) {
free(job->data);
job->data = (uint8_t*)malloc(bytes);
job->buf_size = bytes;
}
memcpy(job->data, src, bytes);
job->ch = ch;
job->w = w;
job->h = h;
job->stride = stride;
job->ts_us = ts_us;
job->valid = 1;
g_enc_tail = next_tail;
pthread_cond_signal(&g_enc_cond);
pthread_mutex_unlock(&g_enc_mutex);
}
static void* enc_worker_thread(void *arg)
{
intptr_t wid = (intptr_t)arg;
tjhandle tj = tjInitCompress();
if (!tj) {
fprintf(stderr, "[ENC%ld] tjInitCompress failed: %s\n",
(long)wid, tjGetErrorStr());
return NULL;
}
while (g_running) {
enc_job_t job;
memset(&job, 0, sizeof(job));
// 큐에서 job 하나 가져오기
pthread_mutex_lock(&g_enc_mutex);
while (g_enc_head == g_enc_tail && g_running) {
pthread_cond_wait(&g_enc_cond, &g_enc_mutex);
}
if (!g_running) {
pthread_mutex_unlock(&g_enc_mutex);
break;
}
job = g_enc_queue[g_enc_head]; // 구조체 복사 (data 포인터 공유)
g_enc_head = (g_enc_head + 1) % ENC_QUEUE_SIZE;
pthread_mutex_unlock(&g_enc_mutex);
if (!job.valid || !job.data) continue;
unsigned char *jpegBuf = NULL;
unsigned long jpegSize = 0;
struct timespec t0, t1;
clock_gettime(CLOCK_MONOTONIC, &t0);
int rc = tjCompress2(
tj,
job.data,
job.w,
job.stride,
job.h,
TJPF_RGB, // V4L2_PIX_FMT_RGB24 기준
&jpegBuf,
&jpegSize,
TJSAMP_420,
45, // quality
TJFLAG_FASTDCT
);
clock_gettime(CLOCK_MONOTONIC, &t1);
if (rc != 0) {
fprintf(stderr, "[ENC%ld] tjCompress2 error: %s\n",
(long)wid, tjGetErrorStr());
if (jpegBuf) tjFree(jpegBuf);
continue;
}
// double sec_diff = (double)(t1.tv_sec - t0.tv_sec);
// double nsec_diff = (double)(t1.tv_nsec - t0.tv_nsec);
// double ms = sec_diff * 1000.0 +
// nsec_diff / 1e6;
pthread_mutex_lock(&g_stat_mutex);
g_enc_frame_count++;
unsigned long n = g_enc_frame_count;
// ---- FPS 계산 ----
struct timespec now;
clock_gettime(CLOCK_MONOTONIC, &now);
if (g_fps_last_ts.tv_sec == 0 && g_fps_last_ts.tv_nsec == 0) {
// 첫 호출 초기화
g_fps_last_ts = now;
g_fps_last_cnt2 = n;
} else {
double sec = (double)(now.tv_sec - g_fps_last_ts.tv_sec) +
(double)(now.tv_nsec - g_fps_last_ts.tv_nsec) / 1e9;
if (sec >= 1.0) {
unsigned long diff = n - g_fps_last_cnt2;
double fps = (double)diff / sec;
printf("[ENC_STAT] total=%lu fps=%.2f\n", n, fps);
g_fps_last_ts = now;
g_fps_last_cnt2 = n;
}
}
pthread_mutex_unlock(&g_stat_mutex);
uint32_t jpeg_sz = (uint32_t)jpegSize;
uint64_t ts_us = job.ts_us;
// 프로토콜 설계에 따라:
// - pixfmt: WS_PIXFMT_JPEG
// - stride: JPEG 바이트 수로 사용
int sret = uds_send_frame(
job.ch,
jpegBuf, // JPEG 바이트
(uint32_t)job.w,
(uint32_t)job.h,
jpeg_sz, // stride 대신 "size"로 사용
WS_PIXFMT_JPEG, // ← uds_frame.h에 정의
ts_us
);
if (sret != 0) {
fprintf(stderr, "[ENC%ld] uds_send_frame JPEG failed (ret=%d)\n",
(long)wid, sret);
}
tjFree(jpegBuf);
}
tjDestroy(tj);
return NULL;
}
static int has_img_ext(const char* name)
{
const char* dot = strrchr(name, '.');
char ext[8];
int i;
if (!dot) return 0;
dot++;
for (i = 0; i < 7 && dot[i]; i++) {
ext[i] = (char)tolower((unsigned char)dot[i]);
}
ext[i] = '\0';
return (!strcmp(ext, "jpg") ||
!strcmp(ext, "jpeg") ||
!strcmp(ext, "png"));
}
int init_rgb_buffer(void)
{
size_t sz_1080p = (size_t)1280 * 720 * 3;
size_t sz_640p = (size_t)NPU_INPUT_WIDTH * NPU_INPUT_HEIGHT * 3; // 640*384*3
g_src_rgb_1080p = (unsigned char*)malloc(sz_1080p);
if (!g_src_rgb_1080p) {
fprintf(stderr, "[IMG] malloc failed (g_src_rgb_1080p)\n");
return -1;
}
g_src_rgb_640p = (unsigned char*)malloc(sz_640p);
if (!g_src_rgb_640p) {
fprintf(stderr, "[IMG] malloc failed (g_src_rgb_640p)\n");
return -1;
}
return 0;
}
void free_rgb_buffer(void)
{
if (g_src_rgb_1080p) {
free(g_src_rgb_1080p);
g_src_rgb_1080p = NULL;
}
if (g_src_rgb_640p) {
free(g_src_rgb_640p);
g_src_rgb_640p = NULL;
}
}
static int extract_tail5(const char* path)
{
const char* base = strrchr(path, '/');
if (!base) base = path;
else base++; // '/' 다음부터 파일명
const char* dot = strrchr(base, '.');
if (!dot) return 0;
if (dot - base < 5) return 0; // 5자리 안 되면
const char* p = dot - 5;
int v = 0;
for (int i = 0; i < 5; i++) {
if (!isdigit((unsigned char)p[i])) return 0;
v = v * 10 + (p[i] - '0');
}
return v;
}
static int cmp_img_tail5(const void* a, const void* b)
{
const char* sa = *(const char* const*)a;
const char* sb = *(const char* const*)b;
int na = extract_tail5(sa);
int nb = extract_tail5(sb);
if (na < nb) return -1;
if (na > nb) return 1;
return strcmp(sa, sb);
}
int load_image_list(const char* dirpath, int limit)
{
DIR* d;
struct dirent* ent;
int i = 0;
const int IMG_LIMIT = limit; // ★ limit 인자 사용
if (g_img_list) {
for (int k = 0; k < g_img_count; ++k) {
free(g_img_list[k]);
}
free(g_img_list);
}
g_img_list = NULL;
g_img_count = 0;
g_img_idx = 0;
d = opendir(dirpath);
if (!d) {
perror("[IMG] opendir");
return -1;
}
int count = 0;
while ((ent = readdir(d)) != NULL) {
if (ent->d_name[0] == '.') continue;
if (!has_img_ext(ent->d_name)) continue;
count++;
}
rewinddir(d);
if (count == 0) {
closedir(d);
fprintf(stderr, "[IMG] no images in %s\n", dirpath);
return -1;
}
if (count > IMG_LIMIT)
count = IMG_LIMIT;
g_img_list = (char**)calloc(count, sizeof(char*));
if (!g_img_list) {
closedir(d);
return -1;
}
while ((ent = readdir(d)) != NULL) {
if (ent->d_name[0] == '.') continue;
if (!has_img_ext(ent->d_name)) continue;
if (i >= IMG_LIMIT) break;
char full[PATH_MAX];
snprintf(full, sizeof(full), "%s/%s", dirpath, ent->d_name);
g_img_list[i] = strdup(full);
if (!g_img_list[i]) {
closedir(d);
return -1;
}
i++;
}
g_img_count = i;
closedir(d);
qsort(g_img_list, g_img_count, sizeof(char*), cmp_img_tail5);
printf("[IMG] %d images loaded (limit=%d)\n", g_img_count, IMG_LIMIT);
return 0;
}
int load_images_to_ram(void)
{
if (g_img_count <= 0) return -1;
g_ram_frames = (RamFrame*)calloc(g_img_count, sizeof(RamFrame));
if (!g_ram_frames) return -1;
tjhandle tj = tjInitCompress();
if (!tj) {
fprintf(stderr, "tjInitCompress failed\n");
return -1;
}
for (int i = 0; i < g_img_count; i++) {
const char* path = g_img_list[i];
int w, h, ch;
unsigned char* img = stbi_load(path, &w, &h, &ch, 3);
if (!img) return -1;
if (w != VIDEO_WIDTH || h != VIDEO_HEIGHT) {
stbi_image_free(img);
return -1;
}
unsigned char* jpegBuf = NULL;
unsigned long jpegSize = 0;
int rc = tjCompress2(
tj,
img,
w,
w * 3,
h,
TJPF_RGB,
&jpegBuf,
&jpegSize,
TJSAMP_420,
45,
TJFLAG_FASTDCT
);
stbi_image_free(img);
if (rc != 0) {
fprintf(stderr, "[RAM] Compress failed: %s\n", tjGetErrorStr());
continue;
}
g_ram_frames[i].jpeg = jpegBuf;
g_ram_frames[i].jpeg_size = jpegSize;
g_ram_frames[i].w = w;
g_ram_frames[i].h = h;
if (i % 10 == 0)
fprintf(stderr, "[RAM][LOAD] %d/%d loaded\n", i, g_img_count);
}
tjDestroy(tj);
g_ram_frame_count = g_img_count;
g_ram_cur_idx = 0;
fprintf(stderr, "[RAM] loaded %d JPEG images\n", g_ram_frame_count);
return 0;
}
void preload_all_images(void)
{
struct {
int feat;
const char* path;
int limit;
} cfg[] = {
{ FEAT_OBJDET, "/mnt/nvme/test_images/1", 200 },
{ FEAT_FIRE, "/mnt/nvme/test_images/2", 50 },
{ FEAT_ABNORM, "/mnt/nvme/test_images/6", 150 }
};
for (int i = 0; i < (int)(sizeof(cfg)/sizeof(cfg[0])); i++) {
load_image_list(cfg[i].path, cfg[i].limit); // ★ limit 전달
load_images_to_ram();
ram_sets[cfg[i].feat].frames = g_ram_frames;
ram_sets[cfg[i].feat].count = g_ram_frame_count;
g_ram_frames = NULL;
g_ram_frame_count = 0;
}
}
static void update_image_source_for_feat(void)
{
uint32_t fm = ctrl_feat_mask_snapshot();
printf("[IMG] update_image_source_for_feat: feat_mask=0x%08X\n", fm);
const uint32_t BIT_OBJDET = (1u << FEAT_OBJDET);
const uint32_t BIT_ABNORM = (1u << FEAT_ABNORM);
const uint32_t BIT_FIRE = (1u << FEAT_FIRE);
int feat = -1;
// 비트마스크로 판정
if (fm & BIT_OBJDET) feat = FEAT_OBJDET;
else if (fm & BIT_FIRE) feat = FEAT_FIRE;
else if (fm & BIT_ABNORM) feat = FEAT_ABNORM;
printf("[IMG] selected feat=%d\n", feat);
// 1) 어떤 것도 선택 안 됐으면 OBJDET로 기본값
if (feat < 0)
feat = FEAT_OBJDET;
// 2) 선택된 feat의 영상 세트가 없으면 OBJDET로 fallback
if (ram_sets[feat].count == 0)
feat = FEAT_OBJDET;
// 3) 실제로 적용
g_ram_frames = ram_sets[feat].frames;
g_ram_frame_count = ram_sets[feat].count;
}
static void on_sigint(int sig) {
(void)sig;
g_running = 0;
}
int send_cnn_buf (uint8_t *ptr_cnn_buf, uint64_t time_stamp_us, uint32_t cam_ch, E_NETWORK_UID net_id)
{
int ret = 0;
stCnnData *cnn_data;
struct mq_attr attr;
attr.mq_maxmsg = MAX_MQ_MSG_CNT;
attr.mq_msgsize = sizeof(stCnnData*);
int oflag = O_WRONLY | O_CREAT;
mqd_t mfd = mq_open(MQ_NAME_CNN_BUF, oflag, 0666, &attr);
if (mfd == -1) {
perror("mq open error");
return -1;
}
cnn_data = (stCnnData*)malloc(sizeof(stCnnData));
cnn_data->cam_ch = cam_ch;
cnn_data->ptr_cnn_buf = ptr_cnn_buf;
cnn_data->time_stamp_us = time_stamp_us;
cnn_data->net_id = net_id;
if ((ret = mq_send(mfd, (const char *)&cnn_data, attr.mq_msgsize, 1)) == -1) {
printf("errno of mq_send = %d\n", errno);
}
mq_close(mfd);
return ret;
}
int receive_cnn_buf (stCnnData **out_cnn_buf)
{
int ret = 0;
struct mq_attr attr;
attr.mq_maxmsg = MAX_MQ_MSG_CNT;
attr.mq_msgsize = sizeof(stCnnData *);
int oflag = O_RDONLY | O_CREAT;
mqd_t mfd = mq_open(MQ_NAME_CNN_BUF, oflag, 0666, &attr);
if (mfd == -1) {
perror("mq open error");
return -1;
}
if ((ret = (int32_t)mq_receive(mfd, (char*)out_cnn_buf, attr.mq_msgsize, NULL)) == -1) {
printf("errno of mq_receive = %d\n", errno);
}
mq_close(mfd);
return ret;
}
int npu_init(st_npu_input_info *npu_input_info)
{
/* Initialize CNN */
if (nc_aiw_init_cnn() < 0 ) {
fprintf(stderr, "nc_aiw_init_cnn() failure!!\n");
return -1;
}
#ifdef SHOW_FACE_DETECT
if (nc_aiw_add_network_to_builder(nc_localize_path((const char *)NETWORK_FILE_FACE_DET), NETWORK_FACE_DET, nc_postprocess_yolov8_inference_result) < 0) {
fprintf(stderr, "nc_aiw_add_network_to_builder() failure!!\n");
return -1;
}
#endif
#ifdef SHOW_CUUVA_DETECT
if (nc_aiw_add_network_to_builder(nc_localize_path((const char *)NETWORK_FILE_CUUVA_DET), NETWORK_CUUVA_DET, nc_postprocess_yolov8_inference_result) < 0) {
fprintf(stderr, "nc_aiw_add_network_to_builder() failure!!\n");
return -1;
}
#endif
#ifdef SHOW_FIRE_DETECT
if (nc_aiw_add_network_to_builder(nc_localize_path((const char *)NETWORK_FILE_FIRE_DET), NETWORK_FIRE_DET, nc_postprocess_yolov8_inference_result) < 0) {
fprintf(stderr, "nc_aiw_add_network_to_builder() failure!!\n");
return -1;
}
#endif
#ifdef SHOW_LPR_DETECT
if (nc_aiw_add_network_to_builder(nc_localize_path((const char *)NETWORK_FILE_LPR_DET), NETWORK_LPR_DET, nc_postprocess_yolov8_inference_result) < 0) {
fprintf(stderr, "nc_aiw_add_network_to_builder() failure!!\n");
return -1;
}
#endif
if(nc_aiw_finish_network_builder() < 0 ) {
fprintf(stderr, "nc_aiw_finish_network_builder() failure!!\n");
return -1;
}
// obtain the input resoltution for the CNN network
// get information of the input tensor
aiwTensorInfo in_tinfo;
#ifdef SHOW_FACE_DETECT
if(nc_get_cnn_network_input_resol(NETWORK_FACE_DET, &in_tinfo) < 0)
{
printf("failed to get the input resolution for the CNN network\n");
}
#endif
#ifdef SHOW_CUUVA_DETECT
if(nc_get_cnn_network_input_resol(NETWORK_CUUVA_DET, &in_tinfo) < 0)
{
printf("failed to get the input resolution for the CNN network\n");
}
#endif
#ifdef SHOW_FIRE_DETECT
if(nc_get_cnn_network_input_resol(NETWORK_FIRE_DET, &in_tinfo) < 0)
{
printf("failed to get the input resolution for the CNN network\n");
}
#endif
#ifdef SHOW_LPR_DETECT
if(nc_get_cnn_network_input_resol(NETWORK_LPR_DET, &in_tinfo) < 0)
{
printf("failed to get the input resolution for the CNN network\n");
}
#endif
npu_input_info->w = in_tinfo.dim.w;// network input width
npu_input_info->h = in_tinfo.dim.h;// network input height
npu_input_info->rgb_size = in_tinfo.dim.w * in_tinfo.dim.h * RGB_CNT;
#ifdef USE_BYTETRACK
E_NETWORK_UID net_id = NETWORK_CUUVA_DET;
if (nc_init_bytetrackers(CAP_FPS, 0, net_id) != 0) {
perror("nc_init_bytetrackers() error");
return -1;
}
#endif
printf("aiw finish\n");
return 0;
}
void *cnn_task(void *arg)
{
(void) arg;
printf("CNN TASK RUN!!\n");
while (g_running) {
stCnnData *cnn_data = NULL;
if(receive_cnn_buf(&cnn_data) != -1){
nc_aiw_run_cnn(cnn_data->ptr_cnn_buf, cnn_data->time_stamp_us, cnn_data->cam_ch, cnn_data->net_id);
if(cnn_data) {
free(cnn_data->ptr_cnn_buf);
free(cnn_data);
}
}
}
printf("EXIT CNN_TASK!!\n");
return NULL;
}
static inline void dup_and_send(unsigned char* src, size_t sz,
uint64_t ts_us, uint32_t ch, E_NETWORK_UID net_uid){
unsigned char* p = (unsigned char*)malloc(sz);
if (!p) return;
memcpy(p, src, sz);
send_cnn_buf(p, ts_us, ch, net_uid);
}
static void dispatch_stage1_and_plan_followups(
uint32_t m,
uint32_t ch,
unsigned char* rgb_planar, size_t rgb_size,
uint64_t ts_us)
{
const int UID_DET = nc_get_cnn_networks_id_by_uid(NETWORK_CUUVA_DET);
const int UID_FIRE = nc_get_cnn_networks_id_by_uid(NETWORK_FIRE_DET);
if (CTRL_MODEL_ON(m, NETWORK_OBJDET) && UID_DET >= 0)
dup_and_send(rgb_planar, rgb_size, ts_us, ch, (E_NETWORK_UID)UID_DET);
if (CTRL_MODEL_ON(m, NETWORK_FIRE) && UID_FIRE >= 0)
dup_and_send(rgb_planar, rgb_size, ts_us, ch, (E_NETWORK_UID)UID_FIRE);
}
static void dispatch_stage2_followups(
uint32_t m, // ctrl_active_models_acquire() 결과(모델 비트)
uint32_t ch,
unsigned char* rgb_planar, size_t rgb_size,
uint64_t ts_us,
bool need_face,
bool need_lpr
){
const int UID_FACE = nc_get_cnn_networks_id_by_uid(NETWORK_FACE_DET);
const int UID_LPR = nc_get_cnn_networks_id_by_uid(NETWORK_LPR_DET);
// const int UID_FIRE = nc_get_cnn_networks_id_by_uid(NETWORK_FIRE_DET);
// const int UID_FIRE = nc_get_cnn_networks_id_by_uid(NETWORK_FIRE_DET);
if (need_face && CTRL_MODEL_ON(m, NETWORK_FACE) && UID_FACE >= 0){
dup_and_send(rgb_planar, rgb_size, ts_us, ch, (E_NETWORK_UID)UID_FACE);
}
if (need_lpr && CTRL_MODEL_ON(m, NETWORK_LPR) && UID_LPR >= 0){
dup_and_send(rgb_planar, rgb_size, ts_us, ch, (E_NETWORK_UID)UID_LPR);
}
// dup_and_send(rgb_planar, rgb_size, ts_us, ch, (E_NETWORK_UID)UID_FACE);
// if (need_abnorm && CTRL_MODEL_ON(m, NETWORK_ABNORM) && UID_ABNORM >= 0)
// dup_and_send(rgb_planar, rgb_size, ts_us, ch, (E_NETWORK_UID)UID_ABNORM);
}
static inline void send_by_ctrl_mask(
uint32_t m,
uint32_t ch,
unsigned char* rgb_planar, size_t rgb_size,
uint64_t ts_us)
{
dispatch_stage1_and_plan_followups(m, ch, rgb_planar, rgb_size, ts_us);
}
#ifndef ARRAY_LEN
#define ARRAY_LEN(a) ((uint32_t)(sizeof(a)/sizeof((a)[0])))
#endif
static void collect_and_append(
uint32_t ch,
const stCnnPostprocessingResults *R,
const char *tag,
bool *need_face_followup,
bool *need_lpr_followup,
uds_det_entry_t **total_items,
uint32_t *total_items_cnt
){
const uint32_t CLASS_CAP = ARRAY_LEN(R->class_objs);
uint32_t total = 0;
for (uint32_t ci = 0; ci < CLASS_CAP; ++ci) {
int cnt = R->class_objs[ci].obj_cnt;
if (cnt > 0) total += (uint32_t)cnt;
}
if (total == 0) return;
uds_det_entry_t *items = (uds_det_entry_t*)malloc((size_t)total * sizeof(uds_det_entry_t));
if (!items) return;
uint16_t tag_id = 0;
if (!strcmp(tag, "DET")) tag_id = UDS_TAG_DET;
else if (!strcmp(tag, "FIRE")) tag_id = UDS_TAG_FIRE;
else if (!strcmp(tag, "FACE")) tag_id = UDS_TAG_FACE;
else if (!strcmp(tag, "LPR")) tag_id = UDS_TAG_LPR;
uint32_t k = 0;
for (uint32_t ci = 0; ci < CLASS_CAP; ++ci) {
int n = R->class_objs[ci].obj_cnt;
if (n <= 0) continue;
const stObjInfo *objs = R->class_objs[ci].objs;
for (int j = 0; j < n; ++j) {
const stObjInfo *o = &objs[j];
uds_det_entry_t *dst = &items[k++];
// uint32_t fm = ctrl_feat_mask_snapshot();
// uint32_t md = ctrl_active_models_acquire();
dst->prob = (float)o->prob;
dst->x = (float)o->bbox.x;
dst->y = (float)o->bbox.y;
dst->w = (float)o->bbox.w;
dst->h = (float)o->bbox.h;
dst->cls = UDS_ENC_CLS(tag_id, (uint16_t)ci);
dst->tid = (uint16_t)o->track_id;
dst->reserved = (uint16_t)(UDS_RESERVED_PACK(ctrl_feat_mask_snapshot(), ctrl_active_models_acquire()) & 0xFFFF);
// uint16_t p = dst->reserved;
// fprintf(stderr,
// "[UDS][RESV] fm8=0x%02X md8=0x%02X pack=0x%04X GET_FEAT=0x%02X GET_MODEL=0x%02X\n",
// (unsigned)(fm & 0xFF), (unsigned)(md & 0xFF),
// (unsigned)p,
// (unsigned)UDS_RESERVED_GET_FEAT(p), // 기대: fm8
// (unsigned)UDS_RESERVED_GET_MODEL(p) // 기대: md8
// );
// if (ch >= 256) ch = 255;
printf("[%s] ch=%u ci=%u j=%d k=%u prob=%.3f bbox=%.1f,%.1f,%.1f,%.1f\n",
tag, ch, ci, j, k, (float)o->prob,
(float)o->bbox.x, (float)o->bbox.y, (float)o->bbox.w, (float)o->bbox.h);
if ((uint32_t)ci == FOLLOWUP_PERSON_ID && tag_id == UDS_TAG_DET) {
*need_face_followup = true;
}
if ((uint32_t)ci == FOLLOWUP_CAR_ID && tag_id == UDS_TAG_DET) {
*need_lpr_followup = true;
}
}
}
uds_det_entry_t *tmp = (uds_det_entry_t*)realloc(*total_items, (size_t)(*total_items_cnt + total) * sizeof(uds_det_entry_t));
if (tmp) {
*total_items = tmp;
memcpy(&(*total_items)[*total_items_cnt], items, (size_t)total * sizeof(uds_det_entry_t));
*total_items_cnt += total;
}
free(items);
}
static void read_and_collect_for(
uint32_t ch,
uint32_t network_offset,
const char *tag,
uint64_t *ts_ns_latest,
uds_det_entry_t **total_items,
uint32_t *total_items_cnt,
bool *need_face_followup,
bool *need_lpr_followup
){
uint64_t ts = 0;
pp_result_buf *det_buf = (pp_result_buf*)nc_tsfs_ff_get_readable_buffer_and_timestamp(ch + network_offset, &ts);
if (det_buf) {
stCnnPostprocessingResults *R = &det_buf->cnn_result;
collect_and_append(ch, R, tag, need_face_followup, need_lpr_followup, total_items, total_items_cnt);
uint64_t ts_ns = ts * 1000ULL;
if (ts_ns > *ts_ns_latest) *ts_ns_latest = ts_ns;
}
nc_tsfs_ff_finish_read_buf(ch + network_offset);
}
// -----------------------------
// 메인
// -----------------------------
int main(int argc, char** argv)
{
(void)argc; (void)argv;
// SIGINT 핸들링
struct sigaction sa;
sa.sa_handler = on_sigint;
sigemptyset(&sa.sa_mask);
sa.sa_flags = SA_RESETHAND;
sigaction(SIGINT, &sa, NULL);
pthread_t p_thread[MAX_TASK_CNT];
int task_cnt = 0;
int thr_id;
int status;
ctrl_uds_start();
ctrl_feature_on(FEAT_OBJDET);
if (uds_server_start(UDS_SOCK_PATH) != 0) {
fprintf(stderr, "uds_server_start failed\n"); return -1;
}
#ifdef USE_8MP_VI
dsr_init();
#endif
#ifdef DETECT_NETWORK
int det_buf_size = sizeof(pp_result_buf);
if(nc_tsfs_ff_create_buffers(0+DETECT_NETWORK, det_buf_size) < 0) {
exit(1);
}
#endif
#ifdef FACE_NETWORK
int face_buf_size = sizeof(pp_result_buf);
if(nc_tsfs_ff_create_buffers(0+FACE_NETWORK, face_buf_size) < 0) {
exit(1);
}
#endif
#ifdef FIRE_NETWORK
int fire_buf_size = sizeof(pp_result_buf);
if(nc_tsfs_ff_create_buffers(0+FIRE_NETWORK, fire_buf_size) < 0) {
exit(1);
}
#endif
#ifdef LPR_NETWORK
int lpr_buf_size = sizeof(pp_result_buf);
if(nc_tsfs_ff_create_buffers(0+LPR_NETWORK, lpr_buf_size) < 0) {
exit(1);
}
#endif
mq_unlink(MQ_NAME_CNN_BUF);
if(npu_init(&npu_input_info) < 0) {
printf("failed to init NPU\n");
return -1;
}
printf("create tasks\n");
thr_id = pthread_create(&p_thread[task_cnt++], NULL, cnn_task, (void *)NULL);
if (thr_id < 0) {
perror("thread create error : cnn_task");
exit(1);
}
cnn_postprocess_arg cnn_post_param;
cnn_post_param.target_width = VIDEO_WIDTH;
cnn_post_param.target_height = VIDEO_HEIGHT;
thr_id = pthread_create(&p_thread[task_cnt++], NULL, nc_cnn_postprocess_task, (void *)&cnn_post_param);
if (thr_id < 0) {
perror("thread create error : nc_cnn_postprocess_task");
exit(1);
}
pthread_t enc_threads[ENC_WORKER_NUM];
for (int i = 0; i < ENC_WORKER_NUM; i++) {
if (pthread_create(&enc_threads[i], NULL, enc_worker_thread, (void*)(intptr_t)i) != 0) {
perror("pthread_create enc_worker_thread");
}
}
unsigned char* rgbdata_for_cnn = (unsigned char *)malloc(npu_input_info.rgb_size);
bool need_face_followup = false;
bool need_lpr_followup = false;
int ch = 0;
preload_all_images();
if (init_rgb_buffer() < 0) return -1;
uint64_t next_ts = now_us();
tjhandle tjd_global = tjInitDecompress();
if (!tjd_global) {
fprintf(stderr, "tjInitDecompress failed\n");
return -1;
}
while (g_running) {
// uint64_t loop_start = now_us();
uint32_t m = ctrl_active_models_acquire();
need_face_followup = false;
need_lpr_followup = false;
update_image_source_for_feat();
if (g_ram_frame_count == 0) {
fprintf(stderr, "[IMG] no ram frames\n");
break;
}
uint64_t time_stamp_us = now_us();
RamFrame* f = &g_ram_frames[g_ram_cur_idx];
int w = f->w;
int h = f->h;
tjDecompress2(
tjd_global,
f->jpeg,
f->jpeg_size,
g_src_rgb_1080p,
w,
0,
h,
TJPF_RGB,
TJFLAG_FASTDCT
);
g_ram_cur_idx = (g_ram_cur_idx + 1) % g_ram_frame_count;
memcpy(dsr_info.dsr_in_buf[0], g_src_rgb_1080p, VIDEO_WIDTH * VIDEO_HEIGHT * 3);
dsr_downscale(
dsr_fd, &dsr_info,
dsr_input_config,
dsr_output_config,
dsr_config,
0
);
g_img_idx = (g_img_idx + 1) % g_img_count;
memcpy(g_src_rgb_640p, dsr_info.dsr_out_buf[0], NPU_INPUT_WIDTH * NPU_INPUT_HEIGHT * 3);
nc_rgb_interleaved_to_planar_neon(
g_src_rgb_640p,
(unsigned char*)rgbdata_for_cnn,
(unsigned char*)rgbdata_for_cnn + (NPU_INPUT_WIDTH * NPU_INPUT_HEIGHT),
(unsigned char*)rgbdata_for_cnn + (NPU_INPUT_WIDTH * NPU_INPUT_HEIGHT * 2),
NPU_INPUT_WIDTH,
NPU_INPUT_HEIGHT
);
enc_enqueue(
ch,
VIDEO_WIDTH,
VIDEO_HEIGHT,
VIDEO_WIDTH * 3,
g_src_rgb_1080p,
time_stamp_us
);
dispatch_stage1_and_plan_followups(m, ch, rgbdata_for_cnn, npu_input_info.rgb_size, time_stamp_us);
uint64_t ts_ns_latest = 0;
uds_det_entry_t *total_items = NULL;
uint32_t total_items_cnt = 0;
if (CTRL_MODEL_ON(m, NETWORK_OBJDET)){
read_and_collect_for(
ch, DETECT_NETWORK, "DET",
&ts_ns_latest, &total_items, &total_items_cnt, &need_face_followup, &need_lpr_followup
);
}
if (CTRL_MODEL_ON(m, NETWORK_FIRE)){
read_and_collect_for(
ch, FIRE_NETWORK, "FIRE",
&ts_ns_latest, &total_items, &total_items_cnt, &need_face_followup, &need_lpr_followup
);
}
bool need_face = need_face_followup;
bool need_lpr = need_lpr_followup;
dispatch_stage2_followups(m, ch, rgbdata_for_cnn, npu_input_info.rgb_size, time_stamp_us, need_face, need_lpr);
if (CTRL_MODEL_ON(m, NETWORK_FACE) && need_face){
read_and_collect_for(
ch, FACE_NETWORK, "FACE",
&ts_ns_latest, &total_items, &total_items_cnt, &need_face_followup, &need_lpr_followup
);
}
if (CTRL_MODEL_ON(m, NETWORK_LPR) && need_lpr){
read_and_collect_for(
ch, LPR_NETWORK, "LPR",
&ts_ns_latest, &total_items, &total_items_cnt, &need_face_followup, &need_lpr_followup
);
}
uds_det_entry_t dummy;
uint64_t det_ts_us = time_stamp_us;
if (ts_ns_latest > 0) {
det_ts_us = ts_ns_latest / 1000ULL;
}
uds_det_entry_t *send_items =
(total_items_cnt > 0) ? total_items : &dummy;
uint32_t send_cnt = total_items_cnt;
uint32_t seq = ch_seq[ch]++;
uds_send_dets_min(ch, seq, det_ts_us, send_items, send_cnt);
if (total_items) free(total_items);
g_frame_count++;
uint64_t now = now_us();
if (g_fps_last_ts_us == 0) {
g_fps_last_ts_us = now;
g_fps_last_cnt = g_frame_count;
} else {
uint64_t diff_us = now - g_fps_last_ts_us;
if (diff_us >= 1000000ULL) {
uint64_t diff_cnt = g_frame_count - g_fps_last_cnt;
double fps = (double)diff_cnt * 1000000.0 / (double)diff_us;
printf("[IMG_FPS] total=%llu fps=%.2f\n",
(unsigned long long)g_frame_count, fps);
g_fps_last_ts_us = now;
g_fps_last_cnt = g_frame_count;
}
}
// uint64_t t1 = now_us();
// uint64_t elapsed_us = t1 - time_stamp_us;
// fprintf(stderr, "[LOOP] time=%llu us (%.3f ms)\n",
// (unsigned long long)elapsed_us,
// (double)elapsed_us / 1000.0);
next_ts += FRAME_INTERVAL_US;
uint64_t now2 = now_us();
if (next_ts > now2) {
usleep((useconds_t)(next_ts - now2));
} else {
next_ts = now2;
}
// uint64_t loop_end = now_us();
// uint64_t loop_time = loop_end - loop_start;
// printf("[LOOP] %llu us (%.3f ms)\n",
// (unsigned long long)loop_time,
// (double)loop_time / 1000.0);
}
for(int i =0; i< task_cnt; i++) {
pthread_join(p_thread[i], (void **)&status);
}
#ifdef USE_8MP_VI
dsr_deinit();
#endif
uds_server_stop(UDS_SOCK_PATH);
free_rgb_buffer();
for (int i = 0; i < ENC_WORKER_NUM; i++) {
pthread_join(enc_threads[i], NULL);
}
#ifdef USE_BYTETRACK
nc_deInit_bytetrackers(0);
#endif
if (g_img_list) {
for (int i = 0; i < g_img_count; ++i) {
free(g_img_list[i]);
}
free(g_img_list);
}
return 0;
}