From 35c9f9cb5d19eeb9b8781a3f2cbb61edc367fd2e Mon Sep 17 00:00:00 2001 From: songhyeonsu Date: Thu, 7 May 2026 14:25:34 +0900 Subject: [PATCH] Initial scaffold for kr_lp_pgnet - README outlines local-server split workflow - setup_server.sh installs paddlepaddle-gpu, clones PaddleOCR, fetches PGNet step1 pretrain - empty configs/dict/data_gen/tools dirs for next steps Co-Authored-By: Claude Opus 4.7 --- .gitignore | 34 ++++++++++++++++++++++++++ README.md | 54 ++++++++++++++++++++++++++++++++++++++++- configs/.gitkeep | 0 data_gen/.gitkeep | 0 dict/.gitkeep | 0 scripts/setup_server.sh | 39 +++++++++++++++++++++++++++++ tools/.gitkeep | 0 7 files changed, 126 insertions(+), 1 deletion(-) create mode 100644 .gitignore create mode 100644 configs/.gitkeep create mode 100644 data_gen/.gitkeep create mode 100644 dict/.gitkeep create mode 100755 scripts/setup_server.sh create mode 100644 tools/.gitkeep diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e0a65c7 --- /dev/null +++ b/.gitignore @@ -0,0 +1,34 @@ +# 학습 산출물 +output/ +inference/ +pretrain_models/ +*.pdparams +*.pdopt +*.states +*.tar +*.pdmodel +*.pdiparams +*.pdiparams.info + +# 데이터셋 (서버에서 생성, 로컬엔 샘플만) +train_data/ +data_gen/plate_templates/*.png +data_gen/plate_templates/*.jpg +!data_gen/plate_templates/.gitkeep + +# 파이썬 +__pycache__/ +*.pyc +*.pyo +.venv/ +venv/ +.ipynb_checkpoints/ + +# OS / IDE +.DS_Store +.vscode/ +.idea/ + +# 로그 +*.log +log/ diff --git a/README.md b/README.md index 1e38c85..0f788d0 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,54 @@ -# PGNET +# kr_lp_pgnet +PaddleOCR PGNet 기반 한국 번호판(LP) end-to-end 검출 + OCR 학습 프로젝트. + +대상 번호판: 승용(흰), 영업용(노란), 전기차(파란 8자리), 화물·특수. + +## 디렉토리 구조 + +``` +kr_lp_pgnet/ +├── configs/ # PGNet 학습 config (.yml) +├── dict/ # 문자 사전 (kr_lp_dict.txt) +├── data_gen/ # 합성 LP 이미지 생성기 +├── scripts/ # 서버 셋업·학습 실행 셸 스크립트 +└── tools/ # 라벨 검증·시각화 등 보조 스크립트 +``` + +## 작업 분업 + +- **로컬 (Mac)**: config·dict·생성기·run script 작성 및 디버깅 +- **원격 GPU 서버 (NVIDIA + CUDA)**: 합성 데이터 생성, 학습 실행 +- 동기화: 이 repo는 git push/pull, 데이터·체크포인트는 git에 올리지 않음 + +## 서버 측 실행 순서 + +```bash +# 1. 최초 1회: 환경 셋업 (Paddle 설치 + PaddleOCR clone + pretrain weight 다운로드) +bash scripts/setup_server.sh + +# 2. 합성 데이터 생성 (수십만장) +python data_gen/generate_synthetic.py --out_dir ../train_data/kr_lp_synth --num 200000 + +# 3. Step1: 합성 데이터로 pretrain +bash scripts/run_step1.sh + +# 4. Step2: 실제 LP 데이터로 fine-tune +bash scripts/run_step2.sh + +# 5. 추론 모델로 export +bash scripts/export_inference.sh +``` + +## 디렉토리 가정 + +서버에서는 다음 레이아웃을 가정: + +``` +~/workspace/ +├── PaddleOCR/ # git clone PaddlePaddle/PaddleOCR +├── kr_lp_pgnet/ # 이 repo +└── train_data/ + ├── kr_lp_synth/ # 합성 데이터 (생성기 출력) + └── kr_lp_real/ # 실제 촬영 LP 데이터 +``` diff --git a/configs/.gitkeep b/configs/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/data_gen/.gitkeep b/data_gen/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/dict/.gitkeep b/dict/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/scripts/setup_server.sh b/scripts/setup_server.sh new file mode 100755 index 0000000..d31924d --- /dev/null +++ b/scripts/setup_server.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash +# 외부 GPU 서버 최초 1회 셋업 +# 가정: NVIDIA GPU + CUDA 11.8 (다른 CUDA면 paddle wheel URL 수정) +# 실행 위치: ~/workspace/kr_lp_pgnet/ + +set -euo pipefail + +WORKSPACE="${WORKSPACE:-$HOME/workspace}" +PADDLEOCR_DIR="$WORKSPACE/PaddleOCR" +PRETRAIN_DIR="$PADDLEOCR_DIR/pretrain_models" + +echo "[1/4] PaddlePaddle GPU 설치 (CUDA 11.8 기준)" +python3 -m pip install --upgrade pip +python3 -m pip install paddlepaddle-gpu==2.6.1.post118 \ + -f https://www.paddlepaddle.org.cn/whl/linux/mkl/avx/stable.html + +echo "[2/4] PaddleOCR clone" +if [ ! -d "$PADDLEOCR_DIR" ]; then + git clone --depth 1 -b release/2.7 https://github.com/PaddlePaddle/PaddleOCR.git "$PADDLEOCR_DIR" +fi +cd "$PADDLEOCR_DIR" +python3 -m pip install -r requirements.txt + +echo "[3/4] PGNet Step1 pretrain weight 다운로드" +mkdir -p "$PRETRAIN_DIR" +cd "$PRETRAIN_DIR" +if [ ! -d "train_step1" ]; then + wget -q https://paddleocr.bj.bcebos.com/dygraph_v2.0/pgnet/train_step1.tar + tar xf train_step1.tar + rm train_step1.tar +fi + +echo "[4/4] GPU/Paddle 동작 확인" +python3 -c "import paddle; print('paddle:', paddle.__version__); print('CUDA available:', paddle.is_compiled_with_cuda()); print('GPU count:', paddle.device.cuda.device_count())" + +echo +echo "셋업 완료. 다음 단계:" +echo " python data_gen/generate_synthetic.py ..." +echo " bash scripts/run_step1.sh" diff --git a/tools/.gitkeep b/tools/.gitkeep new file mode 100644 index 0000000..e69de29