From 77c566a10a3e82134d32f464b3600d359d7863d4 Mon Sep 17 00:00:00 2001 From: songhyeonsoo Date: Fri, 15 May 2026 18:05:30 +0900 Subject: [PATCH] =?UTF-8?q?fix:=20tight=20polygon=20=EB=9D=BC=EB=B2=A8=20+?= =?UTF-8?q?=20eval=20GT=20mat=20=EC=83=9D=EC=84=B1=20=ED=8C=8C=EC=9D=B4?= =?UTF-8?q?=ED=94=84=EB=9D=BC=EC=9D=B8=20=EC=B6=94=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - generate_synthetic.py: plate 전체 box → 글자 실제 좌표 기반 tight polygon으로 변경 (글자 반복 출력·over-segmentation의 근본 원인 해결) gen_type1/2/_gen_two_line 모두 (plate, label_list) 통일 반환 - tools/make_gt_mat.py: test.txt → ICDAR wordBB 포맷 gt_img_N.mat 생성 스크립트 신규 (E2EMetric seqerr=0.99 고착 문제 해결) - scripts/run_step1.sh: 데이터 생성 → GT mat 생성 → 학습 3단계로 재구성 NUM_SAMPLES 환경변수로 데이터 수 제어 가능 Co-Authored-By: Claude Sonnet 4.6 --- data_gen/generate_synthetic.py | 74 +++++++++++++----------------- scripts/run_step1.sh | 32 ++++++++++++- tools/make_gt_mat.py | 84 ++++++++++++++++++++++++++++++++++ 3 files changed, 147 insertions(+), 43 deletions(-) create mode 100644 tools/make_gt_mat.py diff --git a/data_gen/generate_synthetic.py b/data_gen/generate_synthetic.py index aca7028..45a52fd 100644 --- a/data_gen/generate_synthetic.py +++ b/data_gen/generate_synthetic.py @@ -89,6 +89,7 @@ class LPGenerator: return {k: cv2.resize(v, (w, h)) for k, v in d.items()} def gen_type1(self): + """신형 승용 가로 1줄 (520x110). 글자 실제 위치로 tight polygon 생성.""" plate = cv2.resize(self.plate_w, (520, 110)) num = self._resize_dict(self.num_w, 56, 83) char = self._resize_dict(self.char_w, 60, 83) @@ -98,16 +99,20 @@ class LPGenerator: e = [random.choice('0123456789') for _ in range(4)] row, col = 13, 35 + x0 = col for x in d: plate[row:row+83, col:col+56] = num[x]; col += 56 plate[row:row+83, col:col+60] = char[ch]; col += 60 + 36 for x in e: plate[row:row+83, col:col+56] = num[x]; col += 56 + x1 = col text = ''.join(d) + HANGUL_CHAR_MAP[ch] + ''.join(e) - return plate, text + poly = [[x0, row], [x1, row], [x1, row + 83], [x0, row + 83]] + return plate, [{"transcription": text, "points": poly}] def gen_type2(self): + """구형 승용 가로 1줄 (355x155). 글자 실제 위치로 tight polygon 생성.""" plate = cv2.resize(self.plate_w, (355, 155)) num = self._resize_dict(self.num_w, 45, 83) char = self._resize_dict(self.char_w, 49, 70) @@ -117,17 +122,21 @@ class LPGenerator: e = [random.choice('0123456789') for _ in range(4)] row, col = 46, 10 + x0 = col plate[row:row+83, col:col+45] = num[d[0]]; col += 45 plate[row:row+83, col:col+45] = num[d[1]]; col += 45 plate[row+12:row+82, col+2:col+51] = char[ch]; col += 51 plate[row:row+83, col+2:col+47] = num[e[0]]; col += 47 for x in e[1:]: plate[row:row+83, col:col+45] = num[x]; col += 45 + x1 = col text = ''.join(d) + HANGUL_CHAR_MAP[ch] + ''.join(e) - return plate, text + poly = [[x0, row], [x1, row], [x1, row + 83], [x0, row + 83]] + return plate, [{"transcription": text, "points": poly}] def _gen_two_line(self, plate_bg, num_src, char_src, region_src): + """두 줄 LP (336x170). 위·아래 줄 각각 tight polygon 생성.""" plate = cv2.resize(plate_bg, (336, 170)) num1 = self._resize_dict(num_src, 44, 60) num2 = self._resize_dict(num_src, 64, 90) @@ -139,48 +148,36 @@ class LPGenerator: ch = random.choice(list(HANGUL_CHAR_MAP)) e = [random.choice('0123456789') for _ in range(4)] + # 위 줄: region + 숫자2 row, col = 8, 76 + tx0 = col plate[row:row+60, col:col+88] = region[rkey]; col += 88 + 8 for x in d: plate[row:row+60, col:col+44] = num1[x]; col += 44 + tx1 = col + top_poly = [[tx0, row], [tx1, row], [tx1, row + 60], [tx0, row + 60]] + # 아래 줄: 한글 + 숫자4 row, col = 72, 8 + bx0 = col plate[row:row+62, col:col+64] = char[ch]; col += 64 for x in e: plate[row:row+90, col:col+64] = num2[x]; col += 64 + bx1 = col + bot_poly = [[bx0, row], [bx1, row], [bx1, row + 90], [bx0, row + 90]] - # transcription: 줄 단위로 분리 (PGNet은 두 polygon으로 라벨링하는 게 정석) top = REGION_MAP.get(rkey, '?') + ''.join(d) bot = HANGUL_CHAR_MAP[ch] + ''.join(e) - return plate, top, bot + return plate, [ + {"transcription": top, "points": top_poly}, + {"transcription": bot, "points": bot_poly}, + ] def gen_type3(self): - plate, top, bot = self._gen_two_line(self.plate_y, self.num_y, self.char_y, self.region_y_imgs) - return plate, top, bot + return self._gen_two_line(self.plate_y, self.num_y, self.char_y, self.region_y_imgs) def gen_type4(self): - plate, top, bot = self._gen_two_line(self.plate_g, self.num_g, self.char_g, self.region_g_imgs) - return plate, top, bot - - -def make_label_one_line(plate, text): - """가로 한 줄 LP — polygon은 plate 전체.""" - h, w = plate.shape[:2] - poly = [[0, 0], [w, 0], [w, h], [0, h]] - return [{"transcription": text, "points": poly}] - - -def make_label_two_line(plate, top, bot): - """두 줄 LP — 위·아래 두 polygon 으로 분리. - 위 줄 0~50% (region+num), 아래 줄 50~100% (char+num*4) — 단순 분할.""" - h, w = plate.shape[:2] - mid = h // 2 - up_poly = [[0, 0], [w, 0], [w, mid], [0, mid]] - dn_poly = [[0, mid], [w, mid], [w, h], [0, h]] - return [ - {"transcription": top, "points": up_poly}, - {"transcription": bot, "points": dn_poly}, - ] + return self._gen_two_line(self.plate_g, self.num_g, self.char_g, self.region_g_imgs) # 한국 도로 LP 분포 추정 (자가용 92% + 영업용 7.5%, 신형 가로 ~98% 등) @@ -207,10 +204,10 @@ def main(): gen = LPGenerator(Path(args.asset_dir)) type_funcs = { - '1': ('one', gen.gen_type1), - '2': ('one', gen.gen_type2), - '3': ('two', gen.gen_type3), - '4': ('two', gen.gen_type4), + '1': gen.gen_type1, + '2': gen.gen_type2, + '3': gen.gen_type3, + '4': gen.gen_type4, } selected_keys = [t.strip() for t in args.types.split(',') if t.strip() in type_funcs] if not selected_keys: @@ -239,16 +236,11 @@ def main(): records = [] for i in range(count): idx = random.choices(range(len(chosen)), weights=weights, k=1)[0] - kind, fn = chosen[idx] + fn = chosen[idx] type_count[selected_keys[idx]] += 1 - if kind == 'one': - plate, text = fn() - label = make_label_one_line(plate, text) - seen_chars.update(text) - else: - plate, top, bot = fn() - label = make_label_two_line(plate, top, bot) - seen_chars.update(top); seen_chars.update(bot) + plate, label = fn() + for entry in label: + seen_chars.update(entry["transcription"]) if not args.no_bright: plate = random_bright(plate) fname = f"{i:06d}.jpg" diff --git a/scripts/run_step1.sh b/scripts/run_step1.sh index 7f8939c..ce3813b 100755 --- a/scripts/run_step1.sh +++ b/scripts/run_step1.sh @@ -14,8 +14,36 @@ set -euo pipefail PADDLEOCR_DIR=/workspace/PaddleOCR KR_LP_DIR=/workspace/kr_lp_pgnet TRAIN_DATA=/workspace/train_data +SYNTH_DIR="$TRAIN_DATA/kr_lp_synth" +ASSET_DIR="$KR_LP_DIR/data_gen/Korean-license-plate-Generator" LOG="${LOG:-$PADDLEOCR_DIR/output/kr_lp_pgnet/train.log}" +NUM_SAMPLES="${NUM_SAMPLES:-10000}" +# ── 1. 합성 데이터 생성 ────────────────────────────────────────────────────── +echo "===========================" +echo "[1/3] 합성 데이터 생성 (${NUM_SAMPLES}장)" +echo " asset: $ASSET_DIR" +echo " out: $SYNTH_DIR" +echo "===========================" + +python3.10 "$KR_LP_DIR/data_gen/generate_synthetic.py" \ + --asset_dir "$ASSET_DIR" \ + --out_dir "$SYNTH_DIR" \ + --num "$NUM_SAMPLES" \ + --dict "$KR_LP_DIR/dict/kr_lp_dict.txt" + +# ── 2. eval GT mat 생성 ───────────────────────────────────────────────────── +echo "===========================" +echo "[2/3] eval GT mat 생성" +echo " label: $SYNTH_DIR/test/test.txt" +echo " out: $SYNTH_DIR/gt/" +echo "===========================" + +python3.10 "$KR_LP_DIR/tools/make_gt_mat.py" \ + --label "$SYNTH_DIR/test/test.txt" \ + --out_dir "$SYNTH_DIR/gt" + +# ── 3. 학습 ───────────────────────────────────────────────────────────────── cd "$PADDLEOCR_DIR" # train_data symlink (config는 ./train_data/kr_lp_synth 사용) @@ -38,9 +66,9 @@ if [ "${DRY_RUN:-0}" = "1" ]; then fi echo "===========================" -echo "Step1 학습 시작" +echo "[3/3] Step1 학습 시작" echo " config: configs/e2e/kr_lp_pgnet.yml" -echo " data: $TRAIN_DATA/kr_lp_synth/" +echo " data: $SYNTH_DIR/" echo " pretrain: pretrain_models/train_step1/best_accuracy" echo " log: $LOG" echo " override: ${OVERRIDE[@]}" diff --git a/tools/make_gt_mat.py b/tools/make_gt_mat.py new file mode 100644 index 0000000..8cb53a2 --- /dev/null +++ b/tools/make_gt_mat.py @@ -0,0 +1,84 @@ +#!/usr/bin/env python3 +""" +PaddleOCR E2EMetric (mode: A)이 요구하는 gt_img_{N}.mat 파일을 생성. + +입력: PGDataSet 포맷의 label txt 파일 + images/000000.jpg\t[{"transcription":"37도1563","points":[[x1,y1],...]}] + +출력: gt_mat_dir/gt_img_{1-indexed}.mat (ICDAR2015 wordBB 포맷) + +사용법 (서버에서): + python3.10 /workspace/kr_lp_pgnet/tools/make_gt_mat.py \ + --label /workspace/train_data/kr_lp_synth/test/test.txt \ + --out_dir /workspace/train_data/kr_lp_synth/gt +""" + +import argparse +import json +import os + +import numpy as np +import scipy.io as sio + + +def points_to_wordbb(points): + """[[x0,y0],[x1,y1],[x2,y2],[x3,y3]] → (2, 4) array (ICDAR wordBB 포맷). + axis0: x or y, axis1: corner index (TL, TR, BR, BL).""" + pts = np.array(points, dtype=np.float64) # (4, 2) + # (2, 4): row0=x coords, row1=y coords + return pts.T # [[x0..x3], [y0..y3]] + + +def make_txt_cell(text): + """ICDAR txt 셀: numpy array of individual chars (MATLAB cell array 호환).""" + return np.array([list(text)], dtype=object) + + +def main(): + p = argparse.ArgumentParser(description=__doc__) + p.add_argument("--label", required=True, help="PGDataSet label txt 경로") + p.add_argument("--out_dir", required=True, help="gt mat 출력 디렉토리") + args = p.parse_args() + + os.makedirs(args.out_dir, exist_ok=True) + + with open(args.label, encoding="utf-8") as f: + lines = [ln.rstrip("\n") for ln in f if ln.strip()] + + for img_idx, line in enumerate(lines, start=1): + parts = line.split("\t", 1) + if len(parts) != 2: + print(f" skip (no tab): {line[:60]}") + continue + + try: + entries = json.loads(parts[1]) + except json.JSONDecodeError as ex: + print(f" skip (json err @ {img_idx}): {ex}") + continue + + n = len(entries) + if n == 0: + print(f" skip (empty label @ {img_idx})") + continue + + # wordBB: (2, 4, N) — ICDAR 포맷 + word_bb = np.zeros((2, 4, n), dtype=np.float64) + txt = np.empty((1, n), dtype=object) + + for j, entry in enumerate(entries): + word_bb[:, :, j] = points_to_wordbb(entry["points"]) + txt[0, j] = make_txt_cell(entry["transcription"]) + + if n == 1: + # scipy.io는 (2,4,1) → (2,4)로 squeeze하는 경우가 있어 명시적 유지 + word_bb = word_bb.reshape(2, 4, 1) + + mat_path = os.path.join(args.out_dir, f"gt_img_{img_idx}.mat") + sio.savemat(mat_path, {"wordBB": word_bb, "txt": txt}, do_compression=True) + + print(f" gt mat 생성 완료: {len(lines)}개 → {args.out_dir}/gt_img_{{1..{len(lines)}}}.mat") + + +if __name__ == "__main__": + main()