fix: tight polygon 라벨 + eval GT mat 생성 파이프라인 추가

- generate_synthetic.py: plate 전체 box → 글자 실제 좌표 기반 tight polygon으로 변경
  (글자 반복 출력·over-segmentation의 근본 원인 해결)
  gen_type1/2/_gen_two_line 모두 (plate, label_list) 통일 반환
- tools/make_gt_mat.py: test.txt → ICDAR wordBB 포맷 gt_img_N.mat 생성 스크립트 신규
  (E2EMetric seqerr=0.99 고착 문제 해결)
- scripts/run_step1.sh: 데이터 생성 → GT mat 생성 → 학습 3단계로 재구성
  NUM_SAMPLES 환경변수로 데이터 수 제어 가능

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
main
songhyeonsoo 1 month ago
parent 0db2bd14b5
commit 77c566a10a

@ -89,6 +89,7 @@ class LPGenerator:
return {k: cv2.resize(v, (w, h)) for k, v in d.items()} return {k: cv2.resize(v, (w, h)) for k, v in d.items()}
def gen_type1(self): def gen_type1(self):
"""신형 승용 가로 1줄 (520x110). 글자 실제 위치로 tight polygon 생성."""
plate = cv2.resize(self.plate_w, (520, 110)) plate = cv2.resize(self.plate_w, (520, 110))
num = self._resize_dict(self.num_w, 56, 83) num = self._resize_dict(self.num_w, 56, 83)
char = self._resize_dict(self.char_w, 60, 83) char = self._resize_dict(self.char_w, 60, 83)
@ -98,16 +99,20 @@ class LPGenerator:
e = [random.choice('0123456789') for _ in range(4)] e = [random.choice('0123456789') for _ in range(4)]
row, col = 13, 35 row, col = 13, 35
x0 = col
for x in d: for x in d:
plate[row:row+83, col:col+56] = num[x]; col += 56 plate[row:row+83, col:col+56] = num[x]; col += 56
plate[row:row+83, col:col+60] = char[ch]; col += 60 + 36 plate[row:row+83, col:col+60] = char[ch]; col += 60 + 36
for x in e: for x in e:
plate[row:row+83, col:col+56] = num[x]; col += 56 plate[row:row+83, col:col+56] = num[x]; col += 56
x1 = col
text = ''.join(d) + HANGUL_CHAR_MAP[ch] + ''.join(e) text = ''.join(d) + HANGUL_CHAR_MAP[ch] + ''.join(e)
return plate, text poly = [[x0, row], [x1, row], [x1, row + 83], [x0, row + 83]]
return plate, [{"transcription": text, "points": poly}]
def gen_type2(self): def gen_type2(self):
"""구형 승용 가로 1줄 (355x155). 글자 실제 위치로 tight polygon 생성."""
plate = cv2.resize(self.plate_w, (355, 155)) plate = cv2.resize(self.plate_w, (355, 155))
num = self._resize_dict(self.num_w, 45, 83) num = self._resize_dict(self.num_w, 45, 83)
char = self._resize_dict(self.char_w, 49, 70) char = self._resize_dict(self.char_w, 49, 70)
@ -117,17 +122,21 @@ class LPGenerator:
e = [random.choice('0123456789') for _ in range(4)] e = [random.choice('0123456789') for _ in range(4)]
row, col = 46, 10 row, col = 46, 10
x0 = col
plate[row:row+83, col:col+45] = num[d[0]]; col += 45 plate[row:row+83, col:col+45] = num[d[0]]; col += 45
plate[row:row+83, col:col+45] = num[d[1]]; col += 45 plate[row:row+83, col:col+45] = num[d[1]]; col += 45
plate[row+12:row+82, col+2:col+51] = char[ch]; col += 51 plate[row+12:row+82, col+2:col+51] = char[ch]; col += 51
plate[row:row+83, col+2:col+47] = num[e[0]]; col += 47 plate[row:row+83, col+2:col+47] = num[e[0]]; col += 47
for x in e[1:]: for x in e[1:]:
plate[row:row+83, col:col+45] = num[x]; col += 45 plate[row:row+83, col:col+45] = num[x]; col += 45
x1 = col
text = ''.join(d) + HANGUL_CHAR_MAP[ch] + ''.join(e) text = ''.join(d) + HANGUL_CHAR_MAP[ch] + ''.join(e)
return plate, text poly = [[x0, row], [x1, row], [x1, row + 83], [x0, row + 83]]
return plate, [{"transcription": text, "points": poly}]
def _gen_two_line(self, plate_bg, num_src, char_src, region_src): def _gen_two_line(self, plate_bg, num_src, char_src, region_src):
"""두 줄 LP (336x170). 위·아래 줄 각각 tight polygon 생성."""
plate = cv2.resize(plate_bg, (336, 170)) plate = cv2.resize(plate_bg, (336, 170))
num1 = self._resize_dict(num_src, 44, 60) num1 = self._resize_dict(num_src, 44, 60)
num2 = self._resize_dict(num_src, 64, 90) num2 = self._resize_dict(num_src, 64, 90)
@ -139,48 +148,36 @@ class LPGenerator:
ch = random.choice(list(HANGUL_CHAR_MAP)) ch = random.choice(list(HANGUL_CHAR_MAP))
e = [random.choice('0123456789') for _ in range(4)] e = [random.choice('0123456789') for _ in range(4)]
# 위 줄: region + 숫자2
row, col = 8, 76 row, col = 8, 76
tx0 = col
plate[row:row+60, col:col+88] = region[rkey]; col += 88 + 8 plate[row:row+60, col:col+88] = region[rkey]; col += 88 + 8
for x in d: for x in d:
plate[row:row+60, col:col+44] = num1[x]; col += 44 plate[row:row+60, col:col+44] = num1[x]; col += 44
tx1 = col
top_poly = [[tx0, row], [tx1, row], [tx1, row + 60], [tx0, row + 60]]
# 아래 줄: 한글 + 숫자4
row, col = 72, 8 row, col = 72, 8
bx0 = col
plate[row:row+62, col:col+64] = char[ch]; col += 64 plate[row:row+62, col:col+64] = char[ch]; col += 64
for x in e: for x in e:
plate[row:row+90, col:col+64] = num2[x]; col += 64 plate[row:row+90, col:col+64] = num2[x]; col += 64
bx1 = col
bot_poly = [[bx0, row], [bx1, row], [bx1, row + 90], [bx0, row + 90]]
# transcription: 줄 단위로 분리 (PGNet은 두 polygon으로 라벨링하는 게 정석)
top = REGION_MAP.get(rkey, '?') + ''.join(d) top = REGION_MAP.get(rkey, '?') + ''.join(d)
bot = HANGUL_CHAR_MAP[ch] + ''.join(e) bot = HANGUL_CHAR_MAP[ch] + ''.join(e)
return plate, top, bot return plate, [
{"transcription": top, "points": top_poly},
{"transcription": bot, "points": bot_poly},
]
def gen_type3(self): def gen_type3(self):
plate, top, bot = self._gen_two_line(self.plate_y, self.num_y, self.char_y, self.region_y_imgs) return self._gen_two_line(self.plate_y, self.num_y, self.char_y, self.region_y_imgs)
return plate, top, bot
def gen_type4(self): def gen_type4(self):
plate, top, bot = self._gen_two_line(self.plate_g, self.num_g, self.char_g, self.region_g_imgs) return self._gen_two_line(self.plate_g, self.num_g, self.char_g, self.region_g_imgs)
return plate, top, bot
def make_label_one_line(plate, text):
"""가로 한 줄 LP — polygon은 plate 전체."""
h, w = plate.shape[:2]
poly = [[0, 0], [w, 0], [w, h], [0, h]]
return [{"transcription": text, "points": poly}]
def make_label_two_line(plate, top, bot):
"""두 줄 LP — 위·아래 두 polygon 으로 분리.
0~50% (region+num), 아래 50~100% (char+num*4) 단순 분할."""
h, w = plate.shape[:2]
mid = h // 2
up_poly = [[0, 0], [w, 0], [w, mid], [0, mid]]
dn_poly = [[0, mid], [w, mid], [w, h], [0, h]]
return [
{"transcription": top, "points": up_poly},
{"transcription": bot, "points": dn_poly},
]
# 한국 도로 LP 분포 추정 (자가용 92% + 영업용 7.5%, 신형 가로 ~98% 등) # 한국 도로 LP 분포 추정 (자가용 92% + 영업용 7.5%, 신형 가로 ~98% 등)
@ -207,10 +204,10 @@ def main():
gen = LPGenerator(Path(args.asset_dir)) gen = LPGenerator(Path(args.asset_dir))
type_funcs = { type_funcs = {
'1': ('one', gen.gen_type1), '1': gen.gen_type1,
'2': ('one', gen.gen_type2), '2': gen.gen_type2,
'3': ('two', gen.gen_type3), '3': gen.gen_type3,
'4': ('two', gen.gen_type4), '4': gen.gen_type4,
} }
selected_keys = [t.strip() for t in args.types.split(',') if t.strip() in type_funcs] selected_keys = [t.strip() for t in args.types.split(',') if t.strip() in type_funcs]
if not selected_keys: if not selected_keys:
@ -239,16 +236,11 @@ def main():
records = [] records = []
for i in range(count): for i in range(count):
idx = random.choices(range(len(chosen)), weights=weights, k=1)[0] idx = random.choices(range(len(chosen)), weights=weights, k=1)[0]
kind, fn = chosen[idx] fn = chosen[idx]
type_count[selected_keys[idx]] += 1 type_count[selected_keys[idx]] += 1
if kind == 'one': plate, label = fn()
plate, text = fn() for entry in label:
label = make_label_one_line(plate, text) seen_chars.update(entry["transcription"])
seen_chars.update(text)
else:
plate, top, bot = fn()
label = make_label_two_line(plate, top, bot)
seen_chars.update(top); seen_chars.update(bot)
if not args.no_bright: if not args.no_bright:
plate = random_bright(plate) plate = random_bright(plate)
fname = f"{i:06d}.jpg" fname = f"{i:06d}.jpg"

@ -14,8 +14,36 @@ set -euo pipefail
PADDLEOCR_DIR=/workspace/PaddleOCR PADDLEOCR_DIR=/workspace/PaddleOCR
KR_LP_DIR=/workspace/kr_lp_pgnet KR_LP_DIR=/workspace/kr_lp_pgnet
TRAIN_DATA=/workspace/train_data TRAIN_DATA=/workspace/train_data
SYNTH_DIR="$TRAIN_DATA/kr_lp_synth"
ASSET_DIR="$KR_LP_DIR/data_gen/Korean-license-plate-Generator"
LOG="${LOG:-$PADDLEOCR_DIR/output/kr_lp_pgnet/train.log}" LOG="${LOG:-$PADDLEOCR_DIR/output/kr_lp_pgnet/train.log}"
NUM_SAMPLES="${NUM_SAMPLES:-10000}"
# ── 1. 합성 데이터 생성 ──────────────────────────────────────────────────────
echo "==========================="
echo "[1/3] 합성 데이터 생성 (${NUM_SAMPLES}장)"
echo " asset: $ASSET_DIR"
echo " out: $SYNTH_DIR"
echo "==========================="
python3.10 "$KR_LP_DIR/data_gen/generate_synthetic.py" \
--asset_dir "$ASSET_DIR" \
--out_dir "$SYNTH_DIR" \
--num "$NUM_SAMPLES" \
--dict "$KR_LP_DIR/dict/kr_lp_dict.txt"
# ── 2. eval GT mat 생성 ─────────────────────────────────────────────────────
echo "==========================="
echo "[2/3] eval GT mat 생성"
echo " label: $SYNTH_DIR/test/test.txt"
echo " out: $SYNTH_DIR/gt/"
echo "==========================="
python3.10 "$KR_LP_DIR/tools/make_gt_mat.py" \
--label "$SYNTH_DIR/test/test.txt" \
--out_dir "$SYNTH_DIR/gt"
# ── 3. 학습 ─────────────────────────────────────────────────────────────────
cd "$PADDLEOCR_DIR" cd "$PADDLEOCR_DIR"
# train_data symlink (config는 ./train_data/kr_lp_synth 사용) # train_data symlink (config는 ./train_data/kr_lp_synth 사용)
@ -38,9 +66,9 @@ if [ "${DRY_RUN:-0}" = "1" ]; then
fi fi
echo "===========================" echo "==========================="
echo "Step1 학습 시작" echo "[3/3] Step1 학습 시작"
echo " config: configs/e2e/kr_lp_pgnet.yml" echo " config: configs/e2e/kr_lp_pgnet.yml"
echo " data: $TRAIN_DATA/kr_lp_synth/" echo " data: $SYNTH_DIR/"
echo " pretrain: pretrain_models/train_step1/best_accuracy" echo " pretrain: pretrain_models/train_step1/best_accuracy"
echo " log: $LOG" echo " log: $LOG"
echo " override: ${OVERRIDE[@]}" echo " override: ${OVERRIDE[@]}"

@ -0,0 +1,84 @@
#!/usr/bin/env python3
"""
PaddleOCR E2EMetric (mode: A) 요구하는 gt_img_{N}.mat 파일을 생성.
입력: PGDataSet 포맷의 label txt 파일
images/000000.jpg\t[{"transcription":"37도1563","points":[[x1,y1],...]}]
출력: gt_mat_dir/gt_img_{1-indexed}.mat (ICDAR2015 wordBB 포맷)
사용법 (서버에서):
python3.10 /workspace/kr_lp_pgnet/tools/make_gt_mat.py \
--label /workspace/train_data/kr_lp_synth/test/test.txt \
--out_dir /workspace/train_data/kr_lp_synth/gt
"""
import argparse
import json
import os
import numpy as np
import scipy.io as sio
def points_to_wordbb(points):
"""[[x0,y0],[x1,y1],[x2,y2],[x3,y3]] → (2, 4) array (ICDAR wordBB 포맷).
axis0: x or y, axis1: corner index (TL, TR, BR, BL)."""
pts = np.array(points, dtype=np.float64) # (4, 2)
# (2, 4): row0=x coords, row1=y coords
return pts.T # [[x0..x3], [y0..y3]]
def make_txt_cell(text):
"""ICDAR txt 셀: numpy array of individual chars (MATLAB cell array 호환)."""
return np.array([list(text)], dtype=object)
def main():
p = argparse.ArgumentParser(description=__doc__)
p.add_argument("--label", required=True, help="PGDataSet label txt 경로")
p.add_argument("--out_dir", required=True, help="gt mat 출력 디렉토리")
args = p.parse_args()
os.makedirs(args.out_dir, exist_ok=True)
with open(args.label, encoding="utf-8") as f:
lines = [ln.rstrip("\n") for ln in f if ln.strip()]
for img_idx, line in enumerate(lines, start=1):
parts = line.split("\t", 1)
if len(parts) != 2:
print(f" skip (no tab): {line[:60]}")
continue
try:
entries = json.loads(parts[1])
except json.JSONDecodeError as ex:
print(f" skip (json err @ {img_idx}): {ex}")
continue
n = len(entries)
if n == 0:
print(f" skip (empty label @ {img_idx})")
continue
# wordBB: (2, 4, N) — ICDAR 포맷
word_bb = np.zeros((2, 4, n), dtype=np.float64)
txt = np.empty((1, n), dtype=object)
for j, entry in enumerate(entries):
word_bb[:, :, j] = points_to_wordbb(entry["points"])
txt[0, j] = make_txt_cell(entry["transcription"])
if n == 1:
# scipy.io는 (2,4,1) → (2,4)로 squeeze하는 경우가 있어 명시적 유지
word_bb = word_bb.reshape(2, 4, 1)
mat_path = os.path.join(args.out_dir, f"gt_img_{img_idx}.mat")
sio.savemat(mat_path, {"wordBB": word_bb, "txt": txt}, do_compression=True)
print(f" gt mat 생성 완료: {len(lines)}개 → {args.out_dir}/gt_img_{{1..{len(lines)}}}.mat")
if __name__ == "__main__":
main()
Loading…
Cancel
Save