kr_lp_pgnet/tools/make_gt_mat.py

#!/usr/bin/env python3
"""
PaddleOCR E2EMetric (mode: A)이 요구하는 gt_img_{N}.mat 파일을 생성.

입력: PGDataSet 포맷의 label txt 파일
  images/000000.jpg\t[{"transcription":"37도1563","points":[[x1,y1],...]}]

출력: gt_mat_dir/gt_img_{1-indexed}.mat (ICDAR2015 wordBB 포맷)

사용법 (서버에서):
  python3.10 /workspace/kr_lp_pgnet/tools/make_gt_mat.py \
      --label /workspace/train_data/kr_lp_synth/test/test.txt \
      --out_dir /workspace/train_data/kr_lp_synth/gt
"""

import argparse
import json
import os

import numpy as np
import scipy.io as sio


def points_to_wordbb(points):
    """[[x0,y0],[x1,y1],[x2,y2],[x3,y3]] → (2, 4) array (ICDAR wordBB 포맷).
    axis0: x or y, axis1: corner index (TL, TR, BR, BL)."""
    pts = np.array(points, dtype=np.float64)  # (4, 2)
    # (2, 4): row0=x coords, row1=y coords
    return pts.T  # [[x0..x3], [y0..y3]]


def make_txt_cell(text):
    """ICDAR txt 셀: numpy array of individual chars (MATLAB cell array 호환)."""
    return np.array([list(text)], dtype=object)


def main():
    p = argparse.ArgumentParser(description=__doc__)
    p.add_argument("--label", required=True, help="PGDataSet label txt 경로")
    p.add_argument("--out_dir", required=True, help="gt mat 출력 디렉토리")
    args = p.parse_args()

    os.makedirs(args.out_dir, exist_ok=True)

    with open(args.label, encoding="utf-8") as f:
        lines = [ln.rstrip("\n") for ln in f if ln.strip()]

    for img_idx, line in enumerate(lines, start=1):
        parts = line.split("\t", 1)
        if len(parts) != 2:
            print(f"  skip (no tab): {line[:60]}")
            continue

        try:
            entries = json.loads(parts[1])
        except json.JSONDecodeError as ex:
            print(f"  skip (json err @ {img_idx}): {ex}")
            continue

        n = len(entries)
        if n == 0:
            print(f"  skip (empty label @ {img_idx})")
            continue

        # wordBB: (2, 4, N) — ICDAR 포맷
        word_bb = np.zeros((2, 4, n), dtype=np.float64)
        txt = np.empty((1, n), dtype=object)

        for j, entry in enumerate(entries):
            word_bb[:, :, j] = points_to_wordbb(entry["points"])
            txt[0, j] = make_txt_cell(entry["transcription"])

        if n == 1:
            # scipy.io는 (2,4,1) → (2,4)로 squeeze하는 경우가 있어 명시적 유지
            word_bb = word_bb.reshape(2, 4, 1)

        mat_path = os.path.join(args.out_dir, f"gt_img_{img_idx}.mat")
        sio.savemat(mat_path, {"wordBB": word_bb, "txt": txt}, do_compression=True)

    print(f"  gt mat 생성 완료: {len(lines)}개 → {args.out_dir}/gt_img_{{1..{len(lines)}}}.mat")


if __name__ == "__main__":
    main()