@ -58,6 +58,19 @@ def random_bright(img: np.ndarray, scale_range=(0.55, 1.45)) -> np.ndarray:
return cv2 . cvtColor ( hsv . astype ( np . uint8 ) , cv2 . COLOR_HSV2BGR )
return cv2 . cvtColor ( hsv . astype ( np . uint8 ) , cv2 . COLOR_HSV2BGR )
class StratifiedSampler :
""" 완전 균등 샘플링: 모든 키를 셔플해서 한 사이클 동안 정확히 1번씩 등장. """
def __init__ ( self , keys ) :
self . keys = list ( keys )
self . _pool = [ ]
def next ( self ) :
if not self . _pool :
self . _pool = self . keys . copy ( )
random . shuffle ( self . _pool )
return self . _pool . pop ( )
class LPGenerator :
class LPGenerator :
def __init__ ( self , asset_dir : Path ) :
def __init__ ( self , asset_dir : Path ) :
self . asset = Path ( asset_dir )
self . asset = Path ( asset_dir )
@ -77,6 +90,11 @@ class LPGenerator:
self . region_y_imgs = self . _load ( " region_y " )
self . region_y_imgs = self . _load ( " region_y " )
self . region_g_imgs = self . _load ( " region_g " )
self . region_g_imgs = self . _load ( " region_g " )
# 한글/지역명은 클래스 수가 적고 plate당 1개만 등장 → 균등 샘플링 필수
self . hangul_sampler = StratifiedSampler ( HANGUL_CHAR_MAP )
self . region_y_sampler = StratifiedSampler ( self . region_y_imgs )
self . region_g_sampler = StratifiedSampler ( self . region_g_imgs )
def _load ( self , sub : str ) - > dict :
def _load ( self , sub : str ) - > dict :
out = { }
out = { }
for fp in sorted ( ( self . asset / sub ) . iterdir ( ) ) :
for fp in sorted ( ( self . asset / sub ) . iterdir ( ) ) :
@ -95,7 +113,7 @@ class LPGenerator:
char = self . _resize_dict ( self . char_w , 60 , 83 )
char = self . _resize_dict ( self . char_w , 60 , 83 )
d = [ random . choice ( ' 0123456789 ' ) for _ in range ( 2 ) ]
d = [ random . choice ( ' 0123456789 ' ) for _ in range ( 2 ) ]
ch = random . choice ( list ( HANGUL_CHAR_MAP ) )
ch = self . hangul_sampler . next ( )
e = [ random . choice ( ' 0123456789 ' ) for _ in range ( 4 ) ]
e = [ random . choice ( ' 0123456789 ' ) for _ in range ( 4 ) ]
row , col = 13 , 35
row , col = 13 , 35
@ -118,7 +136,7 @@ class LPGenerator:
char = self . _resize_dict ( self . char_w , 49 , 70 )
char = self . _resize_dict ( self . char_w , 49 , 70 )
d = [ random . choice ( ' 0123456789 ' ) for _ in range ( 2 ) ]
d = [ random . choice ( ' 0123456789 ' ) for _ in range ( 2 ) ]
ch = random . choice ( list ( HANGUL_CHAR_MAP ) )
ch = self . hangul_sampler . next ( )
e = [ random . choice ( ' 0123456789 ' ) for _ in range ( 4 ) ]
e = [ random . choice ( ' 0123456789 ' ) for _ in range ( 4 ) ]
row , col = 46 , 10
row , col = 46 , 10
@ -135,7 +153,7 @@ class LPGenerator:
poly = [ [ x0 , row ] , [ x1 , row ] , [ x1 , row + 83 ] , [ x0 , row + 83 ] ]
poly = [ [ x0 , row ] , [ x1 , row ] , [ x1 , row + 83 ] , [ x0 , row + 83 ] ]
return plate , [ { " transcription " : text , " points " : poly } ]
return plate , [ { " transcription " : text , " points " : poly } ]
def _gen_two_line ( self , plate_bg , num_src , char_src , region_src ):
def _gen_two_line ( self , plate_bg , num_src , char_src , region_src , region_sampler ):
""" 두 줄 LP (336x170). 위·아래 줄 각각 tight polygon 생성. """
""" 두 줄 LP (336x170). 위·아래 줄 각각 tight polygon 생성. """
plate = cv2 . resize ( plate_bg , ( 336 , 170 ) )
plate = cv2 . resize ( plate_bg , ( 336 , 170 ) )
num1 = self . _resize_dict ( num_src , 44 , 60 )
num1 = self . _resize_dict ( num_src , 44 , 60 )
@ -143,9 +161,9 @@ class LPGenerator:
region = self . _resize_dict ( region_src , 88 , 60 )
region = self . _resize_dict ( region_src , 88 , 60 )
char = self . _resize_dict ( char_src , 64 , 62 )
char = self . _resize_dict ( char_src , 64 , 62 )
rkey = r andom. choice ( list ( region ) )
rkey = r egion_sampler. next ( )
d = [ random . choice ( ' 0123456789 ' ) for _ in range ( 2 ) ]
d = [ random . choice ( ' 0123456789 ' ) for _ in range ( 2 ) ]
ch = random . choice ( list ( HANGUL_CHAR_MAP ) )
ch = self . hangul_sampler . next ( )
e = [ random . choice ( ' 0123456789 ' ) for _ in range ( 4 ) ]
e = [ random . choice ( ' 0123456789 ' ) for _ in range ( 4 ) ]
# 위 줄: region + 숫자2
# 위 줄: region + 숫자2
@ -174,15 +192,15 @@ class LPGenerator:
]
]
def gen_type3 ( self ) :
def gen_type3 ( self ) :
return self . _gen_two_line ( self . plate_y , self . num_y , self . char_y , self . region_y_imgs )
return self . _gen_two_line ( self . plate_y , self . num_y , self . char_y , self . region_y_imgs , self . region_y_sampler )
def gen_type4 ( self ) :
def gen_type4 ( self ) :
return self . _gen_two_line ( self . plate_g , self . num_g , self . char_g , self . region_g_imgs )
return self . _gen_two_line ( self . plate_g , self . num_g , self . char_g , self . region_g_imgs , self . region_g_sampler )
# 한국 도로 LP 분포 추정 (자가용 92% + 영업용 7.5%, 신형 가로 ~98% 등)
# 학습용 가중치 — 검증 단계에서는 두줄(type3+4)을 도로 분포보다 의도적으로 늘려
# 자산 한계 고려한 합성 가중치 — generate_synthetic 호출 시 --type_weights 로 덮어쓰기 가능 .
# 모델이 윗줄/아랫줄 동시 검출을 충분히 학습하게 함. 추론 시 도로 분포로 평가됨 .
TYPE_DEFAULT_WEIGHTS = { ' 1 ' : 0. 8 0, ' 2 ' : 0.05 , ' 3 ' : 0. 10, ' 4 ' : 0.0 5}
TYPE_DEFAULT_WEIGHTS = { ' 1 ' : 0. 5 0, ' 2 ' : 0.05 , ' 3 ' : 0. 30, ' 4 ' : 0.1 5}
def main ( ) :
def main ( ) :