|
|
|
|
#!/usr/bin/env bash
|
|
|
|
|
# kr_lp_pgnet 전용 컨테이너 안에서 실행되는 환경 셋업 스크립트.
|
|
|
|
|
#
|
|
|
|
|
# 컨테이너 가정:
|
|
|
|
|
# - Ubuntu 24.04 + Python 3.10 + paddlepaddle-gpu sm_120 wheel 설치됨
|
|
|
|
|
# - 호스트 /home/cuuva/workspace ↔ 컨테이너 /workspace bind mount
|
|
|
|
|
# - 이 repo는 /workspace/kr_lp_pgnet/, paddle wheel은 /workspace/wheels/
|
|
|
|
|
#
|
|
|
|
|
# 실행 (호스트):
|
|
|
|
|
# docker exec kr_lp_pgnet bash /workspace/kr_lp_pgnet/scripts/setup_server.sh
|
|
|
|
|
|
|
|
|
|
set -euo pipefail
|
|
|
|
|
|
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
|
|
|
KR_LP_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
|
|
|
|
|
WORKSPACE="${WORKSPACE:-$(cd "$KR_LP_DIR/.." && pwd)}"
|
|
|
|
|
PADDLEOCR_DIR="$WORKSPACE/PaddleOCR"
|
|
|
|
|
PRETRAIN_DIR="$PADDLEOCR_DIR/pretrain_models"
|
|
|
|
|
WHEEL_DIR="$WORKSPACE/wheels"
|
|
|
|
|
|
|
|
|
|
PY=python3.10
|
|
|
|
|
PIP="$PY -m pip"
|
|
|
|
|
PIP_OPTS="--quiet --root-user-action=ignore"
|
|
|
|
|
|
|
|
|
|
echo "[1/8] python3.10 + pip (deadsnakes PPA)"
|
|
|
|
|
if ! command -v $PY >/dev/null 2>&1; then
|
|
|
|
|
export DEBIAN_FRONTEND=noninteractive
|
|
|
|
|
apt-get update -qq
|
|
|
|
|
apt-get install -y -q software-properties-common ca-certificates curl
|
|
|
|
|
add-apt-repository -y ppa:deadsnakes/ppa
|
|
|
|
|
apt-get update -qq
|
|
|
|
|
apt-get install -y -q python3.10 python3.10-venv python3.10-dev libgomp1
|
|
|
|
|
curl -sS https://bootstrap.pypa.io/get-pip.py | $PY
|
|
|
|
|
fi
|
|
|
|
|
$PY --version
|
|
|
|
|
|
|
|
|
|
echo "[2/8] paddle 확인"
|
|
|
|
|
if ! $PY -c 'import paddle; assert paddle.is_compiled_with_cuda()' 2>/dev/null; then
|
|
|
|
|
echo " paddle 미설치 또는 CUDA 비호환. sm_120 wheel 설치..."
|
|
|
|
|
WHL=$(ls "$WHEEL_DIR"/paddlepaddle_gpu-*-cp310-*linux_x86_64.whl 2>/dev/null | head -1)
|
|
|
|
|
if [ -z "$WHL" ]; then
|
|
|
|
|
echo " ERROR: $WHEEL_DIR/paddlepaddle_gpu-*-cp310-*.whl 없음" >&2
|
|
|
|
|
exit 1
|
|
|
|
|
fi
|
|
|
|
|
$PIP install $PIP_OPTS "$WHL"
|
|
|
|
|
fi
|
|
|
|
|
$PY -c 'import paddle; print(" paddle:", paddle.__version__, "cuda:", paddle.is_compiled_with_cuda())'
|
|
|
|
|
|
|
|
|
|
echo "[3/8] OpenCV 시스템 의존성 (libgl 등)"
|
|
|
|
|
export DEBIAN_FRONTEND=noninteractive
|
|
|
|
|
apt-get install -y -q libgl1 libglib2.0-0 libsm6 libxext6 libxrender1 wget git
|
|
|
|
|
|
|
|
|
|
echo "[4/8] PaddleOCR clone (release/2.7)"
|
|
|
|
|
if [ ! -d "$PADDLEOCR_DIR" ]; then
|
|
|
|
|
git clone --depth 1 -b release/2.7 https://github.com/PaddlePaddle/PaddleOCR.git "$PADDLEOCR_DIR"
|
|
|
|
|
fi
|
|
|
|
|
cd "$PADDLEOCR_DIR"
|
|
|
|
|
echo " PaddleOCR @$(git rev-parse --short HEAD)"
|
|
|
|
|
|
|
|
|
|
echo "[5/8] PaddleOCR Python 의존성 (한 줄씩, 충돌 패키지는 skip)"
|
|
|
|
|
# paddle은 이미 wheel로, blinker는 system pkg, opencv는 numpy2 비호환 → 제외 후 별도 설치
|
|
|
|
|
grep -viE '^(paddlepaddle|paddleocr|blinker|opencv-)' requirements.txt > /tmp/kr_lp_req.txt || true
|
|
|
|
|
while IFS= read -r line; do
|
|
|
|
|
[[ -z "$line" || "$line" =~ ^# ]] && continue
|
|
|
|
|
$PIP install $PIP_OPTS --ignore-installed "$line" 2>/dev/null || echo " skip: $line"
|
|
|
|
|
done < /tmp/kr_lp_req.txt
|
|
|
|
|
|
|
|
|
|
echo "[6/8] OpenCV (numpy2 호환) + numpy<2 (PaddleOCR release/2.7 호환성) + wandb + cuDNN 9.17"
|
|
|
|
|
$PIP install $PIP_OPTS 'opencv-python>=4.10' 'opencv-contrib-python>=4.10' wandb
|
|
|
|
|
# paddle sm_120 wheel은 cuDNN 9.17 빌드라 paddle deps의 9.13.0.50을 9.17로 upgrade 필요.
|
|
|
|
|
# (안 하면 conv2d에서 cublasLtCreate 심볼 로드 실패 → process abort)
|
|
|
|
|
$PIP install $PIP_OPTS --upgrade 'nvidia-cudnn-cu13>=9.17,<9.18'
|
|
|
|
|
# imgaug 등이 numpy 1.x API(np.sctypes)에 의존하므로 numpy 1.x로 핀.
|
|
|
|
|
# paddle 3.3.0.dev는 numpy 1.x도 호환.
|
|
|
|
|
$PIP install $PIP_OPTS 'numpy<2' --force-reinstall
|
|
|
|
|
|
|
|
|
|
echo "[7/8] PGNet step1 pretrain weight 다운로드 + import smoke test"
|
|
|
|
|
mkdir -p "$PRETRAIN_DIR"
|
|
|
|
|
cd "$PRETRAIN_DIR"
|
|
|
|
|
if [ ! -d train_step1 ]; then
|
|
|
|
|
wget -q https://paddleocr.bj.bcebos.com/dygraph_v2.0/pgnet/train_step1.tar
|
|
|
|
|
tar xf train_step1.tar && rm train_step1.tar
|
|
|
|
|
fi
|
|
|
|
|
ls train_step1/
|
|
|
|
|
cd "$PADDLEOCR_DIR"
|
|
|
|
|
$PY -c "
|
|
|
|
|
import sys; sys.path.insert(0, '.')
|
|
|
|
|
from ppocr.modeling.architectures import build_model
|
|
|
|
|
from ppocr.postprocess.pg_postprocess import PGPostProcess
|
|
|
|
|
from ppocr.losses.e2e_pg_loss import PGLoss
|
|
|
|
|
from ppocr.modeling.heads.e2e_pg_head import PGHead
|
|
|
|
|
from ppocr.data.imaug.pg_process import PGProcessTrain
|
|
|
|
|
print(' PGNet modules import OK')
|
|
|
|
|
"
|
|
|
|
|
|
|
|
|
|
echo "[8/8] dict / config symlink → PaddleOCR 트리"
|
|
|
|
|
ln -sf "$KR_LP_DIR/dict/kr_lp_dict.txt" "$PADDLEOCR_DIR/ppocr/utils/kr_lp_dict.txt"
|
|
|
|
|
mkdir -p "$PADDLEOCR_DIR/configs/e2e"
|
|
|
|
|
ln -sf "$KR_LP_DIR/configs/kr_lp_pgnet.yml" "$PADDLEOCR_DIR/configs/e2e/kr_lp_pgnet.yml"
|
|
|
|
|
ls -l "$PADDLEOCR_DIR/ppocr/utils/kr_lp_dict.txt" "$PADDLEOCR_DIR/configs/e2e/kr_lp_pgnet.yml"
|
|
|
|
|
|
|
|
|
|
echo
|
|
|
|
|
echo "==========================="
|
|
|
|
|
echo "셋업 완료. 다음 단계:"
|
|
|
|
|
echo " bash $KR_LP_DIR/data_gen/setup_assets.sh # 합성 자산 다운로드"
|
|
|
|
|
echo " python3.10 $KR_LP_DIR/data_gen/generate_synthetic.py ... # 합성 데이터 생성"
|
|
|
|
|
echo "==========================="
|