From cd776cfceb19bff30d39e136aa02f25d9fe65c01 Mon Sep 17 00:00:00 2001 From: songhyeonsu Date: Thu, 7 May 2026 16:32:42 +0900 Subject: [PATCH] Rewrite setup_server.sh for paddle-only container - Targets the dedicated kr_lp_pgnet container (Ubuntu 24.04 + py3.10 + paddle 3.3.0.dev sm_120 wheel) - Reuses pre-staged wheel under /workspace/wheels/ - Pins PaddleOCR release/2.7 (PGNet algorithm baseline) - Adds ppocr import smoke test before downloading pretrain - Symlinks our dict/config into PaddleOCR tree Co-Authored-By: Claude Opus 4.7 --- scripts/setup_server.sh | 71 ++++++++++++++++++++++++++++++----------- 1 file changed, 52 insertions(+), 19 deletions(-) diff --git a/scripts/setup_server.sh b/scripts/setup_server.sh index f831689..76e9887 100755 --- a/scripts/setup_server.sh +++ b/scripts/setup_server.sh @@ -1,7 +1,14 @@ #!/usr/bin/env bash -# 외부 GPU 서버 최초 1회 셋업 -# 가정: NVIDIA GPU + CUDA 11.8 (다른 CUDA면 paddle wheel URL 수정) -# 실행 위치: ~/workspace/kr_lp_pgnet/ +# kr_lp_pgnet 전용 컨테이너 안에서 실행되는 환경 셋업 스크립트. +# +# 가정 (컨테이너 외부): +# - Ubuntu 24.04 베이스 컨테이너 `kr_lp_pgnet` (--gpus all, --restart unless-stopped) +# - 호스트 /home/cuuva/workspace ↔ 컨테이너 /workspace (bind mount) +# - 이 repo는 호스트 ~/workspace/kr_lp_pgnet/ 에 clone (= 컨테이너 /workspace/kr_lp_pgnet/) +# - paddle sm_120 wheel은 ~/workspace/wheels/ 안에 둠 +# +# 실행 (호스트에서): +# docker exec kr_lp_pgnet bash /workspace/kr_lp_pgnet/scripts/setup_server.sh set -euo pipefail @@ -10,37 +17,63 @@ KR_LP_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" WORKSPACE="${WORKSPACE:-$(cd "$KR_LP_DIR/.." && pwd)}" PADDLEOCR_DIR="$WORKSPACE/PaddleOCR" PRETRAIN_DIR="$PADDLEOCR_DIR/pretrain_models" +WHEEL_DIR="$WORKSPACE/wheels" -echo "[1/4] PaddlePaddle GPU 설치 (CUDA 11.8 기준)" -python3 -m pip install --upgrade pip -python3 -m pip install paddlepaddle-gpu==2.6.1.post118 \ - -f https://www.paddlepaddle.org.cn/whl/linux/mkl/avx/stable.html +PY=python3.10 -echo "[2/4] PaddleOCR clone" +echo "[1/6] python / paddle 확인" +$PY --version +if ! $PY -c 'import paddle; assert paddle.is_compiled_with_cuda()' 2>/dev/null; then + echo " paddle 미설치 또는 CUDA 비호환. sm_120 wheel 재설치..." + WHL=$(ls "$WHEEL_DIR"/paddlepaddle_gpu-*-cp310-*linux_x86_64.whl 2>/dev/null | head -1) + if [ -z "$WHL" ]; then + echo " wheel 파일을 $WHEEL_DIR/ 에 두고 다시 실행하세요." >&2 + exit 1 + fi + $PY -m pip install --quiet "$WHL" +fi +$PY -c 'import paddle; print(" paddle:", paddle.__version__, "cuda:", paddle.is_compiled_with_cuda())' + +echo "[2/6] PaddleOCR clone (release/2.7)" if [ ! -d "$PADDLEOCR_DIR" ]; then git clone --depth 1 -b release/2.7 https://github.com/PaddlePaddle/PaddleOCR.git "$PADDLEOCR_DIR" fi cd "$PADDLEOCR_DIR" -python3 -m pip install -r requirements.txt +echo " PaddleOCR @$(git rev-parse --short HEAD)" + +echo "[3/6] PaddleOCR requirements 설치 (paddlepaddle 라인은 제외)" +if [ -f requirements.txt ]; then + grep -viE '^(paddlepaddle|paddleocr)' requirements.txt > /tmp/kr_lp_req.txt || true + $PY -m pip install --quiet -r /tmp/kr_lp_req.txt || true +fi -echo "[3/4] PGNet Step1 pretrain weight 다운로드" +echo "[4/6] PaddleOCR import smoke test" +cd "$PADDLEOCR_DIR" +$PY -c " +import sys; sys.path.insert(0, '.') +import ppocr +from ppocr.modeling.architectures import build_model +print(' ppocr import OK') +" || { echo " PaddleOCR import 실패 (paddle 3.3 호환성 이슈일 수 있음)"; exit 1; } + +echo "[5/6] PGNet step1 pretrain weight 다운로드" mkdir -p "$PRETRAIN_DIR" cd "$PRETRAIN_DIR" -if [ ! -d "train_step1" ]; then +if [ ! -d train_step1 ]; then wget -q https://paddleocr.bj.bcebos.com/dygraph_v2.0/pgnet/train_step1.tar - tar xf train_step1.tar - rm train_step1.tar + tar xf train_step1.tar && rm train_step1.tar fi +ls train_step1/ -echo "[4/5] kr_lp_dict 및 config를 PaddleOCR 트리에 symlink" +echo "[6/6] dict / config symlink → PaddleOCR 트리" ln -sf "$KR_LP_DIR/dict/kr_lp_dict.txt" "$PADDLEOCR_DIR/ppocr/utils/kr_lp_dict.txt" mkdir -p "$PADDLEOCR_DIR/configs/e2e" ln -sf "$KR_LP_DIR/configs/kr_lp_pgnet.yml" "$PADDLEOCR_DIR/configs/e2e/kr_lp_pgnet.yml" - -echo "[5/5] GPU/Paddle 동작 확인" -python3 -c "import paddle; print('paddle:', paddle.__version__); print('CUDA available:', paddle.is_compiled_with_cuda()); print('GPU count:', paddle.device.cuda.device_count())" +ls -l "$PADDLEOCR_DIR/ppocr/utils/kr_lp_dict.txt" "$PADDLEOCR_DIR/configs/e2e/kr_lp_pgnet.yml" echo +echo "===========================" echo "셋업 완료. 다음 단계:" -echo " python data_gen/generate_synthetic.py ..." -echo " bash scripts/run_step1.sh" +echo " bash $KR_LP_DIR/data_gen/setup_assets.sh # 합성 자산 다운로드" +echo " python data_gen/generate_synthetic.py ... # 합성 데이터 생성 (다음 단계)" +echo "==========================="