Rewrite setup_server.sh for paddle-only container

- Targets the dedicated kr_lp_pgnet container (Ubuntu 24.04 + py3.10 + paddle 3.3.0.dev sm_120 wheel)
- Reuses pre-staged wheel under /workspace/wheels/
- Pins PaddleOCR release/2.7 (PGNet algorithm baseline)
- Adds ppocr import smoke test before downloading pretrain
- Symlinks our dict/config into PaddleOCR tree

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
main
songhyeonsu 1 month ago
parent a9495e1387
commit cd776cfceb

@ -1,7 +1,14 @@
#!/usr/bin/env bash
# 외부 GPU 서버 최초 1회 셋업
# 가정: NVIDIA GPU + CUDA 11.8 (다른 CUDA면 paddle wheel URL 수정)
# 실행 위치: ~/workspace/kr_lp_pgnet/
# kr_lp_pgnet 전용 컨테이너 안에서 실행되는 환경 셋업 스크립트.
#
# 가정 (컨테이너 외부):
# - Ubuntu 24.04 베이스 컨테이너 `kr_lp_pgnet` (--gpus all, --restart unless-stopped)
# - 호스트 /home/cuuva/workspace ↔ 컨테이너 /workspace (bind mount)
# - 이 repo는 호스트 ~/workspace/kr_lp_pgnet/ 에 clone (= 컨테이너 /workspace/kr_lp_pgnet/)
# - paddle sm_120 wheel은 ~/workspace/wheels/ 안에 둠
#
# 실행 (호스트에서):
# docker exec kr_lp_pgnet bash /workspace/kr_lp_pgnet/scripts/setup_server.sh
set -euo pipefail
@ -10,37 +17,63 @@ KR_LP_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
WORKSPACE="${WORKSPACE:-$(cd "$KR_LP_DIR/.." && pwd)}"
PADDLEOCR_DIR="$WORKSPACE/PaddleOCR"
PRETRAIN_DIR="$PADDLEOCR_DIR/pretrain_models"
WHEEL_DIR="$WORKSPACE/wheels"
echo "[1/4] PaddlePaddle GPU 설치 (CUDA 11.8 기준)"
python3 -m pip install --upgrade pip
python3 -m pip install paddlepaddle-gpu==2.6.1.post118 \
-f https://www.paddlepaddle.org.cn/whl/linux/mkl/avx/stable.html
PY=python3.10
echo "[2/4] PaddleOCR clone"
echo "[1/6] python / paddle 확인"
$PY --version
if ! $PY -c 'import paddle; assert paddle.is_compiled_with_cuda()' 2>/dev/null; then
echo " paddle 미설치 또는 CUDA 비호환. sm_120 wheel 재설치..."
WHL=$(ls "$WHEEL_DIR"/paddlepaddle_gpu-*-cp310-*linux_x86_64.whl 2>/dev/null | head -1)
if [ -z "$WHL" ]; then
echo " wheel 파일을 $WHEEL_DIR/ 에 두고 다시 실행하세요." >&2
exit 1
fi
$PY -m pip install --quiet "$WHL"
fi
$PY -c 'import paddle; print(" paddle:", paddle.__version__, "cuda:", paddle.is_compiled_with_cuda())'
echo "[2/6] PaddleOCR clone (release/2.7)"
if [ ! -d "$PADDLEOCR_DIR" ]; then
git clone --depth 1 -b release/2.7 https://github.com/PaddlePaddle/PaddleOCR.git "$PADDLEOCR_DIR"
fi
cd "$PADDLEOCR_DIR"
python3 -m pip install -r requirements.txt
echo " PaddleOCR @$(git rev-parse --short HEAD)"
echo "[3/6] PaddleOCR requirements 설치 (paddlepaddle 라인은 제외)"
if [ -f requirements.txt ]; then
grep -viE '^(paddlepaddle|paddleocr)' requirements.txt > /tmp/kr_lp_req.txt || true
$PY -m pip install --quiet -r /tmp/kr_lp_req.txt || true
fi
echo "[3/4] PGNet Step1 pretrain weight 다운로드"
echo "[4/6] PaddleOCR import smoke test"
cd "$PADDLEOCR_DIR"
$PY -c "
import sys; sys.path.insert(0, '.')
import ppocr
from ppocr.modeling.architectures import build_model
print(' ppocr import OK')
" || { echo " PaddleOCR import 실패 (paddle 3.3 호환성 이슈일 수 있음)"; exit 1; }
echo "[5/6] PGNet step1 pretrain weight 다운로드"
mkdir -p "$PRETRAIN_DIR"
cd "$PRETRAIN_DIR"
if [ ! -d "train_step1" ]; then
if [ ! -d train_step1 ]; then
wget -q https://paddleocr.bj.bcebos.com/dygraph_v2.0/pgnet/train_step1.tar
tar xf train_step1.tar
rm train_step1.tar
tar xf train_step1.tar && rm train_step1.tar
fi
ls train_step1/
echo "[4/5] kr_lp_dict 및 config를 PaddleOCR 트리에 symlink"
echo "[6/6] dict / config symlink → PaddleOCR 트리"
ln -sf "$KR_LP_DIR/dict/kr_lp_dict.txt" "$PADDLEOCR_DIR/ppocr/utils/kr_lp_dict.txt"
mkdir -p "$PADDLEOCR_DIR/configs/e2e"
ln -sf "$KR_LP_DIR/configs/kr_lp_pgnet.yml" "$PADDLEOCR_DIR/configs/e2e/kr_lp_pgnet.yml"
echo "[5/5] GPU/Paddle 동작 확인"
python3 -c "import paddle; print('paddle:', paddle.__version__); print('CUDA available:', paddle.is_compiled_with_cuda()); print('GPU count:', paddle.device.cuda.device_count())"
ls -l "$PADDLEOCR_DIR/ppocr/utils/kr_lp_dict.txt" "$PADDLEOCR_DIR/configs/e2e/kr_lp_pgnet.yml"
echo
echo "==========================="
echo "셋업 완료. 다음 단계:"
echo " python data_gen/generate_synthetic.py ..."
echo " bash scripts/run_step1.sh"
echo " bash $KR_LP_DIR/data_gen/setup_assets.sh # 합성 자산 다운로드"
echo " python data_gen/generate_synthetic.py ... # 합성 데이터 생성 (다음 단계)"
echo "==========================="

Loading…
Cancel
Save