{ "cells": [ { "cell_type": "code", "execution_count": 2, "id": "6b410dab", "metadata": {}, "outputs": [], "source": [ "from ultralytics import YOLO\n", "\n", "import torch" ] }, { "cell_type": "code", "execution_count": 2, "id": "f66929b6", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "torch.cuda.is_available()" ] }, { "cell_type": "code", "execution_count": 4, "id": "e5e2ab7b", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "πŸ” Checking split: train (files: 117266)\n", "\n", "πŸ” Checking split: val (files: 4952)\n", "\n", "πŸ“Œ 클래슀 톡계 κ²°κ³Ό:\n", " - Class 0: 268029 개\n", " - Class 1: 45449 개\n", " - Class 2: 10384 개\n", " - Class 3: 6344 개\n", " - Class 4: 9021 개\n", "\n", "총 클래슀 μ’…λ₯˜: 5\n" ] } ], "source": [ "import os\n", "from collections import Counter\n", "\n", "label_root = \"/home/cuuva/experiment/datasets/coco5class/labels\"\n", "splits = [\"train\", \"val\"]\n", "\n", "class_counter = Counter()\n", "\n", "for split in splits:\n", " split_path = os.path.join(label_root, split)\n", " \n", " # 라벨 txt 파일 탐색\n", " label_files = [f for f in os.listdir(split_path) if f.endswith(\".txt\")]\n", " \n", " print(f\"\\nπŸ” Checking split: {split} (files: {len(label_files)})\")\n", "\n", " for file in label_files:\n", " file_path = os.path.join(split_path, file)\n", " with open(file_path, \"r\") as f:\n", " for line in f.readlines():\n", " if line.strip(): # 빈 쀄 제거\n", " class_id = line.split()[0] # 첫번째 κ°’ = 클래슀\n", " class_counter[class_id] += 1\n", "\n", "# μ΅œμ’… κ²°κ³Ό 좜λ ₯\n", "print(\"\\nπŸ“Œ 클래슀 톡계 κ²°κ³Ό:\")\n", "for cls, count in sorted(class_counter.items(), key=lambda x: int(x[0])):\n", " print(f\" - Class {cls}: {count} 개\")\n", "\n", "print(f\"\\n총 클래슀 μ’…λ₯˜: {len(class_counter)}\")\n" ] }, { "cell_type": "code", "execution_count": null, "id": "6bb38c03", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "New https://pypi.org/project/ultralytics/8.3.235 available πŸ˜ƒ Update with 'pip install -U ultralytics'\n", "Ultralytics 8.3.225 πŸš€ Python-3.10.18 torch-2.9.1+cu128 CUDA:0 (NVIDIA GeForce RTX 5090, 32087MiB)\n", "\u001b[34m\u001b[1mengine/trainer: \u001b[0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=-1, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/home/cuuva/experiment/coco5class_exp/coco5class.yaml, degrees=0.0, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=300, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.001, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8m.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=5class, nbs=64, nms=False, opset=None, optimize=False, optimizer=AdamW, overlap_mask=True, patience=40, perspective=0.0, plots=True, pose=12.0, pretrained=True, profile=False, project=coco5class_v8m, rect=False, resume=False, retina_masks=False, save=True, save_conf=False, save_crop=False, save_dir=/home/cuuva/experiment/coco5class_exp/coco5class_v8m/5class, save_frames=False, save_json=False, save_period=-1, save_txt=False, scale=0.5, seed=0, shear=0.0, show=False, show_boxes=True, show_conf=True, show_labels=True, simplify=True, single_cls=False, source=None, split=val, stream_buffer=False, task=detect, time=None, tracker=botsort.yaml, translate=0.1, val=True, verbose=True, vid_stride=1, visualize=False, warmup_bias_lr=0.1, warmup_epochs=3.0, warmup_momentum=0.8, weight_decay=0.0005, workers=8, workspace=None\n", "Overriding model.yaml nc=80 with nc=5\n", "\n", " from n params module arguments \n", " 0 -1 1 1392 ultralytics.nn.modules.conv.Conv [3, 48, 3, 2] \n", " 1 -1 1 41664 ultralytics.nn.modules.conv.Conv [48, 96, 3, 2] \n", " 2 -1 2 111360 ultralytics.nn.modules.block.C2f [96, 96, 2, True] \n", " 3 -1 1 166272 ultralytics.nn.modules.conv.Conv [96, 192, 3, 2] \n", " 4 -1 4 813312 ultralytics.nn.modules.block.C2f [192, 192, 4, True] \n", " 5 -1 1 664320 ultralytics.nn.modules.conv.Conv [192, 384, 3, 2] \n", " 6 -1 4 3248640 ultralytics.nn.modules.block.C2f [384, 384, 4, True] \n", " 7 -1 1 1991808 ultralytics.nn.modules.conv.Conv [384, 576, 3, 2] \n", " 8 -1 2 3985920 ultralytics.nn.modules.block.C2f [576, 576, 2, True] \n", " 9 -1 1 831168 ultralytics.nn.modules.block.SPPF [576, 576, 5] \n", " 10 -1 1 0 torch.nn.modules.upsampling.Upsample [None, 2, 'nearest'] \n", " 11 [-1, 6] 1 0 ultralytics.nn.modules.conv.Concat [1] \n", " 12 -1 2 1993728 ultralytics.nn.modules.block.C2f [960, 384, 2] \n", " 13 -1 1 0 torch.nn.modules.upsampling.Upsample [None, 2, 'nearest'] \n", " 14 [-1, 4] 1 0 ultralytics.nn.modules.conv.Concat [1] \n", " 15 -1 2 517632 ultralytics.nn.modules.block.C2f [576, 192, 2] \n", " 16 -1 1 332160 ultralytics.nn.modules.conv.Conv [192, 192, 3, 2] \n", " 17 [-1, 12] 1 0 ultralytics.nn.modules.conv.Concat [1] \n", " 18 -1 2 1846272 ultralytics.nn.modules.block.C2f [576, 384, 2] \n", " 19 -1 1 1327872 ultralytics.nn.modules.conv.Conv [384, 384, 3, 2] \n", " 20 [-1, 9] 1 0 ultralytics.nn.modules.conv.Concat [1] \n", " 21 -1 2 4207104 ultralytics.nn.modules.block.C2f [960, 576, 2] \n", " 22 [15, 18, 21] 1 3778591 ultralytics.nn.modules.head.Detect [5, [192, 384, 576]] \n", "Model summary: 169 layers, 25,859,215 parameters, 25,859,199 gradients, 79.1 GFLOPs\n", "\n", "Transferred 469/475 items from pretrained weights\n", "Freezing layer 'model.22.dfl.conv.weight'\n", "\u001b[34m\u001b[1mAMP: \u001b[0mrunning Automatic Mixed Precision (AMP) checks...\n", "\u001b[34m\u001b[1mAMP: \u001b[0mchecks passed βœ…\n", "\u001b[34m\u001b[1mtrain: \u001b[0mFast image access βœ… (ping: 0.0Β±0.0 ms, read: 8101.9Β±2264.5 MB/s, size: 195.4 KB)\n", "\u001b[K\u001b[34m\u001b[1mtrain: \u001b[0mScanning /home/cuuva/experiment/datasets/coco5class/labels/train.cache... 117266 images, 48605 backgrounds, 0 corrupt: 100% ━━━━━━━━━━━━ 118287/118287 192.2Mit/s 0.0s\n", "\u001b[34m\u001b[1malbumentations: \u001b[0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, method='weighted_average', num_output_channels=3), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))\n", "\u001b[34m\u001b[1mAutoBatch: \u001b[0mComputing optimal batch size for imgsz=640 at 60.0% CUDA memory utilization.\n", "\u001b[34m\u001b[1mAutoBatch: \u001b[0mCUDA:0 (NVIDIA GeForce RTX 5090) 31.33G total, 0.24G reserved, 0.23G allocated, 30.86G free\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/home/cuuva/anaconda3/envs/1stagedetect/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " Params GFLOPs GPU_mem (GB) forward (ms) backward (ms) input output\n", " 25859215 79.08 2.261 30.79 146.6 (1, 3, 640, 640) list\n", " 25859215 158.2 3.569 9.957 33.28 (2, 3, 640, 640) list\n", " 25859215 316.3 4.731 13.44 46.58 (4, 3, 640, 640) list\n", " 25859215 632.6 7.785 25.68 70.42 (8, 3, 640, 640) list\n", " 25859215 1265 10.414 27.14 117.5 (16, 3, 640, 640) list\n", " 25859215 2531 20.416 55.02 239.2 (32, 3, 640, 640) list\n", " 25859215 5061 36.937 116.3 435.7 (64, 3, 640, 640) list\n", "\u001b[34m\u001b[1mAutoBatch: \u001b[0mUsing batch-size 29 for CUDA:0 19.03G/31.33G (61%) βœ…\n", "\u001b[34m\u001b[1mtrain: \u001b[0mFast image access βœ… (ping: 0.0Β±0.0 ms, read: 6524.6Β±3206.6 MB/s, size: 190.9 KB)\n", "\u001b[K\u001b[34m\u001b[1mtrain: \u001b[0mScanning /home/cuuva/experiment/datasets/coco5class/labels/train.cache... 117266 images, 48605 backgrounds, 0 corrupt: 100% ━━━━━━━━━━━━ 118287/118287 217.1Mit/s 0.0s\n", "\u001b[34m\u001b[1malbumentations: \u001b[0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, method='weighted_average', num_output_channels=3), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))\n", "\u001b[34m\u001b[1mval: \u001b[0mFast image access βœ… (ping: 0.0Β±0.0 ms, read: 4736.1Β±2206.1 MB/s, size: 185.8 KB)\n", "\u001b[K\u001b[34m\u001b[1mval: \u001b[0mScanning /home/cuuva/experiment/datasets/coco5class/labels/val.cache... 4952 images, 2049 backgrounds, 0 corrupt: 100% ━━━━━━━━━━━━ 5000/5000 6.6Mit/s 0.0s0s\n", "Plotting labels to /home/cuuva/experiment/coco5class_exp/coco5class_v8m/5class/labels.jpg... \n", "\u001b[34m\u001b[1moptimizer:\u001b[0m AdamW(lr=0.001, momentum=0.937) with parameter groups 77 weight(decay=0.0), 84 weight(decay=0.000453125), 83 bias(decay=0.0)\n", "Image sizes 640 train, 640 val\n", "Using 8 dataloader workers\n", "Logging results to \u001b[1m/home/cuuva/experiment/coco5class_exp/coco5class_v8m/5class\u001b[0m\n", "Starting training for 300 epochs...\n", "\n", " Epoch GPU_mem box_loss cls_loss dfl_loss Instances Size\n", "\u001b[K 1/300 14.1G 1.156 1.187 1.242 133 640: 100% ━━━━━━━━━━━━ 4079/4079 3.3it/s 20:20<0.3s\n", "\u001b[K Class Images Instances Box(P R mAP50 mAP50-95): 100% ━━━━━━━━━━━━ 87/87 8.9it/s 9.8s0.1s\n", " all 5000 13759 0.471 0.374 0.387 0.237\n", "\n", " Epoch GPU_mem box_loss cls_loss dfl_loss Instances Size\n", "\u001b[K 2/300 14.1G 1.204 1.229 1.282 98 640: 100% ━━━━━━━━━━━━ 4079/4079 3.4it/s 20:06<0.3s\n", "\u001b[K Class Images Instances Box(P R mAP50 mAP50-95): 100% ━━━━━━━━━━━━ 87/87 8.6it/s 10.1s0.1s\n", " all 5000 13759 0.615 0.494 0.54 0.358\n", "\n", " Epoch GPU_mem box_loss cls_loss dfl_loss Instances Size\n", "\u001b[K 3/300 14.1G 1.172 1.165 1.258 120 640: 100% ━━━━━━━━━━━━ 4079/4079 3.4it/s 20:08<0.3s\n", "\u001b[K Class Images Instances Box(P R mAP50 mAP50-95): 100% ━━━━━━━━━━━━ 87/87 8.9it/s 9.8s<0.2s\n", " all 5000 13759 0.644 0.526 0.58 0.395\n", "\n", " Epoch GPU_mem box_loss cls_loss dfl_loss Instances Size\n", "\u001b[K 4/300 14.1G 1.127 1.088 1.231 86 640: 100% ━━━━━━━━━━━━ 4079/4079 3.4it/s 20:06<0.3s\n", "\u001b[K Class Images Instances Box(P R mAP50 mAP50-95): 100% ━━━━━━━━━━━━ 87/87 9.2it/s 9.4s<0.2s\n", " all 5000 13759 0.671 0.559 0.616 0.424\n", "\n", " Epoch GPU_mem box_loss cls_loss dfl_loss Instances Size\n", "\u001b[K 5/300 14.1G 1.088 1.025 1.204 107 640: 100% ━━━━━━━━━━━━ 4079/4079 3.4it/s 20:01<0.3s\n", "\u001b[K Class Images Instances Box(P R mAP50 mAP50-95): 100% ━━━━━━━━━━━━ 87/87 9.3it/s 9.3s<0.1s\n", " all 5000 13759 0.675 0.58 0.641 0.453\n", "\n", " Epoch GPU_mem box_loss cls_loss dfl_loss Instances Size\n", "\u001b[K 6/300 14.1G 1.057 0.9789 1.185 162 640: 100% ━━━━━━━━━━━━ 4079/4079 3.4it/s 20:01<0.3s\n", "\u001b[K Class Images Instances Box(P R mAP50 mAP50-95): 100% ━━━━━━━━━━━━ 87/87 9.4it/s 9.3s<0.1s\n", " all 5000 13759 0.71 0.575 0.656 0.467\n", "\n", " Epoch GPU_mem box_loss cls_loss dfl_loss Instances Size\n", "\u001b[K 7/300 14.1G 1.036 0.9461 1.172 121 640: 100% ━━━━━━━━━━━━ 4079/4079 3.4it/s 20:01<0.3s\n", "\u001b[K Class Images Instances Box(P R mAP50 mAP50-95): 100% ━━━━━━━━━━━━ 87/87 9.4it/s 9.3s<0.1s\n", " all 5000 13759 0.725 0.588 0.672 0.48\n", "\n", " Epoch GPU_mem box_loss cls_loss dfl_loss Instances Size\n", "\u001b[K 8/300 14.1G 1.023 0.9197 1.161 105 640: 100% ━━━━━━━━━━━━ 4079/4079 3.4it/s 19:60<0.3s\n", "\u001b[K Class Images Instances Box(P R mAP50 mAP50-95): 100% ━━━━━━━━━━━━ 87/87 9.4it/s 9.2s0.1ss\n", " all 5000 13759 0.735 0.587 0.681 0.488\n", "\n", " Epoch GPU_mem box_loss cls_loss dfl_loss Instances Size\n", "\u001b[K 9/300 14.1G 1.009 0.9032 1.15 136 640: 100% ━━━━━━━━━━━━ 4079/4079 3.4it/s 19:60<0.3s\n", "\u001b[K Class Images Instances Box(P R mAP50 mAP50-95): 100% ━━━━━━━━━━━━ 87/87 9.2it/s 9.5s<0.1s\n", " all 5000 13759 0.719 0.603 0.683 0.493\n", "\n", " Epoch GPU_mem box_loss cls_loss dfl_loss Instances Size\n", "\u001b[K 10/300 14.1G 0.9986 0.8829 1.145 136 640: 82% ━━━━━━━━━╸── 3329/4079 3.4it/s 16:20<3:401" ] } ], "source": [ "# Load a pretrained YOLO11n model\n", "model = YOLO('yolov8m.pt')\n", "\n", "train_results = model.train(\n", " data=\"/home/cuuva/experiment/coco5class_exp/coco5class.yaml\", #['person','car', 'truck', 'bus', 'motor']\n", " epochs=300,\n", " imgsz=640,\n", " batch=-1,\n", " device=\"cuda\",\n", " optimizer = 'AdamW',\n", " lr0 = 0.001,\n", " patience = 40,\n", " project = 'coco5class_v8m',\n", " name = '5class',\n", ")" ] }, { "cell_type": "markdown", "id": "c87a7d82", "metadata": {}, "source": [ "'''\n", "\n", "all : 0.559\n", "\n", "person : 0.615\n", "\n", "car : 0.496\n", "\n", "truck : 0.438\n", "\n", "bus : 0.739\n", "\n", "motor : 0.509\n", "\n", "'''" ] }, { "cell_type": "code", "execution_count": 3, "id": "681c71ed", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Ultralytics 8.3.225 πŸš€ Python-3.10.18 torch-2.9.1+cu128 CUDA:0 (NVIDIA GeForce RTX 5090, 32087MiB)\n", "Model summary (fused): 92 layers, 25,842,655 parameters, 0 gradients, 78.7 GFLOPs\n", "\n", "\u001b[34m\u001b[1mPyTorch:\u001b[0m starting from '/home/cuuva/experiment/coco5class_exp/coco5class_v8m/5class/weights/best_coco5class.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) (1, 9, 8400) (49.6 MB)\n", "\n", "\u001b[34m\u001b[1mONNX:\u001b[0m starting export with onnx 1.19.1 opset 20...\n", "\u001b[34m\u001b[1mONNX:\u001b[0m slimming with onnxslim 0.1.71...\n", "\u001b[34m\u001b[1mONNX:\u001b[0m export success βœ… 1.3s, saved as '/home/cuuva/experiment/coco5class_exp/coco5class_v8m/5class/weights/best_coco5class.onnx' (98.8 MB)\n", "\n", "Export complete (1.6s)\n", "Results saved to \u001b[1m/home/cuuva/experiment/coco5class_exp/coco5class_v8m/5class/weights\u001b[0m\n", "Predict: yolo predict task=detect model=/home/cuuva/experiment/coco5class_exp/coco5class_v8m/5class/weights/best_coco5class.onnx imgsz=640 \n", "Validate: yolo val task=detect model=/home/cuuva/experiment/coco5class_exp/coco5class_v8m/5class/weights/best_coco5class.onnx imgsz=640 data=/home/cuuva/experiment/coco5class_exp/coco5class.yaml \n", "Visualize: https://netron.app\n" ] }, { "data": { "text/plain": [ "'/home/cuuva/experiment/coco5class_exp/coco5class_v8m/5class/weights/best_coco5class.onnx'" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model = YOLO(\"/home/cuuva/experiment/coco5class_exp/coco5class_v8m/5class/weights/best_coco5class.pt\")\n", "model.export(format=\"onnx\", imgsz=640, device=0)" ] }, { "cell_type": "code", "execution_count": null, "id": "48b48641", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "1stagedetect", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.18" } }, "nbformat": 4, "nbformat_minor": 5 }