Detection_Experiment/fashion_yolo/aihub_dataset_eda.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "fc523237",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "================= CLASS STATISTICS =================\n",
      "\n",
      "📁 Training Dataset\n",
      "   [0] BLOUSE      👉  9014 개\n",
      "   [1] COAT        👉  9949 개\n",
      "   [2] DRESS       👉  10173 개\n",
      "   [3] JACKET      👉  10466 개\n",
      "   [4] JUMPER      👉  21893 개\n",
      "   [5] PANTS       👉  69937 개\n",
      "   [6] SHIRTS      👉  13438 개\n",
      "   [7] SKIRT       👉  18156 개\n",
      "   [8] TSHIRTS     👉  45025 개\n",
      "\n",
      "\n",
      "📁 Validation Dataset\n",
      "   [0] BLOUSE      👉  1127 개\n",
      "   [1] COAT        👉  1242 개\n",
      "   [2] DRESS       👉  1270 개\n",
      "   [3] JACKET      👉  1309 개\n",
      "   [4] JUMPER      👉  2737 개\n",
      "   [5] PANTS       👉  8741 개\n",
      "   [6] SHIRTS      👉  1681 개\n",
      "   [7] SKIRT       👉  2269 개\n",
      "   [8] TSHIRTS     👉  5624 개\n",
      "\n",
      "\n",
      "=============== TOTAL SUMMARY ===============\n",
      "\n",
      "   [0] BLOUSE      👉  10141 total\n",
      "   [1] COAT        👉  11191 total\n",
      "   [2] DRESS       👉  11443 total\n",
      "   [3] JACKET      👉  11775 total\n",
      "   [4] JUMPER      👉  24630 total\n",
      "   [5] PANTS       👉  78678 total\n",
      "   [6] SHIRTS      👉  15119 total\n",
      "   [7] SKIRT       👉  20425 total\n",
      "   [8] TSHIRTS     👉  50649 total\n"
     ]
    }
   ],
   "source": [
    "import os\n",
    "from collections import defaultdict\n",
    "\n",
    "# ---- CLASS MAP (사용자가 정의한 클래스) ----\n",
    "CLASS_MAP = {\n",
    "    \"0\": \"BLOUSE\",\n",
    "    \"1\": \"COAT\",\n",
    "    \"2\": \"DRESS\",\n",
    "    \"3\": \"JACKET\",\n",
    "    \"4\": \"JUMPER\",\n",
    "    \"5\": \"PANTS\",\n",
    "    \"6\": \"SHIRTS\",\n",
    "    \"7\": \"SKIRT\",\n",
    "    \"8\": \"TSHIRTS\"\n",
    "}\n",
    "\n",
    "# ---- 데이터 경로 ----\n",
    "DATASETS = {\n",
    "    \"Training\": \"/home/cuuva/aihub_car/clothes_dataset/Training/labels_txt\",\n",
    "    \"Validation\": \"/home/cuuva/aihub_car/clothes_dataset/Validation/labels_txt\"\n",
    "}\n",
    "\n",
    "\n",
    "def count_classes(path):\n",
    "    class_count = defaultdict(int)\n",
    "\n",
    "    for root, _, files in os.walk(path):\n",
    "        for file in files:\n",
    "            if file.endswith(\".txt\"):\n",
    "                file_path = os.path.join(root, file)\n",
    "\n",
    "                with open(file_path, \"r\") as f:\n",
    "                    lines = f.readlines()\n",
    "\n",
    "                for line in lines:\n",
    "                    if line.strip():\n",
    "                        class_id = line.split()[0]\n",
    "                        class_count[class_id] += 1\n",
    "\n",
    "    # 정렬\n",
    "    return dict(sorted(class_count.items(), key=lambda x: int(x[0])))\n",
    "\n",
    "\n",
    "# ---- 실행 ----\n",
    "results = {}\n",
    "for name, path in DATASETS.items():\n",
    "    results[name] = count_classes(path)\n",
    "\n",
    "print(\"\\n================= CLASS STATISTICS =================\\n\")\n",
    "\n",
    "# ---- 상세 출력 ----\n",
    "for dataset_name, counts in results.items():\n",
    "    print(f\"📁 {dataset_name} Dataset\")\n",
    "    for cls_id, count in counts.items():\n",
    "        cls_name = CLASS_MAP.get(cls_id, \"UNKNOWN\")\n",
    "        print(f\"   [{cls_id}] {cls_name:<10}  👉  {count} 개\")\n",
    "    print(\"\\n\")\n",
    "\n",
    "\n",
    "# ---- Summary 합산 ----\n",
    "print(\"=============== TOTAL SUMMARY ===============\\n\")\n",
    "total = defaultdict(int)\n",
    "\n",
    "for r in results.values():\n",
    "    for cls_id, cnt in r.items():\n",
    "        total[cls_id] += cnt\n",
    "\n",
    "for cls_id, cnt in sorted(total.items(), key=lambda x: int(x[0])):\n",
    "    print(f\"   [{cls_id}] {CLASS_MAP[cls_id]:<10}  👉  {cnt} total\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "62ada642",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "1stagedetect",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.18"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
Organize my project 6 months ago			`{`
			`"cells": [`
			`{`
			`"cell_type": "code",`
			`"execution_count": 2,`
			`"id": "fc523237",`
			`"metadata": {},`
			`"outputs": [`
			`{`
			`"name": "stdout",`
			`"output_type": "stream",`
			`"text": [`
			`"\n",`
			`"================= CLASS STATISTICS =================\n",`
			`"\n",`
			`"📁 Training Dataset\n",`
			`" [0] BLOUSE 👉 9014 개\n",`
			`" [1] COAT 👉 9949 개\n",`
			`" [2] DRESS 👉 10173 개\n",`
			`" [3] JACKET 👉 10466 개\n",`
			`" [4] JUMPER 👉 21893 개\n",`
			`" [5] PANTS 👉 69937 개\n",`
			`" [6] SHIRTS 👉 13438 개\n",`
			`" [7] SKIRT 👉 18156 개\n",`
			`" [8] TSHIRTS 👉 45025 개\n",`
			`"\n",`
			`"\n",`
			`"📁 Validation Dataset\n",`
			`" [0] BLOUSE 👉 1127 개\n",`
			`" [1] COAT 👉 1242 개\n",`
			`" [2] DRESS 👉 1270 개\n",`
			`" [3] JACKET 👉 1309 개\n",`
			`" [4] JUMPER 👉 2737 개\n",`
			`" [5] PANTS 👉 8741 개\n",`
			`" [6] SHIRTS 👉 1681 개\n",`
			`" [7] SKIRT 👉 2269 개\n",`
			`" [8] TSHIRTS 👉 5624 개\n",`
			`"\n",`
			`"\n",`
			`"=============== TOTAL SUMMARY ===============\n",`
			`"\n",`
			`" [0] BLOUSE 👉 10141 total\n",`
			`" [1] COAT 👉 11191 total\n",`
			`" [2] DRESS 👉 11443 total\n",`
			`" [3] JACKET 👉 11775 total\n",`
			`" [4] JUMPER 👉 24630 total\n",`
			`" [5] PANTS 👉 78678 total\n",`
			`" [6] SHIRTS 👉 15119 total\n",`
			`" [7] SKIRT 👉 20425 total\n",`
			`" [8] TSHIRTS 👉 50649 total\n"`
			`]`
			`}`
			`],`
			`"source": [`
			`"import os\n",`
			`"from collections import defaultdict\n",`
			`"\n",`
			`"# ---- CLASS MAP (사용자가 정의한 클래스) ----\n",`
			`"CLASS_MAP = {\n",`
			`" \"0\": \"BLOUSE\",\n",`
			`" \"1\": \"COAT\",\n",`
			`" \"2\": \"DRESS\",\n",`
			`" \"3\": \"JACKET\",\n",`
			`" \"4\": \"JUMPER\",\n",`
			`" \"5\": \"PANTS\",\n",`
			`" \"6\": \"SHIRTS\",\n",`
			`" \"7\": \"SKIRT\",\n",`
			`" \"8\": \"TSHIRTS\"\n",`
			`"}\n",`
			`"\n",`
			`"# ---- 데이터 경로 ----\n",`
			`"DATASETS = {\n",`
			`" \"Training\": \"/home/cuuva/aihub_car/clothes_dataset/Training/labels_txt\",\n",`
			`" \"Validation\": \"/home/cuuva/aihub_car/clothes_dataset/Validation/labels_txt\"\n",`
			`"}\n",`
			`"\n",`
			`"\n",`
			`"def count_classes(path):\n",`
			`" class_count = defaultdict(int)\n",`
			`"\n",`
			`" for root, _, files in os.walk(path):\n",`
			`" for file in files:\n",`
			`" if file.endswith(\".txt\"):\n",`
			`" file_path = os.path.join(root, file)\n",`
			`"\n",`
			`" with open(file_path, \"r\") as f:\n",`
			`" lines = f.readlines()\n",`
			`"\n",`
			`" for line in lines:\n",`
			`" if line.strip():\n",`
			`" class_id = line.split()[0]\n",`
			`" class_count[class_id] += 1\n",`
			`"\n",`
			`" # 정렬\n",`
			`" return dict(sorted(class_count.items(), key=lambda x: int(x[0])))\n",`
			`"\n",`
			`"\n",`
			`"# ---- 실행 ----\n",`
			`"results = {}\n",`
			`"for name, path in DATASETS.items():\n",`
			`" results[name] = count_classes(path)\n",`
			`"\n",`
			`"print(\"\\n================= CLASS STATISTICS =================\\n\")\n",`
			`"\n",`
			`"# ---- 상세 출력 ----\n",`
			`"for dataset_name, counts in results.items():\n",`
			`" print(f\"📁 {dataset_name} Dataset\")\n",`
			`" for cls_id, count in counts.items():\n",`
			`" cls_name = CLASS_MAP.get(cls_id, \"UNKNOWN\")\n",`
			`" print(f\" [{cls_id}] {cls_name:<10} 👉 {count} 개\")\n",`
			`" print(\"\\n\")\n",`
			`"\n",`
			`"\n",`
			`"# ---- Summary 합산 ----\n",`
			`"print(\"=============== TOTAL SUMMARY ===============\\n\")\n",`
			`"total = defaultdict(int)\n",`
			`"\n",`
			`"for r in results.values():\n",`
			`" for cls_id, cnt in r.items():\n",`
			`" total[cls_id] += cnt\n",`
			`"\n",`
			`"for cls_id, cnt in sorted(total.items(), key=lambda x: int(x[0])):\n",`
			`" print(f\" [{cls_id}] {CLASS_MAP[cls_id]:<10} 👉 {cnt} total\")\n"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": null,`
			`"id": "62ada642",`
			`"metadata": {},`
			`"outputs": [],`
			`"source": []`
			`}`
			`],`
			`"metadata": {`
			`"kernelspec": {`
			`"display_name": "1stagedetect",`
			`"language": "python",`
			`"name": "python3"`
			`},`
			`"language_info": {`
			`"codemirror_mode": {`
			`"name": "ipython",`
			`"version": 3`
			`},`
			`"file_extension": ".py",`
			`"mimetype": "text/x-python",`
			`"name": "python",`
			`"nbconvert_exporter": "python",`
			`"pygments_lexer": "ipython3",`
			`"version": "3.10.18"`
			`}`
			`},`
			`"nbformat": 4,`
			`"nbformat_minor": 5`
			`}`