You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

157 lines
4.8 KiB

{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"id": "fc523237",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"================= CLASS STATISTICS =================\n",
"\n",
"📁 Training Dataset\n",
" [0] BLOUSE 👉 9014 개\n",
" [1] COAT 👉 9949 개\n",
" [2] DRESS 👉 10173 개\n",
" [3] JACKET 👉 10466 개\n",
" [4] JUMPER 👉 21893 개\n",
" [5] PANTS 👉 69937 개\n",
" [6] SHIRTS 👉 13438 개\n",
" [7] SKIRT 👉 18156 개\n",
" [8] TSHIRTS 👉 45025 개\n",
"\n",
"\n",
"📁 Validation Dataset\n",
" [0] BLOUSE 👉 1127 개\n",
" [1] COAT 👉 1242 개\n",
" [2] DRESS 👉 1270 개\n",
" [3] JACKET 👉 1309 개\n",
" [4] JUMPER 👉 2737 개\n",
" [5] PANTS 👉 8741 개\n",
" [6] SHIRTS 👉 1681 개\n",
" [7] SKIRT 👉 2269 개\n",
" [8] TSHIRTS 👉 5624 개\n",
"\n",
"\n",
"=============== TOTAL SUMMARY ===============\n",
"\n",
" [0] BLOUSE 👉 10141 total\n",
" [1] COAT 👉 11191 total\n",
" [2] DRESS 👉 11443 total\n",
" [3] JACKET 👉 11775 total\n",
" [4] JUMPER 👉 24630 total\n",
" [5] PANTS 👉 78678 total\n",
" [6] SHIRTS 👉 15119 total\n",
" [7] SKIRT 👉 20425 total\n",
" [8] TSHIRTS 👉 50649 total\n"
]
}
],
"source": [
"import os\n",
"from collections import defaultdict\n",
"\n",
"# ---- CLASS MAP (사용자가 정의한 클래스) ----\n",
"CLASS_MAP = {\n",
" \"0\": \"BLOUSE\",\n",
" \"1\": \"COAT\",\n",
" \"2\": \"DRESS\",\n",
" \"3\": \"JACKET\",\n",
" \"4\": \"JUMPER\",\n",
" \"5\": \"PANTS\",\n",
" \"6\": \"SHIRTS\",\n",
" \"7\": \"SKIRT\",\n",
" \"8\": \"TSHIRTS\"\n",
"}\n",
"\n",
"# ---- 데이터 경로 ----\n",
"DATASETS = {\n",
" \"Training\": \"/home/cuuva/aihub_car/clothes_dataset/Training/labels_txt\",\n",
" \"Validation\": \"/home/cuuva/aihub_car/clothes_dataset/Validation/labels_txt\"\n",
"}\n",
"\n",
"\n",
"def count_classes(path):\n",
" class_count = defaultdict(int)\n",
"\n",
" for root, _, files in os.walk(path):\n",
" for file in files:\n",
" if file.endswith(\".txt\"):\n",
" file_path = os.path.join(root, file)\n",
"\n",
" with open(file_path, \"r\") as f:\n",
" lines = f.readlines()\n",
"\n",
" for line in lines:\n",
" if line.strip():\n",
" class_id = line.split()[0]\n",
" class_count[class_id] += 1\n",
"\n",
" # 정렬\n",
" return dict(sorted(class_count.items(), key=lambda x: int(x[0])))\n",
"\n",
"\n",
"# ---- 실행 ----\n",
"results = {}\n",
"for name, path in DATASETS.items():\n",
" results[name] = count_classes(path)\n",
"\n",
"print(\"\\n================= CLASS STATISTICS =================\\n\")\n",
"\n",
"# ---- 상세 출력 ----\n",
"for dataset_name, counts in results.items():\n",
" print(f\"📁 {dataset_name} Dataset\")\n",
" for cls_id, count in counts.items():\n",
" cls_name = CLASS_MAP.get(cls_id, \"UNKNOWN\")\n",
" print(f\" [{cls_id}] {cls_name:<10} 👉 {count} 개\")\n",
" print(\"\\n\")\n",
"\n",
"\n",
"# ---- Summary 합산 ----\n",
"print(\"=============== TOTAL SUMMARY ===============\\n\")\n",
"total = defaultdict(int)\n",
"\n",
"for r in results.values():\n",
" for cls_id, cnt in r.items():\n",
" total[cls_id] += cnt\n",
"\n",
"for cls_id, cnt in sorted(total.items(), key=lambda x: int(x[0])):\n",
" print(f\" [{cls_id}] {CLASS_MAP[cls_id]:<10} 👉 {cnt} total\")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "62ada642",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "1stagedetect",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.18"
}
},
"nbformat": 4,
"nbformat_minor": 5
}