diff --git a/.gitignore b/.gitignore
index 5d381cc..b6c8679 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,162 +1,106 @@
-# ---> Python
-# Byte-compiled / optimized / DLL files
+# -----------------------------
+# OS 기본 파일
+# -----------------------------
+.DS_Store
+Thumbs.db
+
+# -----------------------------
+# Python 환경
+# -----------------------------
 __pycache__/
-*.py[cod]
-*$py.class
-
-# C extensions
+*.pyc
+*.pyo
+*.pyd
 *.so
 
-# Distribution / packaging
-.Python
-build/
-develop-eggs/
-dist/
-downloads/
-eggs/
-.eggs/
-lib/
-lib64/
-parts/
-sdist/
-var/
-wheels/
-share/python-wheels/
+# 가상환경
+env/
+venv/
+.mipenv/
 *.egg-info/
-.installed.cfg
-*.egg
-MANIFEST
-
-# PyInstaller
-#  Usually these files are written by a python script from a template
-#  before PyInstaller builds the exe, so as to inject date/other infos into it.
-*.manifest
-*.spec
-
-# Installer logs
-pip-log.txt
-pip-delete-this-directory.txt
-
-# Unit test / coverage reports
-htmlcov/
-.tox/
-.nox/
-.coverage
-.coverage.*
-.cache
-nosetests.xml
-coverage.xml
-*.cover
-*.py,cover
-.hypothesis/
-.pytest_cache/
-cover/
+.eggs/
 
-# Translations
-*.mo
-*.pot
+# Poetry / pipenv
+.cache/
+.venv/
+
+# -----------------------------
+# IDE / Editor 관련
+# -----------------------------
+.vscode/
+.idea/
+*.swp
+*.swo
+
+# -----------------------------
+# 데이터, 모델, 체크포인트
+# -----------------------------
+# 모델 파일
+*.pt
+*.pth
+*.ckpt
+*.onnx
+*.trt
+*.pb
+*.h5
+
+# 학습 관련 출력
+runs/
+logs/
+tensorboard/
+lightning_logs/
+checkpoint/
+checkpoints/
+
+# 데이터셋 (원하면 제외 가능)
+data/
+dataset/
+datasets/
+
+# 결과물 (이미지/비디오/추론 결과)
+outputs/
+results/
+inference/
+*.jpg
+*.png
+*.jpeg
+*.bmp
+*.mp4
+*.avi
+
+# -----------------------------
+# PyTorch & HuggingFace 캐시
+# -----------------------------
+/root/.cache/torch/
+/cache/
+/.torch/
+huggingface/
+transformers/
+
+
+# -----------------------------
+# Jupyter 관련
+# -----------------------------
+.ipynb_checkpoints/
+*.ipynb~
+
+# -----------------------------
+# 컴파일/빌드 아티팩트
+# -----------------------------
+build/
+dist/
+*.bin
 
-# Django stuff:
+# -----------------------------
+# Temp 파일
+# -----------------------------
+tmp/
+temp/
 *.log
-local_settings.py
-db.sqlite3
-db.sqlite3-journal
-
-# Flask stuff:
-instance/
-.webassets-cache
-
-# Scrapy stuff:
-.scrapy
-
-# Sphinx documentation
-docs/_build/
-
-# PyBuilder
-.pybuilder/
-target/
-
-# Jupyter Notebook
-.ipynb_checkpoints
-
-# IPython
-profile_default/
-ipython_config.py
 
-# pyenv
-#   For a library or package, you might want to ignore these files since the code is
-#   intended to run in multiple environments; otherwise, check them in:
-# .python-version
-
-# pipenv
-#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
-#   However, in case of collaboration, if having platform-specific dependencies or dependencies
-#   having no cross-platform support, pipenv may install dependencies that don't work, or not
-#   install all needed dependencies.
-#Pipfile.lock
-
-# poetry
-#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
-#   This is especially recommended for binary packages to ensure reproducibility, and is more
-#   commonly ignored for libraries.
-#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
-#poetry.lock
-
-# pdm
-#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
-#pdm.lock
-#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
-#   in version control.
-#   https://pdm.fming.dev/#use-with-ide
-.pdm.toml
-
-# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
-__pypackages__/
-
-# Celery stuff
-celerybeat-schedule
-celerybeat.pid
-
-# SageMath parsed files
-*.sage.py
-
-# Environments
+# -----------------------------
+# 환경 변수 파일
+# -----------------------------
 .env
-.venv
-env/
-venv/
-ENV/
-env.bak/
-venv.bak/
-
-# Spyder project settings
-.spyderproject
-.spyproject
-
-# Rope project settings
-.ropeproject
-
-# mkdocs documentation
-/site
-
-# mypy
-.mypy_cache/
-.dmypy.json
-dmypy.json
-
-# Pyre type checker
-.pyre/
-
-# pytype static type analyzer
-.pytype/
-
-# Cython debug symbols
-cython_debug/
-
-# PyCharm
-#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
-#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
-#  and can be added to the global gitignore or merged into this file.  For a more nuclear
-#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
-#.idea/
+.env.*
 
diff --git a/config.py b/config.py
new file mode 100755
index 0000000..4957d5d
--- /dev/null
+++ b/config.py
@@ -0,0 +1,15 @@
+BATCH_SIZE = 256
+SAVE_FREQ = 1
+TEST_FREQ = 1
+TOTAL_EPOCH = 500
+
+RESUME = ''
+SAVE_DIR = './model'
+MODEL_PRE = 'CASIA_B512_'
+
+
+CASIA_DATA_DIR = '/home/xiaocc/Documents/caffe_project/sphereface/train/data'
+LFW_DATA_DIR = '/home/xiaocc/Documents/caffe_project/sphereface/test/data'
+
+GPU = 0
+
diff --git a/core/model.py b/core/model.py
new file mode 100755
index 0000000..4c69560
--- /dev/null
+++ b/core/model.py
@@ -0,0 +1,195 @@
+from torch import nn
+import torch
+import torch.nn.functional as F
+from torch.autograd import Variable
+import math
+from torch.nn import Parameter
+
+class Bottleneck(nn.Module):
+    def __init__(self, inp, oup, stride, expansion):
+        super(Bottleneck, self).__init__()
+        self.connect = stride == 1 and inp == oup
+        self.conv = nn.Sequential(
+            #pw
+            nn.Conv2d(inp, inp * expansion, 1, 1, 0, bias=False),
+            nn.BatchNorm2d(inp * expansion),
+            nn.ReLU(inplace=True),
+
+            #dw
+            nn.Conv2d(inp * expansion, inp * expansion, 3, stride, 1, groups=inp * expansion, bias=False),
+            nn.BatchNorm2d(inp * expansion),
+            nn.ReLU(inplace=True),
+
+            #pw-linear
+            nn.Conv2d(inp * expansion, oup, 1, 1, 0, bias=False),
+            nn.BatchNorm2d(oup),
+        )
+
+    def forward(self, x):
+        if self.connect:
+            return x + self.conv(x)
+        else:
+            return self.conv(x)
+
+# class ConvBlock(nn.Module): # prelu 버전
+#     def __init__(self, inp, oup, k, s, p, dw=False, linear=False):
+#         super(ConvBlock, self).__init__()
+#         self.linear = linear
+#         if dw:
+#             self.conv = nn.Conv2d(inp, oup, k, s, p, groups=inp, bias=False)
+#         else:
+#             self.conv = nn.Conv2d(inp, oup, k, s, p, bias=False)
+#         self.bn = nn.BatchNorm2d(oup)
+#         if not linear:
+#             self.prelu = nn.PReLU(oup)
+#     def forward(self, x):
+#         x = self.conv(x)
+#         x = self.bn(x)
+#         if self.linear:
+#             return x
+#         else:
+#             return self.prelu(x)
+
+class ConvBlock(nn.Module):
+    def __init__(self, inp, oup, k, s, p, dw=False, linear=False):
+        super(ConvBlock, self).__init__()
+        self.linear = linear
+        if dw:
+            self.conv = nn.Conv2d(inp, oup, k, s, p, groups=inp, bias=False)
+        else:
+            self.conv = nn.Conv2d(inp, oup, k, s, p, bias=False)
+        self.bn = nn.BatchNorm2d(oup)
+        if not linear:
+            self.relu = nn.ReLU(inplace=True)
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        if self.linear:
+            return x
+        else:
+            return self.relu(x)
+        
+class ConvBlockAvgPool(nn.Module): # 이게...맞나?
+    def __init__(self, kernel):
+        super().__init__()
+        self.pool = nn.AvgPool2d(kernel)
+        self.bn = nn.BatchNorm2d(512)
+    
+    def forward(self, x):
+        x = self.pool(x)
+        return self.bn(x)
+        # return self.pool(x)
+
+
+Mobilefacenet_bottleneck_setting = [
+    # t, c , n ,s
+    [2, 64, 5, 2],
+    [4, 128, 1, 2],
+    [2, 128, 6, 1],
+    [4, 128, 1, 2],
+    [2, 128, 2, 1]
+]
+
+Mobilenetv2_bottleneck_setting = [
+    # t, c, n, s
+    [1, 16, 1, 1],
+    [6, 24, 2, 2],
+    [6, 32, 3, 2],
+    [6, 64, 4, 2],
+    [6, 96, 3, 1],
+    [6, 160, 3, 2],
+    [6, 320, 1, 1],
+]
+
+class MobileFacenet(nn.Module):
+    def __init__(self, bottleneck_setting=Mobilefacenet_bottleneck_setting):
+        super(MobileFacenet, self).__init__()
+
+        self.conv1 = ConvBlock(3, 64, 3, 2, 1)
+        self.dw_conv1 = ConvBlock(64, 64, 3, 1, 1, dw=True)
+
+        self.inplanes = 64
+        block = Bottleneck
+        self.blocks = self._make_layer(block, bottleneck_setting)
+
+        self.conv2 = ConvBlock(128, 512, 1, 1, 0)
+
+        # self.linear7 = ConvBlock(512, 512, (7, 6), 1, 0, dw=True, linear=True)
+        # self.linear7 = ConvBlock(512, 512, (8, 8), 1, 0, dw=True, linear=True) # 128x128 로 키우니까 커널사이즈도 키워줘야함.
+        # self.linear7 = nn.AvgPool2d(kernel_size=8, stride=1) # 여기봐바 여기 너가 말한대로 추가해놨어.
+        self.linear7 = ConvBlockAvgPool(kernel=8)
+        self.linear1 = ConvBlock(512, 128, 1, 1, 0, linear=True)
+
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+                m.weight.data.normal_(0, math.sqrt(2. / n))
+            elif isinstance(m, nn.BatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+
+    def _make_layer(self, block, setting):
+        layers = []
+        for t, c, n, s in setting:
+            for i in range(n):
+                if i == 0:
+                    layers.append(block(self.inplanes, c, s, t))
+                else:
+                    layers.append(block(self.inplanes, c, 1, t))
+                self.inplanes = c
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.dw_conv1(x)
+        x = self.blocks(x)
+        x = self.conv2(x)
+        x = self.linear7(x)
+        x = self.linear1(x) # 이때 shape이 [Batch,128,1,1] 임.
+        x = x.view(x.size(0), -1) # reshpape에 해당되는 부분
+        return x
+
+
+class ArcMarginProduct(nn.Module):
+    def __init__(self, in_features=128, out_features=200, s=32.0, m=0.50, easy_margin=False):
+        super(ArcMarginProduct, self).__init__()
+        self.in_features = in_features
+        self.out_features = out_features
+        self.s = s
+        self.m = m
+        self.weight = Parameter(torch.Tensor(out_features, in_features))
+        nn.init.xavier_uniform_(self.weight)
+        # init.kaiming_uniform_()
+        # self.weight.data.normal_(std=0.001)
+
+        self.easy_margin = easy_margin
+        self.cos_m = math.cos(m)
+        self.sin_m = math.sin(m)
+        # make the function cos(theta+m) monotonic decreasing while theta in [0°,180°]
+        self.th = math.cos(math.pi - m)
+        self.mm = math.sin(math.pi - m) * m
+
+    def forward(self, x, label):
+        cosine = F.linear(F.normalize(x), F.normalize(self.weight))
+        sine = torch.sqrt(1.0 - torch.pow(cosine, 2))
+        phi = cosine * self.cos_m - sine * self.sin_m
+        if self.easy_margin:
+            phi = torch.where(cosine > 0, phi, cosine)
+        else:
+            phi = torch.where((cosine - self.th) > 0, phi, cosine - self.mm)
+
+        one_hot = torch.zeros(cosine.size(), device='cuda')
+        one_hot.scatter_(1, label.view(-1, 1).long(), 1)
+        output = (one_hot * phi) + ((1.0 - one_hot) * cosine)
+        output *= self.s
+        return output
+
+
+if __name__ == "__main__":
+    # input = Variable(torch.FloatTensor(2, 3, 112, 96))
+    input = Variable(torch.FloatTensor(2, 3, 128, 128)) # 해상도 128x128 수정 진행.
+    net = MobileFacenet()
+    print(net)
+    x = net(input)
+    print(x.shape)
diff --git a/core/model2.py b/core/model2.py
new file mode 100755
index 0000000..109ccfa
--- /dev/null
+++ b/core/model2.py
@@ -0,0 +1,187 @@
+from torch import nn
+import torch
+import torch.nn.functional as F
+from torch.autograd import Variable
+import math
+from torch.nn import Parameter
+
+class Bottleneck(nn.Module):
+    def __init__(self, inp, oup, stride, expansion):
+        super(Bottleneck, self).__init__()
+        self.connect = stride == 1 and inp == oup
+        self.conv = nn.Sequential(
+            #pw
+            nn.Conv2d(inp, inp * expansion, 1, 1, 0, bias=False),
+            nn.BatchNorm2d(inp * expansion),
+            nn.ReLU(inplace=True),
+
+            #dw
+            nn.Conv2d(inp * expansion, inp * expansion, 3, stride, 1, groups=inp * expansion, bias=False),
+            nn.BatchNorm2d(inp * expansion),
+            nn.ReLU(inplace=True),
+
+            #pw-linear
+            nn.Conv2d(inp * expansion, oup, 1, 1, 0, bias=False),
+            nn.BatchNorm2d(oup),
+        )
+
+    def forward(self, x):
+        if self.connect:
+            return x + self.conv(x)
+        else:
+            return self.conv(x)
+
+# class ConvBlock(nn.Module): # prelu 버전
+#     def __init__(self, inp, oup, k, s, p, dw=False, linear=False):
+#         super(ConvBlock, self).__init__()
+#         self.linear = linear
+#         if dw:
+#             self.conv = nn.Conv2d(inp, oup, k, s, p, groups=inp, bias=False)
+#         else:
+#             self.conv = nn.Conv2d(inp, oup, k, s, p, bias=False)
+#         self.bn = nn.BatchNorm2d(oup)
+#         if not linear:
+#             self.prelu = nn.PReLU(oup)
+#     def forward(self, x):
+#         x = self.conv(x)
+#         x = self.bn(x)
+#         if self.linear:
+#             return x
+#         else:
+#             return self.prelu(x)
+
+class ConvBlock(nn.Module):
+    def __init__(self, inp, oup, k, s, p, dw=False, linear=False):
+        super(ConvBlock, self).__init__()
+        self.linear = linear
+        if dw:
+            self.conv = nn.Conv2d(inp, oup, k, s, p, groups=inp, bias=False)
+        else:
+            self.conv = nn.Conv2d(inp, oup, k, s, p, bias=False)
+        self.bn = nn.BatchNorm2d(oup)
+        if not linear:
+            self.relu = nn.ReLU(inplace=True)
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        if self.linear:
+            return x
+        else:
+            return self.relu(x)
+
+Mobilefacenet_bottleneck_setting = [
+    # t, c , n ,s
+    [2, 64, 5, 2],
+    [4, 128, 1, 2],
+    [2, 128, 6, 1],
+    [4, 128, 1, 2],
+    [2, 128, 2, 1]
+]
+
+Mobilenetv2_bottleneck_setting = [
+    # t, c, n, s
+    [1, 16, 1, 1],
+    [6, 24, 2, 2],
+    [6, 32, 3, 2],
+    [6, 64, 4, 2],
+    [6, 96, 3, 1],
+    [6, 160, 3, 2],
+    [6, 320, 1, 1],
+]
+
+class MobileFacenet(nn.Module):
+    def __init__(self, bottleneck_setting=Mobilefacenet_bottleneck_setting):
+        super(MobileFacenet, self).__init__()
+
+        self.conv1 = ConvBlock(3, 64, 3, 2, 1)
+        self.dw_conv1 = ConvBlock(64, 64, 3, 1, 1, dw=True)
+
+        self.inplanes = 64
+        block = Bottleneck
+        self.blocks = self._make_layer(block, bottleneck_setting)
+
+        self.conv2 = ConvBlock(128, 512, 1, 1, 0)
+
+        # self.linear7 = ConvBlock(512, 512, (7, 6), 1, 0, dw=True, linear=True)
+        # self.linear7 = ConvBlock(512, 512, 8, 1, 0, dw=True, linear=True) # 128x128 로 키우니까 커널사이즈도 키워줘야함.
+        self.pool = nn.AdaptiveAvgPool2d(1)
+        self.pw_conv = nn.Conv2d(512, 512, 1, 1, 0, bias=False)
+        self.bn7 = nn.BatchNorm2d(512)
+        self.linear1 = ConvBlock(512, 128, 1, 1, 0, linear=True)
+
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+                m.weight.data.normal_(0, math.sqrt(2. / n))
+            elif isinstance(m, nn.BatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+
+    def _make_layer(self, block, setting):
+        layers = []
+        for t, c, n, s in setting:
+            for i in range(n):
+                if i == 0:
+                    layers.append(block(self.inplanes, c, s, t))
+                else:
+                    layers.append(block(self.inplanes, c, 1, t))
+                self.inplanes = c
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.dw_conv1(x)
+        x = self.blocks(x)
+        x = self.conv2(x)
+        # x = self.linear7(x)
+        x = self.pool(x)
+        x = self.pw_conv(x)
+        x = self.bn7(x)
+        x = self.linear1(x)
+        x = x.view(x.size(0), -1)
+        return x
+
+
+class ArcMarginProduct(nn.Module):
+    def __init__(self, in_features=128, out_features=200, s=32.0, m=0.50, easy_margin=False):
+        super(ArcMarginProduct, self).__init__()
+        self.in_features = in_features
+        self.out_features = out_features
+        self.s = s
+        self.m = m
+        self.weight = Parameter(torch.Tensor(out_features, in_features))
+        nn.init.xavier_uniform_(self.weight)
+        # init.kaiming_uniform_()
+        # self.weight.data.normal_(std=0.001)
+
+        self.easy_margin = easy_margin
+        self.cos_m = math.cos(m)
+        self.sin_m = math.sin(m)
+        # make the function cos(theta+m) monotonic decreasing while theta in [0°,180°]
+        self.th = math.cos(math.pi - m)
+        self.mm = math.sin(math.pi - m) * m
+
+    def forward(self, x, label):
+        cosine = F.linear(F.normalize(x), F.normalize(self.weight))
+        sine = torch.sqrt(1.0 - torch.pow(cosine, 2))
+        phi = cosine * self.cos_m - sine * self.sin_m
+        if self.easy_margin:
+            phi = torch.where(cosine > 0, phi, cosine)
+        else:
+            phi = torch.where((cosine - self.th) > 0, phi, cosine - self.mm)
+
+        one_hot = torch.zeros(cosine.size(), device='cuda')
+        one_hot.scatter_(1, label.view(-1, 1).long(), 1)
+        output = (one_hot * phi) + ((1.0 - one_hot) * cosine)
+        output *= self.s
+        return output
+
+
+if __name__ == "__main__":
+    # input = Variable(torch.FloatTensor(2, 3, 112, 96))
+    input = Variable(torch.FloatTensor(2, 3, 128, 128)) # 해상도 128x128 수정 진행.
+    net = MobileFacenet()
+    print(net)
+    x = net(input)
+    print(x.shape)
diff --git a/core/model_bak.py b/core/model_bak.py
new file mode 100755
index 0000000..1772ffe
--- /dev/null
+++ b/core/model_bak.py
@@ -0,0 +1,182 @@
+from torch import nn
+import torch
+import torch.nn.functional as F
+from torch.autograd import Variable
+import math
+from torch.nn import Parameter
+
+class Bottleneck(nn.Module):
+    def __init__(self, inp, oup, stride, expansion):
+        super(Bottleneck, self).__init__()
+        self.connect = stride == 1 and inp == oup
+        self.conv = nn.Sequential(
+            #pw
+            nn.Conv2d(inp, inp * expansion, 1, 1, 0, bias=False),
+            nn.BatchNorm2d(inp * expansion),
+            nn.ReLU(inplace=True),
+
+            #dw
+            nn.Conv2d(inp * expansion, inp * expansion, 3, stride, 1, groups=inp * expansion, bias=False),
+            nn.BatchNorm2d(inp * expansion),
+            nn.ReLU(inplace=True),
+
+            #pw-linear
+            nn.Conv2d(inp * expansion, oup, 1, 1, 0, bias=False),
+            nn.BatchNorm2d(oup),
+        )
+
+    def forward(self, x):
+        if self.connect:
+            return x + self.conv(x)
+        else:
+            return self.conv(x)
+
+# class ConvBlock(nn.Module): # prelu 버전
+#     def __init__(self, inp, oup, k, s, p, dw=False, linear=False):
+#         super(ConvBlock, self).__init__()
+#         self.linear = linear
+#         if dw:
+#             self.conv = nn.Conv2d(inp, oup, k, s, p, groups=inp, bias=False)
+#         else:
+#             self.conv = nn.Conv2d(inp, oup, k, s, p, bias=False)
+#         self.bn = nn.BatchNorm2d(oup)
+#         if not linear:
+#             self.prelu = nn.PReLU(oup)
+#     def forward(self, x):
+#         x = self.conv(x)
+#         x = self.bn(x)
+#         if self.linear:
+#             return x
+#         else:
+#             return self.prelu(x)
+
+class ConvBlock(nn.Module):
+    def __init__(self, inp, oup, k, s, p, dw=False, linear=False):
+        super(ConvBlock, self).__init__()
+        self.linear = linear
+        if dw:
+            self.conv = nn.Conv2d(inp, oup, k, s, p, groups=inp, bias=False)
+        else:
+            self.conv = nn.Conv2d(inp, oup, k, s, p, bias=False)
+        self.bn = nn.BatchNorm2d(oup)
+        if not linear:
+            self.relu = nn.ReLU(inplace=True)
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        if self.linear:
+            return x
+        else:
+            return self.relu(x)
+
+Mobilefacenet_bottleneck_setting = [
+    # t, c , n ,s
+    [2, 64, 5, 2],
+    [4, 128, 1, 2],
+    [2, 128, 6, 1],
+    [4, 128, 1, 2],
+    [2, 128, 2, 1]
+]
+
+Mobilenetv2_bottleneck_setting = [
+    # t, c, n, s
+    [1, 16, 1, 1],
+    [6, 24, 2, 2],
+    [6, 32, 3, 2],
+    [6, 64, 4, 2],
+    [6, 96, 3, 1],
+    [6, 160, 3, 2],
+    [6, 320, 1, 1],
+]
+
+class MobileFacenet(nn.Module):
+    def __init__(self, bottleneck_setting=Mobilefacenet_bottleneck_setting):
+        super(MobileFacenet, self).__init__()
+
+        self.conv1 = ConvBlock(3, 64, 3, 2, 1)
+        self.dw_conv1 = ConvBlock(64, 64, 3, 1, 1, dw=True)
+
+        self.inplanes = 64
+        block = Bottleneck
+        self.blocks = self._make_layer(block, bottleneck_setting)
+
+        self.conv2 = ConvBlock(128, 512, 1, 1, 0)
+
+        # self.linear7 = ConvBlock(512, 512, (7, 6), 1, 0, dw=True, linear=True)
+        self.linear7 = ConvBlock(512, 512, 8, 1, 0, dw=True, linear=True) # (8,8) 안하고 8 하니까 이것도 loss 안주는듯? 아니다아니다
+        # self.linear7 = ConvBlock(512, 512, (8,8), 1, 0, dw=True, linear=True) # 128x128 로 키우니까 커널사이즈도 키워줘야함. 
+        self.linear1 = ConvBlock(512, 128, 1, 1, 0, linear=True)
+
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+                m.weight.data.normal_(0, math.sqrt(2. / n))
+            elif isinstance(m, nn.BatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+
+    def _make_layer(self, block, setting):
+        layers = []
+        for t, c, n, s in setting:
+            for i in range(n):
+                if i == 0:
+                    layers.append(block(self.inplanes, c, s, t))
+                else:
+                    layers.append(block(self.inplanes, c, 1, t))
+                self.inplanes = c
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.dw_conv1(x)
+        x = self.blocks(x)
+        x = self.conv2(x)
+        x = self.linear7(x)
+        x = self.linear1(x)
+        x = x.view(x.size(0), -1)
+        return x
+
+
+class ArcMarginProduct(nn.Module):
+    def __init__(self, in_features=128, out_features=200, s=32.0, m=0.50, easy_margin=False):
+        super(ArcMarginProduct, self).__init__()
+        self.in_features = in_features
+        self.out_features = out_features
+        self.s = s
+        self.m = m
+        self.weight = Parameter(torch.Tensor(out_features, in_features))
+        nn.init.xavier_uniform_(self.weight)
+        # init.kaiming_uniform_()
+        # self.weight.data.normal_(std=0.001)
+
+        self.easy_margin = easy_margin
+        self.cos_m = math.cos(m)
+        self.sin_m = math.sin(m)
+        # make the function cos(theta+m) monotonic decreasing while theta in [0°,180°]
+        self.th = math.cos(math.pi - m)
+        self.mm = math.sin(math.pi - m) * m
+
+    def forward(self, x, label):
+        cosine = F.linear(F.normalize(x), F.normalize(self.weight))
+        sine = torch.sqrt(1.0 - torch.pow(cosine, 2))
+        phi = cosine * self.cos_m - sine * self.sin_m
+        if self.easy_margin:
+            phi = torch.where(cosine > 0, phi, cosine)
+        else:
+            phi = torch.where((cosine - self.th) > 0, phi, cosine - self.mm)
+
+        one_hot = torch.zeros(cosine.size(), device='cuda')
+        one_hot.scatter_(1, label.view(-1, 1).long(), 1)
+        output = (one_hot * phi) + ((1.0 - one_hot) * cosine)
+        output *= self.s
+        return output
+
+
+if __name__ == "__main__":
+    # input = Variable(torch.FloatTensor(2, 3, 112, 96))
+    input = Variable(torch.FloatTensor(2, 3, 128, 128)) # 해상도 128x128 수정 진행.
+    net = MobileFacenet()
+    print(net)
+    x = net(input)
+    print(x.shape)
diff --git a/core/utils.py b/core/utils.py
new file mode 100755
index 0000000..fc889eb
--- /dev/null
+++ b/core/utils.py
@@ -0,0 +1,19 @@
+from __future__ import print_function
+import os
+import logging
+
+
+def init_log(output_dir):
+    logging.basicConfig(level=logging.DEBUG,
+                        format='%(asctime)s %(message)s',
+                        datefmt='%Y%m%d-%H:%M:%S',
+                        filename=os.path.join(output_dir, 'log.log'),
+                        filemode='w')
+    console = logging.StreamHandler()
+    console.setLevel(logging.INFO)
+    logging.getLogger('').addHandler(console)
+    return logging
+
+
+if __name__ == '__main__':
+    pass
diff --git a/dataloader/CASIA_Face_loader.py b/dataloader/CASIA_Face_loader.py
new file mode 100755
index 0000000..49c16de
--- /dev/null
+++ b/dataloader/CASIA_Face_loader.py
@@ -0,0 +1,50 @@
+import numpy as np
+import scipy.misc
+import os
+import torch
+
+class CASIA_Face(object):
+    def __init__(self, root):
+        self.root = root
+
+        img_txt_dir = os.path.join(root, 'CASIA-WebFace-112X96.txt')
+        image_list = []
+        label_list = []
+        with open(img_txt_dir) as f:
+            img_label_list = f.read().splitlines()
+        for info in img_label_list:
+            image_dir, label_name = info.split(' ')
+            image_list.append(os.path.join(root, 'CASIA-WebFace-112X96', image_dir))
+            label_list.append(int(label_name))
+
+        self.image_list = image_list
+        self.label_list = label_list
+        self.class_nums = len(np.unique(self.label_list))
+
+    def __getitem__(self, index):
+        img_path = self.image_list[index]
+        target = self.label_list[index]
+        img = scipy.misc.imread(img_path)
+
+        if len(img.shape) == 2:
+            img = np.stack([img] * 3, 2)
+        flip = np.random.choice(2)*2-1
+        img = img[:, ::flip, :]
+        img = (img - 127.5) / 128.0
+        img = img.transpose(2, 0, 1)
+        img = torch.from_numpy(img).float()
+
+        return img, target
+
+    def __len__(self):
+        return len(self.image_list)
+
+
+
+if __name__ == '__main__':
+    data_dir = '/home/brl/USER/fzc/dataset/CASIA'
+    dataset = CASIA_Face(root=data_dir)
+    trainloader = torch.utils.data.DataLoader(dataset, batch_size=32, shuffle=True, num_workers=8, drop_last=False)
+    print(len(dataset))
+    for data in trainloader:
+        print(data[0].shape)
diff --git a/dataloader/LFW_loader.py b/dataloader/LFW_loader.py
new file mode 100755
index 0000000..56bb2f4
--- /dev/null
+++ b/dataloader/LFW_loader.py
@@ -0,0 +1,33 @@
+import numpy as np
+import scipy.misc
+
+import torch
+class LFW(object):
+    def __init__(self, imgl, imgr):
+
+        self.imgl_list = imgl
+        self.imgr_list = imgr
+
+    def __getitem__(self, index):
+        imgl = scipy.misc.imread(self.imgl_list[index])
+        if len(imgl.shape) == 2:
+            imgl = np.stack([imgl] * 3, 2)
+        imgr = scipy.misc.imread(self.imgr_list[index])
+        if len(imgr.shape) == 2:
+            imgr = np.stack([imgr] * 3, 2)
+
+        # imgl = imgl[:, :, ::-1]
+        # imgr = imgr[:, :, ::-1]
+        imglist = [imgl, imgl[:, ::-1, :], imgr, imgr[:, ::-1, :]]
+        for i in range(len(imglist)):
+            imglist[i] = (imglist[i] - 127.5) / 128.0
+            imglist[i] = imglist[i].transpose(2, 0, 1)
+        imgs = [torch.from_numpy(i).float() for i in imglist]
+        return imgs
+
+    def __len__(self):
+        return len(self.imgl_list)
+
+
+if __name__ == '__main__':
+    pass
\ No newline at end of file
diff --git a/dataloader/MyHF_loader.py b/dataloader/MyHF_loader.py
new file mode 100644
index 0000000..f0dc087
--- /dev/null
+++ b/dataloader/MyHF_loader.py
@@ -0,0 +1,95 @@
+import torch
+from torch.utils.data import Dataset
+from PIL import Image
+import numpy as np
+from datasets import load_dataset
+
+# ----------------------------
+# Train Dataset: CASIA Web Face
+# ----------------------------
+class CASIA_HF(Dataset):
+    def __init__(self):
+        self.dataset = load_dataset("SaffalPoosh/casia_web_face", split="train")  # Hugging Face train split
+
+    def __len__(self):
+        return len(self.dataset)
+
+    def __getitem__(self, idx):
+        item = self.dataset[idx]
+        img = np.array(item['image'])  # Hugging Face image 열
+        img = Image.fromarray(img).convert("RGB").resize((128,128))
+        img = np.array(img)
+        img = (img - 127.5) / 128.0
+        img = img.transpose(2,0,1)
+        img = torch.from_numpy(img).float()
+        label = torch.tensor(int(item['label']))  # label 열 확인 필요
+        return img, label
+
+
+# ----------------------------
+# Test Dataset: LFW Pairs
+# ----------------------------
+# class LFW_Pairs(Dataset):
+#     def __init__(self):
+#         self.dataset = load_dataset("logasja/lfw", "pairs", split="test")
+
+#     def __len__(self):
+#         return len(self.dataset)
+
+#     def __getitem__(self, idx):
+#         item = self.dataset[idx]
+#         imgl = np.array(item['image1'])
+#         imgr = np.array(item['image2'])
+
+#         imgl = Image.fromarray(imgl).convert("RGB").resize((128,128))
+#         imgr = Image.fromarray(imgr).convert("RGB").resize((128,128))
+
+#         imglist = [imgl, imgl[:, ::-1, :], imgr, imgr[:, ::-1, :]]  # original + flip
+#         for i in range(len(imglist)):
+#             imglist[i] = (imglist[i] - 127.5) / 128.0
+#             imglist[i] = imglist[i].transpose(2,0,1)
+#         imgs = [torch.from_numpy(i).float() for i in imglist]
+
+#         label = torch.tensor(item['label'])
+#         return imgs, label
+class LFW_Pairs(Dataset):
+    def __init__(self):
+        self.dataset = load_dataset("logasja/lfw", "pairs", split="test")
+
+    def __len__(self):
+        return len(self.dataset)
+
+    def __getitem__(self, idx):
+        item = self.dataset[idx]
+        # print(idx,item) # 지울거
+        # print(type(item)) # 지울거
+        
+        # imgl = np.array(item['img_0'])
+        # imgr = np.array(item['img_1'])
+        
+        # PIL 이미지 가져오기
+        imgl = item['img_0']
+        imgr = item['img_1']
+
+        imgl = imgl.resize((128,128)).convert("RGB")
+        imgr = imgr.resize((128,128)).convert("RGB")
+        # print('imgl shape:', imgl.shape, 'type:', type(imgl))
+        # print('imgr shape:', imgr.shape, 'type:', type(imgr))
+        
+        # numpy 배열로 변환
+        imgl = np.array(imgl)
+        imgr = np.array(imgr)
+        # print('numpy 배열로 변환 후, imgl shape:', imgl.shape, 'type:', type(imgl))
+        # print('numpy 배열로 변환 후, imgr shape:', imgr.shape, 'type:', type(imgr))
+        
+        
+        # imglist = [imgl, imgl[:, ::-1, :], imgr, imgr[:, ::-1, :]]  # original + flip
+         # 이미지 리스트 생성 (original + flip)
+        imglist = [imgl, imgl[:, ::-1, :], imgr, imgr[:, ::-1, :]]
+        for i in range(len(imglist)):
+            imglist[i] = (imglist[i] - 127.5) / 128.0
+            imglist[i] = imglist[i].transpose(2, 0, 1)
+        imgs = [torch.from_numpy(i).float() for i in imglist]
+
+        label = torch.tensor(item['pair'])
+        return imgs, label
\ No newline at end of file
diff --git a/lfw_eval.py b/lfw_eval.py
new file mode 100755
index 0000000..044fca0
--- /dev/null
+++ b/lfw_eval.py
@@ -0,0 +1,187 @@
+import sys
+# import caffe
+import os
+import numpy as np
+import cv2
+import scipy.io
+import copy
+import core.model
+import os
+import torch.utils.data
+from core import model
+from dataloader.LFW_loader import LFW
+from config import LFW_DATA_DIR
+import argparse
+
+def parseList(root):
+    with open(os.path.join(root, 'pairs.txt')) as f:
+        pairs = f.read().splitlines()[1:]
+    folder_name = 'lfw-112X96'
+    nameLs = []
+    nameRs = []
+    folds = []
+    flags = []
+    for i, p in enumerate(pairs):
+        p = p.split('\t')
+        if len(p) == 3:
+            nameL = os.path.join(root, folder_name, p[0], p[0] + '_' + '{:04}.jpg'.format(int(p[1])))
+            nameR = os.path.join(root, folder_name, p[0], p[0] + '_' + '{:04}.jpg'.format(int(p[2])))
+            fold = i // 600
+            flag = 1
+        elif len(p) == 4:
+            nameL = os.path.join(root, folder_name, p[0], p[0] + '_' + '{:04}.jpg'.format(int(p[1])))
+            nameR = os.path.join(root, folder_name, p[2], p[2] + '_' + '{:04}.jpg'.format(int(p[3])))
+            fold = i // 600
+            flag = -1
+        nameLs.append(nameL)
+        nameRs.append(nameR)
+        folds.append(fold)
+        flags.append(flag)
+    # print(nameLs)
+    return [nameLs, nameRs, folds, flags]
+
+
+
+def getAccuracy(scores, flags, threshold):
+    p = np.sum(scores[flags == 1] > threshold)
+    n = np.sum(scores[flags == -1] < threshold)
+    return 1.0 * (p + n) / len(scores)
+
+
+def getThreshold(scores, flags, thrNum):
+    accuracys = np.zeros((2 * thrNum + 1, 1))
+    thresholds = np.arange(-thrNum, thrNum + 1) * 1.0 / thrNum
+    for i in range(2 * thrNum + 1):
+        accuracys[i] = getAccuracy(scores, flags, thresholds[i])
+
+    max_index = np.squeeze(accuracys == np.max(accuracys))
+    bestThreshold = np.mean(thresholds[max_index])
+    return bestThreshold
+
+
+def evaluation_10_fold(root='./result/pytorch_result.mat'):
+    ACCs = np.zeros(10)
+    result = scipy.io.loadmat(root)
+    for i in range(10):
+        fold = result['fold']
+        flags = result['flag']
+        featureLs = result['fl']
+        featureRs = result['fr']
+
+        valFold = fold != i
+        testFold = fold == i
+        flags = np.squeeze(flags)
+
+        mu = np.mean(np.concatenate((featureLs[valFold[0], :], featureRs[valFold[0], :]), 0), 0)
+        mu = np.expand_dims(mu, 0)
+        featureLs = featureLs - mu
+        featureRs = featureRs - mu
+        featureLs = featureLs / np.expand_dims(np.sqrt(np.sum(np.power(featureLs, 2), 1)), 1)
+        featureRs = featureRs / np.expand_dims(np.sqrt(np.sum(np.power(featureRs, 2), 1)), 1)
+
+        scores = np.sum(np.multiply(featureLs, featureRs), 1)
+        threshold = getThreshold(scores[valFold[0]], flags[valFold[0]], 10000)
+        ACCs[i] = getAccuracy(scores[testFold[0]], flags[testFold[0]], threshold)
+    #     print('{}    {:.2f}'.format(i+1, ACCs[i] * 100))
+    # print('--------')
+    # print('AVE    {:.2f}'.format(np.mean(ACCs) * 100))
+    return ACCs
+
+
+
+def getFeatureFromTorch(lfw_dir, feature_save_dir, resume=None, gpu=True):
+    net = model.MobileFacenet()
+    if gpu:
+        net = net.cuda()
+    if resume:
+        ckpt = torch.load(resume)
+        net.load_state_dict(ckpt['net_state_dict'])
+    net.eval()
+    nl, nr, flods, flags = parseList(lfw_dir)
+    lfw_dataset = LFW(nl, nr)
+    lfw_loader = torch.utils.data.DataLoader(lfw_dataset, batch_size=32,
+                                              shuffle=False, num_workers=8, drop_last=False)
+
+    featureLs = None
+    featureRs = None
+    count = 0
+
+    for data in lfw_loader:
+        if gpu:
+            for i in range(len(data)):
+                data[i] = data[i].cuda()
+        count += data[0].size(0)
+        print('extracing deep features from the face pair {}...'.format(count))
+        res = [net(d).data.cpu().numpy()for d in data]
+        featureL = np.concatenate((res[0], res[1]), 1)
+        featureR = np.concatenate((res[2], res[3]), 1)
+        if featureLs is None:
+            featureLs = featureL
+        else:
+            featureLs = np.concatenate((featureLs, featureL), 0)
+        if featureRs is None:
+            featureRs = featureR
+        else:
+            featureRs = np.concatenate((featureRs, featureR), 0)
+        # featureLs.append(featureL)
+        # featureRs.append(featureR)
+
+    result = {'fl': featureLs, 'fr': featureRs, 'fold': flods, 'flag': flags}
+    scipy.io.savemat(feature_save_dir, result)
+
+
+
+
+# def getFeatureFromCaffe(gpu=True):
+#     if gpu:
+#         caffe.set_mode_gpu()
+#         caffe.set_device(0)
+#     else:
+#         caffe.set_mode_cpu()
+#     # caffe.reset_all()
+#     model = '/home/xiaocc/Documents/caffe_project/sphereface/train/code/sphereface_deploy.prototxt'
+#     weights = '/home/xiaocc/Documents/caffe_project/sphereface/train/result/sphereface_model.caffemodel'
+#     net = caffe.Net(model, weights, caffe.TEST)
+#
+#     nl, nr, flods, flags = parseList()
+#
+#     featureLs = []
+#     featureRs = []
+#     for i in range(len(nl)):
+#         print('extracing deep features from the {}th face pair ...'.format(i))
+#         featureL = extractDeepFeature(nl[i], net)[0]
+#         featureR = extractDeepFeature(nr[i], net)[0]
+#         featureLs.append(featureL)
+#         featureRs.append(featureR)
+#     result = {'fl': featureLs, 'fr': featureRs, 'fold': flods, 'flag': flags}
+#     scipy.io.savemat('caffe_result.mat', result)
+#
+# def extractDeepFeature(f, net, h=112, w=96):
+#     img = cv2.imread(f)
+#     img = (img - 127.5) / 128
+#     img = img.transpose((2, 0, 1))
+#     net.blobs['data'].reshape(1, 3, h, w)
+#     net.blobs['data'].data[0, ...] = img
+#     res = copy.deepcopy(net.forward()['fc5'])
+#     net.blobs['data'].data[0, ...] = img[:, :, ::-1]
+#     res_ = copy.deepcopy(net.forward()['fc5'])
+#     r = np.concatenate((res, res_), 1)
+#     return r
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Testing')
+    parser.add_argument('--lfw_dir', type=str, default=LFW_DATA_DIR, help='The path of lfw data')
+    parser.add_argument('--resume', type=str, default='./model/best/068.ckpt',
+                        help='The path pf save model')
+    parser.add_argument('--feature_save_dir', type=str, default='./result/best_result.mat',
+                        help='The path of the extract features save, must be .mat file')
+    args = parser.parse_args()
+
+
+    # getFeatureFromCaffe()
+    getFeatureFromTorch(args.lfw_dir, args.feature_save_dir, args.resume)
+    ACCs = evaluation_10_fold(args.feature_save_dir)
+    for i in range(len(ACCs)):
+        print('{}    {:.2f}'.format(i+1, ACCs[i] * 100))
+    print('--------')
+    print('AVE    {:.2f}'.format(np.mean(ACCs) * 100))
diff --git a/result/best_result.mat b/result/best_result.mat
new file mode 100644
index 0000000..04537cd
Binary files /dev/null and b/result/best_result.mat differ
diff --git a/result/tmp_result.mat b/result/tmp_result.mat
new file mode 100644
index 0000000..ba751be
Binary files /dev/null and b/result/tmp_result.mat differ
diff --git a/test.ipynb b/test.ipynb
new file mode 100644
index 0000000..d519738
--- /dev/null
+++ b/test.ipynb
@@ -0,0 +1,179 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "2c786740",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/cuuva/anaconda3/envs/mfn/lib/python3.8/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n",
+      "Generating train split: 100%|██████████| 1000/1000 [00:00<00:00, 56341.73 examples/s]\n",
+      "Generating test split: 100%|██████████| 2200/2200 [00:00<00:00, 96950.62 examples/s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "from datasets import load_dataset\n",
+    "\n",
+    "ds = load_dataset(\"logasja/lfw\", \"pairs\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "cf343f72",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Generating train split: 100%|██████████| 13233/13233 [00:00<00:00, 29211.85 examples/s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "from datasets import load_dataset\n",
+    "\n",
+    "ds = load_dataset(\"logasja/lfw\", \"default\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "14ee413a",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Saving train images...\n"
+     ]
+    },
+    {
+     "ename": "KeyError",
+     "evalue": "'image1'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[5], line 29\u001b[0m\n\u001b[1;32m     27\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSaving train images...\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m     28\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i, item \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(train_data):\n\u001b[0;32m---> 29\u001b[0m     img1 \u001b[38;5;241m=\u001b[39m Image\u001b[38;5;241m.\u001b[39mfromarray(\u001b[43mitem\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mimage1\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m)\u001b[38;5;241m.\u001b[39mconvert(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRGB\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m     30\u001b[0m     img2 \u001b[38;5;241m=\u001b[39m Image\u001b[38;5;241m.\u001b[39mfromarray(item[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mimage2\u001b[39m\u001b[38;5;124m'\u001b[39m])\u001b[38;5;241m.\u001b[39mconvert(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRGB\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m     31\u001b[0m     label \u001b[38;5;241m=\u001b[39m item[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mlabel\u001b[39m\u001b[38;5;124m'\u001b[39m]  \u001b[38;5;66;03m# 0: same, 1: different\u001b[39;00m\n",
+      "\u001b[0;31mKeyError\u001b[0m: 'image1'"
+     ]
+    }
+   ],
+   "source": [
+    "from datasets import load_dataset\n",
+    "import os\n",
+    "from PIL import Image\n",
+    "import numpy as np\n",
+    "\n",
+    "# ----------------------------\n",
+    "# 경로 설정\n",
+    "# ----------------------------\n",
+    "LOCAL_DATA_DIR = \"/home/cuuva/lfw_images\"  # 저장할 최상위 폴더\n",
+    "TRAIN_DIR = os.path.join(LOCAL_DATA_DIR, \"train\")\n",
+    "TEST_DIR = os.path.join(LOCAL_DATA_DIR, \"test\")\n",
+    "\n",
+    "os.makedirs(TRAIN_DIR, exist_ok=True)\n",
+    "os.makedirs(TEST_DIR, exist_ok=True)\n",
+    "\n",
+    "# ----------------------------\n",
+    "# Hugging Face LFW 불러오기\n",
+    "# ----------------------------\n",
+    "dataset = load_dataset(\"logasja/lfw\", \"pairs\")\n",
+    "\n",
+    "train_data = dataset[\"train\"]\n",
+    "test_data = dataset[\"test\"]\n",
+    "\n",
+    "# ----------------------------\n",
+    "# train 데이터 저장\n",
+    "# ----------------------------\n",
+    "print(\"Saving train images...\")\n",
+    "for i, item in enumerate(train_data):\n",
+    "    img1 = Image.fromarray(item['image1']).convert(\"RGB\")\n",
+    "    img2 = Image.fromarray(item['image2']).convert(\"RGB\")\n",
+    "    label = item['label']  # 0: same, 1: different\n",
+    "\n",
+    "    # 파일 이름 예: train_00001_1.jpg, train_00001_2.jpg\n",
+    "    img1.save(os.path.join(TRAIN_DIR, f\"train_{i}_1.jpg\"))\n",
+    "    img2.save(os.path.join(TRAIN_DIR, f\"train_{i}_2.jpg\"))\n",
+    "\n",
+    "print(f\"Train images saved: {len(train_data)*2}\")\n",
+    "\n",
+    "# ----------------------------\n",
+    "# test 데이터 저장\n",
+    "# ----------------------------\n",
+    "print(\"Saving test images...\")\n",
+    "for i, item in enumerate(test_data):\n",
+    "    img1 = Image.fromarray(item['image1']).convert(\"RGB\")\n",
+    "    img2 = Image.fromarray(item['image2']).convert(\"RGB\")\n",
+    "    label = item['label']\n",
+    "\n",
+    "    # 파일 이름 예: test_00001_1.jpg, test_00001_2.jpg\n",
+    "    img1.save(os.path.join(TEST_DIR, f\"test_{i}_1.jpg\"))\n",
+    "    img2.save(os.path.join(TEST_DIR, f\"test_{i}_2.jpg\"))\n",
+    "\n",
+    "print(f\"Test images saved: {len(test_data)*2}\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "b5830c3f",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['pair', 'img_0', 'img_1']\n",
+      "{'pair': 1, 'img_0': <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=250x250 at 0x7C0069CF23A0>, 'img_1': <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=250x250 at 0x7C0069CF27F0>}\n"
+     ]
+    }
+   ],
+   "source": [
+    "from datasets import load_dataset\n",
+    "\n",
+    "ds = load_dataset(\"logasja/lfw\", \"pairs\", split=\"test\")\n",
+    "print(ds.column_names)  # 현재 컬럼 이름 확인\n",
+    "print(ds[0])            # 첫 번째 데이터 샘플 확인\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7e53a5b1",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "mfn",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/toonnx.py b/toonnx.py
new file mode 100644
index 0000000..4000e9b
--- /dev/null
+++ b/toonnx.py
@@ -0,0 +1,87 @@
+import torch
+import os
+from core import model  # 학습할 때 썼던 model 파일을 불러와야 합니다.
+
+# ----------------------------
+# 1. 설정 (경로 및 입력 사이즈)
+# ----------------------------
+# 사용자님이 알려주신 ckpt 경로
+# ckpt_path = '/home/cuuva/face_exp/MobileFaceNet_Pytorch/model/CASIA_B512_v2_20251124_175829/best_model/best_104.ckpt'
+ckpt_path = '/home/cuuva/face_exp/MobileFaceNet_Pytorch/model/CASIA_B512_v2_20251126_173236/best_model/best_063.ckpt'
+onnx_path = 'best_104.onnx' # 저장될 파일 이름
+
+# [중요] 학습할 때 사용한 이미지 해상도와 일치해야 합니다.
+# 아까 코드에서 128x128로 수정하신 것을 확인했으므로 128로 설정합니다.
+input_size = (1, 3, 128, 128) 
+
+def convert():
+    print(f"Loading checkpoint from: {ckpt_path}")
+    
+    # ----------------------------
+    # 2. 모델 구조 정의
+    # ----------------------------
+    # 학습 코드와 동일한 모델 클래스를 인스턴스화 합니다.
+    net = model.MobileFacenet()
+    
+    # ----------------------------
+    # 3. 가중치(Weights) 로드
+    # ----------------------------
+    checkpoint = torch.load(ckpt_path, map_location='cpu', weights_only=False) # GPU가 없어도 돌 수 있게 cpu로 로드
+    
+    # 저장된 ckpt 구조에 따라 state_dict를 가져옵니다.
+    if 'net_state_dict' in checkpoint:
+        state_dict = checkpoint['net_state_dict']
+    else:
+        state_dict = checkpoint
+        
+    # [핵심] DataParallel로 학습했다면 키(key) 앞에 'module.'이 붙어있습니다.
+    # 이를 제거해줘야 단일 모델에 로드할 수 있습니다.
+    new_state_dict = {}
+    for k, v in state_dict.items():
+        name = k.replace("module.", "") # 'module.conv1.weight' -> 'conv1.weight'
+        new_state_dict[name] = v
+        
+    # 가중치 덮어씌우기
+    net.load_state_dict(new_state_dict)
+    
+    # ----------------------------
+    # 4. 평가 모드 전환 (필수!)
+    # ----------------------------
+    # Dropout이나 Batch Norm이 학습 모드가 아닌 추론 모드로 동작하게 합니다.
+    net.eval() 
+    
+    # ----------------------------
+    # 5. ONNX Export
+    # ----------------------------
+    print("Exporting to ONNX...")
+    
+    # 모델 추적(Trace)을 위한 더미 입력 데이터 생성
+    dummy_input = torch.randn(*input_size)
+    
+    torch.onnx.export(
+        net,                      # 실행할 모델
+        dummy_input,              # 더미 입력값
+        onnx_path,                # 저장할 경로
+        verbose=True,             # 변환 과정 로그 출력
+        input_names=['input'],    # 입력 노드 이름 (나중에 추론할 때 씀)
+        output_names=['output'],  # 출력 노드 이름
+        external_data=False
+        #opset_version=11         # ONNX 버전 (보통 11이나 12가 호환성이 좋음)
+        # batch size를 가변적으로 쓰고 싶다면 아래 dynamic_axes 사용 (고정하려면 주석 처리)
+        #dynamic_axes={'input': {0: 'batch_size'}, 'output': {0: 'batch_size'}}
+    )
+    # torch.onnx.export(
+    #     net,
+    #     dummy_input,
+    #     onnx_path,
+    #     verbose=True,
+    #     input_names=['input'],
+    #     output_names=['output'],
+    #     do_constant_folding=True,   # 고정 상수 연산 미리 계산
+    #     use_external_data_format=False  # external .data 파일 없이 export
+    # )
+
+    print(f"Success! Model saved to: {os.path.abspath(onnx_path)}")
+
+if __name__ == "__main__":
+    convert()
\ No newline at end of file
diff --git a/toonnx2.py b/toonnx2.py
new file mode 100644
index 0000000..57aa3ee
--- /dev/null
+++ b/toonnx2.py
@@ -0,0 +1,87 @@
+import torch
+import os
+from core import model2  # 학습할 때 썼던 model 파일을 불러와야 합니다.
+
+# ----------------------------
+# 1. 설정 (경로 및 입력 사이즈)
+# ----------------------------
+# 사용자님이 알려주신 ckpt 경로
+ckpt_path = '/home/cuuva/face_exp/MobileFaceNet_Pytorch/model/MODEL_2_20251127_174006/best_model/best_004.ckpt'
+onnx_path = 'best_104.onnx' # 저장될 파일 이름
+
+# [중요] 학습할 때 사용한 이미지 해상도와 일치해야 합니다.
+# 아까 코드에서 128x128로 수정하신 것을 확인했으므로 128로 설정합니다.
+input_size = (1, 3, 128, 128) 
+
+def convert():
+    print(f"Loading checkpoint from: {ckpt_path}")
+    
+    # ----------------------------
+    # 2. 모델 구조 정의
+    # ----------------------------
+    # 학습 코드와 동일한 모델 클래스를 인스턴스화 합니다.
+    net = model2.MobileFacenet()
+    
+    # ----------------------------
+    # 3. 가중치(Weights) 로드
+    # ----------------------------
+    checkpoint = torch.load(ckpt_path, map_location='cpu', weights_only=False) # GPU가 없어도 돌 수 있게 cpu로 로드
+    
+    # 저장된 ckpt 구조에 따라 state_dict를 가져옵니다.
+    if 'net_state_dict' in checkpoint:
+        state_dict = checkpoint['net_state_dict']
+    else:
+        state_dict = checkpoint
+        
+    # [핵심] DataParallel로 학습했다면 키(key) 앞에 'module.'이 붙어있습니다.
+    # 이를 제거해줘야 단일 모델에 로드할 수 있습니다.
+    new_state_dict = {}
+    for k, v in state_dict.items():
+        name = k.replace("module.", "") # 'module.conv1.weight' -> 'conv1.weight'
+        new_state_dict[name] = v
+        
+    # 가중치 덮어씌우기
+    net.load_state_dict(new_state_dict)
+    
+    # ----------------------------
+    # 4. 평가 모드 전환 (필수!)
+    # ----------------------------
+    # Dropout이나 Batch Norm이 학습 모드가 아닌 추론 모드로 동작하게 합니다.
+    net.eval() 
+    # ----------------------------
+    # 4. ONNX 폴더 경로 생성
+    # ----------------------------
+    # ckpt_path 상위 폴더 이름 추출
+    experiment_folder_name = os.path.basename(os.path.dirname(os.path.dirname(ckpt_path)))
+    # 모델 최상위 경로
+    model_root = '/home/cuuva/face_exp/MobileFaceNet_Pytorch/model'
+    # 최종 ONNX 경로
+    onnx_dir = os.path.join(model_root, 'ONNX', experiment_folder_name)
+    os.makedirs(onnx_dir, exist_ok=True)
+    
+    # ckpt 이름 기반으로 onnx 파일 이름 생성
+    onnx_name = os.path.splitext(os.path.basename(ckpt_path))[0] + '.onnx'
+    onnx_path = os.path.join(onnx_dir, onnx_name)
+    
+    # ----------------------------
+    # 5. ONNX Export
+    # ----------------------------
+    print("Exporting to ONNX...")
+    
+    # 모델 추적(Trace)을 위한 더미 입력 데이터 생성
+    dummy_input = torch.randn(*input_size)
+    
+    torch.onnx.export(
+        net,                      # 실행할 모델
+        dummy_input,              # 더미 입력값
+        onnx_path,                # 저장할 경로
+        verbose=True,             # 변환 과정 로그 출력
+        input_names=['input'],    # 입력 노드 이름 (나중에 추론할 때 씀)
+        output_names=['output'],  # 출력 노드 이름
+        external_data=False
+    )
+
+    print(f"Success! Model saved to: {os.path.abspath(onnx_path)}")
+
+if __name__ == "__main__":
+    convert()
\ No newline at end of file
diff --git a/toonnx_bak.py b/toonnx_bak.py
new file mode 100644
index 0000000..18cd3df
--- /dev/null
+++ b/toonnx_bak.py
@@ -0,0 +1,87 @@
+import torch
+import os
+from core import model_bak  # 학습할 때 썼던 model 파일을 불러와야 합니다.
+
+# ----------------------------
+# 1. 설정 (경로 및 입력 사이즈)
+# ----------------------------
+# 사용자님이 알려주신 ckpt 경로
+# ckpt_path = '/home/cuuva/face_exp/MobileFaceNet_Pytorch/model/CASIA_B512_v2_20251124_175829/best_model/best_104.ckpt'
+ckpt_path = '/home/cuuva/face_exp/MobileFaceNet_Pytorch/model/MODEL_BAK20251127_171730/best_model/best_001.ckpt'
+onnx_path = 'best_104.onnx' # 저장될 파일 이름
+
+# [중요] 학습할 때 사용한 이미지 해상도와 일치해야 합니다.
+# 아까 코드에서 128x128로 수정하신 것을 확인했으므로 128로 설정합니다.
+input_size = (1, 3, 128, 128) 
+
+def convert():
+    print(f"Loading checkpoint from: {ckpt_path}")
+    
+    # ----------------------------
+    # 2. 모델 구조 정의
+    # ----------------------------
+    # 학습 코드와 동일한 모델 클래스를 인스턴스화 합니다.
+    net = model_bak.MobileFacenet()
+    
+    # ----------------------------
+    # 3. 가중치(Weights) 로드
+    # ----------------------------
+    checkpoint = torch.load(ckpt_path, map_location='cpu', weights_only=False) # GPU가 없어도 돌 수 있게 cpu로 로드
+    
+    # 저장된 ckpt 구조에 따라 state_dict를 가져옵니다.
+    if 'net_state_dict' in checkpoint:
+        state_dict = checkpoint['net_state_dict']
+    else:
+        state_dict = checkpoint
+        
+    # [핵심] DataParallel로 학습했다면 키(key) 앞에 'module.'이 붙어있습니다.
+    # 이를 제거해줘야 단일 모델에 로드할 수 있습니다.
+    new_state_dict = {}
+    for k, v in state_dict.items():
+        name = k.replace("module.", "") # 'module.conv1.weight' -> 'conv1.weight'
+        new_state_dict[name] = v
+        
+    # 가중치 덮어씌우기
+    net.load_state_dict(new_state_dict)
+    
+    # ----------------------------
+    # 4. 평가 모드 전환 (필수!)
+    # ----------------------------
+    # Dropout이나 Batch Norm이 학습 모드가 아닌 추론 모드로 동작하게 합니다.
+    net.eval() 
+    
+    # ----------------------------
+    # 5. ONNX Export
+    # ----------------------------
+    print("Exporting to ONNX...")
+    
+    # 모델 추적(Trace)을 위한 더미 입력 데이터 생성
+    dummy_input = torch.randn(*input_size)
+    
+    torch.onnx.export(
+        net,                      # 실행할 모델
+        dummy_input,              # 더미 입력값
+        onnx_path,                # 저장할 경로
+        verbose=True,             # 변환 과정 로그 출력
+        input_names=['input'],    # 입력 노드 이름 (나중에 추론할 때 씀)
+        output_names=['output'],  # 출력 노드 이름
+        external_data=False
+        #opset_version=11         # ONNX 버전 (보통 11이나 12가 호환성이 좋음)
+        # batch size를 가변적으로 쓰고 싶다면 아래 dynamic_axes 사용 (고정하려면 주석 처리)
+        #dynamic_axes={'input': {0: 'batch_size'}, 'output': {0: 'batch_size'}}
+    )
+    # torch.onnx.export(
+    #     net,
+    #     dummy_input,
+    #     onnx_path,
+    #     verbose=True,
+    #     input_names=['input'],
+    #     output_names=['output'],
+    #     do_constant_folding=True,   # 고정 상수 연산 미리 계산
+    #     use_external_data_format=False  # external .data 파일 없이 export
+    # )
+
+    print(f"Success! Model saved to: {os.path.abspath(onnx_path)}")
+
+if __name__ == "__main__":
+    convert()
\ No newline at end of file
diff --git a/train copy.py b/train copy.py
new file mode 100755
index 0000000..2ad2ae2
--- /dev/null
+++ b/train copy.py	
@@ -0,0 +1,166 @@
+import os
+import torch.utils.data
+from torch import nn
+from torch.nn import DataParallel
+from datetime import datetime
+from config import BATCH_SIZE, SAVE_FREQ, RESUME, SAVE_DIR, TEST_FREQ, TOTAL_EPOCH, MODEL_PRE, GPU
+from config import CASIA_DATA_DIR, LFW_DATA_DIR
+from core import model
+from core.utils import init_log
+from dataloader.CASIA_Face_loader import CASIA_Face
+from dataloader.LFW_loader import LFW
+from torch.optim import lr_scheduler
+import torch.optim as optim
+import time
+from lfw_eval import parseList, evaluation_10_fold
+import numpy as np
+import scipy.io
+
+# gpu init
+gpu_list = ''
+multi_gpus = False
+if isinstance(GPU, int):
+    gpu_list = str(GPU)
+else:
+    multi_gpus = True
+    for i, gpu_id in enumerate(GPU):
+        gpu_list += str(gpu_id)
+        if i != len(GPU) - 1:
+            gpu_list += ','
+os.environ['CUDA_VISIBLE_DEVICES'] = gpu_list
+
+# other init
+start_epoch = 1
+save_dir = os.path.join(SAVE_DIR, MODEL_PRE + 'v2_' + datetime.now().strftime('%Y%m%d_%H%M%S'))
+if os.path.exists(save_dir):
+    raise NameError('model dir exists!')
+os.makedirs(save_dir)
+logging = init_log(save_dir)
+_print = logging.info
+
+
+# define trainloader and testloader
+trainset = CASIA_Face(root=CASIA_DATA_DIR)
+trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE,
+                                          shuffle=True, num_workers=8, drop_last=False)
+
+# nl: left_image_path
+# nr: right_image_path
+nl, nr, folds, flags = parseList(root=LFW_DATA_DIR)
+testdataset = LFW(nl, nr)
+testloader = torch.utils.data.DataLoader(testdataset, batch_size=32,
+                                         shuffle=False, num_workers=8, drop_last=False)
+
+# define model
+net = model.MobileFacenet()
+ArcMargin = model.ArcMarginProduct(128, trainset.class_nums)
+
+if RESUME:
+    ckpt = torch.load(RESUME)
+    net.load_state_dict(ckpt['net_state_dict'])
+    start_epoch = ckpt['epoch'] + 1
+
+
+# define optimizers
+ignored_params = list(map(id, net.linear1.parameters()))
+ignored_params += list(map(id, ArcMargin.weight))
+prelu_params_id = []
+prelu_params = []
+for m in net.modules():
+    if isinstance(m, nn.PReLU):
+        ignored_params += list(map(id, m.parameters()))
+        prelu_params += m.parameters()
+base_params = filter(lambda p: id(p) not in ignored_params, net.parameters())
+
+optimizer_ft = optim.SGD([
+    {'params': base_params, 'weight_decay': 4e-5},
+    {'params': net.linear1.parameters(), 'weight_decay': 4e-4},
+    {'params': ArcMargin.weight, 'weight_decay': 4e-4},
+    {'params': prelu_params, 'weight_decay': 0.0}
+], lr=0.1, momentum=0.9, nesterov=True)
+
+exp_lr_scheduler = lr_scheduler.MultiStepLR(optimizer_ft, milestones=[36, 52, 58], gamma=0.1)
+
+
+net = net.cuda()
+ArcMargin = ArcMargin.cuda()
+if multi_gpus:
+    net = DataParallel(net)
+    ArcMargin = DataParallel(ArcMargin)
+criterion = torch.nn.CrossEntropyLoss()
+
+
+best_acc = 0.0
+best_epoch = 0
+for epoch in range(start_epoch, TOTAL_EPOCH+1):
+    exp_lr_scheduler.step()
+    # train model
+    _print('Train Epoch: {}/{} ...'.format(epoch, TOTAL_EPOCH))
+    net.train()
+
+    train_total_loss = 0.0
+    total = 0
+    since = time.time()
+    for data in trainloader:
+        img, label = data[0].cuda(), data[1].cuda()
+        batch_size = img.size(0)
+        optimizer_ft.zero_grad()
+
+        raw_logits = net(img)
+
+        output = ArcMargin(raw_logits, label)
+        total_loss = criterion(output, label)
+        total_loss.backward()
+        optimizer_ft.step()
+
+        train_total_loss += total_loss.item() * batch_size
+        total += batch_size
+
+    train_total_loss = train_total_loss / total
+    time_elapsed = time.time() - since
+    loss_msg = '    total_loss: {:.4f} time: {:.0f}m {:.0f}s'\
+        .format(train_total_loss, time_elapsed // 60, time_elapsed % 60)
+    _print(loss_msg)
+
+    # test model on lfw
+    if epoch % TEST_FREQ == 0:
+        net.eval()
+        featureLs = None
+        featureRs = None
+        _print('Test Epoch: {} ...'.format(epoch))
+        for data in testloader:
+            for i in range(len(data)):
+                data[i] = data[i].cuda()
+            res = [net(d).data.cpu().numpy() for d in data]
+            featureL = np.concatenate((res[0], res[1]), 1)
+            featureR = np.concatenate((res[2], res[3]), 1)
+            if featureLs is None:
+                featureLs = featureL
+            else:
+                featureLs = np.concatenate((featureLs, featureL), 0)
+            if featureRs is None:
+                featureRs = featureR
+            else:
+                featureRs = np.concatenate((featureRs, featureR), 0)
+
+        result = {'fl': featureLs, 'fr': featureRs, 'fold': folds, 'flag': flags}
+        # save tmp_result
+        scipy.io.savemat('./result/tmp_result.mat', result)
+        accs = evaluation_10_fold('./result/tmp_result.mat')
+        _print('    ave: {:.4f}'.format(np.mean(accs) * 100))
+
+    # save model
+    if epoch % SAVE_FREQ == 0:
+        msg = 'Saving checkpoint: {}'.format(epoch)
+        _print(msg)
+        if multi_gpus:
+            net_state_dict = net.module.state_dict()
+        else:
+            net_state_dict = net.state_dict()
+        if not os.path.exists(save_dir):
+            os.mkdir(save_dir)
+        torch.save({
+            'epoch': epoch,
+            'net_state_dict': net_state_dict},
+            os.path.join(save_dir, '%03d.ckpt' % epoch))
+print('finishing training')
diff --git a/train.py b/train.py
new file mode 100755
index 0000000..1965e08
--- /dev/null
+++ b/train.py
@@ -0,0 +1,202 @@
+import torch
+import torch.optim as optim
+from torch.optim import lr_scheduler
+from torch.nn import DataParallel, CrossEntropyLoss
+from dataloader.MyHF_loader import CASIA_HF, LFW_Pairs
+from core import model
+from core.utils import init_log
+import os, time, numpy as np, scipy.io
+from datetime import datetime
+from config import BATCH_SIZE, SAVE_FREQ, RESUME, SAVE_DIR, TEST_FREQ, TOTAL_EPOCH, MODEL_PRE, GPU
+from sklearn.metrics.pairwise import cosine_similarity # [추가] 정확도 계산용
+
+# ----------------------------
+# [추가] 간단한 LFW 정확도 계산 함수
+# ----------------------------
+def calculate_accuracy(featureLs, featureRs, flags, thresholds=np.arange(0, 1, 0.01)):
+    # 1. 특징 벡터 정규화 (Normalize)
+    featureLs = featureLs / np.linalg.norm(featureLs, axis=1, keepdims=True)
+    featureRs = featureRs / np.linalg.norm(featureRs, axis=1, keepdims=True)
+    
+    # 2. 코사인 유사도 계산 (Dot Product)
+    scores = np.sum(featureLs * featureRs, axis=1)
+    
+    # 3. 최적의 임계값(Threshold) 찾기 및 정확도 계산
+    best_acc = 0
+    for t in thresholds:
+        # 유사도가 t보다 크면 '같은 사람(1)', 작으면 '다른 사람(0)'
+        preds = (scores > t).astype(int)
+        acc = np.mean(preds == flags)
+        if acc > best_acc:
+            best_acc = acc
+    return best_acc
+
+# ----------------------------
+# GPU 및 초기 설정 (기존 동일)
+# ----------------------------
+gpu_list = ''
+multi_gpus = False
+if isinstance(GPU, int):
+    gpu_list = str(GPU)
+else:
+    multi_gpus = True
+    gpu_list = ','.join(map(str, GPU))
+os.environ['CUDA_VISIBLE_DEVICES'] = gpu_list
+
+start_epoch = 1
+save_dir = os.path.join(SAVE_DIR, MODEL_PRE + 'v2_' + datetime.now().strftime('%Y%m%d_%H%M%S'))
+os.makedirs(save_dir, exist_ok=True)
+logging = init_log(save_dir)
+_print = logging.info
+
+# ----------------------------
+# Dataloader (기존 동일)
+# ----------------------------
+trainset = CASIA_HF()
+trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE,
+                                          shuffle=True, num_workers=8, drop_last=False)
+
+testset = LFW_Pairs()
+testloader = torch.utils.data.DataLoader(testset, batch_size=32,
+                                         shuffle=False, num_workers=8, drop_last=False)
+
+# ----------------------------
+# Model & Optimizer (기존 동일)
+# ----------------------------
+net = model.MobileFacenet()
+ArcMargin = model.ArcMarginProduct(128, trainset.dataset.features['label'].num_classes)
+
+if RESUME:
+    ckpt = torch.load(RESUME)
+    net.load_state_dict(ckpt['net_state_dict'])
+    start_epoch = ckpt['epoch'] + 1
+
+net = net.cuda()
+ArcMargin = ArcMargin.cuda()
+if multi_gpus:
+    net = DataParallel(net)
+    ArcMargin = DataParallel(ArcMargin)
+
+criterion = CrossEntropyLoss()
+
+ignored_params = list(map(id, net.linear1.parameters())) + list(map(id, ArcMargin.weight))
+# prelu_params = [p for m in net.modules() if isinstance(m, torch.nn.PReLU) for p in m.parameters()]
+base_params = filter(lambda p: id(p) not in ignored_params, net.parameters())
+
+# 기존 아키텍처에서 prelu 삭제했었으니까 아래 optim에서도 삭제 처리
+optimizer_ft = optim.SGD([
+    {'params': base_params, 'weight_decay': 4e-5},
+    {'params': net.linear1.parameters(), 'weight_decay': 4e-4},
+    {'params': ArcMargin.weight, 'weight_decay': 4e-4}
+], lr=0.1, momentum=0.9, nesterov=True)
+
+# optimizer_ft = optim.SGD([
+#     {'params': base_params, 'weight_decay': 4e-5},
+#     {'params': net.linear1.parameters(), 'weight_decay': 4e-4},
+#     {'params': ArcMargin.weight, 'weight_decay': 4e-4},
+#     {'params': prelu_params, 'weight_decay': 0.0}
+# ], lr=0.1, momentum=0.9, nesterov=True)
+
+# 여기도 Config에서 Epoch 숫자 수정할때마다 milestone도 같이 수정해줘야함.
+exp_lr_scheduler = lr_scheduler.MultiStepLR(optimizer_ft, milestones=[240, 310, 400], gamma=0.1)
+
+# ----------------------------
+# [추가] Best Accuracy 기록 변수
+# ----------------------------
+best_lfw_acc = 0.0
+
+# ----------------------------
+# Training Loop
+# ----------------------------
+for epoch in range(start_epoch, TOTAL_EPOCH + 1):
+    net.train()
+    train_total_loss, total = 0, 0
+    since = time.time()
+    _print(f"Train Epoch: {epoch}/{TOTAL_EPOCH} ...")
+
+    for data in trainloader:
+        img, label = data[0].cuda(), data[1].cuda()
+        optimizer_ft.zero_grad()
+        raw_logits = net(img)
+        output = ArcMargin(raw_logits, label)
+        loss = criterion(output, label)
+        loss.backward()
+        optimizer_ft.step()
+        train_total_loss += loss.item() * img.size(0)
+        total += img.size(0)
+
+    train_total_loss /= total
+    time_elapsed = time.time() - since
+    _print(f"    total_loss: {train_total_loss:.4f} time: {time_elapsed//60:.0f}m {time_elapsed%60:.0f}s")
+    
+    exp_lr_scheduler.step()
+    
+    # ----------------------------
+    # Test & Best Model Save
+    # ----------------------------
+    if epoch % TEST_FREQ == 0:
+        net.eval()
+        featureLs, featureRs = None, None
+        flags = [] # [추가] 정답(Label)을 저장할 리스트
+        
+        _print("    Testing LFW...")
+        with torch.no_grad(): # [추가] 테스트 땐 기울기 계산 끔 (메모리 절약)
+            for data in testloader:
+                # data 구조: [images_list, label(flag)]라고 가정
+                # LFW_Pairs의 경우 data[1]이 보통 정답(1:같은사람, 0:다른사람)
+                
+                # 이미지 GPU 이동
+                imgs = [d.cuda() for d in data[0]] 
+                
+                # 정답 라벨 수집 (numpy로 변환)
+                flags.append(data[1].numpy()) 
+                
+                # 특징 추출
+                res = [net(d).data.cpu().numpy() for d in imgs]
+                
+                featureL = np.concatenate((res[0], res[1]), 1)
+                featureR = np.concatenate((res[2], res[3]), 1)
+                
+                featureLs = featureL if featureLs is None else np.concatenate((featureLs, featureL), 0)
+                featureRs = featureR if featureRs is None else np.concatenate((featureRs, featureR), 0)
+
+        # [추가] 정답 리스트 합치기
+        flags = np.concatenate(flags, 0)
+        
+        # [추가] 정확도 계산
+        # 만약 scipy.io.savemat은 필요하면 유지, 아니면 삭제해도 됨
+        # result = {'fl': featureLs, 'fr': featureRs}
+        # scipy.io.savemat('./result/tmp_result.mat', result)
+        
+        # 직접 정확도 계산 (함수 호출)
+        current_acc = calculate_accuracy(featureLs, featureRs, flags)
+        _print(f"    LFW Acc: {current_acc*100:.2f}% (Best: {best_lfw_acc*100:.2f}%)")
+
+        # [핵심] Best Model 저장 (Loss가 아닌 Acc 기준)
+        if current_acc > best_lfw_acc:
+            best_lfw_acc = current_acc
+            state_dict = net.module.state_dict() if multi_gpus else net.state_dict()
+            
+            best_dir = os.path.join(save_dir, 'best_model')
+            os.makedirs(best_dir, exist_ok=True)
+            
+            best_path = os.path.join(best_dir, f'best_{epoch:03d}.ckpt')
+            torch.save(
+                {
+                    'epoch': epoch,
+                    'net_state_dict': state_dict,
+                    'acc': best_lfw_acc
+                },
+                best_path
+            )
+            _print(f"    ==> Best Model Saved! (Acc: {best_lfw_acc*100:.2f}%, Epoch: {epoch}))")
+            
+    # ----------------------------
+    # Regular Save (백업용)
+    # ----------------------------
+    if epoch % SAVE_FREQ == 0:
+        state_dict = net.module.state_dict() if multi_gpus else net.state_dict()
+        torch.save({'epoch': epoch, 'net_state_dict': state_dict},
+                   os.path.join(save_dir, f'{epoch:03d}.ckpt'))
+
+_print("finishing training")
\ No newline at end of file
diff --git a/train2.py b/train2.py
new file mode 100755
index 0000000..815b82c
--- /dev/null
+++ b/train2.py
@@ -0,0 +1,202 @@
+import torch
+import torch.optim as optim
+from torch.optim import lr_scheduler
+from torch.nn import DataParallel, CrossEntropyLoss
+from dataloader.MyHF_loader import CASIA_HF, LFW_Pairs
+from core import model2
+from core.utils import init_log
+import os, time, numpy as np, scipy.io
+from datetime import datetime
+from config import BATCH_SIZE, SAVE_FREQ, RESUME, SAVE_DIR, TEST_FREQ, TOTAL_EPOCH, MODEL_PRE, GPU
+from sklearn.metrics.pairwise import cosine_similarity # [추가] 정확도 계산용
+
+# ----------------------------
+# [추가] 간단한 LFW 정확도 계산 함수
+# ----------------------------
+def calculate_accuracy(featureLs, featureRs, flags, thresholds=np.arange(0, 1, 0.01)):
+    # 1. 특징 벡터 정규화 (Normalize)
+    featureLs = featureLs / np.linalg.norm(featureLs, axis=1, keepdims=True)
+    featureRs = featureRs / np.linalg.norm(featureRs, axis=1, keepdims=True)
+    
+    # 2. 코사인 유사도 계산 (Dot Product)
+    scores = np.sum(featureLs * featureRs, axis=1)
+    
+    # 3. 최적의 임계값(Threshold) 찾기 및 정확도 계산
+    best_acc = 0
+    for t in thresholds:
+        # 유사도가 t보다 크면 '같은 사람(1)', 작으면 '다른 사람(0)'
+        preds = (scores > t).astype(int)
+        acc = np.mean(preds == flags)
+        if acc > best_acc:
+            best_acc = acc
+    return best_acc
+
+# ----------------------------
+# GPU 및 초기 설정 (기존 동일)
+# ----------------------------
+gpu_list = ''
+multi_gpus = False
+if isinstance(GPU, int):
+    gpu_list = str(GPU)
+else:
+    multi_gpus = True
+    gpu_list = ','.join(map(str, GPU))
+os.environ['CUDA_VISIBLE_DEVICES'] = gpu_list
+
+start_epoch = 1
+save_dir = os.path.join(SAVE_DIR, 'MODEL_2_' + datetime.now().strftime('%Y%m%d_%H%M%S'))
+os.makedirs(save_dir, exist_ok=True)
+logging = init_log(save_dir)
+_print = logging.info
+
+# ----------------------------
+# Dataloader (기존 동일)
+# ----------------------------
+trainset = CASIA_HF()
+trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE,
+                                          shuffle=True, num_workers=8, drop_last=False)
+
+testset = LFW_Pairs()
+testloader = torch.utils.data.DataLoader(testset, batch_size=32,
+                                         shuffle=False, num_workers=8, drop_last=False)
+
+# ----------------------------
+# Model & Optimizer (기존 동일)
+# ----------------------------
+net = model2.MobileFacenet()
+ArcMargin = model2.ArcMarginProduct(128, trainset.dataset.features['label'].num_classes)
+
+if RESUME:
+    ckpt = torch.load(RESUME)
+    net.load_state_dict(ckpt['net_state_dict'])
+    start_epoch = ckpt['epoch'] + 1
+
+net = net.cuda()
+ArcMargin = ArcMargin.cuda()
+if multi_gpus:
+    net = DataParallel(net)
+    ArcMargin = DataParallel(ArcMargin)
+
+criterion = CrossEntropyLoss()
+
+ignored_params = list(map(id, net.linear1.parameters())) + list(map(id, ArcMargin.weight))
+# prelu_params = [p for m in net.modules() if isinstance(m, torch.nn.PReLU) for p in m.parameters()]
+base_params = filter(lambda p: id(p) not in ignored_params, net.parameters())
+
+# 기존 아키텍처에서 prelu 삭제했었으니까 아래 optim에서도 삭제 처리
+optimizer_ft = optim.SGD([
+    {'params': base_params, 'weight_decay': 4e-5},
+    {'params': net.linear1.parameters(), 'weight_decay': 4e-4},
+    {'params': ArcMargin.weight, 'weight_decay': 4e-4}
+], lr=0.1, momentum=0.9, nesterov=True)
+
+# optimizer_ft = optim.SGD([
+#     {'params': base_params, 'weight_decay': 4e-5},
+#     {'params': net.linear1.parameters(), 'weight_decay': 4e-4},
+#     {'params': ArcMargin.weight, 'weight_decay': 4e-4},
+#     {'params': prelu_params, 'weight_decay': 0.0}
+# ], lr=0.1, momentum=0.9, nesterov=True)
+
+# 여기도 Config에서 Epoch 숫자 수정할때마다 milestone도 같이 수정해줘야함.
+exp_lr_scheduler = lr_scheduler.MultiStepLR(optimizer_ft, milestones=[240, 310, 400], gamma=0.1)
+
+# ----------------------------
+# [추가] Best Accuracy 기록 변수
+# ----------------------------
+best_lfw_acc = 0.0
+
+# ----------------------------
+# Training Loop
+# ----------------------------
+for epoch in range(start_epoch, TOTAL_EPOCH + 1):
+    net.train()
+    train_total_loss, total = 0, 0
+    since = time.time()
+    _print(f"Train Epoch: {epoch}/{TOTAL_EPOCH} ...")
+
+    for data in trainloader:
+        img, label = data[0].cuda(), data[1].cuda()
+        optimizer_ft.zero_grad()
+        raw_logits = net(img)
+        output = ArcMargin(raw_logits, label)
+        loss = criterion(output, label)
+        loss.backward()
+        optimizer_ft.step()
+        train_total_loss += loss.item() * img.size(0)
+        total += img.size(0)
+
+    train_total_loss /= total
+    time_elapsed = time.time() - since
+    _print(f"    total_loss: {train_total_loss:.4f} time: {time_elapsed//60:.0f}m {time_elapsed%60:.0f}s")
+    
+    exp_lr_scheduler.step()
+    
+    # ----------------------------
+    # Test & Best Model Save
+    # ----------------------------
+    if epoch % TEST_FREQ == 0:
+        net.eval()
+        featureLs, featureRs = None, None
+        flags = [] # [추가] 정답(Label)을 저장할 리스트
+        
+        _print("    Testing LFW...")
+        with torch.no_grad(): # [추가] 테스트 땐 기울기 계산 끔 (메모리 절약)
+            for data in testloader:
+                # data 구조: [images_list, label(flag)]라고 가정
+                # LFW_Pairs의 경우 data[1]이 보통 정답(1:같은사람, 0:다른사람)
+                
+                # 이미지 GPU 이동
+                imgs = [d.cuda() for d in data[0]] 
+                
+                # 정답 라벨 수집 (numpy로 변환)
+                flags.append(data[1].numpy()) 
+                
+                # 특징 추출
+                res = [net(d).data.cpu().numpy() for d in imgs]
+                
+                featureL = np.concatenate((res[0], res[1]), 1)
+                featureR = np.concatenate((res[2], res[3]), 1)
+                
+                featureLs = featureL if featureLs is None else np.concatenate((featureLs, featureL), 0)
+                featureRs = featureR if featureRs is None else np.concatenate((featureRs, featureR), 0)
+
+        # [추가] 정답 리스트 합치기
+        flags = np.concatenate(flags, 0)
+        
+        # [추가] 정확도 계산
+        # 만약 scipy.io.savemat은 필요하면 유지, 아니면 삭제해도 됨
+        # result = {'fl': featureLs, 'fr': featureRs}
+        # scipy.io.savemat('./result/tmp_result.mat', result)
+        
+        # 직접 정확도 계산 (함수 호출)
+        current_acc = calculate_accuracy(featureLs, featureRs, flags)
+        _print(f"    LFW Acc: {current_acc*100:.2f}% (Best: {best_lfw_acc*100:.2f}%)")
+
+        # [핵심] Best Model 저장 (Loss가 아닌 Acc 기준)
+        if current_acc > best_lfw_acc:
+            best_lfw_acc = current_acc
+            state_dict = net.module.state_dict() if multi_gpus else net.state_dict()
+            
+            best_dir = os.path.join(save_dir, 'best_model')
+            os.makedirs(best_dir, exist_ok=True)
+            
+            best_path = os.path.join(best_dir, f'best_{epoch:03d}.ckpt')
+            torch.save(
+                {
+                    'epoch': epoch,
+                    'net_state_dict': state_dict,
+                    'acc': best_lfw_acc
+                },
+                best_path
+            )
+            _print(f"    ==> Best Model Saved! (Acc: {best_lfw_acc*100:.2f}%, Epoch: {epoch}))")
+            
+    # ----------------------------
+    # Regular Save (백업용)
+    # ----------------------------
+    if epoch % SAVE_FREQ == 0:
+        state_dict = net.module.state_dict() if multi_gpus else net.state_dict()
+        torch.save({'epoch': epoch, 'net_state_dict': state_dict},
+                   os.path.join(save_dir, f'{epoch:03d}.ckpt'))
+
+_print("finishing training")
\ No newline at end of file
diff --git a/train_bak.py b/train_bak.py
new file mode 100755
index 0000000..6a46ea5
--- /dev/null
+++ b/train_bak.py
@@ -0,0 +1,202 @@
+import torch
+import torch.optim as optim
+from torch.optim import lr_scheduler
+from torch.nn import DataParallel, CrossEntropyLoss
+from dataloader.MyHF_loader import CASIA_HF, LFW_Pairs
+from core import model_bak
+from core.utils import init_log
+import os, time, numpy as np, scipy.io
+from datetime import datetime
+from config import BATCH_SIZE, SAVE_FREQ, RESUME, SAVE_DIR, TEST_FREQ, TOTAL_EPOCH, MODEL_PRE, GPU
+from sklearn.metrics.pairwise import cosine_similarity # [추가] 정확도 계산용
+
+# ----------------------------
+# [추가] 간단한 LFW 정확도 계산 함수
+# ----------------------------
+def calculate_accuracy(featureLs, featureRs, flags, thresholds=np.arange(0, 1, 0.01)):
+    # 1. 특징 벡터 정규화 (Normalize)
+    featureLs = featureLs / np.linalg.norm(featureLs, axis=1, keepdims=True)
+    featureRs = featureRs / np.linalg.norm(featureRs, axis=1, keepdims=True)
+    
+    # 2. 코사인 유사도 계산 (Dot Product)
+    scores = np.sum(featureLs * featureRs, axis=1)
+    
+    # 3. 최적의 임계값(Threshold) 찾기 및 정확도 계산
+    best_acc = 0
+    for t in thresholds:
+        # 유사도가 t보다 크면 '같은 사람(1)', 작으면 '다른 사람(0)'
+        preds = (scores > t).astype(int)
+        acc = np.mean(preds == flags)
+        if acc > best_acc:
+            best_acc = acc
+    return best_acc
+
+# ----------------------------
+# GPU 및 초기 설정 (기존 동일)
+# ----------------------------
+gpu_list = ''
+multi_gpus = False
+if isinstance(GPU, int):
+    gpu_list = str(GPU)
+else:
+    multi_gpus = True
+    gpu_list = ','.join(map(str, GPU))
+os.environ['CUDA_VISIBLE_DEVICES'] = gpu_list
+
+start_epoch = 1
+save_dir = os.path.join(SAVE_DIR, 'MODEL_BAK' + datetime.now().strftime('%Y%m%d_%H%M%S'))
+os.makedirs(save_dir, exist_ok=True)
+logging = init_log(save_dir)
+_print = logging.info
+
+# ----------------------------
+# Dataloader (기존 동일)
+# ----------------------------
+trainset = CASIA_HF()
+trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE,
+                                          shuffle=True, num_workers=8, drop_last=False)
+
+testset = LFW_Pairs()
+testloader = torch.utils.data.DataLoader(testset, batch_size=32,
+                                         shuffle=False, num_workers=8, drop_last=False)
+
+# ----------------------------
+# Model & Optimizer (기존 동일)
+# ----------------------------
+net = model_bak.MobileFacenet()
+ArcMargin = model_bak.ArcMarginProduct(128, trainset.dataset.features['label'].num_classes)
+
+if RESUME:
+    ckpt = torch.load(RESUME)
+    net.load_state_dict(ckpt['net_state_dict'])
+    start_epoch = ckpt['epoch'] + 1
+
+net = net.cuda()
+ArcMargin = ArcMargin.cuda()
+if multi_gpus:
+    net = DataParallel(net)
+    ArcMargin = DataParallel(ArcMargin)
+
+criterion = CrossEntropyLoss()
+
+ignored_params = list(map(id, net.linear1.parameters())) + list(map(id, ArcMargin.weight))
+# prelu_params = [p for m in net.modules() if isinstance(m, torch.nn.PReLU) for p in m.parameters()]
+base_params = filter(lambda p: id(p) not in ignored_params, net.parameters())
+
+# 기존 아키텍처에서 prelu 삭제했었으니까 아래 optim에서도 삭제 처리
+optimizer_ft = optim.SGD([
+    {'params': base_params, 'weight_decay': 4e-5},
+    {'params': net.linear1.parameters(), 'weight_decay': 4e-4},
+    {'params': ArcMargin.weight, 'weight_decay': 4e-4}
+], lr=0.1, momentum=0.9, nesterov=True)
+
+# optimizer_ft = optim.SGD([
+#     {'params': base_params, 'weight_decay': 4e-5},
+#     {'params': net.linear1.parameters(), 'weight_decay': 4e-4},
+#     {'params': ArcMargin.weight, 'weight_decay': 4e-4},
+#     {'params': prelu_params, 'weight_decay': 0.0}
+# ], lr=0.1, momentum=0.9, nesterov=True)
+
+# 여기도 Config에서 Epoch 숫자 수정할때마다 milestone도 같이 수정해줘야함.
+exp_lr_scheduler = lr_scheduler.MultiStepLR(optimizer_ft, milestones=[240, 310, 400], gamma=0.1)
+
+# ----------------------------
+# [추가] Best Accuracy 기록 변수
+# ----------------------------
+best_lfw_acc = 0.0
+
+# ----------------------------
+# Training Loop
+# ----------------------------
+for epoch in range(start_epoch, TOTAL_EPOCH + 1):
+    net.train()
+    train_total_loss, total = 0, 0
+    since = time.time()
+    _print(f"Train Epoch: {epoch}/{TOTAL_EPOCH} ...")
+
+    for data in trainloader:
+        img, label = data[0].cuda(), data[1].cuda()
+        optimizer_ft.zero_grad()
+        raw_logits = net(img)
+        output = ArcMargin(raw_logits, label)
+        loss = criterion(output, label)
+        loss.backward()
+        optimizer_ft.step()
+        train_total_loss += loss.item() * img.size(0)
+        total += img.size(0)
+
+    train_total_loss /= total
+    time_elapsed = time.time() - since
+    _print(f"    total_loss: {train_total_loss:.4f} time: {time_elapsed//60:.0f}m {time_elapsed%60:.0f}s")
+    
+    exp_lr_scheduler.step()
+    
+    # ----------------------------
+    # Test & Best Model Save
+    # ----------------------------
+    if epoch % TEST_FREQ == 0:
+        net.eval()
+        featureLs, featureRs = None, None
+        flags = [] # [추가] 정답(Label)을 저장할 리스트
+        
+        _print("    Testing LFW...")
+        with torch.no_grad(): # [추가] 테스트 땐 기울기 계산 끔 (메모리 절약)
+            for data in testloader:
+                # data 구조: [images_list, label(flag)]라고 가정
+                # LFW_Pairs의 경우 data[1]이 보통 정답(1:같은사람, 0:다른사람)
+                
+                # 이미지 GPU 이동
+                imgs = [d.cuda() for d in data[0]] 
+                
+                # 정답 라벨 수집 (numpy로 변환)
+                flags.append(data[1].numpy()) 
+                
+                # 특징 추출
+                res = [net(d).data.cpu().numpy() for d in imgs]
+                
+                featureL = np.concatenate((res[0], res[1]), 1)
+                featureR = np.concatenate((res[2], res[3]), 1)
+                
+                featureLs = featureL if featureLs is None else np.concatenate((featureLs, featureL), 0)
+                featureRs = featureR if featureRs is None else np.concatenate((featureRs, featureR), 0)
+
+        # [추가] 정답 리스트 합치기
+        flags = np.concatenate(flags, 0)
+        
+        # [추가] 정확도 계산
+        # 만약 scipy.io.savemat은 필요하면 유지, 아니면 삭제해도 됨
+        # result = {'fl': featureLs, 'fr': featureRs}
+        # scipy.io.savemat('./result/tmp_result.mat', result)
+        
+        # 직접 정확도 계산 (함수 호출)
+        current_acc = calculate_accuracy(featureLs, featureRs, flags)
+        _print(f"    LFW Acc: {current_acc*100:.2f}% (Best: {best_lfw_acc*100:.2f}%)")
+
+        # [핵심] Best Model 저장 (Loss가 아닌 Acc 기준)
+        if current_acc > best_lfw_acc:
+            best_lfw_acc = current_acc
+            state_dict = net.module.state_dict() if multi_gpus else net.state_dict()
+            
+            best_dir = os.path.join(save_dir, 'best_model')
+            os.makedirs(best_dir, exist_ok=True)
+            
+            best_path = os.path.join(best_dir, f'best_{epoch:03d}.ckpt')
+            torch.save(
+                {
+                    'epoch': epoch,
+                    'net_state_dict': state_dict,
+                    'acc': best_lfw_acc
+                },
+                best_path
+            )
+            _print(f"    ==> Best Model Saved! (Acc: {best_lfw_acc*100:.2f}%, Epoch: {epoch}))")
+            
+    # ----------------------------
+    # Regular Save (백업용)
+    # ----------------------------
+    if epoch % SAVE_FREQ == 0:
+        state_dict = net.module.state_dict() if multi_gpus else net.state_dict()
+        torch.save({'epoch': epoch, 'net_state_dict': state_dict},
+                   os.path.join(save_dir, f'{epoch:03d}.ckpt'))
+
+_print("finishing training")
\ No newline at end of file