From ec8a4c100a1878fbd050341ea305cf21aede2bb7 Mon Sep 17 00:00:00 2001
From: MCG <000914m@gmail.com>
Date: Sat, 23 Nov 2024 02:57:09 +0900
Subject: [PATCH] fix: focal_Loss UNet3+ #25

---
 UNet3+/Code/CropTrainRun.py            |  16 +-
 UNet3+/Code/InfetenceRun.py            |   1 -
 UNet3+/Code/Loss/Loss.py               |  83 +++++---
 UNet3+/Code/Model/FixedModel.py        |   9 +-
 UNet3+/Code/Model/model_shape_check.py |   8 +-
 UNet3+/Code/Train.py                   |  36 ++--
 UNet3+/Code/TrainRun.py                |   2 +-
 UNet3+/Code/Validation.py              |  27 +--
 UNet3+/Code/config.py                  |  16 +-
 UNet3+/DataCreate.ipynb                | 270 +++++++++++++++++++++++++
 10 files changed, 380 insertions(+), 88 deletions(-)
 create mode 100644 UNet3+/DataCreate.ipynb

diff --git a/UNet3+/Code/CropTrainRun.py b/UNet3+/Code/CropTrainRun.py
index c889752..142241a 100644
--- a/UNet3+/Code/CropTrainRun.py
+++ b/UNet3+/Code/CropTrainRun.py
@@ -10,7 +10,7 @@
 
 from Model.FixedModel import UNet_3Plus_DeepSup
 from DataSet.DataLoder import get_image_label_paths
-from config import IMAGE_ROOT, LABEL_ROOT, BATCH_SIZE, IMSIZE, CLASSES, MILESTONES, GAMMA, LR, SAVED_DIR, VISUALIZE_TRAIN_DATA, SAVE_VISUALIZE_TRAIN_DATA_PATH
+from config import IMAGE_ROOT, LABEL_ROOT, BATCH_SIZE, IMSIZE, CLASSES, MILESTONES, GAMMA, LR, SAVED_DIR, VISUALIZE_TRAIN_DATA, SAVE_VISUALIZE_TRAIN_DATA_PATH,NUM_EPOCHS
 from DataSet.LabelBaseCropDataset import XRayDataset
 from Loss.Loss import CombinedLoss
 from Train import train
@@ -57,15 +57,15 @@ def main():
         train_filenames,
         train_labelnames,
         is_train=True,
-        save_dir=SAVE_VISUALIZE_TRAIN_DATA_PATH,
-        draw_enabled=VISUALIZE_TRAIN_DATA,
+        save_dir=None,
+        draw_enabled=False,
     )
     valid_dataset = XRayDataset(
         valid_filenames,
         valid_labelnames,
         is_train=False,
-        save_dir=None,
-        draw_enabled=False,
+        save_dir=SAVE_VISUALIZE_TRAIN_DATA_PATH,
+        draw_enabled=VISUALIZE_TRAIN_DATA,
     )
 
     train_loader = DataLoader(
@@ -87,11 +87,11 @@ def main():
     model = UNet_3Plus_DeepSup(n_classes=len(CLASSES))
 
     # Loss function 정의
-    criterion = CombinedLoss(focal_weight=1, iou_weight=1, ms_ssim_weight=1, dice_weight=1)
+    criterion = CombinedLoss(focal_weight=1, iou_weight=1, ms_ssim_weight=1, dice_weight=0)
 
     # Optimizer 정의
-    optimizer = optim.Adam(params=model.parameters(), lr=LR, weight_decay=1e-6)
-    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=MILESTONES, gamma=GAMMA)
+    optimizer = optim.AdamW(params=model.parameters(), lr=LR, weight_decay=1e-4)
+    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=NUM_EPOCHS, eta_min=1e-6)
 
     train(model, train_loader, valid_loader, criterion, optimizer, scheduler)
 
diff --git a/UNet3+/Code/InfetenceRun.py b/UNet3+/Code/InfetenceRun.py
index 61670e9..19035b0 100644
--- a/UNet3+/Code/InfetenceRun.py
+++ b/UNet3+/Code/InfetenceRun.py
@@ -89,7 +89,6 @@ def test(model, data_loader, thr=0.5):
 
             # restore original size
             outputs = F.interpolate(outputs, size=(2048, 2048), mode="bilinear")
-            outputs = torch.sigmoid(outputs)
             outputs = (outputs > thr).detach().cpu().numpy()
 
             for output, image_name in zip(outputs, image_names):
diff --git a/UNet3+/Code/Loss/Loss.py b/UNet3+/Code/Loss/Loss.py
index 2332648..aef8b68 100644
--- a/UNet3+/Code/Loss/Loss.py
+++ b/UNet3+/Code/Loss/Loss.py
@@ -2,7 +2,8 @@
 import torch.nn as nn
 import torch.nn.functional as F
 from math import exp
-
+import numpy as np
+import torchvision
 
 def gaussian(window_size, sigma):
     gauss = torch.Tensor([exp(-(x - window_size//2)**2/float(2*sigma**2)) for x in range(window_size)])
@@ -107,27 +108,46 @@ def __init__(self, window_size=11, size_average=True, channel=3):
     def forward(self, img1, img2):
         return msssim(img1, img2, window_size=self.window_size, size_average=self.size_average, normalize=True)
 
-
-
-
 class CombinedLoss(nn.Module):
     def __init__(self, focal_weight=1, iou_weight=1, ms_ssim_weight=1, dice_weight=1, smooth=1e-6, channel=3):
-        """
-        Combined Loss = alpha * Focal Loss + beta * IoU Loss + gamma * MS-SSIM Loss + delta * Dice Loss
-        """
         super(CombinedLoss, self).__init__()
-        self.alpha = focal_weight  # Weight for Focal Loss
-        self.beta = iou_weight    # Weight for IoU Loss
-        self.gamma = ms_ssim_weight  # Weight for MS-SSIM Loss
-        self.delta = dice_weight  # Weight for Dice Loss
+        self.focal_weight = focal_weight
+        self.iou_weight = iou_weight
+        self.ms_ssim_weight = ms_ssim_weight
+        self.dice_weight = dice_weight
         self.smooth = smooth
-        self.ms_ssim = MSSSIM(window_size=7, size_average=True, channel=channel)
+        self.ms_ssim = MSSSIM(window_size=11, size_average=True, channel=channel)
+        self.bce_loss_fn = nn.BCEWithLogitsLoss(reduction='mean')  # BCE loss with logits
+    def adaptive_focal_loss(self, logits, targets, alpha=1, gamma_min=1.5, gamma_max=4.0, reduce=True):
+        # Compute BCE loss
+        BCE_loss = F.binary_cross_entropy_with_logits(logits, targets, reduction='none')#self.bce_loss_fn(logits, targets)
 
-    def focal_loss(self, logits, targets, alpha=0.8, gamma=2):
-        probs = torch.sigmoid(logits)
-        focal_loss = -alpha * (1 - probs) ** gamma * targets * torch.log(probs + 1e-6) \
-                     - (1 - alpha) * probs ** gamma * (1 - targets) * torch.log(1 - probs + 1e-6)
-        return focal_loss.mean()
+        # Compute pt (predicted probability for true class)
+        pt = torch.exp(-BCE_loss)
+
+        # Dynamically adjust gamma based on pt
+        gamma = gamma_min + (1 - pt) * (gamma_max - gamma_min)
+        gamma = torch.clamp(gamma, gamma_min, gamma_max)  # Ensure gamma stays within [gamma_min, gamma_max]
+
+        # Compute Focal Loss
+        F_loss = alpha * (1 - pt) ** gamma * BCE_loss
+
+        # Reduce loss if required
+        if reduce:
+            return torch.mean(F_loss)
+        else:
+            return F_loss
+
+    
+    def focal_loss(self, logits, targets, alpha=1, gamma=1.8, reduce=True):
+        BCE_loss= F.binary_cross_entropy_with_logits(logits, targets, reduction='none')#self.bce_loss_fn(logits, targets)
+        #print("BCE:",BCE_loss)
+        pt = torch.exp(-BCE_loss)
+        F_loss = alpha * (1-pt)**gamma * BCE_loss
+        if reduce:
+            return torch.mean(F_loss)
+        else:
+            return F_loss
 
     def iou_loss(self, logits, targets):
         probs = torch.sigmoid(logits)
@@ -137,24 +157,27 @@ def iou_loss(self, logits, targets):
         return iou_loss.mean()
 
     def dice_loss(self, logits, targets):
-        """
-        Dice Loss = 1 - (2 * intersection + smooth) / (sum_probs + sum_targets + smooth)
-        """
         probs = torch.sigmoid(logits)
         intersection = (probs * targets).sum(dim=(2, 3))
         sum_probs = probs.sum(dim=(2, 3))
         sum_targets = targets.sum(dim=(2, 3))
         dice = (2 * intersection + self.smooth) / (sum_probs + sum_targets + self.smooth)
         return 1 - dice.mean()
-
+    def bce_loss(self, logits, targets):
+        # Use BCEWithLogitsLoss for numerical stability
+        return self.bce_loss_fn(logits, targets)
     def forward(self, logits, targets):
-        # Calculate individual losses
-        focal = self.focal_loss(logits, targets)
-        #iou = self.iou_loss(logits, targets)
-        ms_ssim_loss = 1 - self.ms_ssim(torch.sigmoid(logits), targets)
-        dice = self.dice_loss(logits, targets)
-
-        # Combine losses with respective weights
-        total_loss = self.alpha * focal  + self.gamma * ms_ssim_loss  + self.delta * dice #self.beta * iou #+ self.delta * dice
-        return total_loss
+        focal = self.focal_loss(logits, targets)*self.focal_weight 
+        ms_ssim_loss = 1 - self.ms_ssim(torch.sigmoid(logits), targets) * self.ms_ssim_weight
+        dice = self.dice_loss(logits, targets) * self.dice_weight
+        iou= self.iou_loss(logits,targets) * self.iou_weight 
+        #bce = F.binary_cross_entropy_with_logits(logits, targets, reduction='mean')
+
+
+        # Combined loss
+        total_loss = focal + ms_ssim_loss +iou + dice
+
+        # 개별 손실 값 로깅을 위해 반환
+        return total_loss, focal, ms_ssim_loss,iou, dice,
+
 
diff --git a/UNet3+/Code/Model/FixedModel.py b/UNet3+/Code/Model/FixedModel.py
index c1194f9..d331ef0 100644
--- a/UNet3+/Code/Model/FixedModel.py
+++ b/UNet3+/Code/Model/FixedModel.py
@@ -42,10 +42,8 @@ def __init__(self, in_channels=3, n_classes=1, feature_scale=4, is_deconv=True,
         self.conv3 = self.convnext[3:5]  # ConvNeXt Stage 2 (Output: 28x28, 384 channels)
         self.conv4 = self.convnext[5:7]
         self.conv5 = nn.Sequential(
-            nn.Conv2d(filters[4], filters[4], kernel_size=3, stride=2, padding=1),  # DownSample
-            nn.BatchNorm2d(filters[4]),
-            nn.GELU(),  # GELU activation function
-            self.convnext[7:])   # ConvNeXt Stage 4 (Output: 7x7, 1536 channels)
+        nn.MaxPool2d(kernel_size=2, stride=2),  # DownSample using MaxPool
+        self.convnext[7:])
 
 
         ## -------------Decoder--------------
@@ -401,8 +399,9 @@ def forward(self, inputs):
         d5 = self.dotProduct(d5, cls_branch_mask)
         
         '''
+        
         if self.training:
-            return d1, d2, d3, d4, d5
+            return torch.cat((d1, d2, d3, d4, d5), dim=0) 
         else:
             #print(d1)
             return d1
\ No newline at end of file
diff --git a/UNet3+/Code/Model/model_shape_check.py b/UNet3+/Code/Model/model_shape_check.py
index f8310ab..ca81c16 100644
--- a/UNet3+/Code/Model/model_shape_check.py
+++ b/UNet3+/Code/Model/model_shape_check.py
@@ -1,7 +1,11 @@
 from torchvision.models import convnext_large
+import torch
 
 # ConvNeXt Large 모델 로드
-model = convnext_large(pretrained=True)
+#model = convnext_large(pretrained=True)
 
 # 모델 구조 출력
-print(model)
\ No newline at end of file
+#print(model)
+
+ce_loss = torch.log(torch.tensor(1e-6))
+print(ce_loss)
\ No newline at end of file
diff --git a/UNet3+/Code/Train.py b/UNet3+/Code/Train.py
index 2258bb3..69b8806 100644
--- a/UNet3+/Code/Train.py
+++ b/UNet3+/Code/Train.py
@@ -13,10 +13,11 @@ def save_model(model, file_name=MODELNAME):
     output_path = os.path.join(SAVED_DIR, file_name)
     torch.save(model, output_path)
 
-def train(model, data_loader, val_loader, criterion, optimizer, scheduler, accumulation_steps=ACCUMULATION_STEPS):
+def train(model, data_loader, val_loader, criterion, optimizer, scheduler, accumulation_steps=ACCUMULATION_STEPS, threshold=0.92):
     """
     Args:
         accumulation_steps (int): Number of steps to accumulate gradients before updating.
+        threshold (float): Dice 점수를 기준으로 손실 함수 변경.
     """
     print(f'Start training with Gradient Accumulation (accumulation_steps={accumulation_steps})..')
     model.cuda()
@@ -24,9 +25,6 @@ def train(model, data_loader, val_loader, criterion, optimizer, scheduler, accum
     n_class = len(CLASSES)
     best_dice = 0.0
 
-    # 손실 가중치 (Deep Supervision)
-    deep_sup_weights = [0.5, 0.3, 0.2, 0.15, 0.1]  # 각 출력에 대한 가중치
-
     # Mixed Precision Scaler 생성
     scaler = GradScaler()
 
@@ -47,18 +45,12 @@ def train(model, data_loader, val_loader, criterion, optimizer, scheduler, accum
             # Inference 및 Mixed Precision 적용
             with autocast():  # Mixed Precision 모드
                 outputs = model(images)
+                batch_masks = masks.repeat(5, 1, 1, 1)
 
-                # Deep Supervision 처리: 여러 출력을 가정
-                if isinstance(outputs, (tuple, list)):  # 출력이 리스트/튜플 형태인 경우
-                    total_loss = 0.0
-                    for i, output in enumerate(outputs):
-                        loss = criterion(output, masks)  # 각 출력의 손실 계산
-                        total_loss += loss * deep_sup_weights[i]  # 가중치를 곱해 합산
-                else:  # 출력이 단일 텐서인 경우 (예외 처리)
-                    total_loss = criterion(outputs, masks)
+                loss, focal, ms_ssim_loss, iou, dice = criterion(outputs, batch_masks)  # 각 출력의 손실 계산
 
             # Loss Scaling 및 Backpropagation (Gradient Accumulation)
-            scaler.scale(total_loss).backward()
+            scaler.scale(loss).backward()
 
             # Gradient Accumulation Steps 마다 업데이트
             if (step + 1) % accumulation_steps == 0:
@@ -72,7 +64,11 @@ def train(model, data_loader, val_loader, criterion, optimizer, scheduler, accum
                     f'{datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")} | '
                     f'Epoch [{epoch+1}/{NUM_EPOCHS}], '
                     f'Step [{step+1}/{len(data_loader)}], '
-                    f'Loss: {round(total_loss.item(), 4)}'
+                    f'Loss: {round(loss.item(), 4)} | '
+                    f'Focal: {round(focal.item(), 4)}, '
+                    f'MS-SSIM: {round(ms_ssim_loss.item(), 4)}, '
+                    f'IoU: {round(iou.item(), 4)}, '
+                    f'Dice: {round(dice.item(), 4)}'
                 )
 
         # 마지막 미니배치 처리 후 Gradient 업데이트
@@ -83,7 +79,17 @@ def train(model, data_loader, val_loader, criterion, optimizer, scheduler, accum
 
         # Validation 주기에 따른 Loss 출력 및 Best Model 저장
         if (epoch + 1) % VAL_EVERY == 0:
-            dice = validation(epoch + 1, model, val_loader, criterion)
+            dice = validation(epoch + 1, model, val_loader)
+
+            # Validation 결과에 따른 손실 함수 선택
+            if dice < threshold:
+                print(f"Validation Dice ({dice:.4f}) < Threshold ({threshold}), using IoU Loss.")
+                criterion.delta = 0  # Dice Loss 비활성화
+                criterion.beta = 1   # IoU Loss 활성화
+            else:
+                print(f"Validation Dice ({dice:.4f}) >= Threshold ({threshold}), using Dice Loss.")
+                criterion.delta = 1  # Dice Loss 활성화
+                criterion.beta = 0   # IoU Loss 비활성화
 
             if best_dice < dice:
                 print(f"Best performance at epoch: {epoch + 1}, {best_dice:.4f} -> {dice:.4f}")
diff --git a/UNet3+/Code/TrainRun.py b/UNet3+/Code/TrainRun.py
index 19b4dcc..5fd3cb7 100644
--- a/UNet3+/Code/TrainRun.py
+++ b/UNet3+/Code/TrainRun.py
@@ -81,7 +81,7 @@
 criterion = CombinedLoss(focal_weight=1, iou_weight=1, ms_ssim_weight=1, dice_weight=1)
 
 # Optimizer 정의
-optimizer = optim.Adam(params=model.parameters(), lr=LR, weight_decay=1e-6)
+optimizer = optim.Adam(params=model.parameters(), lr=LR, weight_decay=1e-5)
 scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=MILESTONES, gamma=GAMMA)
 
 
diff --git a/UNet3+/Code/Validation.py b/UNet3+/Code/Validation.py
index 14ed02a..6c1cd30 100644
--- a/UNet3+/Code/Validation.py
+++ b/UNet3+/Code/Validation.py
@@ -3,14 +3,12 @@
 from config import CLASSES
 import torch.nn.functional as F
 
-def validation(epoch, model, data_loader, criterion, thr=0.5):
+def validation(epoch, model, data_loader, thr=0.5):
     print(f'Start validation #{epoch:2d}')
     model.cuda()
     model.eval()
 
     dices = []
-    total_loss = 0
-    cnt = 0
 
     with torch.no_grad():
         total_steps = len(data_loader)  # 데이터 로더 총 스텝 수
@@ -24,27 +22,21 @@ def validation(epoch, model, data_loader, criterion, thr=0.5):
             # 출력 크기 보정 (필요한 경우만)
             if outputs.shape[-2:] != masks.shape[-2:]:
                 outputs = F.interpolate(outputs, size=masks.shape[-2:], mode="bilinear", align_corners=False)
-
-            # 손실 계산
-            loss = criterion(outputs, masks)
-            total_loss += loss.item()
-            cnt += 1
-
+            
             # 출력 이진화 및 Dice 계산 (GPU 상에서 처리)
-            outputs = (torch.sigmoid(outputs) > thr).float()
+            outputs = (outputs > thr).float()
             dice = dice_coef(outputs, masks)
             dices.append(dice.detach())  # GPU에서 유지
 
-            # 진행 상황과 손실 출력
-            if (step + 1) % 80 == 0 or (step + 1) == total_steps:  # 매 10 스텝마다 또는 마지막 스텝에서 출력
-                avg_loss = total_loss / cnt
-                print(f"Validation Progress: Step {step + 1}/{total_steps}, Avg Loss: {avg_loss:.4f}")
+            # 진행 상황 출력
+            if (step + 1) % 80 == 0 or (step + 1) == total_steps:  # 매 80 스텝마다 또는 마지막 스텝에서 출력
+                print(f"Validation Progress: Step {step + 1}/{total_steps}")
 
     # GPU 상에서 Dice 평균 계산
     dices = torch.cat(dices, 0)
     dices_per_class = dices.mean(dim=0)
     
-    # 로그 출력
+    # 클래스별 Dice 점수 출력
     dice_str = [
         f"{c:<12}: {d.item():.4f}"
         for c, d in zip(CLASSES, dices_per_class)
@@ -54,8 +46,7 @@ def validation(epoch, model, data_loader, criterion, thr=0.5):
 
     avg_dice = dices_per_class.mean().item()
 
-    # 최종 평균 손실 출력
-    avg_loss = total_loss / cnt
-    print(f"Validation Completed: Avg Loss: {avg_loss:.4f}, Avg Dice: {avg_dice:.4f}")
+    # 최종 평균 Dice 출력
+    print(f"Validation Completed: Avg Dice: {avg_dice:.4f}")
 
     return avg_dice
diff --git a/UNet3+/Code/config.py b/UNet3+/Code/config.py
index 57433c0..77a97f4 100644
--- a/UNet3+/Code/config.py
+++ b/UNet3+/Code/config.py
@@ -41,28 +41,28 @@
 RANDOM_SEED = 21
 
 # 적절하게 조절
-NUM_EPOCHS =52
+NUM_EPOCHS =75
 VAL_EVERY = 1
 
-ACCUMULATION_STEPS=32
+ACCUMULATION_STEPS=16
 BATCH_SIZE = 1
 IMSIZE=480
 
-LR = 0.0003
-MILESTONES=[20,30,37]
-GAMMA=0.2
+LR = 0.0008
+MILESTONES=[5,20,32,40,47]
+GAMMA=0.3
 
 
 SAVED_DIR = "/data/ephemeral/home/MCG/UNetRefactored/Creadted_model/"
-MODELNAME="othersCrop_AddBottleNeck_ConvTrans_dice_52.pt"
+MODELNAME="CropOthersChangeLoss.pt"
 if not os.path.isdir(SAVED_DIR):
     os.mkdir(SAVED_DIR)
     
 
 
-INFERENCE_MODEL_NAME="othersCrop_AddBottleNeck_ConvTrans_dice_52.pt"
+INFERENCE_MODEL_NAME="CropOthersChangeLoss.pt"
 
 TEST_IMAGE_ROOT="/data/ephemeral/home/MCG/data/test/DCM"
 
 CSVDIR="/data/ephemeral/home/MCG/UNetRefactored/CSV"
-CSVNAME="othersCrop_AddBottleNeck_ConvTrans_dice_52.csv"
\ No newline at end of file
+CSVNAME="CropOthersChangeLoss.csv"
\ No newline at end of file
diff --git a/UNet3+/DataCreate.ipynb b/UNet3+/DataCreate.ipynb
new file mode 100644
index 0000000..f15a525
--- /dev/null
+++ b/UNet3+/DataCreate.ipynb
@@ -0,0 +1,270 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import shutil\n",
+    "import numpy as np\n",
+    "from sklearn.model_selection import GroupKFold\n",
+    "import random\n",
+    "\n",
+    "# 데이터 경로 설정\n",
+    "IMAGE_ROOT = \"/data/ephemeral/home/MCG/data/train/DCM\"\n",
+    "LABEL_ROOT = \"/data/ephemeral/home/MCG/data/train/outputs_json\"\n",
+    "OUTPUT_DIR = \"/data/ephemeral/home/MCG/data/groupKFold_seed21\"\n",
+    "\n",
+    "# 랜덤 시드 설정\n",
+    "RANDOM_SEED = 21\n",
+    "np.random.seed(RANDOM_SEED)\n",
+    "random.seed(RANDOM_SEED)\n",
+    "\n",
+    "# 데이터 준비\n",
+    "pngs = {\n",
+    "    os.path.relpath(os.path.join(root, fname), start=IMAGE_ROOT)\n",
+    "    for root, _dirs, files in os.walk(IMAGE_ROOT)\n",
+    "    for fname in files\n",
+    "    if os.path.splitext(fname)[1].lower() == \".png\"\n",
+    "}\n",
+    "\n",
+    "jsons = {\n",
+    "    os.path.relpath(os.path.join(root, fname), start=LABEL_ROOT)\n",
+    "    for root, _dirs, files in os.walk(LABEL_ROOT)\n",
+    "    for fname in files\n",
+    "    if os.path.splitext(fname)[1].lower() == \".json\"\n",
+    "}\n",
+    "\n",
+    "assert len(pngs) == len(jsons), \"Mismatch between PNG and JSON files!\"\n",
+    "\n",
+    "pngs = sorted(pngs)\n",
+    "jsons = sorted(jsons)\n",
+    "\n",
+    "# 그룹 설정\n",
+    "filenames = np.array(pngs)\n",
+    "labelnames = np.array(jsons)\n",
+    "groups = [os.path.dirname(fname) for fname in filenames]\n",
+    "\n",
+    "# GroupKFold 생성\n",
+    "gkf = GroupKFold(n_splits=5)\n",
+    "\n",
+    "# 출력 디렉토리 생성\n",
+    "os.makedirs(OUTPUT_DIR, exist_ok=True)\n",
+    "\n",
+    "# GroupKFold를 통해 Fold별 데이터 저장\n",
+    "for fold_idx, (train_idx, val_idx) in enumerate(gkf.split(filenames, np.zeros(len(filenames)), groups)):\n",
+    "    fold_dir = os.path.join(OUTPUT_DIR, f\"fold{fold_idx + 1}\")\n",
+    "    train_image_dir = os.path.join(fold_dir, \"train\", \"Image\")\n",
+    "    train_label_dir = os.path.join(fold_dir, \"train\", \"Label\")\n",
+    "    val_image_dir = os.path.join(fold_dir, \"val\", \"Image\")\n",
+    "    val_label_dir = os.path.join(fold_dir, \"val\", \"Label\")\n",
+    "\n",
+    "    # Fold 디렉토리 및 하위 폴더 생성\n",
+    "    os.makedirs(train_image_dir, exist_ok=True)\n",
+    "    os.makedirs(train_label_dir, exist_ok=True)\n",
+    "    os.makedirs(val_image_dir, exist_ok=True)\n",
+    "    os.makedirs(val_label_dir, exist_ok=True)\n",
+    "\n",
+    "    # Training 데이터 복사\n",
+    "    for idx in train_idx:\n",
+    "        # 이미지 복사\n",
+    "        src_image_path = os.path.join(IMAGE_ROOT, filenames[idx])\n",
+    "        dst_image_path = os.path.join(train_image_dir, os.path.basename(filenames[idx]))\n",
+    "        shutil.copy2(src_image_path, dst_image_path)\n",
+    "\n",
+    "        # 라벨 복사\n",
+    "        src_label_path = os.path.join(LABEL_ROOT, labelnames[idx])\n",
+    "        dst_label_path = os.path.join(train_label_dir, os.path.basename(labelnames[idx]))\n",
+    "        shutil.copy2(src_label_path, dst_label_path)\n",
+    "\n",
+    "    # Validation 데이터 복사\n",
+    "    for idx in val_idx:\n",
+    "        # 이미지 복사\n",
+    "        src_image_path = os.path.join(IMAGE_ROOT, filenames[idx])\n",
+    "        dst_image_path = os.path.join(val_image_dir, os.path.basename(filenames[idx]))\n",
+    "        shutil.copy2(src_image_path, dst_image_path)\n",
+    "\n",
+    "        # 라벨 복사\n",
+    "        src_label_path = os.path.join(LABEL_ROOT, labelnames[idx])\n",
+    "        dst_label_path = os.path.join(val_label_dir, os.path.basename(labelnames[idx]))\n",
+    "        shutil.copy2(src_label_path, dst_label_path)\n",
+    "\n",
+    "    print(f\"Fold {fold_idx + 1} saved: {len(train_idx)} train files, {len(val_idx)} val files.\")\n",
+    "\n",
+    "print(f\"Data split completed and saved in {OUTPUT_DIR}\")\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "\n",
+    "\n",
+    "import os\n",
+    "import json\n",
+    "import shutil\n",
+    "import numpy as np\n",
+    "import cv2\n",
+    "from PIL import Image\n",
+    "\n",
+    "# 경로 설정\n",
+    "source_dir = \"/data/ephemeral/home/MCG/data/groupKFold_seed21/fold1\"\n",
+    "target_dir = \"/data/ephemeral/home/MCG/data/UNet3+Data\"\n",
+    "\n",
+    "# 클래스 매핑\n",
+    "CLASSES = [\n",
+    "    'finger-1', 'finger-2', 'finger-3', 'finger-4', 'finger-5',\n",
+    "    'finger-6', 'finger-7', 'finger-8', 'finger-9', 'finger-10',\n",
+    "    'finger-11', 'finger-12', 'finger-13', 'finger-14', 'finger-15',\n",
+    "    'finger-16', 'finger-17', 'finger-18', 'finger-19', 'Trapezium',\n",
+    "    'Trapezoid', 'Capitate', 'Hamate', 'Scaphoid', 'Lunate',\n",
+    "    'Triquetrum', 'Pisiform', 'Radius', 'Ulna',\n",
+    "]\n",
+    "CLASS_MAPPING = {cls_name: idx for idx, cls_name in enumerate(CLASSES, start=1)}\n",
+    "\n",
+    "def create_directories(base_dir):\n",
+    "    \"\"\"UNet3+ 데이터 구조에 맞게 디렉토리 생성\"\"\"\n",
+    "    os.makedirs(os.path.join(base_dir, \"train\", \"images\"), exist_ok=True)\n",
+    "    os.makedirs(os.path.join(base_dir, \"train\", \"mask\"), exist_ok=True)\n",
+    "    os.makedirs(os.path.join(base_dir, \"val\", \"images\"), exist_ok=True)\n",
+    "    os.makedirs(os.path.join(base_dir, \"val\", \"mask\"), exist_ok=True)\n",
+    "\n",
+    "def parse_json_label(json_path, mask_shape):\n",
+    "    \"\"\"\n",
+    "    JSON 파일에서 라벨 정보를 읽어 멀티클래스 마스크 이미지 생성.\n",
+    "    \"\"\"\n",
+    "    with open(json_path, 'r') as f:\n",
+    "        data = json.load(f)\n",
+    "    \n",
+    "    mask = np.zeros(mask_shape, dtype=np.uint8)\n",
+    "    \n",
+    "    for obj in data[\"annotations\"]:\n",
+    "        class_name = obj.get(\"label\", \"\")\n",
+    "        if class_name not in CLASS_MAPPING:\n",
+    "            continue\n",
+    "        class_id = CLASS_MAPPING[class_name]\n",
+    "        \n",
+    "        polygon = np.array(obj[\"points\"], dtype=np.int32)\n",
+    "        cv2.fillPoly(mask, [polygon], color=class_id)\n",
+    "    \n",
+    "    return Image.fromarray(mask)\n",
+    "\n",
+    "def process_data(source_dir, target_dir, mask_shape=(2048, 2048)):\n",
+    "    \"\"\"데이터 복사 및 마스크 생성\"\"\"\n",
+    "    create_directories(target_dir)\n",
+    "    \n",
+    "    for split in [\"train\", \"val\"]:\n",
+    "        image_src_dir = os.path.join(source_dir, split, \"Image\")\n",
+    "        label_src_dir = os.path.join(source_dir, split, \"Label\")\n",
+    "        \n",
+    "        image_dest_dir = os.path.join(target_dir, split, \"images\")\n",
+    "        mask_dest_dir = os.path.join(target_dir, split, \"mask\")\n",
+    "        \n",
+    "        image_files = sorted([f for f in os.listdir(image_src_dir) if f.endswith(('.png', '.jpg', '.jpeg'))])\n",
+    "        label_files = sorted([f for f in os.listdir(label_src_dir) if f.endswith('.json')])\n",
+    "        \n",
+    "        assert len(image_files) == len(label_files), \"이미지와 라벨 파일 수가 맞지 않습니다.\"\n",
+    "        \n",
+    "        for index, (img_file, lbl_file) in enumerate(zip(image_files, label_files)):\n",
+    "            # 새 파일명 설정\n",
+    "            image_filename = f\"image_{index}_0.png\"\n",
+    "            mask_filename = f\"mask_{index}_0.png\"\n",
+    "            \n",
+    "            # 이미지 복사\n",
+    "            shutil.copy(os.path.join(image_src_dir, img_file), os.path.join(image_dest_dir, image_filename))\n",
+    "            \n",
+    "            # 라벨에서 마스크 생성\n",
+    "            json_path = os.path.join(label_src_dir, lbl_file)\n",
+    "            mask = parse_json_label(json_path, mask_shape)\n",
+    "            mask.save(os.path.join(mask_dest_dir, mask_filename))\n",
+    "\n",
+    "# 실행\n",
+    "process_data(source_dir, target_dir, mask_shape=(2048, 2048))\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "FileNotFoundError",
+     "evalue": "[Errno 2] No such file or directory: '/data/ephemeral/home/MCG/data/UNet3+Data/train/mask/image1661144206667.png'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mFileNotFoundError\u001b[0m                         Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[1], line 9\u001b[0m\n\u001b[1;32m      6\u001b[0m mask_path \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m/data/ephemeral/home/MCG/data/UNet3+Data/train/mask/image1661144206667.png\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m      8\u001b[0m \u001b[38;5;66;03m# 마스크 이미지 불러오기\u001b[39;00m\n\u001b[0;32m----> 9\u001b[0m mask \u001b[38;5;241m=\u001b[39m \u001b[43mImage\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mopen\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmask_path\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     10\u001b[0m mask_array \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39marray(mask)\n\u001b[1;32m     12\u001b[0m \u001b[38;5;66;03m# 유효 클래스 ID 확인\u001b[39;00m\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/PIL/Image.py:3227\u001b[0m, in \u001b[0;36mopen\u001b[0;34m(fp, mode, formats)\u001b[0m\n\u001b[1;32m   3224\u001b[0m     filename \u001b[38;5;241m=\u001b[39m fp\n\u001b[1;32m   3226\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m filename:\n\u001b[0;32m-> 3227\u001b[0m     fp \u001b[38;5;241m=\u001b[39m \u001b[43mbuiltins\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mopen\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilename\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mrb\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m   3228\u001b[0m     exclusive_fp \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[1;32m   3230\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n",
+      "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: '/data/ephemeral/home/MCG/data/UNet3+Data/train/mask/image1661144206667.png'"
+     ]
+    },
+    {
+     "ename": "",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m현재 셀 또는 이전 셀에서 코드를 실행하는 동안 Kernel이 충돌했습니다. \n",
+      "\u001b[1;31m셀의 코드를 검토하여 가능한 오류 원인을 식별하세요. \n",
+      "\u001b[1;31m자세한 내용을 보려면 <a href='https://aka.ms/vscodeJupyterKernelCrash'>여기</a>를 클릭하세요. \n",
+      "\u001b[1;31m자세한 내용은 Jupyter <a href='command:jupyter.viewOutput'>로그</a>를 참조하세요."
+     ]
+    }
+   ],
+   "source": [
+    "\n",
+    "\n",
+    "\n",
+    "import matplotlib.pyplot as plt\n",
+    "import numpy as np\n",
+    "from PIL import Image\n",
+    "\n",
+    "# 마스크 이미지 경로\n",
+    "mask_path = \"/data/ephemeral/home/MCG/data/UNet3+Data/train/mask/image1661144206667.png\"\n",
+    "\n",
+    "# 마스크 이미지 불러오기\n",
+    "mask = Image.open(mask_path)\n",
+    "mask_array = np.array(mask)\n",
+    "\n",
+    "# 유효 클래스 ID 확인\n",
+    "unique_values = np.unique(mask_array)\n",
+    "print(\"Unique class IDs in the mask:\", unique_values)\n",
+    "\n",
+    "# 마스크 시각화\n",
+    "plt.figure(figsize=(10, 10))\n",
+    "plt.title(\"Mask Visualization\")\n",
+    "plt.imshow(mask_array, cmap='tab20', interpolation='nearest')  # tab20 색상맵 사용\n",
+    "plt.colorbar(ticks=unique_values, label=\"Class ID\")  # 색상 범례 추가\n",
+    "plt.show()\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "base",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}