boostcampaitech7 · refor53 · Sep 27, 2024 · Sep 30, 2024
diff --git a/inference.py b/inference.py
@@ -2,6 +2,7 @@
 from torch.utils.data import DataLoader
 import pandas as pd
 import os
+import numpy as np
 
 from src.dataset import CustomDataset
 from src.transforms import TransformSelector
@@ -28,25 +29,142 @@ def main():
     # Set up test dataset and dataloader
     test_dataset = CustomDataset(root_dir=testdata_dir, info_df=test_info, transform=test_transform, is_inference=True)
     test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, drop_last=False)
+
+    # # Load model 1
+    # model_selector_1 = ModelSelector(model_type='timm', num_classes=num_classes, model_name='eva02_large_patch14_448.mim_m38m_ft_in22k_in1k', pretrained=False)
+    # model_1 = model_selector_1.get_model()
+    # model_1 = layer_modification(model_1)
+    # model_1_path = os.path.join("./train_result", "best_model.pt")
+    # model_1.load_state_dict(torch.load(model_1_path, map_location=device))
+    # model_1.to(device)
+    # # Run inference
+    # predictions_model_1 = inference(model=model_1, device=device, test_loader=test_loader)
+    # np.save('prewitt_data.npy',predictions_model_1)
+    # print("1 save")
+    # predictions_model_1 = torch.tensor(predictions_model_1, dtype=torch.float32, device=device)
 
-    # Load model
-    model_selector = ModelSelector(model_type='timm', num_classes=num_classes, model_name='eva02_large_patch14_448.mim_m38m_ft_in22k_in1k', pretrained=False)
-    model = model_selector.get_model()
-    model = layer_modification(model)
+    # # Load model 2
+    # model_selector_2 =  ModelSelector(model_type='timm', num_classes=num_classes, model_name='eva02_large_patch14_448.mim_m38m_ft_in22k_in1k', pretrained=False)
+    # model_2 = model_selector_2.get_model()
+    # model_2 = layer_modification(model_2)
+    # model_2_path = os.path.join("./train_result", "model_adamw.pt")
+    # model_2.load_state_dict(torch.load(model_2_path, map_location=device))
+    # model_2.to(device)
+    # # Run inference
+    # predictions_model_2 = inference(model=model_2, device=device, test_loader=test_loader)
+    # np.save('adamW.npy',predictions_model_2)
+    # print("2 save")
+    # predictions_model_2 = torch.tensor(predictions_model_2, dtype=torch.float32, device=device)
 
-    # Load the best model from ./train_result/best_model.pt
-    model_path = os.path.join("./train_result", "best_model.pt")
-    model.load_state_dict(torch.load(model_path, map_location=device))
-    model.to(device)
+    # # Load model 3
+    # model_selector_3 =  ModelSelector(model_type='timm', num_classes=num_classes, model_name='eva02_large_patch14_448.mim_m38m_ft_in22k_in1k', pretrained=False)
+    # model_3 = model_selector_3.get_model()
+    # model_3 = layer_modification(model_3)
+    # model_3_path = os.path.join("./train_result", "2nd_ensemble_model_until_7epoch_best_model.pt")
+    # model_3.load_state_dict(torch.load(model_3_path, map_location=device))
+    # model_3.to(device)
+    # # Run inference
+    # predictions_model_3 = inference(model=model_3, device=device, test_loader=test_loader)
+    # np.save('ensemble_data.npy',predictions_model_3)
+    # predictions_model_3 = torch.tensor(predictions_model_3, dtype=torch.float32, device=device)
+    # print("3 save")
 
-    # Run inference
-    predictions = inference(model=model, device=device, test_loader=test_loader)
+
+    # # Load model 4
+    # model_selector_4 =  ModelSelector(model_type='timm', num_classes=num_classes, model_name='eva02_large_patch14_448.mim_m38m_ft_in22k_in1k', pretrained=False)
+    # model_4 = model_selector_4.get_model()
+    # model_4 = layer_modification(model_4)
+    # model_4_path = os.path.join("./train_result", "Canny.pt")
+    # model_4.load_state_dict(torch.load(model_4_path, map_location=device))
+    # model_4.to(device)
+    # # Run inference
+    # predictions_model_4 = inference(model=model_4, device=device, test_loader=test_loader)
+    # np.save('canny.npy',predictions_model_4)
+    # print("4 save")
+    # predictions_model_4 = torch.tensor(predictions_model_4, dtype=torch.float32, device=device)
+
+
+    # # Load model 5
+    # model_selector_5 =  ModelSelector(model_type='timm', num_classes=num_classes, model_name='eva02_large_patch14_448.mim_m38m_ft_in22k_in1k', pretrained=False)
+    # model_5 = model_selector_5.get_model()
+    # model_5 = layer_modification(model_5)
+    # model_5_path = os.path.join("./train_result", "aug5_best_model.pt")
+    # model_5.load_state_dict(torch.load(model_5_path, map_location=device))
+    # model_5.to(device)
+    # # Run inference
+    # predictions_model_5 = inference(model=model_5, device=device, test_loader=test_loader)
+    # np.save('cutmix_data.npy',predictions_model_5)
+    # print("5 save")
+    # predictions_model_5 = torch.tensor(predictions_model_5, dtype=torch.float32, device=device)
+
+    # #Load model 6
+    # model_selector_6 =  ModelSelector(model_type='timm', num_classes=num_classes, model_name='eva02_large_patch14_448.mim_m38m_ft_in22k_in1k', pretrained=False)
+    # model_6 = model_selector_6.get_model()
+    # model_6 = layer_modification(model_6)
+    # model_6_path = os.path.join("./train_result", "eva02_labelsmoothing_0.1_batchsize_64.pt")
+    # model_6.load_state_dict(torch.load(model_6_path, map_location=device))
+    # model_6.to(device)
+    # # Run inference
+    # predictions_model_6 = inference(model=model_6, device=device, test_loader=test_loader)
+    # np.save('labelsmoothing.npy',predictions_model_6)
+    # print("6 save")
+    # #predictions_model_6 = torch.tensor(predictions_model_6, dtype=torch.float32, device=device)
+
+    # #Load model 7
+    # model_selector_7 =  ModelSelector(model_type='timm', num_classes=num_classes, model_name='eva02_large_patch14_448.mim_m38m_ft_in22k_in1k', pretrained=False)
+    # model_7 = model_selector_7.get_model()
+    # model_7 = layer_modification(model_7)
+    # model_7_path = os.path.join("./train_result", "model_eva02_lr_0.0001.pt")
+    # model_7.load_state_dict(torch.load(model_7_path, map_location=device))
+    # model_7.to(device)
+    # # Run inference
+    # predictions_model_7 = inference(model=model_7, device=device, test_loader=test_loader)
+    # np.save('lr_0.0001.npy',predictions_model_7)
+    # print("7 save")
+    # predictions_model_7 = torch.tensor(predictions_model_7, dtype=torch.float32, device=device)
+
+    predictions_model_1 = np.load('adamW.npy')
+    print(predictions_model_1.shape)
+
+    predictions_model_2 = np.load('cutmix_data.npy')
+    print(predictions_model_2.shape)
+
+    #predictions_model_3 = np.load('ensemble_data.npy')
+    #print(predictions_model_3.shape)
+
+    predictions_model_4 = np.load('labelsmoothing.npy')
+    print(predictions_model_4.shape)
+
+    #predictions_model_5 = np.load('lr_0.0001.npy')
+    #print(predictions_model_5.shape)
+
+    predictions_model_6 = np.load('canny_prewitt_data.npy')
+    print(predictions_model_6.shape)
+
+    predictions_model_7 = np.load('prewitt_data.npy')
+    print(predictions_model_7.shape)
+    #predictions_model_8 = np.load('new_ensemble.npy')
+    #print(predictions_model_8.shape)
+
+    # # Soft Voting 수행
+    soft_voting = (predictions_model_1 + predictions_model_2 + 
+                    predictions_model_4 + predictions_model_6 +
+                    predictions_model_7) / 5
+    print("Soft voting shape:", soft_voting.shape)
+
+    soft_voting = torch.tensor(soft_voting, dtype=torch.float32)
+    voting_result = soft_voting.argmax(dim=1)
+    print("Voting result shape:", voting_result.shape)
 
     # Save results
-    test_info['target'] = predictions
+    test_info['target'] = voting_result.tolist()
     test_info = test_info.reset_index().rename(columns={"index": "ID"})
-    test_info.to_csv("output.csv", index=False)
+    test_info.to_csv("output_soft_voting.csv", index=False)
     print("Inference completed and results saved to output.csv")
 
+    # # # 결과 비교
+    # # matches = (voting_result == model_2_result).sum().item()
+    # # total = voting_result.numel()
+    # # print(f"Matches with model_2: {matches}/{total} ({matches/total*100:.2f}%)")
 if __name__ == "__main__":
     main()
diff --git a/src/compare_csv.py b/src/compare_csv.py
@@ -0,0 +1,53 @@
+import pandas as pd
+import numpy as np
+
+def compare_csv_files(file1_path: str, file2_path: str, id_column: str = 'ID'):
+    """
+    두 CSV 파일을 행 기준으로 비교하는 함수
+
+    :param file1_path: 첫 번째 CSV 파일 경로
+    :param file2_path: 두 번째 CSV 파일 경로
+    :param id_column: 행을 식별하는 데 사용할 열 이름 (기본값: 'ID')
+    :return: None
+    """
+    # CSV 파일 읽기
+    df1 = pd.read_csv(file1_path)
+    df2 = pd.read_csv(file2_path)
+
+    # ID 열이 존재하는지 확인
+    if id_column not in df1.columns or id_column not in df2.columns:
+        raise ValueError(f"'{id_column}' 열이 두 CSV 파일 모두에 존재하지 않습니다.")
+
+    # ID 열을 인덱스로 설정
+    df1.set_index(id_column, inplace=True)
+    df2.set_index(id_column, inplace=True)
+
+    # 공통 인덱스만 선택
+    common_indices = df1.index.intersection(df2.index)
+    df1_common = df1.loc[common_indices]
+    df2_common = df2.loc[common_indices]
+
+    # 열 비교
+    columns_diff = set(df1.columns).symmetric_difference(set(df2.columns))
+    if columns_diff:
+        print(f"두 파일의 열이 다릅니다. 차이: {columns_diff}")
+        common_columns = list(set(df1.columns).intersection(set(df2.columns)))
+        df1_common = df1_common[common_columns]
+        df2_common = df2_common[common_columns]
+
+    # 값 비교
+    differences = (df1_common != df2_common) & ~(df1_common.isna() & df2_common.isna())
+    diff_count = differences.sum().sum()
+
+    print(f"공통 행에서 {diff_count}개의 값이 다릅니다.")
+
+    # if diff_count > 0:
+    #     print("\n값이 다른 셀의 상세 정보:")
+    #     for col in differences.columns:
+    #         diff_indices = differences.index[differences[col]]
+    #         for idx in diff_indices:
+    #             print(f"행 {idx}, 열 '{col}': {df1_common.loc[idx, col]} vs {df2_common.loc[idx, col]}")
+
+# 함수 사용 예시
+compare_csv_files("/data/ephemeral/home/deamin/level1-imageclassification-cv-04/output_soft_voting.csv", 
+                "/data/ephemeral/home/deamin/level1-imageclassification-cv-04/output_wrong_sVoting.csv") #/data/ephemeral/home/deamin/level1-imageclassification-cv-04/pj1_lr_00001_output.csv
diff --git a/src/prewitt_edges.py b/src/prewitt_edges.py
@@ -0,0 +1,21 @@
+import cv2
+import numpy as np
+
+def enhance_edges(image, weight=0.3):
+    # 그레이스케일로 변환
+    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+
+    # Prewitt 엣지 검출
+    kernel_x = np.array([[-1, 0, 1], [-1, 0, 1], [-1, 0, 1]])
+    kernel_y = np.array([[-1, -1, -1], [0, 0, 0], [1, 1, 1]])
+    prewitt_x = cv2.filter2D(gray, -1, kernel_x)
+    prewitt_y = cv2.filter2D(gray, -1, kernel_y)
+    edges = np.sqrt(prewitt_x**2 + prewitt_y**2).astype(np.uint8)
+
+    # 엣지를 3채널로 변환
+    edges_3channel = cv2.cvtColor(edges, cv2.COLOR_GRAY2BGR)
+
+    # 원본 이미지와 엣지를 합성
+    enhanced = cv2.addWeighted(image, 1, edges_3channel, weight, 0)
+
+    return enhanced
diff --git a/src/transforms.py b/src/transforms.py
@@ -1,6 +1,7 @@
 import albumentations as A
 from albumentations.pytorch import ToTensorV2
 from PIL import Image
+from src.prewitt_edges import enhance_edges
 
 class TransformSelector:
     def __init__(self, transform_type: str):
@@ -25,9 +26,12 @@ def __init__(self, is_train: bool = True):
         if is_train:
             self.transform = A.Compose(
                 [
+                   # Albumentations 변환으로 사용
+                    A.Lambda(image=lambda x, **kwargs: enhance_edges(x)),
                     A.HorizontalFlip(p=0.5),
                     A.Rotate(limit=20),
                     A.RandomBrightnessContrast(p=0.25),
+                    A.GaussNoise(var_limit=(10.0, 50.0), p=0.5),
                     A.Blur()
                 ] + common_transforms
             )

diff --git a/src/utils.py b/src/utils.py
@@ -1,18 +1,20 @@
 import torch
 import torch.nn.functional as F
 from tqdm.auto import tqdm
+import numpy as np
 
 def inference(model: torch.nn.Module, device: torch.device, test_loader: torch.utils.data.DataLoader):
     model.to(device)
     model.eval()
 
-    predictions = []
+    all_predictions = []
     with torch.no_grad():
         for images in tqdm(test_loader):
             images = images.to(device)
             logits = model(images)
             logits = F.softmax(logits, dim=1)
-            preds = logits.argmax(dim=1)
-            predictions.extend(preds.cpu().detach().numpy())
+            all_predictions.append(logits.cpu().detach().numpy())
 
+    predictions = np.concatenate(all_predictions, axis=0)
+
     return predictions
diff --git a/test_num.py b/test_num.py
@@ -0,0 +1,14 @@
+import torch
+import numpy as np
+import pandas as pd
+
+# 예시 텐서 생성 (실제 사용 시 이 부분은 제외하고 기존 텐서를 사용하세요)
+data = torch.randn(10014, 500)
+
+# 텐서를 NumPy 배열로 변환
+numpy_array = data.numpy()
+
+# NumPy 배열을 pandas DataFrame으로 변환
+np.save('testSave.npy', data)
+
+print("텐서가 성공적으로 CSV 파일로 저장되었습니다.")
diff --git a/voting_result/adamW.npy b/voting_result/adamW.npy
diff --git a/voting_result/canny_prewitt_data.npy b/voting_result/canny_prewitt_data.npy
diff --git a/voting_result/cutmix_data.npy b/voting_result/cutmix_data.npy
diff --git a/voting_result/ensemble_data.npy b/voting_result/ensemble_data.npy
diff --git a/voting_result/labelsmoothing.npy b/voting_result/labelsmoothing.npy
diff --git a/voting_result/lr_0.0001.npy b/voting_result/lr_0.0001.npy