Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Exp/prewitt and soft voting #12

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
142 changes: 130 additions & 12 deletions inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from torch.utils.data import DataLoader
import pandas as pd
import os
import numpy as np

from src.dataset import CustomDataset
from src.transforms import TransformSelector
Expand All @@ -28,25 +29,142 @@ def main():
# Set up test dataset and dataloader
test_dataset = CustomDataset(root_dir=testdata_dir, info_df=test_info, transform=test_transform, is_inference=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, drop_last=False)

# # Load model 1
# model_selector_1 = ModelSelector(model_type='timm', num_classes=num_classes, model_name='eva02_large_patch14_448.mim_m38m_ft_in22k_in1k', pretrained=False)
# model_1 = model_selector_1.get_model()
# model_1 = layer_modification(model_1)
# model_1_path = os.path.join("./train_result", "best_model.pt")
# model_1.load_state_dict(torch.load(model_1_path, map_location=device))
# model_1.to(device)
# # Run inference
# predictions_model_1 = inference(model=model_1, device=device, test_loader=test_loader)
# np.save('prewitt_data.npy',predictions_model_1)
# print("1 save")
# predictions_model_1 = torch.tensor(predictions_model_1, dtype=torch.float32, device=device)

# Load model
model_selector = ModelSelector(model_type='timm', num_classes=num_classes, model_name='eva02_large_patch14_448.mim_m38m_ft_in22k_in1k', pretrained=False)
model = model_selector.get_model()
model = layer_modification(model)
# # Load model 2
# model_selector_2 = ModelSelector(model_type='timm', num_classes=num_classes, model_name='eva02_large_patch14_448.mim_m38m_ft_in22k_in1k', pretrained=False)
# model_2 = model_selector_2.get_model()
# model_2 = layer_modification(model_2)
# model_2_path = os.path.join("./train_result", "model_adamw.pt")
# model_2.load_state_dict(torch.load(model_2_path, map_location=device))
# model_2.to(device)
# # Run inference
# predictions_model_2 = inference(model=model_2, device=device, test_loader=test_loader)
# np.save('adamW.npy',predictions_model_2)
# print("2 save")
# predictions_model_2 = torch.tensor(predictions_model_2, dtype=torch.float32, device=device)

# Load the best model from ./train_result/best_model.pt
model_path = os.path.join("./train_result", "best_model.pt")
model.load_state_dict(torch.load(model_path, map_location=device))
model.to(device)
# # Load model 3
# model_selector_3 = ModelSelector(model_type='timm', num_classes=num_classes, model_name='eva02_large_patch14_448.mim_m38m_ft_in22k_in1k', pretrained=False)
# model_3 = model_selector_3.get_model()
# model_3 = layer_modification(model_3)
# model_3_path = os.path.join("./train_result", "2nd_ensemble_model_until_7epoch_best_model.pt")
# model_3.load_state_dict(torch.load(model_3_path, map_location=device))
# model_3.to(device)
# # Run inference
# predictions_model_3 = inference(model=model_3, device=device, test_loader=test_loader)
# np.save('ensemble_data.npy',predictions_model_3)
# predictions_model_3 = torch.tensor(predictions_model_3, dtype=torch.float32, device=device)
# print("3 save")

# Run inference
predictions = inference(model=model, device=device, test_loader=test_loader)

# # Load model 4
# model_selector_4 = ModelSelector(model_type='timm', num_classes=num_classes, model_name='eva02_large_patch14_448.mim_m38m_ft_in22k_in1k', pretrained=False)
# model_4 = model_selector_4.get_model()
# model_4 = layer_modification(model_4)
# model_4_path = os.path.join("./train_result", "Canny.pt")
# model_4.load_state_dict(torch.load(model_4_path, map_location=device))
# model_4.to(device)
# # Run inference
# predictions_model_4 = inference(model=model_4, device=device, test_loader=test_loader)
# np.save('canny.npy',predictions_model_4)
# print("4 save")
# predictions_model_4 = torch.tensor(predictions_model_4, dtype=torch.float32, device=device)


# # Load model 5
# model_selector_5 = ModelSelector(model_type='timm', num_classes=num_classes, model_name='eva02_large_patch14_448.mim_m38m_ft_in22k_in1k', pretrained=False)
# model_5 = model_selector_5.get_model()
# model_5 = layer_modification(model_5)
# model_5_path = os.path.join("./train_result", "aug5_best_model.pt")
# model_5.load_state_dict(torch.load(model_5_path, map_location=device))
# model_5.to(device)
# # Run inference
# predictions_model_5 = inference(model=model_5, device=device, test_loader=test_loader)
# np.save('cutmix_data.npy',predictions_model_5)
# print("5 save")
# predictions_model_5 = torch.tensor(predictions_model_5, dtype=torch.float32, device=device)

# #Load model 6
# model_selector_6 = ModelSelector(model_type='timm', num_classes=num_classes, model_name='eva02_large_patch14_448.mim_m38m_ft_in22k_in1k', pretrained=False)
# model_6 = model_selector_6.get_model()
# model_6 = layer_modification(model_6)
# model_6_path = os.path.join("./train_result", "eva02_labelsmoothing_0.1_batchsize_64.pt")
# model_6.load_state_dict(torch.load(model_6_path, map_location=device))
# model_6.to(device)
# # Run inference
# predictions_model_6 = inference(model=model_6, device=device, test_loader=test_loader)
# np.save('labelsmoothing.npy',predictions_model_6)
# print("6 save")
# #predictions_model_6 = torch.tensor(predictions_model_6, dtype=torch.float32, device=device)

# #Load model 7
# model_selector_7 = ModelSelector(model_type='timm', num_classes=num_classes, model_name='eva02_large_patch14_448.mim_m38m_ft_in22k_in1k', pretrained=False)
# model_7 = model_selector_7.get_model()
# model_7 = layer_modification(model_7)
# model_7_path = os.path.join("./train_result", "model_eva02_lr_0.0001.pt")
# model_7.load_state_dict(torch.load(model_7_path, map_location=device))
# model_7.to(device)
# # Run inference
# predictions_model_7 = inference(model=model_7, device=device, test_loader=test_loader)
# np.save('lr_0.0001.npy',predictions_model_7)
# print("7 save")
# predictions_model_7 = torch.tensor(predictions_model_7, dtype=torch.float32, device=device)

predictions_model_1 = np.load('adamW.npy')
print(predictions_model_1.shape)

predictions_model_2 = np.load('cutmix_data.npy')
print(predictions_model_2.shape)

#predictions_model_3 = np.load('ensemble_data.npy')
#print(predictions_model_3.shape)

predictions_model_4 = np.load('labelsmoothing.npy')
print(predictions_model_4.shape)

#predictions_model_5 = np.load('lr_0.0001.npy')
#print(predictions_model_5.shape)

predictions_model_6 = np.load('canny_prewitt_data.npy')
print(predictions_model_6.shape)

predictions_model_7 = np.load('prewitt_data.npy')
print(predictions_model_7.shape)
#predictions_model_8 = np.load('new_ensemble.npy')
#print(predictions_model_8.shape)

# # Soft Voting 수행
soft_voting = (predictions_model_1 + predictions_model_2 +
predictions_model_4 + predictions_model_6 +
predictions_model_7) / 5
print("Soft voting shape:", soft_voting.shape)

soft_voting = torch.tensor(soft_voting, dtype=torch.float32)
voting_result = soft_voting.argmax(dim=1)
print("Voting result shape:", voting_result.shape)

# Save results
test_info['target'] = predictions
test_info['target'] = voting_result.tolist()
test_info = test_info.reset_index().rename(columns={"index": "ID"})
test_info.to_csv("output.csv", index=False)
test_info.to_csv("output_soft_voting.csv", index=False)
print("Inference completed and results saved to output.csv")

# # # 결과 비교
# # matches = (voting_result == model_2_result).sum().item()
# # total = voting_result.numel()
# # print(f"Matches with model_2: {matches}/{total} ({matches/total*100:.2f}%)")
if __name__ == "__main__":
main()
53 changes: 53 additions & 0 deletions src/compare_csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import pandas as pd
import numpy as np

def compare_csv_files(file1_path: str, file2_path: str, id_column: str = 'ID'):
"""
두 CSV 파일을 행 기준으로 비교하는 함수

:param file1_path: 첫 번째 CSV 파일 경로
:param file2_path: 두 번째 CSV 파일 경로
:param id_column: 행을 식별하는 데 사용할 열 이름 (기본값: 'ID')
:return: None
"""
# CSV 파일 읽기
df1 = pd.read_csv(file1_path)
df2 = pd.read_csv(file2_path)

# ID 열이 존재하는지 확인
if id_column not in df1.columns or id_column not in df2.columns:
raise ValueError(f"'{id_column}' 열이 두 CSV 파일 모두에 존재하지 않습니다.")

# ID 열을 인덱스로 설정
df1.set_index(id_column, inplace=True)
df2.set_index(id_column, inplace=True)

# 공통 인덱스만 선택
common_indices = df1.index.intersection(df2.index)
df1_common = df1.loc[common_indices]
df2_common = df2.loc[common_indices]

# 열 비교
columns_diff = set(df1.columns).symmetric_difference(set(df2.columns))
if columns_diff:
print(f"두 파일의 열이 다릅니다. 차이: {columns_diff}")
common_columns = list(set(df1.columns).intersection(set(df2.columns)))
df1_common = df1_common[common_columns]
df2_common = df2_common[common_columns]

# 값 비교
differences = (df1_common != df2_common) & ~(df1_common.isna() & df2_common.isna())
diff_count = differences.sum().sum()

print(f"공통 행에서 {diff_count}개의 값이 다릅니다.")

# if diff_count > 0:
# print("\n값이 다른 셀의 상세 정보:")
# for col in differences.columns:
# diff_indices = differences.index[differences[col]]
# for idx in diff_indices:
# print(f"행 {idx}, 열 '{col}': {df1_common.loc[idx, col]} vs {df2_common.loc[idx, col]}")

# 함수 사용 예시
compare_csv_files("/data/ephemeral/home/deamin/level1-imageclassification-cv-04/output_soft_voting.csv",
"/data/ephemeral/home/deamin/level1-imageclassification-cv-04/output_wrong_sVoting.csv") #/data/ephemeral/home/deamin/level1-imageclassification-cv-04/pj1_lr_00001_output.csv
21 changes: 21 additions & 0 deletions src/prewitt_edges.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import cv2
import numpy as np

def enhance_edges(image, weight=0.3):
# 그레이스케일로 변환
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

# Prewitt 엣지 검출
kernel_x = np.array([[-1, 0, 1], [-1, 0, 1], [-1, 0, 1]])
kernel_y = np.array([[-1, -1, -1], [0, 0, 0], [1, 1, 1]])
prewitt_x = cv2.filter2D(gray, -1, kernel_x)
prewitt_y = cv2.filter2D(gray, -1, kernel_y)
edges = np.sqrt(prewitt_x**2 + prewitt_y**2).astype(np.uint8)

# 엣지를 3채널로 변환
edges_3channel = cv2.cvtColor(edges, cv2.COLOR_GRAY2BGR)

# 원본 이미지와 엣지를 합성
enhanced = cv2.addWeighted(image, 1, edges_3channel, weight, 0)

return enhanced
4 changes: 4 additions & 0 deletions src/transforms.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import albumentations as A
from albumentations.pytorch import ToTensorV2
from PIL import Image
from src.prewitt_edges import enhance_edges

class TransformSelector:
def __init__(self, transform_type: str):
Expand All @@ -25,9 +26,12 @@ def __init__(self, is_train: bool = True):
if is_train:
self.transform = A.Compose(
[
# Albumentations 변환으로 사용
A.Lambda(image=lambda x, **kwargs: enhance_edges(x)),
A.HorizontalFlip(p=0.5),
A.Rotate(limit=20),
A.RandomBrightnessContrast(p=0.25),
A.GaussNoise(var_limit=(10.0, 50.0), p=0.5),
A.Blur()
] + common_transforms
)
Expand Down
8 changes: 5 additions & 3 deletions src/utils.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,20 @@
import torch
import torch.nn.functional as F
from tqdm.auto import tqdm
import numpy as np

def inference(model: torch.nn.Module, device: torch.device, test_loader: torch.utils.data.DataLoader):
model.to(device)
model.eval()

predictions = []
all_predictions = []
with torch.no_grad():
for images in tqdm(test_loader):
images = images.to(device)
logits = model(images)
logits = F.softmax(logits, dim=1)
preds = logits.argmax(dim=1)
predictions.extend(preds.cpu().detach().numpy())
all_predictions.append(logits.cpu().detach().numpy())

predictions = np.concatenate(all_predictions, axis=0)

return predictions
14 changes: 14 additions & 0 deletions test_num.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import torch
import numpy as np
import pandas as pd

# 예시 텐서 생성 (실제 사용 시 이 부분은 제외하고 기존 텐서를 사용하세요)
data = torch.randn(10014, 500)

# 텐서를 NumPy 배열로 변환
numpy_array = data.numpy()

# NumPy 배열을 pandas DataFrame으로 변환
np.save('testSave.npy', data)

print("텐서가 성공적으로 CSV 파일로 저장되었습니다.")
Binary file added voting_result/adamW.npy
Binary file not shown.
Binary file added voting_result/canny_prewitt_data.npy
Binary file not shown.
Binary file added voting_result/cutmix_data.npy
Binary file not shown.
Binary file added voting_result/ensemble_data.npy
Binary file not shown.
Binary file added voting_result/labelsmoothing.npy
Binary file not shown.
Binary file added voting_result/lr_0.0001.npy
Binary file not shown.
Loading