In [1]:
import torch
import numpy as np
import pandas as pd
import os
from torch.utils.data import Dataset, DataLoader, TensorDataset, ConcatDataset
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Function
import torch.optim as optim
from tqdm import tqdm
from transformers import ViTForImageClassification, ViTFeatureExtractor
import albumentations as A
from albumentations.pytorch import ToTensorV2
import cv2
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import timm
from sklearn.metrics import log_loss
import random
from torch.optim.swa_utils import AveragedModel, update_bn, SWALR
from torchvision.transforms.functional import to_pil_image
import torchvision.transforms.functional as TF
import matplotlib.pyplot as plt
from torchcam.methods import SmoothGradCAMpp  # or GradCAM

2025-06-12 09:31:25.487705: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-06-12 09:31:25.496100: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1749688285.504876 2368790 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1749688285.507514 2368790 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-06-12 09:31:25.517112: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

In [2]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

# seed_everything(42) # Seed 고정

In [3]:
test_path = "/home/aicontest/HAI_car/data/test"
train_path = "/home/aicontest/HAI_car/data/train"
train_class = [i for i in os.listdir(train_path)]

In [4]:
train_list = [f"{k}/{i}" for k in train_class for i in os.listdir(f"{train_path}/{k}")]
test_list = sorted([i for i in os.listdir(test_path)])
test_list

['TEST_00000.jpg',
 'TEST_00001.jpg',
 'TEST_00002.jpg',
 'TEST_00003.jpg',
 'TEST_00004.jpg',
 'TEST_00005.jpg',
 'TEST_00006.jpg',
 'TEST_00007.jpg',
 'TEST_00008.jpg',
 'TEST_00009.jpg',
 'TEST_00010.jpg',
 'TEST_00011.jpg',
 'TEST_00012.jpg',
 'TEST_00013.jpg',
 'TEST_00014.jpg',
 'TEST_00015.jpg',
 'TEST_00016.jpg',
 'TEST_00017.jpg',
 'TEST_00018.jpg',
 'TEST_00019.jpg',
 'TEST_00020.jpg',
 'TEST_00021.jpg',
 'TEST_00022.jpg',
 'TEST_00023.jpg',
 'TEST_00024.jpg',
 'TEST_00025.jpg',
 'TEST_00026.jpg',
 'TEST_00027.jpg',
 'TEST_00028.jpg',
 'TEST_00029.jpg',
 'TEST_00030.jpg',
 'TEST_00031.jpg',
 'TEST_00032.jpg',
 'TEST_00033.jpg',
 'TEST_00034.jpg',
 'TEST_00035.jpg',
 'TEST_00036.jpg',
 'TEST_00037.jpg',
 'TEST_00038.jpg',
 'TEST_00039.jpg',
 'TEST_00040.jpg',
 'TEST_00041.jpg',
 'TEST_00042.jpg',
 'TEST_00043.jpg',
 'TEST_00044.jpg',
 'TEST_00045.jpg',
 'TEST_00046.jpg',
 'TEST_00047.jpg',
 'TEST_00048.jpg',
 'TEST_00049.jpg',
 'TEST_00050.jpg',
 'TEST_00051.jpg',
 'TEST_00052

In [5]:
class CutoutCustom(A.ImageOnlyTransform):
    def __init__(self, size=50, always_apply=False, p=0.5):
        super().__init__(always_apply, p)
        self.size = size

    def apply(self, image, **params):
        H, W, _ = image.shape
        x = np.random.randint(W)
        y = np.random.randint(H)
        x1 = np.clip(x - self.size // 2, 0, W)
        x2 = np.clip(x + self.size // 2, 0, W)
        y1 = np.clip(y - self.size // 2, 0, H)
        y2 = np.clip(y + self.size // 2, 0, H)
        image[y1:y2, x1:x2, :] = 0
        return image


class BlockMasking(A.ImageOnlyTransform):
    def __init__(self, num_blocks=20, block_size=20, always_apply=False, p=0.5):
        super().__init__(always_apply, p)
        self.num_blocks = num_blocks
        self.block_size = block_size

    def apply(self, image, **params):
        H, W, _ = image.shape
        for _ in range(self.num_blocks):
            x = np.random.randint(0, W - self.block_size)
            y = np.random.randint(0, H - self.block_size)
            image[y:y + self.block_size, x:x + self.block_size, :] = 0
        return image
    
class HalfCrop(A.ImageOnlyTransform):
    def __init__(self, mode='random', always_apply=False, p=0.5):
        super().__init__(always_apply, p)
        self.mode = mode

    def apply(self, image, **params):
        H, W, C = image.shape

        if self.mode == 'random':
            mode = np.random.choice(['left', 'right', 'top', 'bottom'])
        else:
            mode = self.mode

        if mode == 'left':
            return image[:, :W // 2, :]
        elif mode == 'right':
            return image[:, W // 2:, :]
        elif mode == 'top':
            return image[:H // 2, :, :]
        elif mode == 'bottom':
            return image[H // 2:, :, :]
        else:
            raise ValueError(f"Invalid mode: {mode}")

In [6]:
class CustomDataset(Dataset):
    def __init__(self, base_path, path_list, mode='train', transform=None, label_encoder=None):
        self.path = base_path
        self.data = path_list
        self.mode = mode
        self.transform = transform
        self.label_encoder = label_encoder

    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, item):
        img_path = self.data[item]
        image = cv2.imread(f"{self.path}/{img_path}")
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        if self.transform:
            image = self.transform(image=image)['image']

        if self.mode == "train":
            label_str = img_path.split("/")[0]
            label = self.label_encoder.transform([label_str])[0]
            return image, label
        elif self.mode == "test":
            return image


In [7]:

class CamAttentionCrop(A.ImageOnlyTransform):
    def __init__(self, model, target_layer, size=(384, 384), device='cuda', p=1.0):
        super().__init__(always_apply=False, p=p)
        self.model = model.eval()
        self.device = device
        self.size = size
        self.target_layer = target_layer
        self.activation = None

        # Register forward hook
        self.target_layer.register_forward_hook(self._hook)

        # Fallback transform
        self.fallback_transform = A.RandomResizedCrop(
            size=size,  # ✅ 올바른 방식
            scale=(0.5, 1.0), ratio=(0.75, 1.33),
            p=1.0
        )

    def _hook(self, module, input, output):
        # Save activation from LayerNorm (B, C)
        self.activation = output.detach()

    def apply(self, image, **params):
        try:
            # Convert image to tensor and normalize [0, 1]
            image_tensor = torch.from_numpy(image).permute(2, 0, 1).float().unsqueeze(0) / 255.
            image_tensor = image_tensor.to(self.device)

            _ = self.model(image_tensor)
            cam = self.activation.squeeze(0)  # shape: (C,)
            cam_np = cam.cpu().numpy()

            # Map feature vector back to 2D spatial layout
            H, W, _ = image.shape
            side = int(np.sqrt(len(cam_np)))
            if side * side != len(cam_np):
                raise ValueError("Cannot reshape activation to square")

            cam_2d = cam_np.reshape(side, side)
            cam_resized = cv2.resize(cam_2d, (W, H), interpolation=cv2.INTER_LINEAR)

            # Max point (center of attention)
            y, x = np.unravel_index(np.argmax(cam_resized), cam_resized.shape)
            crop_h, crop_w = self.size

            x1 = max(x - crop_w // 2, 0)
            y1 = max(y - crop_h // 2, 0)
            x2 = min(x1 + crop_w, W)
            y2 = min(y1 + crop_h, H)

            if (x2 - x1 < crop_w) or (y2 - y1 < crop_h):
                raise ValueError("CAM crop too small, fallback to RandomResizedCrop")

            cropped = image[y1:y2, x1:x2]
            cropped = cv2.resize(cropped, self.size, interpolation=cv2.INTER_LINEAR)
            return cropped

        except Exception as e:
            # Fallback cropping
            fallback = self.fallback_transform(image=image)
            return fallback["image"]

In [8]:
train_lebel = sorted(list(set([p.split('/')[0] for p in train_list])))
train_lebel

['1시리즈_F20_2013_2015',
 '1시리즈_F20_2016_2019',
 '1시리즈_F40_2020_2024',
 '2008_2015_2017',
 '2시리즈_그란쿠페_F44_2020_2024',
 '2시리즈_액티브_투어러_F45_2019_2021',
 '2시리즈_액티브_투어러_U06_2022_2024',
 '3008_2세대_2018_2023',
 '3시리즈_E90_2005_2012',
 '3시리즈_F30_2013_2018',
 '3시리즈_G20_2019_2022',
 '3시리즈_G20_2023_2025',
 '3시리즈_GT_F34_2014_2021',
 '4시리즈_F32_2014_2020',
 '4시리즈_G22_2021_2023',
 '4시리즈_G22_2024_2025',
 '5008_2세대_2018_2019',
 '5008_2세대_2021_2024',
 '5시리즈_F10_2010_2016',
 '5시리즈_G30_2017_2023',
 '5시리즈_G60_2024_2025',
 '5시리즈_GT_F07_2010_2017',
 '6시리즈_F12_2011_2018',
 '6시리즈_GT_G32_2018_2020',
 '6시리즈_GT_G32_2021_2024',
 '718_박스터_2017_2024',
 '718_카이맨_2017_2024',
 '7시리즈_F01_2009_2015',
 '7시리즈_G11_2016_2018',
 '7시리즈_G11_2019_2022',
 '7시리즈_G70_2023_2025',
 '8시리즈_G15_2020_2024',
 '911_2003_2019',
 '911_992_2020_2024',
 'A4_B9_2016_2019',
 'A4_B9_2020_2024',
 'A5_F5_2019_2024',
 'A6_C8_2019_2025',
 'A7_2012_2016',
 'A7_4K_2020_2024',
 'A8_D5_2018_2023',
 'AMG_GT_2016_2024',
 'A_클래스_W176_2015_2018',
 'A_클래스_W177_2

In [9]:
stratify = [i.split("/")[0] for i in train_list]
train_root, val_root = train_test_split(
    train_list,
    test_size=0.2,
    random_state=42,
    stratify=stratify
)

num_classes = len(train_lebel)
device = "cuda" if torch.cuda.is_available() else "cpu"
model_name = "efficientnet_b3"
premodel = timm.create_model(model_name, pretrained=True, num_classes=num_classes)
premodel.load_state_dict(torch.load(f"cam_pth_b3.pth", map_location=device))

target_layer = premodel.blocks[-1][-1]  # Swin에서는 이 부분이 GAP 후 FC 전, CAM 추출에 적합

cam_crop = CamAttentionCrop(
    model=premodel, 
    target_layer=target_layer, 
    size=(448, 448),
    device=device,
    p=1.0
)

train_transform = A.Compose([
    HalfCrop(mode='random', p=0.3),
    # ✅ 정보 손실을 줄이고, 세부 crop 보존
    A.RandomResizedCrop(
        size=(448, 448),
        scale=(0.5, 1.0),       # 50~100% 면적
        ratio=(0.75, 1.33),     # 더 다양한 aspect ratio
        p=1.0
    ),
    
    # A.Resize(448,448),
    # # ✅ 너무 센 augment 제거하고, 디테일 보존 위주로 선택
    A.SomeOf([
        A.HorizontalFlip(p=1.0),  # 대칭만 유지
        A.RandomBrightnessContrast(p=1.0),  # 전체 명도 대비 조정
        A.HueSaturationValue(p=1.0),  # 색조 변경 (너무 세지 않게)
        A.CLAHE(p=1.0),  # 국소 대비 강화 → fine-grained에 도움
        A.OneOf([
            A.Sharpen(alpha=(0.1, 0.3)),  # Fine edge 강조
            A.GaussianBlur(blur_limit=(3, 5))  # 흐린 이미지 일반화
        ], p=1.0)
    ], n=2, replace=False, p=0.6),  # 2개만 선택

    CutoutCustom(size=100, p=0.3),  # ✅ 사용자 정의 Cutout
    BlockMasking(num_blocks=15, block_size=15, p=0.3),  # ✅ 사용자 정의 블록 마스킹
    
    A.Normalize(mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225]),
    ToTensorV2()
])

test_transform = A.Compose([
    A.RandomResizedCrop(
        size=(448, 448),
        scale=(0.5, 1.0),       # 50~100% 면적
        ratio=(0.75, 1.33),     # 더 다양한 aspect ratio
        p=1.0
    ),
    A.Normalize(mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225]),
    ToTensorV2()
])


  premodel.load_state_dict(torch.load(f"cam_pth_b3.pth", map_location=device))
  super().__init__(always_apply=False, p=p)


In [10]:
label_encoder = LabelEncoder()
label_encoder.fit(train_lebel)

In [38]:
train_dataset = ConcatDataset([CustomDataset(train_path, train_root, 'train', train_transform, label_encoder)])# , CustomDataset(train_path, train_root, 'train', test_transform, label_encoder)])
boosting_dataset = ConcatDataset([CustomDataset(train_path, train_root, 'train', test_transform, label_encoder), CustomDataset(train_path, val_root, 'train', test_transform, label_encoder)])
val_dataset = CustomDataset(train_path, val_root, 'train', test_transform, label_encoder)
test_dataset = CustomDataset(test_path, test_list, 'test', transform=None, label_encoder=label_encoder)

In [12]:
# # 1. 모델 준비
# premodel.to(device)
# premodel.eval()

# # 2. CAM extractor 준비
# cam_extractor = SmoothGradCAMpp(premodel, target_layer=target_layer)  # Swin에서 'norm' 사용

# # 3. 데이터 하나 가져오기
# image_tensor, label = val_dataset[0]  # image_tensor: (C, H, W)

# # 4. CAM 추출
# input_tensor = image_tensor.unsqueeze(0).to(device)
# output = premodel(input_tensor)
# pred = output.argmax(dim=1).item()
# activation_map = cam_extractor(pred, output)[0].detach()  # (H, W)


# # 5. 이미지 복원 (normalize 해제)
# image_np = image_tensor.permute(1, 2, 0).cpu().numpy()
# image_np = (image_np * [0.229, 0.224, 0.225]) + [0.485, 0.456, 0.406]
# image_np = (image_np * 255).astype('uint8')

# # 6. CAM 색상 맵 만들기
# # 1. 토큰 임베딩에서 spatial map으로 reshape
# # 예: (1, 12, 1024) → (1, 3, 4, 1024) → (3, 4)
# activation_map = activation_map.squeeze(0)  # (12, 1024)

# # 2. 각 위치별로 L2 norm 또는 mean으로 summary
# activation_map = activation_map.norm(dim=-1)  # (12,) 또는 .mean(dim=-1)

# # 3. spatial 형태로 reshape (예: 3 x 4)
# activation_map = activation_map.reshape(3, 4)  # or 적절한 H, W 값 (patch 수 기반)

# # 4. resize to image shape for CAM
# activation_map = cv2.resize(activation_map.cpu().numpy(), (image_np.shape[1], image_np.shape[0]))
# activation_map = np.clip(activation_map, 0, 1)
# activation_map = np.uint8(255 * activation_map)

# # 5. apply colormap
# heatmap = cv2.applyColorMap(activation_map, cv2.COLORMAP_JET)
# heatmap = cv2.cvtColor(heatmap, cv2.COLOR_BGR2RGB)
# heatmap = cv2.resize(heatmap, (image_np.shape[1], image_np.shape[0]))

# # 6. overlay
# cam_overlay = cv2.addWeighted(image_np, 0.5, heatmap, 0.5, 0)

# # 7. 시각화
# plt.figure(figsize=(12, 4))
# plt.subplot(1, 3, 1)
# plt.imshow(image_np)
# plt.title("Original Image")
# plt.axis('off')

# plt.subplot(1, 3, 2)
# plt.imshow(activation_map, cmap='jet')
# plt.title("CAM Map")
# plt.axis('off')

# plt.subplot(1, 3, 3)
# plt.imshow(cam_overlay)
# plt.title("Overlay")
# plt.axis('off')
# plt.show()

In [39]:
batch_size = 16
num_workers = 8

train_dataloader = DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True,
    num_workers=num_workers
)
boosting_dataloader = DataLoader(
    boosting_dataset,
    batch_size=1,
    shuffle=False,
    num_workers=0
)
val_dataloader = DataLoader(
    val_dataset,
    batch_size=1,
    shuffle=False,
    num_workers=0
)
test_dataloader = DataLoader(
    test_dataset,
    batch_size=1,
    shuffle=False,
    num_workers=0
)

In [14]:
class SAM(torch.optim.Optimizer):
    def __init__(self, params, base_optimizer, rho=0.05, adaptive=False, **kwargs):
        assert rho >= 0.0, f"Invalid rho, should be non-negative: {rho}"

        defaults = dict(rho=rho, adaptive=adaptive, **kwargs)
        super(SAM, self).__init__(params, defaults)

        self.base_optimizer = base_optimizer(self.param_groups, **kwargs)
        self.param_groups = self.base_optimizer.param_groups
        self.defaults.update(self.base_optimizer.defaults)

    @torch.no_grad()
    def first_step(self, zero_grad=False):
        grad_norm = self._grad_norm()
        for group in self.param_groups:
            scale = group["rho"] / (grad_norm + 1e-12)

            for p in group["params"]:
                if p.grad is None: continue
                self.state[p]["old_p"] = p.data.clone()
                e_w = (torch.pow(p, 2) if group["adaptive"] else 1.0) * p.grad * scale.to(p)
                p.add_(e_w)  # climb to the local maximum "w + e(w)"

        if zero_grad: self.zero_grad()

    @torch.no_grad()
    def second_step(self, zero_grad=False):
        for group in self.param_groups:
            for p in group["params"]:
                if p.grad is None: continue
                p.data = self.state[p]["old_p"]  # get back to "w" from "w + e(w)"

        self.base_optimizer.step()  # do the actual "sharpness-aware" update

        if zero_grad: self.zero_grad()

    @torch.no_grad()
    def step(self, closure=None):
        assert closure is not None, "Sharpness Aware Minimization requires closure, but it was not provided"
        closure = torch.enable_grad()(closure)  # the closure should do a full forward-backward pass

        self.first_step(zero_grad=True)
        closure()
        self.second_step()

    def _grad_norm(self):
        shared_device = self.param_groups[0]["params"][0].device  # put everything on the same device, in case of model parallelism
        norm = torch.norm(
                    torch.stack([
                        ((torch.abs(p) if group["adaptive"] else 1.0) * p.grad).norm(p=2).to(shared_device)
                        for group in self.param_groups for p in group["params"]
                        if p.grad is not None
                    ]),
                    p=2
               )
        return norm

    def load_state_dict(self, state_dict):
        super().load_state_dict(state_dict)
        self.base_optimizer.param_groups = self.param_groups

In [15]:
class ArcFace(nn.Module):
    def __init__(self, in_features, out_features, s=30.0, m=0.50):
        super().__init__()
        self.s = s
        self.m = m
        self.weight = nn.Parameter(torch.FloatTensor(out_features, in_features))
        nn.init.xavier_uniform_(self.weight)

    def forward(self, input, label):
        # Normalize input and weights
        cosine = F.linear(F.normalize(input), F.normalize(self.weight))  # cosine(theta)

        if label is None:
            return self.s * cosine  # inference 시 margin 없이 리턴

        # Clamp to valid range
        theta = torch.acos(torch.clamp(cosine, -1.0 + 1e-7, 1.0 - 1e-7))
        target_logits = torch.cos(theta + self.m)

        # 🔧 안정성을 위한 보강
        label = label.long().view(-1, 1)
        one_hot = torch.zeros_like(cosine, device=cosine.device)
        one_hot.scatter_(1, label, 1.0)

        output = self.s * (one_hot * target_logits + (1.0 - one_hot) * cosine)
        return output


In [16]:
class MultiArcFace(nn.Module):
    def __init__(self, model_name, num_classes, s=30.0, m=0.50):
        super().__init__()
        self.backbone = timm.create_model(model_name, pretrained=True, num_classes=0)  # classifier 제거
        self.backbone.head = nn.Identity()
        self.feature_dim = self.backbone.num_features
        self.arcface = ArcFace(self.feature_dim, num_classes, s=s, m=m)
        self.softmax = nn.Linear(self.feature_dim, num_classes)

    def forward(self, x, label):
        # Swin 계열은 forward_features 사용
        if hasattr(self.backbone, 'forward_features'):
            features = self.backbone.forward_features(x)
        else:
            features = self.backbone(x)

        features = features.mean(dim=(1, 2))
            
        arc = self.arcface(features, label)
        softmax = self.softmax(features)

        return arc, softmax

In [17]:
num_classes = len(train_lebel)  # 라벨 개수
model_name = "eva02_base_patch14_448"
# model = MultiArcFace(model_name, num_classes)
model = timm.create_model(model_name, pretrained=True, num_classes=num_classes)
print(model)

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


Eva(
  (patch_embed): PatchEmbed(
    (proj): Conv2d(3, 768, kernel_size=(14, 14), stride=(14, 14))
    (norm): Identity()
  )
  (pos_drop): Dropout(p=0.0, inplace=False)
  (rope): RotaryEmbeddingCat()
  (blocks): ModuleList(
    (0-11): 12 x EvaBlock(
      (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
      (attn): EvaAttention(
        (q_proj): Linear(in_features=768, out_features=768, bias=True)
        (k_proj): Linear(in_features=768, out_features=768, bias=False)
        (v_proj): Linear(in_features=768, out_features=768, bias=True)
        (attn_drop): Dropout(p=0.0, inplace=False)
        (norm): Identity()
        (proj): Linear(in_features=768, out_features=768, bias=True)
        (proj_drop): Dropout(p=0.0, inplace=False)
      )
      (drop_path1): Identity()
      (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
      (mlp): SwiGLU(
        (fc1_g): Linear(in_features=768, out_features=2048, bias=True)
        (fc1_x): Linear(in_features=76

In [18]:
device = "cuda" if torch.cuda.is_available() else "cpu"

model.to(device)

base_optimizer = torch.optim.AdamW
optimizer = SAM(model.parameters(), base_optimizer, weight_decay=2.e-7, lr=5.e-5)
criterion = nn.CrossEntropyLoss()
swa_model = AveragedModel(model)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer.base_optimizer, T_max=150)
# scheduler = SWALR(optimizer, swa_lr=1e-5)

In [19]:
def calibrated_logits(logits, T=2.0):
    return logits / T

In [20]:
def train_one_epoch(model, dataloader, optimizer, criterion, device):
    model.train()
    total_loss = 0

    for images, labels in tqdm(dataloader, desc="Training", total=len(dataloader)):
        images, labels = images.to(device), labels.to(device)

        def closure():
            optimizer.zero_grad()
            # outputs, _ = model(images, labels)  # ArcFace requires labels
            outputs = model(images)  
            loss = criterion(outputs, labels)
            loss.backward()
            return loss

        loss = closure()
        optimizer.step(closure)
        total_loss += loss.item()

    return total_loss / len(dataloader)


def val_one_epoch(model, dataloader, device, criterion, num_classes):
    model.eval()
    total_loss = 0
    correct = 0
    total = 0
    all_probs = []
    all_labels = []

    wrong_answer = []

    with torch.no_grad():
        for images, labels in tqdm(dataloader, desc="Validating", total=len(dataloader)):
            images, labels = images.to(device), labels.to(device)

            # outputs, _ = model(images, labels)  # 이제 label 없어도 OK
            outputs = model(images)
            # outputs = calibrated_logits(outputs)
            loss = criterion(outputs, labels)
            total_loss += loss.item()

            preds = torch.argmax(outputs, dim=1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
            
            if preds != labels:
                wrong_answer.append((images, labels, preds))

            probs = F.softmax(outputs, dim=1)
            all_probs.extend(probs.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    average_loss = total_loss / len(dataloader)
    accuracy = correct / total * 100
    val_logloss = log_loss(all_labels, all_probs, labels=list(range(num_classes)))

    return average_loss, accuracy, val_logloss, wrong_answer



In [21]:
best_val_metric = float('inf')
best_logloss = float('inf')
current_patient = 0
patient = 5
epochs = 50
swa_start = int(0.75 * epochs)

for epoch in range(epochs):
    
    print(f"epoch {epoch+1}")
    train_loss = train_one_epoch(model, train_dataloader, optimizer, criterion, device)
    val_loss, val_metric, val_logloss, wrong_answer = val_one_epoch(model, val_dataloader, device, criterion, num_classes)

    print(f"train loss : {train_loss:.4}, val acc : {val_metric:.4}, val loss : {val_loss:.4}, val logloss : {val_logloss:.4}")

    scheduler.step()

    # if epoch > swa_start:
    #     swa_model.update_parameters(model)
    #     scheduler.step()
    # else:
    #     scheduler.step()

    # Best model 저장
    if val_logloss < best_logloss:
        best_logloss = val_logloss
        torch.save(model.state_dict(), f'best_model.pth')
        print(f"📦 Best model saved at epoch {epoch+1} (logloss: {val_logloss:.4f})")
        current_patient = 0
        best_val_metric = val_loss
    else:
        current_patient += 1
        if current_patient > patient:
            print(f"early stopping!! at Epoch {epoch+1}")
            break

# update_bn(train_dataloader, swa_model)
# torch.save(swa_model.module.state_dict(), "swa_best_model.pth")

epoch 1


Training:   0%|          | 0/1657 [00:00<?, ?it/s]

Training: 100%|██████████| 1657/1657 [47:56<00:00,  1.74s/it]
Validating: 100%|██████████| 6628/6628 [02:41<00:00, 41.10it/s]


train loss : 4.299, val acc : 64.94, val loss : 1.55, val logloss : 1.55
📦 Best model saved at epoch 1 (logloss: 1.5498)
epoch 2


Training: 100%|██████████| 1657/1657 [48:00<00:00,  1.74s/it]
Validating: 100%|██████████| 6628/6628 [02:40<00:00, 41.35it/s]


train loss : 0.7783, val acc : 88.94, val loss : 0.4192, val logloss : 0.4192
📦 Best model saved at epoch 2 (logloss: 0.4192)
epoch 3


Training: 100%|██████████| 1657/1657 [48:00<00:00,  1.74s/it]
Validating: 100%|██████████| 6628/6628 [02:40<00:00, 41.40it/s]


train loss : 0.2938, val acc : 91.57, val loss : 0.2831, val logloss : 0.2831
📦 Best model saved at epoch 3 (logloss: 0.2831)
epoch 4


Training: 100%|██████████| 1657/1657 [47:58<00:00,  1.74s/it]
Validating: 100%|██████████| 6628/6628 [02:40<00:00, 41.40it/s]


train loss : 0.1936, val acc : 92.97, val loss : 0.2347, val logloss : 0.2347
📦 Best model saved at epoch 4 (logloss: 0.2347)
epoch 5


Training: 100%|██████████| 1657/1657 [47:58<00:00,  1.74s/it]
Validating: 100%|██████████| 6628/6628 [02:40<00:00, 41.41it/s]


train loss : 0.152, val acc : 94.25, val loss : 0.1925, val logloss : 0.1925
📦 Best model saved at epoch 5 (logloss: 0.1925)
epoch 6


Training: 100%|██████████| 1657/1657 [48:01<00:00,  1.74s/it]
Validating: 100%|██████████| 6628/6628 [02:40<00:00, 41.36it/s]


train loss : 0.1248, val acc : 94.79, val loss : 0.1851, val logloss : 0.1851
📦 Best model saved at epoch 6 (logloss: 0.1851)
epoch 7


Training: 100%|██████████| 1657/1657 [48:01<00:00,  1.74s/it]
Validating: 100%|██████████| 6628/6628 [02:40<00:00, 41.35it/s]


train loss : 0.1076, val acc : 94.72, val loss : 0.172, val logloss : 0.172
📦 Best model saved at epoch 7 (logloss: 0.1720)
epoch 8


Training: 100%|██████████| 1657/1657 [48:00<00:00,  1.74s/it]
Validating: 100%|██████████| 6628/6628 [02:40<00:00, 41.39it/s]


train loss : 0.09674, val acc : 95.22, val loss : 0.1537, val logloss : 0.1537
📦 Best model saved at epoch 8 (logloss: 0.1537)
epoch 9


Training: 100%|██████████| 1657/1657 [48:00<00:00,  1.74s/it]
Validating: 100%|██████████| 6628/6628 [02:40<00:00, 41.34it/s]


train loss : 0.08491, val acc : 95.5, val loss : 0.1496, val logloss : 0.1496
📦 Best model saved at epoch 9 (logloss: 0.1496)
epoch 10


Training: 100%|██████████| 1657/1657 [48:00<00:00,  1.74s/it]
Validating: 100%|██████████| 6628/6628 [02:40<00:00, 41.40it/s]


train loss : 0.07759, val acc : 95.75, val loss : 0.1467, val logloss : 0.1467
📦 Best model saved at epoch 10 (logloss: 0.1467)
epoch 11


Training: 100%|██████████| 1657/1657 [48:01<00:00,  1.74s/it]
Validating: 100%|██████████| 6628/6628 [02:40<00:00, 41.42it/s]


train loss : 0.07305, val acc : 95.56, val loss : 0.1552, val logloss : 0.1552
epoch 12


Training: 100%|██████████| 1657/1657 [48:00<00:00,  1.74s/it]
Validating: 100%|██████████| 6628/6628 [02:40<00:00, 41.39it/s]


train loss : 0.06578, val acc : 95.59, val loss : 0.1521, val logloss : 0.1521
epoch 13


Training: 100%|██████████| 1657/1657 [48:01<00:00,  1.74s/it]
Validating: 100%|██████████| 6628/6628 [02:40<00:00, 41.36it/s]


train loss : 0.05929, val acc : 95.78, val loss : 0.1356, val logloss : 0.1356
📦 Best model saved at epoch 13 (logloss: 0.1356)
epoch 14


Training: 100%|██████████| 1657/1657 [48:01<00:00,  1.74s/it]
Validating: 100%|██████████| 6628/6628 [02:40<00:00, 41.41it/s]


train loss : 0.05571, val acc : 96.33, val loss : 0.1299, val logloss : 0.1299
📦 Best model saved at epoch 14 (logloss: 0.1299)
epoch 15


Training: 100%|██████████| 1657/1657 [48:02<00:00,  1.74s/it]
Validating: 100%|██████████| 6628/6628 [02:40<00:00, 41.37it/s]


train loss : 0.05178, val acc : 95.75, val loss : 0.1471, val logloss : 0.1471
epoch 16


Training:  61%|██████    | 1004/1657 [29:05<18:55,  1.74s/it]


KeyboardInterrupt: 

In [21]:
# 1. 모델 불러오기
model.load_state_dict(torch.load("best_model.pth", map_location=device))
model.to(device)

# 반복 조건
max_iter = 3
threshold = 50
current_iter = 0

num_epochs_per_boost = 3

# criterion, optimizer 정의 (학습용)
base_optimizer = torch.optim.AdamW
optimizer = SAM(model.parameters(), base_optimizer, weight_decay=2.e-7, lr=1.e-6)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer.base_optimizer, T_max=20)

while current_iter < max_iter:
    print(f"\n🔁 Iteration {current_iter + 1}")

    # 2. 전체 train dataset 에서 잘못 맞춘 샘플 추출
    _, _, _, wrong_answer = val_one_epoch(model, boosting_dataloader, device, criterion, num_classes)

    print(f"❌ Wrong samples: {len(wrong_answer)}")
    if len(wrong_answer) <= threshold:
        print("✅ Enough correction, stopping early.")
        break

    # 3. 틀린 샘플만을 이용한 DataLoader 생성
    images, labels, _ = zip(*wrong_answer)  # image: Tensor (C, H, W), label: int
    images = torch.stack(images)            # (N, C, H, W)
    labels = torch.tensor(labels)

    images = images.squeeze(1)

    wrong_dataset = TensorDataset(images, labels)
    wrong_loader = DataLoader(wrong_dataset, batch_size=16, shuffle=True)

    # 4. 틀린 샘플만을 이용한 재학습
    model.train()
    for epoch in range(num_epochs_per_boost):
        for images, labels in tqdm(wrong_loader, desc="boosting", total=len(wrong_loader)):
            images, labels = images.to(device), labels.to(device)
            # images = train_transform(image=images)["image"]

            def closure():
                optimizer.zero_grad()
                outputs = model(images)  
                loss = criterion(outputs, labels)
                loss.backward()
                return loss

            loss = closure()
            optimizer.step(closure)

        scheduler.step()

    current_iter += 1

    torch.save(model.state_dict(), f'best_model.pth')

  model.load_state_dict(torch.load("best_model.pth", map_location=device))



🔁 Iteration 1


Validating:   0%|          | 0/33137 [00:00<?, ?it/s]

Validating: 100%|██████████| 33137/33137 [13:21<00:00, 41.34it/s]


❌ Wrong samples: 627


boosting: 100%|██████████| 40/40 [01:07<00:00,  1.69s/it]
boosting: 100%|██████████| 40/40 [01:07<00:00,  1.70s/it]
boosting: 100%|██████████| 40/40 [01:07<00:00,  1.70s/it]



🔁 Iteration 2


Validating: 100%|██████████| 33137/33137 [13:27<00:00, 41.04it/s]


❌ Wrong samples: 565


boosting: 100%|██████████| 36/36 [01:00<00:00,  1.69s/it]
boosting: 100%|██████████| 36/36 [01:01<00:00,  1.70s/it]
boosting: 100%|██████████| 36/36 [01:01<00:00,  1.70s/it]



🔁 Iteration 3


Validating: 100%|██████████| 33137/33137 [13:27<00:00, 41.05it/s]


❌ Wrong samples: 511


boosting: 100%|██████████| 32/32 [00:54<00:00,  1.71s/it]
boosting: 100%|██████████| 32/32 [00:55<00:00,  1.73s/it]
boosting: 100%|██████████| 32/32 [00:55<00:00,  1.73s/it]


In [22]:
model.load_state_dict(torch.load(f"best_model.pth", map_location=device))
model.to(device)
val_loss, val_metric, val_logloss, wrong_answer = val_one_epoch(model, val_dataloader, device, criterion, num_classes)

  model.load_state_dict(torch.load(f"best_model.pth", map_location=device))
Validating:   2%|▏         | 120/6628 [00:02<02:37, 41.27it/s]

Validating: 100%|██████████| 6628/6628 [02:41<00:00, 41.06it/s]


In [None]:
mean=[0.485, 0.456, 0.406]
std=[0.229, 0.224, 0.225]
mean = torch.tensor(mean).view(-1, 1, 1).to(device)
std = torch.tensor(std).view(-1, 1, 1).to(device)

print(f"wrong answer : {len(wrong_answer)}")

for i, l, w in wrong_answer[:20]:
    image = (i * std + mean).squeeze(0).permute(1, 2, 0).cpu().numpy()
    label = label_encoder.inverse_transform([l.cpu()])
    wrong_label = label_encoder.inverse_transform([w.cpu()])

    print(f"{label} - {wrong_label}")
    plt.imshow(image)
    plt.show()

In [75]:
def tta_random_crop_batch(model, image_np_batch, transform, device, tta_times=3):
    model.eval()
    batch_size = len(image_np_batch)
    all_probs = [[] for _ in range(batch_size)]

    with torch.no_grad():
        for _ in range(tta_times):
            aug_batch = []

            # TTA 변형 반복 적용
            for image_np in image_np_batch:
                aug = transform(image=image_np)["image"]  # Tensor (C, H, W)
                aug_batch.append(aug)

            aug_batch = torch.stack(aug_batch).to(device)  # (B, C, H, W)

            logits = model(aug_batch)  # (B, num_classes)
            probs = torch.softmax(logits, dim=1)

            for i in range(batch_size):
                all_probs[i].append(probs[i])

    # 각 이미지에 대해 confidence(softmax max값)가 가장 높은 결과 선택
    max_confidence_probs = []
    for prob_list in all_probs:
        # 각 TTA 결과의 confidence 추출
        confidences = [prob.max().item() for prob in prob_list]
        best_idx = confidences.index(max(confidences))
        max_confidence_probs.append(prob_list[best_idx])

    max_confidence_probs = torch.stack(max_confidence_probs, dim=0)  # (B, num_classes)
    return max_confidence_probs


    # # 평균 계산
    # avg_probs = [torch.stack(p_list, dim=0).mean(dim=0) for p_list in all_probs]
    # avg_probs = torch.stack(avg_probs, dim=0)  # (B, num_classes)
    # return avg_probs


In [76]:
# 저장된 모델 로드
# model = ConvNeXtArcFace(model_name=model_name, num_classes=num_classes)
# model = MultiArcFace(model_name, num_classes)
model = timm.create_model(model_name, pretrained=True, num_classes=num_classes)
model.load_state_dict(torch.load(f"best_model.pth", map_location=device))
model.to(device)

temperature = 1

model.eval()
results = []

with torch.no_grad():
    for images in tqdm(test_dataloader, desc="TTA Test", total=len(test_dataloader)):
        image_np = images.numpy()  # (H, W, C) 형태
        # images = images.to(device)

        probs = tta_random_crop_batch(model, image_np, test_transform, device, tta_times=5)
        # probs = model(images)
        probs = probs / temperature
        probs = F.softmax(probs, dim=1)

        for prob in probs.cpu():
            result = {
                train_lebel[i]: prob[i].item()
                for i in range(len(train_lebel))
            }
            results.append(result)
        

# 결과 정리
pred = pd.DataFrame(results)
sample_submission = pd.read_csv("/home/aicontest/HAI_car/data/sample_submission.csv")
class_columns = sample_submission.columns[1:]
pred = pred[class_columns]
sample_submission[class_columns] = pred.values
sample_submission.to_csv('submission.csv', index=False, encoding='utf-8-sig')

  model.load_state_dict(torch.load(f"best_model.pth", map_location=device))
TTA Test: 100%|██████████| 8258/8258 [14:59<00:00,  9.18it/s]


In [95]:
import pandas as pd
import numpy as np

def compare_rowwise_probability_diff(csv1_path, csv2_path, top_k=10, only_different_preds=False):
    df1 = pd.read_csv(csv1_path)
    df2 = pd.read_csv(csv2_path)

    assert df1.shape == df2.shape, "Shape mismatch between CSVs"

    # ID 제외한 확률 컬럼
    exclude_cols = ['ID'] if 'ID' in df1.columns else []
    class_cols = [col for col in df1.columns if col not in exclude_cols]

    diffs = []

    num_wrong = 0
    for i in range(len(df1)):
        probs1 = df1[class_cols].iloc[i].values.astype(float)
        probs2 = df2[class_cols].iloc[i].values.astype(float)

        pred1 = np.argmax(probs1)
        pred2 = np.argmax(probs2)

        prob_diff = np.abs(probs1 - probs2).mean()
        sample_id = df1['ID'].iloc[i] if 'ID' in df1.columns else i
        if pred1 != pred2:
            num_wrong += 1
            diffs.append((sample_id, pred1, pred2, prob_diff))

    # 차이 큰 순서대로 정렬
    sorted_diffs = sorted(diffs, key=lambda x: x[3], reverse=True)

    print(f"wrong : {num_wrong}")
    print(f"Top {top_k} rows with largest mean probability difference" +
          (" (only different predictions)" if only_different_preds else "") + ":")

    for rank, (idx, p1, p2, d) in enumerate(sorted_diffs[:top_k], 1):
        print(f"{rank:2d}. ID: {idx}, Pred1: {p1}, Pred2: {p2}, MeanAbsDiff: {d:.4f}")

    return pd.DataFrame(sorted_diffs, columns=["ID", "Pred1", "Pred2", "MeanAbsDiff"])


In [96]:
compare_rowwise_probability_diff("submission.csv", "eva_submission.csv", top_k=20)


wrong : 173
Top 20 rows with largest mean probability difference:
 1. ID: TEST_07982, Pred1: 380, Pred2: 381, MeanAbsDiff: 0.0050
 2. ID: TEST_00722, Pred1: 356, Pred2: 283, MeanAbsDiff: 0.0050
 3. ID: TEST_03770, Pred1: 156, Pred2: 155, MeanAbsDiff: 0.0050
 4. ID: TEST_01029, Pred1: 181, Pred2: 368, MeanAbsDiff: 0.0049
 5. ID: TEST_06988, Pred1: 352, Pred2: 71, MeanAbsDiff: 0.0049
 6. ID: TEST_07015, Pred1: 370, Pred2: 371, MeanAbsDiff: 0.0049
 7. ID: TEST_06600, Pred1: 269, Pred2: 270, MeanAbsDiff: 0.0049
 8. ID: TEST_00670, Pred1: 42, Pred2: 77, MeanAbsDiff: 0.0049
 9. ID: TEST_05734, Pred1: 25, Pred2: 26, MeanAbsDiff: 0.0049
10. ID: TEST_00121, Pred1: 93, Pred2: 207, MeanAbsDiff: 0.0049
11. ID: TEST_06347, Pred1: 24, Pred2: 23, MeanAbsDiff: 0.0049
12. ID: TEST_05606, Pred1: 15, Pred2: 14, MeanAbsDiff: 0.0049
13. ID: TEST_03542, Pred1: 366, Pred2: 281, MeanAbsDiff: 0.0049
14. ID: TEST_03624, Pred1: 264, Pred2: 90, MeanAbsDiff: 0.0049
15. ID: TEST_03196, Pred1: 23, Pred2: 24, MeanAbs

Unnamed: 0,ID,Pred1,Pred2,MeanAbsDiff
0,TEST_07982,380,381,0.004993
1,TEST_00722,356,283,0.004969
2,TEST_03770,156,155,0.004955
3,TEST_01029,181,368,0.004945
4,TEST_06988,352,71,0.004941
...,...,...,...,...
168,TEST_00447,169,52,0.002762
169,TEST_02243,149,86,0.002614
170,TEST_00773,266,373,0.002612
171,TEST_05034,227,149,0.002413
