In [1]:
import random
import pandas as pd
import numpy as np
import os
import cv2

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

from tqdm import tqdm

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

import torchvision.models as models

from sklearn.metrics import f1_score


In [2]:
GPU_NUM = 0 # 원하는 GPU 번호 입력
device = torch.device(f'cuda:{GPU_NUM}' if torch.cuda.is_available() else 'cpu')
torch.cuda.set_device(device) # change allocation of current GPU
os.environ['CUDA_LAUNCH_BLOCKING']="1"
print(device)
print(f'torch version : {torch.__version__}')

cuda:0
torch version : 1.12.1


In [3]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(41) # Seed 고정

In [4]:
df = pd.read_csv('./Dataset/train.csv')
df.head()

Unnamed: 0,id,img_path,artist
0,0,./train/0000.jpg,Diego Velazquez
1,1,./train/0001.jpg,Vincent van Gogh
2,2,./train/0002.jpg,Claude Monet
3,3,./train/0003.jpg,Edgar Degas
4,4,./train/0004.jpg,Hieronymus Bosch


In [5]:
from collections import OrderedDict
classes = {}
for i in df['artist']:
    if i not in classes:
        # count 데이터 수
        classes[i] = 0
    else:
        classes[i] +=1
# i번째 라벨 = [화가이름, image 수]
convert_labels = sorted(classes.items(), key=lambda x : x[1], reverse=True)

# key= 화가이름 value = [라벨번호, count 수]
for i in range(len(convert_labels)):
    classes[convert_labels[i][0]]=[i,convert_labels[i][1]]

In [6]:
new_df = pd.read_csv('./Dataset/artists_info.csv')
print(len(new_df))
new_df.head()

50


Unnamed: 0,name,years,genre,nationality
0,Amedeo Modigliani,1884 - 1920,Expressionism,Italian
1,Vasiliy Kandinskiy,1866 - 1944,"Expressionism,Abstractionism",Russian
2,Diego Rivera,1886 - 1957,"Social Realism,Muralism",Mexican
3,Claude Monet,1840 - 1926,Impressionism,French
4,Rene Magritte,1898 - 1967,"Surrealism,Impressionism",Belgian


In [7]:
# classes [label, count, years, genre, nationality]
for name in classes.keys():
    for i in range(len(new_df)):
        if new_df.loc[i]['name'] == name:
            classes[name].extend(new_df.loc[i].iloc[1:])
            classes[name] = classes[name]
classes = OrderedDict(sorted(classes.items(), key = lambda t : t[1][1],reverse=True))
for i in range(50):
    print(list(classes.keys())[i])
    print(classes[list(classes.keys())[i]])
    

Vincent van Gogh
[0, 628, '1853 ??1890', 'Post-Impressionism', 'Dutch']
Edgar Degas
[1, 488, '1834 - 1917', 'Impressionism', 'French']
Pablo Picasso
[2, 302, '1881 - 1973', 'Cubism', 'Spanish']
Pierre-Auguste Renoir
[3, 232, '1841 - 1919', 'Impressionism', 'French']
Albrecht Du rer
[4, 219]
Paul Gauguin
[5, 219, '1848 ??1903', 'Symbolism,Post-Impressionism', 'French']
Francisco Goya
[6, 203, '1746 - 1828', 'Romanticism', 'Spanish']
Rembrandt
[7, 180, '1606 - 1669', 'Baroque', 'Dutch']
Titian
[8, 172, '1488 - 1576', 'High Renaissance,Mannerism', 'Italian']
Marc Chagall
[9, 172, '1887 - 1985', 'Primitivism', 'French,Jewish,Belarusian']
Alfred Sisley
[10, 164, '1839 - 1899', 'Impressionism', 'French,British']
Paul Klee
[11, 141, '1879 ??1940', 'Expressionism,Abstractionism,Surrealism', 'German,Swiss']
Rene Magritte
[12, 136, '1898 - 1967', 'Surrealism,Impressionism', 'Belgian']
Andy Warhol
[13, 131, '1928 ??1987', 'Pop Art', 'American']
Amedeo Modigliani
[14, 131, '1884 - 1920', 'Expressi

In [8]:
def cut_data(df,number,class_info,seed):
    print(f'total data : {len(df)}')
    for i in class_info:
        if class_info[i][1] > number:
            a = df[df['artist']==i]
            drop_index = list(a.sample(class_info[i][1]-number,random_state=seed)['id'])
            class_info[i][1]=number
            print(f'{i} delete {len(drop_index)}')
            df.drop(index=drop_index,inplace=True,axis=0)
    print(f'ater data : {len(df)}')
    return df 

In [9]:
df = cut_data(df,200,classes,41)
print(classes)

total data : 5911
Vincent van Gogh delete 428
Edgar Degas delete 288
Pablo Picasso delete 102
Pierre-Auguste Renoir delete 32
Albrecht Du rer delete 19
Paul Gauguin delete 19
Francisco Goya delete 3
ater data : 5020
OrderedDict([('Vincent van Gogh', [0, 200, '1853 ??1890', 'Post-Impressionism', 'Dutch']), ('Edgar Degas', [1, 200, '1834 - 1917', 'Impressionism', 'French']), ('Pablo Picasso', [2, 200, '1881 - 1973', 'Cubism', 'Spanish']), ('Pierre-Auguste Renoir', [3, 200, '1841 - 1919', 'Impressionism', 'French']), ('Albrecht Du rer', [4, 200]), ('Paul Gauguin', [5, 200, '1848 ??1903', 'Symbolism,Post-Impressionism', 'French']), ('Francisco Goya', [6, 200, '1746 - 1828', 'Romanticism', 'Spanish']), ('Rembrandt', [7, 180, '1606 - 1669', 'Baroque', 'Dutch']), ('Titian', [8, 172, '1488 - 1576', 'High Renaissance,Mannerism', 'Italian']), ('Marc Chagall', [9, 172, '1887 - 1985', 'Primitivism', 'French,Jewish,Belarusian']), ('Alfred Sisley', [10, 164, '1839 - 1899', 'Impressionism', 'French,B

In [10]:
def get_data(df, classes , infer=False):
    imgs = []
    labels = []
    if infer:
        return df['img_path'].values

    for i in range(len(df)):
        labels.append(classes[df['artist'].iloc[i]][0])
        imgs.append([df['img_path'].iloc[i]])

    return imgs, labels

In [11]:
from imblearn.over_sampling import RandomOverSampler, SMOTE
from collections import Counter

all_imgs , all_labels = get_data(df,classes= classes)

oversampling = RandomOverSampler(random_state=42)
all_imgs, all_labels = oversampling.fit_resample(all_imgs, all_labels)
print(Counter(all_labels))

Counter({25: 201, 37: 201, 18: 201, 3: 201, 12: 201, 44: 201, 21: 201, 46: 201, 10: 201, 34: 201, 7: 201, 6: 201, 1: 201, 2: 201, 8: 201, 17: 201, 19: 201, 22: 201, 13: 201, 36: 201, 30: 201, 14: 201, 39: 201, 20: 201, 23: 201, 0: 201, 4: 201, 5: 201, 16: 201, 38: 201, 48: 201, 11: 201, 41: 201, 9: 201, 32: 201, 15: 201, 31: 201, 43: 201, 27: 201, 49: 201, 42: 201, 33: 201, 28: 201, 35: 201, 26: 201, 29: 201, 40: 201, 24: 201, 47: 201, 45: 201})


In [12]:
class CustomDataset(Dataset):
    def __init__(self, img_paths, labels, class_info,transforms=None):
        self.img_paths = img_paths
        self.labels = labels
        self.transforms = transforms
        self.classes = class_info
    
    def __getitem__(self, index):
        img_path = './Dataset'+self.img_paths[index][1:]
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        if self.transforms is not None:
            image = self.transforms(image=image)['image']

        if self.labels is not None:
            label = torch.zeros([50], dtype=torch.float32)
            # label[self.classes[self.labels[index]][0]] = 1
            label[self.labels[index]] = 1
            # print(f'artist name {self.labels[index]} , label = {self.classes[self.labels[index]][0]}')
            return image, label
        else:
            return image
    def __len__(self):
        return len(self.img_paths)      
    
    def getclasses(self):
        return self.classes

In [13]:
train_transform = A.Compose([
    A.Resize(256, 256),
    A.RandomCrop(224,224),
    A.HorizontalFlip( p=0.5),
    A.RandomRotate90(p=0.5),
    A.ColorJitter(brightness=0.2, p=0.5),
    # A.CoarseDropout(max_holes=4, max_height=16, max_width=16, 
    #                          min_holes=None, min_height=16, min_width=16,always_apply=False, p=0.5),
    A.Normalize(mean=(0.485, 0.456, 0.406), std= (0.229,0.224,0.224), max_pixel_value=255),
    #A.Normalize(max_pixel_value=255),
    # (HxWxC) -> (CxHxW)
    ToTensorV2()
])

test_transform = A.Compose([
    A.Resize(224, 224),
    #A.Normalize(max_pixel_value=255),
    A.Normalize(mean=(0.485, 0.456, 0.406), std= (0.229,0.224,0.224), max_pixel_value=255),
    ToTensorV2()
])

In [14]:
from torchvision.models import convnext_large,ConvNeXt_Large_Weights
from torchvision import models

import timm
class BaseModel(nn.Module):
    def __init__(self, num_classes=len(classes)):
        super(BaseModel, self).__init__()
        #self.backbone = convnext_large(weight=ConvNeXt_Large_Weights.DEFAULT)
        # self.backbone = models.convnext_base(weights=models.ConvNeXt_Base_Weights.DEFAULT)
        # self.backbone = models.convnext_small(weights=models.ConvNeXt_Small_Weights.IMAGENET1K_V1)
        #self.backbone= models.efficientnet_b3(weights=models.EfficientNet_B3_Weights.IMAGENET1K_V1)
        #self.backbone = timm.create_model('vit_base_patch16_224', pretrained=True)
        self.backbone = models.swin_s(weights=models.Swin_S_Weights.IMAGENET1K_V1)
        self.classifier = nn.Linear(1000, num_classes)
        self.drop = nn.Dropout(0.5,inplace=True)
    def forward(self, x):
        x = self.backbone(x)
        x = self.drop(x)
        x = self.classifier(x)
        return x

In [15]:
import torchsummary
model = BaseModel()
torchsummary.summary(model, (3,224,224),device='cpu')

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 96, 56, 56]           4,704
           Permute-2           [-1, 56, 56, 96]               0
         LayerNorm-3           [-1, 56, 56, 96]             192
         LayerNorm-4           [-1, 56, 56, 96]             192
ShiftedWindowAttention-5           [-1, 56, 56, 96]               0
   StochasticDepth-6           [-1, 56, 56, 96]               0
         LayerNorm-7           [-1, 56, 56, 96]             192
            Linear-8          [-1, 56, 56, 384]          37,248
              GELU-9          [-1, 56, 56, 384]               0
          Dropout-10          [-1, 56, 56, 384]               0
           Linear-11           [-1, 56, 56, 96]          36,960
          Dropout-12           [-1, 56, 56, 96]               0
  StochasticDepth-13           [-1, 56, 56, 96]               0
SwinTransformerBlock-14           [

In [16]:
def sigmoid_focal_loss(
    inputs: torch.Tensor,
    targets: torch.Tensor,
    alpha: float = 0.25,
    gamma: float = 2,
    reduction: str = "mean",
    classes=classes
) -> torch.Tensor:
    """
    Loss used in RetinaNet for dense detection: https://arxiv.org/abs/1708.02002.
    Args:
        inputs (Tensor): A float tensor of arbitrary shape.
                The predictions for each example.
        targets (Tensor): A float tensor with the same shape as inputs. Stores the binary
                classification label for each element in inputs
                (0 for the negative class and 1 for the positive class).
        alpha (float): Weighting factor in range (0,1) to balance
                positive vs negative examples or -1 for ignore. Default: ``0.25``.
        gamma (float): Exponent of the modulating factor (1 - p_t) to
                balance easy vs hard examples. Default: ``2``.
        reduction (string): ``'none'`` | ``'mean'`` | ``'sum'``
                ``'none'``: No reduction will be applied to the output.
                ``'mean'``: The output will be averaged.
                ``'sum'``: The output will be summed. Default: ``'none'``.
    Returns:
        Loss tensor with the reduction option applied.
    """
    # Original implementation from https://github.com/facebookresearch/fvcore/blob/master/fvcore/nn/focal_loss.py
    p = torch.sigmoid(inputs)
    # label smoothing
    targets = targets*(1-0.1)+0.1/50
    ce_loss = F.binary_cross_entropy_with_logits(inputs, targets, reduction="none")
    p_t = p * targets + (1 - p) * (1 - targets)
    loss = ce_loss * ((1 - p_t) ** gamma)
    if alpha >= 0:
        alpha_t = alpha * targets + (1 - alpha) * (1 - targets)
        loss = alpha_t * loss
    # # 라벨마다 가중치
    # for i in range(targets.shape[0]):
    #     k = targets[i].argmax(0).item()
    #     more = torch.tensor(classes[list(classes.keys())[k]][1])
    #     loss[i] = loss[i]*630/more
        
    # Check reduction option and return loss accordingly
    if reduction == "none":
        pass
    elif reduction == "mean":
        loss = loss.mean()
    elif reduction == "sum":
        loss = loss.sum()
    else:
        raise ValueError(
            f"Invalid Value for arg 'reduction': '{reduction} \n Supported reduction modes: 'none', 'mean', 'sum'"
        )
    return loss

In [17]:
def competition_metric(true,pred):
    
    return f1_score(true,pred,average='macro')

In [18]:
def validation(model, criterion,test_loader, device):
    model.eval()

    model_preds = []
    true_labels = []

    val_loss = []

    with torch.no_grad():
        for img, label in tqdm(iter(test_loader)):
            img, label = img.float().to(device), label.to(device)

            model_pred = model(img)

            loss = criterion(model_pred, label)

            val_loss.append(loss.item())
            model_preds += model_pred.argmax(1).detach().cpu().numpy().tolist()
            true_labels += label.argmax(1).detach().cpu().numpy().tolist()
    val_f1 = competition_metric(true_labels, model_preds)
    return np.mean(val_loss), val_f1

# val_loss, val_score = validation(model,criterion, test_loader, device)

In [19]:
def train(model, optimizer, train_loader, criterion,scheduler, device):
    model.train()
    train_loss = []
    for img , label in tqdm(iter(train_loader)):
        img, label = img.float().to(device), label.to(device)
        optimizer.zero_grad()

        model_pred = model(img)

        loss = criterion(model_pred, label)

        loss.backward()
        optimizer.step()

        if scheduler is not None:
            scheduler.step()
        train_loss.append(loss.item())

    tr_loss  = np.mean(np.array(train_loss))

    return tr_loss
# train_loss = train(model,optimizer, train_loader, criterion,device)


In [20]:
from sklearn.model_selection import StratifiedKFold

def kfold_train(base_model, all_images, all_labels,class_info,train_transform,test_transform=None,k=5):
    print(f'Total Dataset : {len(all_images)}, {type(all_images)}')
    skf = StratifiedKFold(n_splits=k)
    total_history = []
    data_length = len(all_images)//k
    print(data_length)
    all_images = np.array(all_images)
    all_labels = np.array(all_labels)
    c = 0
    for train_index, test_index in skf.split(all_images,all_labels):
        c+=1
        print(f'{c}st Train')
        # train_images = all_images.iloc[train_index].values
        # train_label = all_labels.iloc[train_index].values

        # val_images = all_images.iloc[test_index].values
        # val_label = all_labels.iloc[test_index].values
        train_images = np.take(all_images,train_index)
        train_labels = np.take(all_labels,train_index)

        val_images = np.take(all_images,test_index)
        val_labels = np.take(all_labels,test_index)

        train_dataset = CustomDataset(train_images,train_labels,class_info,train_transform)
        train_loader = DataLoader(train_dataset, batch_size = 16, shuffle=True)

        val_dataset = CustomDataset(val_images,val_labels,class_info,test_transform)
        val_loader = DataLoader(val_dataset, batch_size = 16, shuffle=False)

        model = base_model()
        model.to(device)
        epoch = 100
        best_f1 = 0
        early = 0
        history = {'train_loss':[],'val_loss':[],'f1_score':[]}
        
        optimizer = torch.optim.Adam(params=model.parameters(),lr=5e-3)
        scheduler = torch.optim.lr_scheduler.StepLR(optimizer,step_size=7,gamma=0.5)
        # criterion = sigmoid_focal_loss
        criterion = F.binary_cross_entropy_with_logits
        for e in range(1,epoch):
            train_loss = train(model, optimizer, train_loader, criterion,scheduler, device)
            val_loss, f1_score = validation(model,criterion,val_loader,device)

            history['train_loss'].append(train_loss)
            history['val_loss'].append(val_loss)
            history['f1_score'].append(f1_score)

            if best_f1 < f1_score:
                best_f1 = f1_score
                torch.save({
                    'epoch': e,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                },f'./checkpoint/best_{c}_conv_small_{f1_score:.3f}.pth')
                print('Model Saved')
            else:
                early +=1
                if early > 5:
                    print('Early stopping')
                    break
            print(f'{c} - Epoch [{e}], Train Loss : {train_loss:.5f}, Val Loss : {val_loss:.5f}, Val F1 Score : {f1_score:.3f}')

        total_history.append(history)
    return total_history

In [21]:
result = kfold_train(BaseModel,all_imgs, all_labels,classes,train_transform,test_transform,k=5)

Total Dataset : 10050, <class 'list'>
2010
1st Train


100%|██████████| 503/503 [03:29<00:00,  2.40it/s]
100%|██████████| 126/126 [00:34<00:00,  3.60it/s]


Model Saved
1 - Epoch [1], Train Loss : 0.35291, Val Loss : 0.29539, Val F1 Score : 0.001


100%|██████████| 503/503 [03:27<00:00,  2.43it/s]
100%|██████████| 126/126 [00:34<00:00,  3.64it/s]


1 - Epoch [2], Train Loss : 0.30752, Val Loss : 0.29539, Val F1 Score : 0.001


100%|██████████| 503/503 [03:29<00:00,  2.40it/s]
100%|██████████| 126/126 [00:34<00:00,  3.63it/s]


1 - Epoch [3], Train Loss : 0.30523, Val Loss : 0.29539, Val F1 Score : 0.001


100%|██████████| 503/503 [03:27<00:00,  2.43it/s]
100%|██████████| 126/126 [00:34<00:00,  3.63it/s]


1 - Epoch [4], Train Loss : 0.30466, Val Loss : 0.29539, Val F1 Score : 0.001


100%|██████████| 503/503 [03:27<00:00,  2.42it/s]
100%|██████████| 126/126 [00:34<00:00,  3.67it/s]


1 - Epoch [5], Train Loss : 0.30950, Val Loss : 0.29539, Val F1 Score : 0.001


100%|██████████| 503/503 [03:27<00:00,  2.42it/s]
100%|██████████| 126/126 [00:36<00:00,  3.47it/s]


1 - Epoch [6], Train Loss : 0.31317, Val Loss : 0.29539, Val F1 Score : 0.001


100%|██████████| 503/503 [03:26<00:00,  2.44it/s]
100%|██████████| 126/126 [00:34<00:00,  3.66it/s]


Early stopping
2st Train


100%|██████████| 503/503 [03:27<00:00,  2.42it/s]
100%|██████████| 126/126 [00:31<00:00,  4.05it/s]


Model Saved
2 - Epoch [1], Train Loss : 0.16185, Val Loss : 0.11221, Val F1 Score : 0.001


100%|██████████| 503/503 [03:03<00:00,  2.74it/s]
100%|██████████| 126/126 [00:28<00:00,  4.43it/s]


2 - Epoch [2], Train Loss : 0.12368, Val Loss : 0.11221, Val F1 Score : 0.001


100%|██████████| 503/503 [03:03<00:00,  2.74it/s]
100%|██████████| 126/126 [00:28<00:00,  4.38it/s]


2 - Epoch [3], Train Loss : 0.12279, Val Loss : 0.11221, Val F1 Score : 0.001


100%|██████████| 503/503 [03:03<00:00,  2.74it/s]
100%|██████████| 126/126 [00:28<00:00,  4.43it/s]


2 - Epoch [4], Train Loss : 0.12281, Val Loss : 0.11221, Val F1 Score : 0.001


100%|██████████| 503/503 [03:04<00:00,  2.73it/s]
100%|██████████| 126/126 [00:28<00:00,  4.47it/s]


2 - Epoch [5], Train Loss : 0.12246, Val Loss : 0.11221, Val F1 Score : 0.001


100%|██████████| 503/503 [03:04<00:00,  2.73it/s]
100%|██████████| 126/126 [00:28<00:00,  4.46it/s]


2 - Epoch [6], Train Loss : 0.12379, Val Loss : 0.11221, Val F1 Score : 0.001


100%|██████████| 503/503 [03:03<00:00,  2.74it/s]
100%|██████████| 126/126 [00:28<00:00,  4.35it/s]


Early stopping
3st Train


100%|██████████| 503/503 [03:04<00:00,  2.73it/s]
100%|██████████| 126/126 [00:28<00:00,  4.45it/s]


Model Saved
3 - Epoch [1], Train Loss : 0.23517, Val Loss : 0.10237, Val F1 Score : 0.001


100%|██████████| 503/503 [03:04<00:00,  2.72it/s]
100%|██████████| 126/126 [00:28<00:00,  4.46it/s]


3 - Epoch [2], Train Loss : 0.16525, Val Loss : 0.10237, Val F1 Score : 0.001


100%|██████████| 503/503 [03:04<00:00,  2.73it/s]
100%|██████████| 126/126 [00:27<00:00,  4.54it/s]


3 - Epoch [3], Train Loss : 0.15750, Val Loss : 0.10237, Val F1 Score : 0.001


100%|██████████| 503/503 [03:04<00:00,  2.72it/s]
100%|██████████| 126/126 [00:27<00:00,  4.53it/s]


3 - Epoch [4], Train Loss : 0.15939, Val Loss : 0.10237, Val F1 Score : 0.001


100%|██████████| 503/503 [03:04<00:00,  2.72it/s]
100%|██████████| 126/126 [00:27<00:00,  4.53it/s]


3 - Epoch [5], Train Loss : 0.16462, Val Loss : 0.10237, Val F1 Score : 0.001


100%|██████████| 503/503 [03:04<00:00,  2.73it/s]
100%|██████████| 126/126 [00:27<00:00,  4.51it/s]


3 - Epoch [6], Train Loss : 0.15828, Val Loss : 0.10237, Val F1 Score : 0.001


100%|██████████| 503/503 [03:05<00:00,  2.72it/s]
100%|██████████| 126/126 [00:27<00:00,  4.51it/s]


Early stopping
4st Train


 14%|█▎        | 68/503 [00:25<02:34,  2.81it/s]