In [None]:
import os
import time
import math
import datetime
import warnings
import copy

import torch
import torchvision
import torch.nn.functional as F
import torch.nn as nn
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader, Subset
from torch.optim.lr_scheduler import ReduceLROnPlateau, OneCycleLR
from efficientnet_pytorch import EfficientNet
from sklearn.metrics import accuracy_score, roc_auc_score
from sklearn.model_selection import StratifiedKFold
import pandas as pd
import numpy as np
import cv2
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# At least fixing some random seeds. 
# It is still impossible to make results 100% reproducible when using GPU
warnings.simplefilter('ignore')
torch.manual_seed(47)
np.random.seed(47)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
class MelanomaDataset(Dataset):
    def __init__(self, filename: str, data: np.ndarray, train: bool = True, transforms = None):
        """
        Class initialization
        Args:
            df (str): path to pandas.DataFrame 
            data (np.ndarray): resized images data in a shape of (HxWxC)
            train (bool): flag of whether a training dataset is being initialized or testing one
            transforms: image transformation method to be applied
            
        """
        self.df = pd.read_csv(filename)
        self.data = data
        self.transforms = transforms
        self.train = train
        if self.train:
            self.y = self.df['target'].values
        
    def __getitem__(self, index):
        x = self.data[index]
        
        if self.transforms:
            x = transforms.ToPILImage()(x)
            x = self.transforms(x)

        assert x.shape == (3, 224, 224)
        assert x.dtype == torch.float32
        
        if self.train:
            y = self.df.iloc[index]['target']
            assert y == 0 or y == 1
            return x, y
        else:
            return x
    
    def __len__(self):
        return len(self.data)
    
    
class Net(nn.Module):
    def __init__(self, arch):
        super(Net, self).__init__()
        self.arch = arch
        #self.arch._fc = nn.Linear(in_features=arch._fc.in_features, out_features=1, bias=True)
        self.arch.fc = nn.Linear(in_features=arch.fc.in_features, out_features=1, bias=True)
        
    def forward(self, x):
        """
        No sigmoid in forward because we are going to use BCEWithLogitsLoss
        Which applies sigmoid for us when calculating a loss
        """
        x = self.arch(x)
        return x
    
class ArchFaceNet(nn.Module):
    def __init__(self, arch):
        super(ArchFaceNet, self).__init__()
        self.arch = arch
        self.arch._fc = nn.Linear(in_features=arch._fc.in_features, out_features=1, bias=False)

    def forward_resnet(self, x):
        x = self.arch.conv1(x)
        x = self.arch.bn1(x)
        x = self.arch.relu(x)
        x = self.arch.maxpool(x)

        x = self.arch.layer1(x)
        x = self.arch.layer2(x)
        x = self.arch.layer3(x)
        x = self.arch.layer4(x)

        x = self.arch.avgpool(x)
        x = torch.flatten(x, 1)
        x = x / x.square().sum(1, keepdim=True)
        self.arch.fc.weight = self.arch.fc.weight / self.arch.fc.weight.square().sum(1, keepdim=True)
        x = self.arch.fc(x)
        return x
    
    def forward(self, inputs):
        x = self.arch.extract_features(inputs)

        # Pooling and final linear layer
        x = self.arch._avg_pooling(x)
        x = x.flatten(start_dim=1)
        x = self.arch._dropout(x)
        
        x = x / x.square().sum(1, keepdim=True)
        self.arch._fc.weight = nn.parameter.Parameter(self.arch._fc.weight / self.arch._fc.weight.square().sum(1, keepdim=True))
        x = self.arch._fc(x)
        return x
    
class BinaryArchFaceLoss(nn.Module):
    def __init__(self, m=math.pi / 6, s=2):
        super().__init__()
        assert m > 0
        assert s > 1
        self.m = m
        self.s = s
    
    def forward(self, inputs, targets):
        x = torch.acos(inputs)
        x = torch.cos(x + targets * self.m) * self.s
        loss = F.binary_cross_entropy_with_logits(x, targets)
        return loss

In [None]:
class FocalLoss(nn.Module):
    def __init__(self, alpha=1, gamma=2, logits=True, reduce=True):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.logits = logits
        self.reduce = reduce

    def forward(self, inputs, targets):
        if self.logits:
            BCE_loss = F.binary_cross_entropy_with_logits(inputs, targets, reduce=False)
        else:
            BCE_loss = F.binary_cross_entropy(inputs, targets, reduce=False)
        pt = torch.exp(-BCE_loss)
        F_loss = self.alpha * (1-pt)**self.gamma * BCE_loss

        if self.reduce:
            return torch.mean(F_loss)
        else:
            return F_loss

In [None]:
transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])
])


In [None]:
train = MelanomaDataset(filename='/home/kitamura/dataset/Melanoma/train.csv', 
                        data=np.load('x_train_224.npy'),
                        transforms=transform)
test = MelanomaDataset(filename='/home/kitamura/dataset/Melanoma/test.csv', 
                       data=np.load('x_test_224.npy'), 
                       train=False,
                       transforms=transform)

In [None]:
def run_train(epochs, model, optim, criterion, train_loader, val_loader, es_patience, model_path):
    best_val = 0.0  # Best validation score within this fold
    patience = es_patience  # Current patience counter
    
    for epoch in range(epochs):
        start_time = time.time()
        correct = 0
        epoch_loss = 0
        model.train()
        
        for x, y in train_loader:
            assert model.training == True
            x = torch.tensor(x, device=device, dtype=torch.float32)
            y = torch.tensor(y, device=device, dtype=torch.float32)
            optim.zero_grad()
            z = model(x)
            loss = criterion(z, y.unsqueeze(1))
            loss.backward()
            optim.step()
            pred = torch.round(torch.sigmoid(z))  # round off sigmoid to obtain predictions
            correct += (pred.cpu() == y.cpu().unsqueeze(1)).sum().item()  # tracking number of correctly predicted samples
            epoch_loss += loss.item()
        train_acc = correct / len(train_idx)

        model.eval()  # switch model to the evaluation mode
        val_preds = torch.zeros((len(val_loader.dataset), 1), dtype=torch.float32, device=device)
        val_true = np.zeros(len(val_loader.dataset), dtype=np.int)
        val_loss = 0.0
        with torch.no_grad():  # Do not calculate gradient since we are only predicting
            # Predicting on validation set
            for j, (x_val, y_val) in enumerate(val_loader):
                assert model.training == False
                val_true[j*x_val.shape[0]:(j + 1)*x_val.shape[0]] = y_val
                x_val = torch.tensor(x_val, device=device, dtype=torch.float32)
                y_val = torch.tensor(y_val, device=device, dtype=torch.float32)
                z_val = model(x_val)
                val_loss = criterion(z_val, y_val.unsqueeze(1))
                val_pred = torch.sigmoid(z_val)
                val_preds[j*x_val.shape[0]:(j + 1)*x_val.shape[0]] = val_pred
            val_acc = accuracy_score(val_true, torch.round(val_preds.cpu()))
            val_roc = roc_auc_score(val_true, val_preds.cpu())
            
        print('Epoch {:03}: | Loss: {:.3f} | Train acc: {:.3f} | Val acc: {:.3f} | Val loss: {:.3f} | Val roc_auc: {:.3f} | Training time: {}'.format(
            epoch + 1, epoch_loss, train_acc, val_acc, val_loss, val_roc, str(datetime.timedelta(seconds=time.time() - start_time))))
            
        if val_roc >= best_val:
            best_val = val_roc
            patience = es_patience  # Resetting patience since we have new best validation accuracy
            torch.save(model, model_path)  # Saving current best model
        else:
            patience -= 1
            if patience == 0:
                print('Early stopping. Best Val roc_auc: {:.3f}'.format(best_val))
                break

In [None]:
epochs = 10
model_path = 'model.pth'
es_patience = 3
batch_size = 70
skf = StratifiedKFold(n_splits=5, random_state=47, shuffle=True)
preds = torch.zeros((len(test), 1), dtype=torch.float32, device=device)

for fold, (train_idx, val_idx) in enumerate(skf.split(X=np.zeros(len(train)), y=train.y), 1):
    print('=' * 20, 'Fold', fold, '=' * 20)
    
#     arch = EfficientNet.from_pretrained('efficientnet-b0')
#     model = Net(arch=arch)
    #model = ArchFaceNet(arch=arch)
    arch = torch.hub.load('pytorch/vision', 'resnet50', pretrained=True)
    model = Net(arch=arch)
     
    model = model.to(device)
    optim = torch.optim.Adam([
        {"params": model.arch.layer2.parameters(), "lr": 0.00001},
        {"params": model.arch.layer3.parameters(), "lr": 0.0001},
        {"params": model.arch.layer4.parameters(), "lr": 0.0001},
        {"params": model.arch.fc.parameters(), "lr": 0.001},
    ])
    criterion = nn.BCEWithLogitsLoss()
    #criterion = BinaryArchFaceLoss()
    #criterion =FocalLoss()
    train_loader = DataLoader(dataset=Subset(train, train_idx), batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(dataset=Subset(train, val_idx), batch_size=50, shuffle=False)
    test_loader = DataLoader(dataset=test, batch_size=50, shuffle=False)
    
    run_train(epochs, model, optim, criterion, train_loader, val_loader, es_patience, model_path)
        
    model = torch.load(model_path)  # Loading best model of this fold
    model.eval()  # switch model to the evaluation mode
    val_preds = torch.zeros((len(val_idx), 1), dtype=torch.float32, device=device)
    with torch.no_grad():
        for i, x_test in enumerate(test_loader):
            x_test = torch.tensor(x_test, device=device, dtype=torch.float32)
            z_test = model(x_test)
            z_test = torch.sigmoid(z_test)
            preds[i*x_test.shape[0]:i*x_test.shape[0] + x_test.shape[0]] += z_test / skf.n_splits

In [None]:
sub = pd.read_csv('/home/kitamura/dataset/Melanoma/sample_submission.csv')
sub['target'] = preds.cpu().numpy()
sub.to_csv('submission_res50_finetune.csv', index=False)