# Libraries

In [None]:
import os
import pandas as pd
import numpy as np
from PIL import Image
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

# Visualization

import seaborn as sns
import matplotlib.pyplot as plt

# PyTorch

import torch as T
from torch import nn
from torch.utils.data import DataLoader, Dataset
from torchvision import models
from torchvision import transforms

Configure GPU

In [None]:
device = T.device('cuda' if T.cuda.is_available() else 'cpu')

# Loading dataset

In [None]:
# Read csv file
train_data = pd.read_csv("/kaggle/input/dog-breed-identification/labels.csv")
# Train data shape
print(f"Train dataset shape: {train_data.shape}")
# Sample of the train_data DataFrame
train_data.head()

## Distribution of the breed classes

In [None]:
breed_classes = train_data.breed.value_counts().reset_index()
plt.figure(figsize=(20,8))
sns.barplot(breed_classes, x='breed', y='count', palette="flare")
plt.xticks(rotation=90)
plt.title("Distribution of the breed classes")
plt.show()

In [None]:
breed_classes['count'].describe()

Confirm the 120 dog breed classes

In [None]:
breed_classes['breed'].nunique()

# Label Encoding

In [None]:
le = LabelEncoder()
train_data['breed'] = le.fit_transform(train_data.loc[:,'breed']) 

In [None]:
label_map = dict(zip(le.classes_, le.transform(le.classes_)))

# Dog Breed Dataset

In [None]:
class Dog_Breed_Dataset(Dataset):
    
    def __init__(self, df: pd.DataFrame, img_base_path: str, split: str, transforms = None):        
        self.df = df
        self.img_base_path = img_base_path
        self.split = split
        self.transforms = transforms
        
    def __getitem__(self, index):
        # Path of the image
        img_path = os.path.join(self.img_base_path + self.df.loc[index,'id'] + '.jpg')
        # Read the image
        img = Image.open(img_path)        
        # Perform the transformations
        if self.transforms:
            img = self.transforms(img)
        
        if self.split != 'test':
            y = self.df.loc[index, 'breed']                     
            return img, y
        else:            
            return img
    
    def __len__(self):
        return len(self.df)        

# Data Augmentation

In [None]:
train_transforms = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.RandomHorizontalFlip(p=0.2),
    transforms.RandomVerticalFlip(p=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
test_transforms = transforms.Compose([
    transforms.Resize((224,224)),    
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


## Get Data Loaders

### Split the dataset

In [None]:
train, val = train_test_split(train_data, test_size=0.2, random_state=42, stratify=train_data['breed'])

train = train.reset_index(drop=True)
val = val.reset_index(drop=True)

### Get the data loaders

In [None]:
train_dataset = Dog_Breed_Dataset(
    df=train,
    img_base_path='/kaggle/input/dog-breed-identification/train/',
    split='train',
    transforms=train_transforms
)
validation_dataset = Dog_Breed_Dataset(
    df=val,
    img_base_path='/kaggle/input/dog-breed-identification/train/',
    split='val',
    transforms=test_transforms
)

train_dl = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4)
validation_dl = DataLoader(validation_dataset, batch_size=64, shuffle=False, num_workers=4)    

In [None]:
print(f"Train data length: {len(train_dl.dataset)}, Validation data length: {len(validation_dl.dataset)}")

In [None]:
# def imshow(axis, inp):
#     """Denormalize and show"""
#     inp = inp.numpy().transpose((1, 2, 0))
#     mean = np.array([0.485, 0.456, 0.406])
#     std = np.array([0.229, 0.224, 0.225])
#     inp = std * inp + mean
#     axis.imshow(inp)

In [None]:
# from mpl_toolkits.axes_grid1 import ImageGrid

# img, label = next(iter(train_dataset))
# print(img.size(), label)

# fig = plt.figure(1, figsize=(16, 12))
# grid = ImageGrid(fig, 111, nrows_ncols=(3, 4), axes_pad=0.05)    

# for i in range(img.size()[0]):
#     ax = grid[i]
#     imshow(ax, img[i])

# Training process

## Training function

In [None]:
def train_model(train_dl, val_dl, model, epochs=50):    
    
    train_acc_history = []
    val_acc_history = []
    train_loss_history = []
    val_loss_history = []
    # Best validation accuracy
    best_val_loss = 1_000_000.0    
    # Get initial weights
    weights = model.get_weights()
    
    for epoch in range(epochs):
        print("="*20, "Epoch: ", str(epoch), "="*20)
        
        train_correct_pred = 0
        val_correct_pred = 0
        train_acc = 0
        val_acc = 0
        train_loss = 0
        val_loss = 0
        
        # Set to training mode
        model.train()
        
        for x, y in train_dl:               
            # Convert data to Tensor            
            x = x.clone().detach().to(device).requires_grad_(True)
            y = y.clone().detach().long().to(device)
            # Reset gradients
            model.optim.zero_grad()
            # Predict
            preds = model(x)            
            
            # Compute the loss            
            loss = model.criterion(preds,y)            
            
            # Compute the gradients            
            loss.backward()
            # Update weights
            model.optim.step()
            # Count the correct predictions
            preds = T.argmax(preds, dim=1)           
            train_correct_pred += (preds.long().unsqueeze(1) == y.unsqueeze(1)).sum().item()
            
            train_loss += loss.item()           
        
        train_acc = train_correct_pred / len(train_dl.dataset)
        
        train_acc_history.append(train_acc)
               
        train_loss_history.append(train_loss)
        
        # Switch to evaluation mode
        model.eval()        
        
        with T.no_grad():
            for x, y in val_dl:                
                # Convert data to Tensor                
                x = x.clone().detach().to(device)
                y = y.clone().detach().long().to(device)    
                # Predict
                preds = model(x)                
                # Compute the loss
                loss = model.criterion(preds,y)                                         
                
                val_loss += loss.item()                
                # Count the correct predictions
                preds = T.argmax(preds, dim=1)
                
                val_correct_pred += (preds.long().unsqueeze(1) == y.unsqueeze(1)).sum().item() 
                
        model.scheduler.step()       
        
        val_acc = val_correct_pred / len(val_dl.dataset)
        
        val_acc_history.append(val_acc)
        val_loss_history.append(val_loss)           
        # Save the weights of the best model
        if best_val_loss > val_loss:
            best_val_loss = val_loss
            weights = model.get_weights()
            
        print("Train acc: {:.4f} | Train Loss: {:.4f} | Validation acc: {:.4f} | Validation Loss: {:.4f}".format(train_acc, train_loss, val_acc, val_loss))
    # Load best model
    model.load_weights(weights)
    
    return [train_acc_history, train_loss_history, val_acc_history, val_loss_history], model

## Models

Inception model

In [None]:
inception = models.inception_v3(weights='Inception_V3_Weights.DEFAULT')

inception_model = nn.Sequential(
    inception.Conv2d_1a_3x3,
    inception.Conv2d_2a_3x3,
    inception.Conv2d_2b_3x3,
    inception.maxpool1,
    inception.Conv2d_3b_1x1,
    inception.Conv2d_4a_3x3,
    inception.maxpool2,
    inception.Mixed_5b,
    inception.Mixed_5c,
    inception.Mixed_5d,
    inception.Mixed_6a,
    inception.Mixed_6b,
    inception.Mixed_6c,
    inception.Mixed_6d,
    inception.Mixed_6e,
    inception.Mixed_7a,
    inception.Mixed_7b,
    inception.Mixed_7c,
    inception.avgpool
)

Resnet50 model

In [None]:
resnet50 = models.resnet50(weights='ResNet50_Weights.DEFAULT')

resnet50_model = nn.Sequential(
    resnet50.conv1,
    resnet50.bn1,
    resnet50.relu,
    resnet50.maxpool,
    resnet50.layer1,
    resnet50.layer2,
    resnet50.layer3,
    resnet50.layer4,
    resnet50.avgpool
)


In [None]:
# Freeze parameters of pretrained models
for param in resnet50_model.parameters():    
    param.requires_grad = False
    
for param in inception_model.parameters():    
    param.requires_grad = False

In [None]:
# Freeze training for all "features" layers
for param_res, param_inc in zip(resnet50.parameters(), inception.parameters()):
    param_res.requires_grad, param_inc.requires_grad = False, False
    
# replace the last fully connected layer with a Linnear layer 133 output
in_features_resnet50 = resnet50.fc.in_features
in_features_inception = inception.fc.in_features

resnet50.fc = nn.Linear(in_features_resnet50, 120)
inception.fc = nn.Linear(in_features_inception, 120)

### model with SGD

In [None]:
class Model(nn.Module):
    
    def __init__(self, inception_model, resnet50_model):
        super(Model,self).__init__()
        
        self.inception_model = inception_model
        self.resnet50_model = resnet50_model        
        
        self.output = nn.Sequential(
            nn.Dropout(0.7),
            nn.Linear(4096,120)            
        )
        
        self.to(device)
        # Optimizer 
        self.optim = T.optim.SGD(self.output.parameters(), lr=0.005, momentum=0.9)
        # Loss
        self.criterion = T.nn.CrossEntropyLoss()
        # Scheduler
        self.scheduler = T.optim.lr_scheduler.StepLR(self.optim, step_size=7, gamma=0.1)
        
    def forward(self, x):
        X1 = self.inception_model(x)
        X2 = self.resnet50_model(x)
        
        X1 = X1.view(X1.size(0), -1)
        X2 = X2.view(X2.size(0), -1)
       
        X = T.cat([X1, X2], dim=1)
        
        P = self.output(X)        
        
        return P
    
    def get_weights(self):
        return self.output.state_dict()
    
    def load_weights(self, weights):
        self.output.load_state_dict(weights)

In [None]:
model = Model(inception_model, resnet50_model)

In [None]:
history, model = train_model(train_dl, validation_dl, model)
T.save(model, 'resnet-inception-sgd.pt')
T.save(model.state_dict(), 'resnet-inception-sgd-weights.pt')

### Model with AdamW

In [None]:
class Model(nn.Module):
    
    def __init__(self, inception_model, resnet50_model):
        super(Model,self).__init__()
        
        self.inception_model = inception_model
        self.resnet50_model = resnet50_model        
        
        self.output = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(4096,120)            
        )
        
        self.to(device)
        # Optimizer 
        self.optim = T.optim.AdamW(self.output.parameters(), lr=0.005)
        # Loss
        self.criterion = T.nn.CrossEntropyLoss()
        
    def forward(self, x):
        X1 = self.inception_model(x)
        X2 = self.resnet50_model(x)
        
        X1 = X1.view(X1.size(0), -1)
        X2 = X2.view(X2.size(0), -1)
       
        X = T.cat([X1, X2], dim=1)
        
        P = self.output(X)        
        
        return P
    
    def get_weights(self):
        return self.output.state_dict()
    
    def load_weights(self, weights):
        self.output.load_state_dict(weights)

In [None]:
def train_model(train_dl, val_dl, model, epochs=50):    
    
    train_acc_history = []
    val_acc_history = []
    train_loss_history = []
    val_loss_history = []
    # Best validation accuracy
    best_val_loss = 1_000_000.0    
    # Get initial weights
    weights = model.get_weights()
    
    for epoch in range(epochs):
        print("="*20, "Epoch: ", str(epoch), "="*20)
        
        train_correct_pred = 0
        val_correct_pred = 0
        train_acc = 0
        val_acc = 0
        train_loss = 0
        val_loss = 0
        
        # Set to training mode
        model.train()
        
        for x, y in train_dl:               
            # Convert data to Tensor            
            x = x.clone().detach().to(device).requires_grad_(True)
            y = y.clone().detach().long().to(device)
            # Reset gradients
            model.optim.zero_grad()
            # Predict
            preds = model(x)            
            
            # Compute the loss            
            loss = model.criterion(preds,y)            
            
            # Compute the gradients            
            loss.backward()
            # Update weights
            model.optim.step()
            # Count the correct predictions
            preds = T.argmax(preds, dim=1)           
            train_correct_pred += (preds.long().unsqueeze(1) == y.unsqueeze(1)).sum().item()
            
            train_loss += loss.item()           
        
        train_acc = train_correct_pred / len(train_dl.dataset)
        
        train_acc_history.append(train_acc)
               
        train_loss_history.append(train_loss)
        
        # Switch to evaluation mode
        model.eval()        
        
        with T.no_grad():
            for x, y in val_dl:                
                # Convert data to Tensor                
                x = x.clone().detach().to(device)
                y = y.clone().detach().long().to(device)    
                # Predict
                preds = model(x)                
                # Compute the loss
                loss = model.criterion(preds,y)                                         
                
                val_loss += loss.item()                
                # Count the correct predictions
                preds = T.argmax(preds, dim=1)
                
                val_correct_pred += (preds.long().unsqueeze(1) == y.unsqueeze(1)).sum().item() 
                   
        
        val_acc = val_correct_pred / len(val_dl.dataset)
        
        val_acc_history.append(val_acc)
        val_loss_history.append(val_loss)           
        # Save the weights of the best model
        if best_val_loss > val_loss:
            best_val_loss = val_loss
            weights = model.get_weights()
            
        print("Train acc: {:.4f} | Train Loss: {:.4f} | Validation acc: {:.4f} | Validation Loss: {:.4f}".format(train_acc, train_loss, val_acc, val_loss))
    # Load best model
    model.load_weights(weights)
    
    return [train_acc_history, train_loss_history, val_acc_history, val_loss_history], model

In [None]:
model = Model(inception_model, resnet50_model)

In [None]:
history, model = train_model(train_dl, validation_dl, model)
T.save(model, 'resnet-inception-adamw.pt')
T.save(model.state_dict(), 'resnet-inception-adamw-weights.pt')