# Pytorch - Densnet201 - 16 classes - Training

In [None]:
from __future__ import print_function, division

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler

In [2]:
from torchvision import datasets, models, transforms



import matplotlib as mpl
#  ######################################################
#  #### Matplotlib X display error - removing for server#
#  ######################################################
mpl.use('Agg')  # This has to run before pyplot import

import matplotlib.pyplot as plt
import time
import datetime
import os
import copy
import sys
import pandas as pd

from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
import itertools
import numpy as np

import ipywidgets as widgets # for ipython widgets

In [None]:
plt.ion()   # interactive mode

# Training 

In [None]:
# Take date and time for saving points
now = datetime.datetime.now()
date_and_time = now.strftime("%Y%m%d%H%M")

## Initial varaibles to change for different running

In [None]:
main_data_dir = "../../../data/data_generated_medicotask_70_30_v2"  # Main data directory to be handled
model_name = date_and_time + "_13_3_densenet201_70_30"
checkpoint_name_format = date_and_time + "_13_3_weights-improvement-{epoch:02d}-{val_acc:.4f}.hdf5"

## Number of epochs and batchsize (to be changed)

In [None]:
number_of_epochs = 1

batch_size = 25

## Directories to save output data

In [None]:
data_dir = main_data_dir
model_dir = data_dir + '/pytorch_models'
plot_dir  = data_dir + '/pytorch_plots'
history_dir = data_dir + '/pytorch_history'

In [None]:
acc_loss_plot_name = 'acc_loss_plot_' + model_name
accuracy_plot_name = 'accuracy_plot_' + model_name
loss_plot_name = 'loss_plot_' + model_name

In [None]:
########################################################################
#  Managin Directory structure
########################################################################
if not os.path.exists(model_dir):
    os.mkdir(model_dir)

if not os.path.exists(plot_dir):
    os.mkdir(plot_dir)

if not os.path.exists(history_dir):
    os.mkdir(history_dir)


## Preparing Data - Training and Validation

In [None]:
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(229),
        transforms.RandomHorizontalFlip(),
        transforms.RandomVerticalFlip(),
        transforms.RandomRotation(90),
        transforms.ToTensor(),
        transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
    ]),
    'validation': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(229),
        transforms.ToTensor(),
        transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
    ]),
}

In [None]:
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                          data_transforms[x])
                  for x in ['train', 'validation']}

dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=batch_size,
                                              shuffle=True, num_workers=1)
               for x in ['train', 'validation']}


dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'validation']}

## Selecting the computing device

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## The Main method to train a model

In [None]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    history_tensor = torch.empty((num_epochs, 4), device=device)  # 4- trai_acc, train_loss, val_acc, val_loss
    checkpoint_name = None

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)



        # Each epoch has a training and validation phase
        for phase in ['train', 'validation']:
            if phase == 'train':
                scheduler.step()
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            indicator = 0  # just for print batch processing status (no of batches)

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:



                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)
                  #  print("outputs=", outputs) # only for testing - vajira
                  #  print("labels = ", labels) # only for testing - vajira
                    print(indicator, sep='-', end='=', flush=True)
                    indicator += 1

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # Collecting data for making plots
            if phase == 'train':
                history_tensor[epoch, 0] = epoch_acc
                history_tensor[epoch, 1] = epoch_loss
            if phase == 'validation':
                history_tensor[epoch, 2] = epoch_acc
                history_tensor[epoch, 3] = epoch_loss

            # deep copy the model
            if phase == 'validation' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
                checkpoint_name = checkpoint_name_format.format(epoch=epoch, val_acc=best_acc)
                print("Found a best model:", checkpoint_name)
            elif phase== 'validation':
                print("No improvement from the previous best model ")

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model, history_tensor, checkpoint_name


## A methods to plot and save plots

In [None]:
###########################################################
#  Ploting history and save plots to plots directory
###########################################################
def plot_and_save_training_history(history_tensor):
    history_data = history_tensor.cpu().numpy()
    df = pd.DataFrame(history_data, columns=['train_acc', 'train_loss', 'val_acc', 'val_loss'])
    pie = df.plot()
    fig = pie.get_figure()
    fig.savefig(os.path.join(plot_dir, "_training_" + acc_loss_plot_name))

## Loading a pretrained mode and modifying the last layers

In [None]:
model_ft = models.densenet201(pretrained=False) # without pretrained weights
num_ftrs = model_ft.classifier.in_features
model_ft.fc = nn.Linear(num_ftrs, 16)

## Setting parameters

In [None]:
#  # Setting model parameters
criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.1, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=10, gamma=0.1)

## If multiple GPUs are there, then use all of them

In [None]:
## #######################################################
# If multiple GPUS are there, run on multiple GPUS
##########################################################
#  Setting model in multiple GPUs
if torch.cuda.device_count() > 1:
    print("Let's use", torch.cuda.device_count(), "GPUs!")
    # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs
    model_ft = nn.DataParallel(model_ft)
elif torch.cuda.device_count() == 1:
    print("Found only one GPU")
else:
    print("No GPU.. Runing on CPU")

## Loading the model to the GPUs and run it to train

In [None]:
##############################################################
#  Loading model to GPUs and setting parameters
##############################################################
model_ft = model_ft.to(device)


#############################################################
### start Training
############################################################

model_ft, history_tensor, check_point_name = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler,
                       num_epochs=number_of_epochs)


## Save the best model to the model directory

In [None]:
############################################################
### Save the model to the directory
############################################################

if not os.path.exists(model_dir):
    os.mkdir(model_dir)  # to save plots

if not check_point_name==None:
    print(check_point_name)
    torch.save(model_ft.state_dict(), os.path.join(model_dir, check_point_name))
    print("Model saved")

## Plot and save training history 

In [None]:
plot_and_save_training_history(history_tensor)

print("Plots saved to", plot_dir)

# Re-Training

## Loading the model to retrain

In [None]:
# Take date and time for saving points
now = datetime.datetime.now()
date_and_time = now.strftime("%Y%m%d%H%M")

In [None]:
best_weight_file_name = input('Please, enter the best weights value file name:')

In [None]:
best_weight_file_name

In [None]:
model_ft = models.densenet201(pretrained=False)

#print(model_ft)
#exit()
# num_ftrs = model_ft.fc.in_features
num_ftrs = model_ft.classifier.in_features
model_ft.fc = nn.Linear(num_ftrs, 16)

In [None]:
model_ft.load_state_dict(torch.load(os.path.join(model_dir, best_weight_file_name)))
print('Model loaded')

In [None]:
#####################################################
#  Setting model in multiple GPUs
if torch.cuda.device_count() > 1:
    print("Let's use", torch.cuda.device_count(), "GPUs!")
    # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs
    model_ft = nn.DataParallel(model_ft)
elif torch.cuda.device_count() == 1:
    print("Found only one GPU")
else:
    print("No GPU.. Runing on CPU")

##############################################3

## Setting parameters

In [None]:
#  # Setting model parameters
criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.1, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=10, gamma=0.1)

## Model to the GPU

In [None]:
##############################################################
#  Loading model to GPUs and setting parameters
##############################################################
model_ft = model_ft.to(device)

## Retraining number of epochs and bs

In [None]:
number_of_epochs = 2

batch_size = 25

In [None]:
model_ft, history_tensor, check_point_name = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler,
                       num_epochs=number_of_epochs)

## Saving the best model from re-training

In [None]:
if not os.path.exists(model_dir):
    os.mkdir(model_dir)  # to save plots

if not check_point_name==None:
    print(check_point_name)
    torch.save(model_ft.state_dict(), os.path.join(model_dir, check_point_name))
    print("Model saved")

## Plot re-training history 

In [None]:
plot_and_save_training_history(history_tensor)

print("Plots saved to", plot_dir)

# Testing

## Get the weight file to test

In [None]:
test_weight_file_name = input('Please, enter the best weights value file name:')

## Creating and Loading the test weights to the model and setting model to the device

In [None]:
model_ft = models.densenet201(pretrained=False)
num_ftrs = model_ft.classifier.in_features
model_ft.fc = nn.Linear(num_ftrs, 16)

print('Model architecture created..')

In [None]:
#####################################################
#  Setting model in multiple GPUs
if torch.cuda.device_count() > 1:
    print("Let's use", torch.cuda.device_count(), "GPUs!")
    # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs
    model_ft = nn.DataParallel(model_ft)
elif torch.cuda.device_count() == 1:
    print("Found only one GPU")
else:
    print("No GPU.. Runing on CPU")

##############################################3

In [None]:
model_ft.load_state_dict(torch.load(os.path.join(model_dir, best_weight_file_name)))
print('Model loaded')
model_ft = model_ft.to(device)

print('Model loaded... and set to device')

print('Started to test on testing data...')


## confusion matrix plot name

In [None]:
cm_plot_name = 'cm_'+model_name

In [None]:
model_name

## Preparing test data

In [None]:
print('Preparing test data...')

test_data_transforms = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

test_datasets = datasets.ImageFolder(os.path.join(data_dir, 'validation'), test_data_transforms)

test_dataloader = torch.utils.data.DataLoader(test_datasets, batch_size=batch_size, shuffle=False, num_workers=4)

print('Preparing test data finised')

## start testing

In [None]:
####################################################
#  # Testing
####################################################
model_ft.eval()
correct = 0
total = 0
all_labels_d = torch.tensor([], dtype=torch.long).to(device)
all_predictions_d = torch.tensor([], dtype=torch.long).to(device)

with torch.no_grad():
    for inputs, labels in test_dataloader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        outputs = model_ft(inputs)
        _, predicted = torch.max(outputs.data, 1)
        print((predicted == labels).sum())
        total += labels.size(0)
        correct += (predicted == labels).sum()
        all_labels_d = torch.cat((all_labels_d, labels), 0)
        all_predictions_d = torch.cat((all_predictions_d, predicted), 0)


print('copying some data back to cpu for generating confusion matrix...')
testset_labels = all_labels_d.cpu()
testset_predicted_labels = all_predictions_d.cpu()   # to('cpu')

cm = confusion_matrix(testset_labels, testset_predicted_labels)  # confusion matrix

print('Accuracy of the network on the %d test images: %f %%' % (total, (
        100.0 * correct / total)))





In [None]:
print(cm)

In [None]:
################################################################
#  Plotting Confusion Matrix
################################################################

def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues,
                          plt_size=[10,10]):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    plt.rcParams['figure.figsize'] = plt_size
    plt.figure()
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=90)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.savefig(os.path.join(plot_dir, cm_plot_name))



In [None]:
print("taking class names to plot CM")

class_names = test_datasets.classes  # taking class names for plotting confusion matrix

print("Generating confution matrix")

plot_confusion_matrix(cm, classes=class_names, title='my confusion matrix')

print('confusion matrix saved to ', plot_dir)