**1. Imports**

In [None]:
import numpy as np
import matplotlib.pylab as plt
import plotly.graph_objects as go
import os
import shutil
import copy
import pickle

In [None]:
from torchvision import transforms, datasets, models
import torch 
from torch import nn
import torch.optim as optim
from torch.utils.data import DataLoader

In [None]:
# custom written code 
from xrays import create_datasets, summaries
from training_loop import Train
from train_model import fit
from imbalanced import class_weights
from resnet import ResNet
from testing_report import Test_Report

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


**2. Download from Kaggle**


In [None]:
! mkdir ~/.kaggle
!cp /content/drive/MyDrive/kaggle.json ~/.kaggle/kaggle.json

In [None]:
! kaggle datasets download tawsifurrahman/covid19-radiography-database

Downloading covid19-radiography-database.zip to /content
100% 777M/778M [00:38<00:00, 22.8MB/s]
100% 778M/778M [00:38<00:00, 21.0MB/s]


In [None]:
! unzip covid19-radiography-database.zip

**3.A first look at the data: Class distribution, Visualizations**

In [None]:
# Total instances downloaded per class
classes = [ 'COVID', 'Lung_Opacity', 'Normal', 'Viral Pneumonia']
points = []
for clas in classes:
    class_path = './COVID-19_Radiography_Dataset/' + clas + '/images'
    class_images = os.listdir(class_path) #list
    points.append(len(class_images))
fig = go.Figure([go.Bar(x=classes, y=points, text=points)])
fig.update_layout(width=600, height=400, title='Covid-19 Radiography Dataset : Class distribution', title_x=0.5)
fig.show()

**4. Training, Validation and Test Datasets**

In [None]:
#shutil.rmtree('/content/COVID-19_Radiography_Dataset/training_dataset')
#shutil.rmtree('/content/COVID-19_Radiography_Dataset/validation_dataset')
#shutil.rmtree('/content/COVID-19_Radiography_Dataset/test_dataset')

In [None]:
# to be applied to training data
train_transforms = transforms.Compose([
    transforms.Resize((224, 224)),          #recommended size
    transforms.RandomHorizontalFlip(),      #data augmentation
    transforms.RandomVerticalFlip(),        #data augmentation
    transforms.RandomRotation(degrees=20),  #data augmentation
    transforms.ToTensor(),
    transforms.Normalize(torch.Tensor([0.5, 0.5, 0.5]), torch.Tensor([0.5, 0.5, 0.5]))
])
# to be applied to validation and test data
test_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(torch.Tensor([0.5, 0.5, 0.5]), torch.Tensor([0.5, 0.5, 0.5]))
])

In [None]:
train_dataset, validation_dataset, test_dataset = create_datasets(train_transforms, test_transforms)

In [None]:
labels = train_dataset.class_to_idx
summaries(labels, bar_plot=True)

**5. ResNet34**

In [None]:
weights = class_weights('./COVID-19_Radiography_Dataset/training_dataset', train_dataset)

COVID class - instances 2892 - assigned weight 0.00034578146611341634
Lung_Opacity class - instances 4809 - assigned weight 0.0002079434393844874
Normal class - instances 8153 - assigned weight 0.00012265423770391266
Viral Pneumonia class - instances 1076 - assigned weight 0.0009293680297397769


In [None]:
resnet34 = ResNet(type_ = '34', 
                  no_of_classes = 4, 
                  trainable_layers = [4,5,6,7])

# Note that the layer before the Linear layer there is a Flatten layer. I haven't changed the '(avgpool)' name yet though.
resnet34.model



ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [None]:
resnet34.trainable_params()

No. of trainable params 21374468


>**training**

In [None]:
loss_fct = torch.nn.CrossEntropyLoss(weights.cuda())
optimizer = optim.Adam(resnet34.model.parameters(), lr=0.0001)
#scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.9)

training_dict, validation_dict = fit(train_dataset=train_dataset,
                                     validation_dataset=validation_dataset,
                                     batch_size=64,
                                     model=resnet34,
                                     loss_fct=loss_fct,
                                     optimizer=optimizer,
                                     scheduler=None,
                                     epochs=100,
                                     patience=20,
                                     no_of_classes=4,
                                     labels_of_normal_classes=[2])

Device: cuda:0
Starting training..
-----------------------------------
Epoch 1/100
->Training phase
  Loss=0.3613
  Accuracy=0.84 - Recall per class=[0.86, 0.84, 0.83, 0.94]
->Validation phase
  Loss=0.1926
  Accuracy=0.93 - Recall per class=[0.93, 0.92, 0.92, 0.99]
->New model saved!
-----------------------------------
Epoch 2/100
->Training phase
  Loss=0.2208
  Accuracy=0.9 - Recall per class=[0.94, 0.88, 0.89, 0.97]
->Validation phase
  Loss=0.1766
  Accuracy=0.93 - Recall per class=[0.94, 0.91, 0.94, 1.0]
->New model saved!
-----------------------------------
Epoch 3/100
->Training phase
  Loss=0.1834
  Accuracy=0.92 - Recall per class=[0.96, 0.9, 0.91, 0.98]
->Validation phase
  Loss=0.2313
  Accuracy=0.89 - Recall per class=[0.91, 0.94, 0.84, 1.0]
-----------------------------------
Epoch 4/100
->Training phase
  Loss=0.1597
  Accuracy=0.93 - Recall per class=[0.97, 0.91, 0.92, 0.98]
->Validation phase
  Loss=0.3792
  Accuracy=0.89 - Recall per class=[0.72, 0.94, 0.9, 1.0]
-----

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Training complete !


In [None]:
drive_path = './drive/MyDrive/Colab_Notebooks/dataset_models/Covid-19_Radiography_Dataset/Models/'
with open(drive_path + 'xrays_resnet34_training_dict.pickle', 'wb') as f: 
  pickle.dump(training_dict, f)
with open(drive_path + 'xrays_resnet34_validation_dict.pickle', 'wb') as f: 
  pickle.dump(validation_dict, f)

>**plot**

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=list(range(1, len(training_dict['loss'])+1 )), y=training_dict['loss'], name='Training'))
fig.add_trace(go.Scatter(x=list(range(1, len(validation_dict['loss'])+1 )), y=validation_dict['loss'], name='Validation'))
fig.update_layout(title='ResNet34 Loss history (Optimal model at epoch 25)',
                  title_x=0.5,
                   xaxis_title='Epochs',
                   yaxis_title='Loss',
                  height=400, width=800,)
fig.show()

>**testing**

In [None]:
resnet34 = torch.load('xrays_resnet34.pt')
test_loader = DataLoader(dataset=test_dataset, batch_size=128, shuffle=False)
classes = [ 'COVID', 'Lung_Opacity', 'Normal', 'Viral Pneumonia']
test_report = Test_Report(test_loader, resnet34, classes)

Device: cuda:0


In [None]:
print('\033[1m' + 'Model: xrays_resnet34.pt' + '\033[0m \n')
test_report.classification_report()
test_report.balanced_accuracy()

[1mModel: xrays_resnet34.pt[0m 

                 precision    recall  f1-score   support

          COVID       0.99      0.99      0.99       362
   Lung_Opacity       0.91      0.95      0.93       602
         Normal       0.97      0.94      0.95      1020
Viral Pneumonia       0.96      0.99      0.97       135

       accuracy                           0.95      2119
      macro avg       0.96      0.97      0.96      2119
   weighted avg       0.95      0.95      0.95      2119

Balanced Accuracy Score: 0.97


In [None]:
print('\033[1m' + 'Model: xrays_resnet34.pt' + '\033[0m')
test_report.roc_curve()
test_report.pr_curve()

[1mModel: xrays_resnet34.pt[0m
