### Importing relevant libraries 

In [None]:
import torchxrayvision as xrv
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, StratifiedShuffleSplit

In [None]:
from torchvision import transforms
from torch.utils import data
import torch
import time, copy
import random

In [None]:
from torchvision.transforms.functional import normalize, resize, to_tensor, to_pil_image
from torchvision import models
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

### Custom Classes for preprocessing 

In [None]:
class TransformToPil:
    def __init__(self):
        pass
    
    def __call__(self, t):
        shp = t.shape
        #print(t.shape)
        t = t.reshape(shp[1], shp[2], 1)
        #print(t.shape)
        return np.tile(t, (1,1,3))
    
class Rescale:
    def __init__(self):
        pass
    
    def __call__(self, t):
        #print(t.shape)
        return t/255

class ToGray:
    def __init__(self):
        pass
    
    def __call__(self, t):
        #print(t)
        temp = t[1,:,:].reshape(1,t.shape[1],t.shape[2])
        #print("Hello")
        #print(temp.shape)
        return temp

In [None]:
image_transformer = transforms.Compose([
                    TransformToPil(),
                    transforms.ToTensor(),
                    transforms.ToPILImage(),
                    transforms.Resize((256,256)),
                    transforms.CenterCrop(224),
                    transforms.ToTensor(),
                    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
                    ToGray()
        ])

### Reading Datasets 

In [None]:
all_data = xrv.datasets.COVID19_Dataset(imgpath='images\\', transform=image_transformer, csvpath='metadata.csv')
mat = all_data.labels
covid_labels = mat[:,2]

all_data.labels = covid_labels
stlabels = all_data.labels

x_train, x_test, y_train, y_test = train_test_split(list(range(149)),stlabels, test_size=.2, stratify=stlabels)
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=.2, stratify=y_train)

In [None]:
sum(y_test)/len(y_test)

In [None]:
sum(y_train)/len(y_train)

In [None]:
sum(y_val/len(y_val))

## getting  statistics about our dataset

In [None]:
# class labels
all_data.pathologies

###  103 +ve, 46 -ve cases in dataset

In [None]:
# Splitting data for dataloaders in pytorch
st_train = data.SubsetRandomSampler(x_train)
params_train = {'batch_size': 16,
          'shuffle': False,
          'num_workers': 0,
           'sampler': st_train}
st_val = data.SubsetRandomSampler(x_val)
params_val = { 'shuffle': False,
          'num_workers': 0,
           'sampler': st_val}
max_epochs = 100


In [None]:
dataloaders = {}
dataloaders['train'] = data.DataLoader(all_data, **params_train)
dataloaders['val'] = data.DataLoader(all_data, **params_val)

In [None]:
dataset_sizes = {}
dataset_sizes['train'] = len(st_train)
dataset_sizes['val'] = len(st_val)

### Trying out various architectures for transfer learning 

In [None]:
#tlmod = torch.hub.load('pytorch/vision:v0.5.0', 'resnet18', pretrained=True)
tlmod =  xrv.models.DenseNet(weights='all')
for name, param in tlmod.named_parameters():
    if(name not in ['bn']):
        param.requires_grad = False

In [None]:
tlmod.classifier = nn.Sequential(nn.Linear(tlmod.classifier.in_features,200), nn.ReLU(), nn.Linear(200,1))

In [None]:
"""lrs= [
{ 'params': tlmod.avgpool.parameters(), 'lr': 1e-4},
{ 'params': tlmod.fc.parameters(), 'lr':  1e-4}
    ]"""

### Specifying criteria for NN 

In [None]:
#criterion = nn.BCELoss()
#criterion = nn.CrossEntropyLoss()
criterion = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([.46]))#weight=torch.tensor([65, 65/30]))

optimizer_conv = optim.Adam(tlmod.classifier.parameters(), lr=3e-4)



In [None]:
def label_mapper(labels):
    
    arr_label = np.array(labels)
    c2_indices = np.where(arr_label==0)
    c2_labels = np.zeros_like(arr_label)
    c2_labels[c2_indices] = 1
    return torch.tensor(np.vstack([arr_label, c2_labels]).T)

In [None]:
def train_model(model, criterion, optimizer, scheduler=None, num_epochs=25): 
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for value in dataloaders[phase]:
                inputs = value['PA']
                labels = value['lab']
                ids = value['idx']
                labels_ce = label_mapper(labels)
                #print(ids)
                #labels = labels.reshape((-1,1))
                #print(labels.reshape((-1,1)))
                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    #print(outputs)
                    #print(outputs.shape)
                    outputs = outputs.reshape((-1,1))
                    labels = labels.reshape((-1,1))
                    #print(labels)
                    #print(outputs)
                    _, preds = torch.max(outputs, 1)
                    #print(outputs.shape)
                    #print(labels_ce.shape)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        #print("labels", labels)
                        #print("Predictions", preds)

                        loss.backward()
                        optimizer.step()

                # statistics
                #print(labels.data)
                #print(preds)
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            if phase == 'train':
                #scheduler.step()
                pass    
            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()
    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model


In [None]:
mod = train_model(tlmod, criterion, optimizer_conv, num_epochs=40)