In [1]:
current_folder = ''
# User must define this

Mounted at /content/drive


In [3]:
import os
import json

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sn
import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

import torchvision
from torchvision import transforms as T
from torchvision.models import detection
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.datasets.utils import download_url
import torchvision.transforms as tt
from torchvision.datasets import ImageFolder
from torchvision.ops import box_convert
from scipy.stats import rayleigh
from scipy.ndimage import zoom
from torch.utils.data import random_split
from torchvision.utils import make_grid
from torchvision import transforms
from collections import Counter

import albumentations as A
from albumentations.pytorch import ToTensorV2

from copy import deepcopy

from PIL import Image
import cv2
from torch.cuda import is_available as check_cuda
from PIL.ImageOps import grayscale

import torchmetrics as tm

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(device)

cuda


In [4]:
def rayleigh_clahe(img, clip_limit=2.0, tile_grid_size=(8,8)):
    # Convert to float for processing
    img = img.astype(np.float64)

    # Normalize the image to range [0,1]
    img /= np.max(img)

    # Create a CLAHE object (Arguments are optional)
    clahe = cv2.createCLAHE(clipLimit=clip_limit, tileGridSize=tile_grid_size)
    img_clahe = clahe.apply((img*255).astype(np.uint8))

    # Calculate scale for Rayleigh distribution based on the image
    scale = np.std(img_clahe)

    # Flatten the image for processing
    img_flat = img_clahe.flatten()

    # Generate Rayleigh distributed histogram
    rayleigh_hist, bin_edges = np.histogram(rayleigh.rvs(scale=scale, size=len(img_flat)), bins=256)

    # Generate the histogram of the input image
    img_hist, _ = np.histogram(img_flat, bins=256)

    # Calculate the CDFs
    img_cdf = img_hist.cumsum() / img_hist.sum()
    rayleigh_cdf = rayleigh_hist.cumsum() / rayleigh_hist.sum()

    # Perform histogram matching
    img_matched = np.interp(img_flat, bin_edges[:-1], rayleigh_cdf)

    # Map the matched image to the CDF of the original image
    img_matched = np.interp(img_matched, img_cdf, bin_edges[:-1])

    # Reshape the image back to its original shape
    img_matched = img_matched.reshape(img.shape)

    return img_matched

# function #2 to perform best-practice enhancements
def lanczos_interpolation(img, zoom_factor):
    # Perform Lanczos-3 interpolation
    img_interpolated = zoom(img, zoom_factor, order=3)

    return img_interpolated

In [5]:
# Load all labels, boxes data
labels_dict = {}
to_df = []
folder_name = current_folder + "Data/jsons"
for path in os.listdir(folder_name):
    with open(os.path.join(folder_name,path)) as json_file:
        data = json.load(json_file)
    im = path.replace('.json','')
    labels = [data['shapes'][0]['label']]
    pts = data['shapes'][0]['points']
    pts_correct_format = [np.array([pts[0][0],pts[0][1],pts[1][0],pts[1][1]])]
    labels_dict[im] = {'label':labels,'boxes':pts_correct_format}
    to_df.append([im,int(pts[0][0]),int(pts[0][1]),int(pts[1][0]),int(pts[1][1]),labels[0]])

df = pd.DataFrame(to_df,columns=['image_id','x1','y1','x2','y2','label'])
df['label_bool'] = [0 if df.loc[i,'label'] == 'benign' else 1 for i in range(len(df))]
df = df.drop(['label'],axis=1)
unique_imgs = df['image_id'].unique()

In [6]:
root_path = current_folder + 'Data/'
train_imgs = os.listdir(root_path+'train/')
train_imgs = [i for i in train_imgs if i[:len(i)-4] in list(df['image_id']) ]
val_imgs = os.listdir(root_path+'val/')
val_imgs = [i  for i in val_imgs if i[:len(i)-4] in list(df['image_id'])]

In [8]:
# Loading External Test Set
benign_path = current_folder + 'Data/External Test Set/External_test_set-benign/'
malignant_path = current_folder + 'Data/External Test Set/External_test_set-malignant/'
root_paths = [benign_path,malignant_path]
test_to_df = []
for root_path in root_paths:
  for folder in os.listdir(root_path):
    img_name = folder

    temp_path = root_path + folder
    temp_files = os.listdir(temp_path)
    files_base = [i.replace('.jpg','') for i in temp_files if i[len(i)-4:] == '.jpg']
    for f in files_base:
      img_specificity = f
      with open(os.path.join(temp_path,f+'.json')) as json_file:
            data = json.load(json_file)
      shapes = data['shapes'][0]['points']
      x1 = shapes[0][0]
      x2 = shapes[1][0]
      y1 = shapes[0][1]
      y2 = shapes[1][1]
      if root_path == benign_path:
        l = 0
      elif root_path == malignant_path:
        l = 1
      test_to_df.append((img_name,img_specificity,x1,y1,x2,y2,l))
ext_test_df = pd.DataFrame(test_to_df,columns=['folder_id','image_id','x1','y1','x2','y2','label_bool'])

In [11]:
# Albumentation for data augmentation
# Installation: https://albumentations.ai/docs/getting_started/installation/
# Example: https://albumentations.ai/docs/examples/pytorch_classification/


train_transform = A.Compose(
    [
        A.SmallestMaxSize(max_size=160),
        # A.SmallestMaxSize(max_size=320),
        A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=15, p=0.5),
        A.RandomCrop(height=128, width=128),
        # A.RandomCrop(height=256, width=256),
        A.RGBShift(r_shift_limit=15, g_shift_limit=15, b_shift_limit=15, p=0.5),
        A.RandomBrightnessContrast(p=0.5),
        A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.5),
        A.SmallestMaxSize(max_size=128),
        ToTensorV2(),

    ]
)

val_transform = A.Compose(
    [
        A.SmallestMaxSize(max_size=160),
        # A.SmallestMaxSize(max_size=320),
        # A.CenterCrop(height=256, width=256),
        A.CenterCrop(height=128, width=128),
        A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
        # A.SmallestMaxSize(max_size=128),
        ToTensorV2(),
    ]
)

In [12]:
# This torch Dataset is used when images are conglomerated together into two folders: train/ and val/, within the data folder.
class CustData_trainval(torch.utils.data.Dataset):
    def __init__(self,df,image_names,train_bool, transform=None):
        self.df = df
        self.image_names = image_names
        self.train_bool = train_bool
    def __len__(self):
        return len(self.image_names)
    def __getitem__(self,idx):
        image_name = self.image_names[idx]
        image_name_base = image_name[:len(image_name)-4]
        row = self.df[self.df['image_id']==image_name_base]
        left = int(round(row['x1'],0))
        upper = int(round(row['y1'],0))
        right = int(round(row['x2'],0))
        lower = int(round(row['y2'],0))

        resize_factor = 160
        label = row['label_bool'].item()

        if self.train_bool:
          folder = 'train/'
          img = cv2.imread(current_folder+'Data/'+folder+image_name,cv2.IMREAD_GRAYSCALE)
          img_eq = rayleigh_clahe(img)
          img_int = lanczos_interpolation(img_eq, 1)
          img_cfr = img_int
          final_img = Image.fromarray(img_cfr).convert('RGB').crop((left,upper,right,lower)).resize((resize_factor,resize_factor))
          img_transformed = train_transform(image=np.array(final_img))['image']
        else:
          folder = 'val/'
          img = cv2.imread(current_folder+'Data/'+folder+image_name,cv2.IMREAD_GRAYSCALE)
          img_eq = rayleigh_clahe(img)
          img_int = lanczos_interpolation(img_eq, 1)
          img_cfr = img_int
          final_img = Image.fromarray(img_cfr).convert('RGB').crop((left,upper,right,lower)).resize((resize_factor,resize_factor))
          img_transformed = val_transform(image=np.array(final_img))['image']

        return img_transformed, label

# This torch dataset is used when test images are placed in various folders separating patients into multiple individual folders, with parent folders separating malignant and benign cases
class CustDataEXTERNALTEST(torch.utils.data.Dataset):
    def __init__(self,df,image_names,containing_folders, transform=None):
        self.df = df
        self.image_names = image_names
        self.containing_folders = containing_folders
    def __len__(self):
        return len(self.image_names)
    def __getitem__(self,idx):
        image_name = self.image_names[idx]
        image_name_base = image_name 
        containing_folder = self.containing_folders[idx]
        row = self.df[self.df['folder_image_path']==image_name_base]
        left = int(round(row['x1'],0))
        upper = int(round(row['y1'],0))
        right = int(round(row['x2'],0))
        lower = int(round(row['y2'],0))
        resize_factor = 160

        label = row['label_bool'].item()

        folder = current_folder + 'Data/Test Set/'
        if containing_folder in os.listdir(folder+'External_test_set-benign'):
          root = 'External_test_set-benign/'
        elif containing_folder in os.listdir(folder+'External_test_set-malignant'):
          root = 'External_test_set-malignant/'
        final_folder = folder + root + image_name_base + '.jpg'
        img = cv2.imread(final_folder,cv2.IMREAD_GRAYSCALE)
        img_eq = rayleigh_clahe(img)
        img_int = lanczos_interpolation(img_eq, 1)
        img_cfr = img_int
        final_img = Image.fromarray(img_cfr).convert('RGB').crop((left,upper,right,lower)).resize((resize_factor,resize_factor))
        img_transformed = val_transform(image=np.array(final_img))['image']

        return img_transformed, label

In [13]:
train_dl = torch.utils.data.DataLoader(CustData(df,train_imgs,True),
                                      batch_size = 32,
                                      shuffle = True,
                                      pin_memory = True if torch.cuda.is_available() else False)

val_dl = torch.utils.data.DataLoader(CustData(df,val_imgs,False),
                                    batch_size = 16,
                                    shuffle = False,
                                    pin_memory = True if torch.cuda.is_available() else False
                                    )


test_dl = torch.utils.data.DataLoader(CustDataEXTERNALTEST(ext_test_df,list(ext_test_df['folder_image_path']),list(ext_test_df['folder_id'])),
                                    batch_size = 16,
                                    shuffle = False,
                                    pin_memory = True if torch.cuda.is_available() else False
                                    )

In [22]:
def model_loader(model_input = "resnet", lr=0.0001, weight_decay=0.01,lr_decay_gamma=0.9,last_layer_dropout=.15,no_grad=False,opt="Adam", model_path = '',crit_type=nn.BCEWithLogitsLoss(),crit_pos_weight=1):

  if model_input == "resnet":
      model = torchvision.models.resnet50(pretrained=True).to(device)

      if no_grad:
          for param in model.parameters():
              param.requires_grad = False

      ''' (fc): Linear(in_features=2048, out_features=1000, bias=True) '''

      model.fc = nn.Sequential(nn.Linear(2048, 512),
                                      nn.ReLU(),
                                      nn.Dropout(last_layer_dropout),
                                      nn.Linear(512, 2),
                                      nn.LogSoftmax(dim=1))
      criterion = nn.CrossEntropyLoss()
      optimizer = optim.AdamW(model.fc.parameters(), lr=lr, weight_decay=weight_decay,amsgrad=False)
      scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma = lr_decay_gamma)
      model.to(device)

  ###################

  elif model_input == "mobilenet":
      model = torchvision.models.mobilenet_v2(pretrained=True)
      '''(classifier): Sequential(
          (0): Dropout(p=0.2, inplace=False)
          (1): Linear(in_features=1280, out_features=1000, bias=True)
        )'''

      for param in model.parameters():
          param.requires_grad = False

      model.classifier = nn.Sequential(nn.Linear(1280, 512),
                                      nn.ReLU(),
                                      nn.Dropout(last_layer_dropout),
                                      nn.Linear(512, 2),
                                      nn.LogSoftmax(dim=1))
      criterion = nn.CrossEntropyLoss()
      optimizer = optim.Adam(model.classifier.parameters(), lr=lr, weight_decay=weight_decay)
      scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma = lr_decay_gamma)
      model.to(device)

  ###########################


  elif model_input == "densenet":
      model = torchvision.models.densenet121(pretrained=True)
      '''(classifier): Linear(in_features=1024, out_features=1000, bias=True)'''

      if no_grad:
        for param in model.parameters():
            param.requires_grad = False

      model.classifier = nn.Sequential(nn.Linear(1024, 512),
                                      nn.ReLU(),
                                      nn.Dropout(last_layer_dropout),
                                      nn.Linear(512, 2),
                                      nn.LogSoftmax(dim=1))
      criterion = nn.CrossEntropyLoss()
      if opt == "Adam":
        optimizer = optim.Adam(model.classifier.parameters(), lr=lr, weight_decay=weight_decay,amsgrad=False)
      elif opt == "SGD":
        optimizer = optim.SGD(model.classifier.parameters(), lr=lr, weight_decay=weight_decay,amsgrad=False)
      elif opt == 'AdamW':
        optimizer = optim.AdamW(model.classifier.parameters(), lr=lr, weight_decay=weight_decay,amsgrad=False)
      scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma = lr_decay_gamma)
      model.to(device)


  ###########################


  elif model_input == "inception":
      model = torchvision.models.inception_v3(pretrained=True)
      '''(fc): Linear(in_features=2048, out_features=1000, bias=True)'''

      if no_grad:
        for param in model.parameters():
            param.requires_grad = False

      model.fc = nn.Sequential(nn.Linear(2048, 2,bias=True),
                               nn.LogSoftmax(dim=1))
      criterion = nn.CrossEntropyLoss()
      optimizer = optim.Adam(model.fc.parameters(), lr=lr, weight_decay=weight_decay,amsgrad=False)
      scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma = lr_decay_gamma)
      model.to(device)

  ###########################


  elif model_input == "resnext":
      model = torchvision.models.resnext50_32x4d(pretrained=True)
      '''(fc): Linear(in_features=2048, out_features=1000, bias=True)'''

      if no_grad:
        for param in model.parameters():
            param.requires_grad = False

      model.fc = nn.Sequential(nn.Linear(2048, 2,bias=True),
                               nn.LogSoftmax(dim=1))
      criterion = nn.CrossEntropyLoss()
      optimizer = optim.Adam(model.fc.parameters(), lr=lr, weight_decay=weight_decay,amsgrad=False)
      scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma = lr_decay_gamma)
      model.to(device)

#################################################

  elif model_input == "pre_trained":
      model = torch.load(model_path)

      if no_grad:
        for ind,param in enumerate(model.parameters()):
            if ind < len(model.parameters()) -1:
              param.requires_grad = False

      criterion = nn.CrossEntropyLoss()
      if opt == "Adam":
        optimizer = optim.Adam(model.classifier.parameters(), lr=lr, weight_decay=weight_decay,amsgrad=False)
      elif opt == "SGD":
        optimizer = optim.SGD(model.classifier.parameters(), lr=lr, weight_decay=weight_decay,amsgrad=False)
      elif opt == 'AdamW':
        optimizer = optim.AdamW(model.classifier.parameters(), lr=lr, weight_decay=weight_decay,amsgrad=False)
      scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma = lr_decay_gamma)
      model.to(device)


  ###########################

  return model,criterion, optimizer, scheduler

In [15]:
def model_train(model, criterion, optimizer, scheduler, epochs=60,model_number='1',save_model_path = current_folder+'saved_trained_models/'):
  steps = 0
  running_loss = 0
  train_losses, test_losses = [], []

  best_accuracy = 0
  best_epoch = 0

  for epoch in range(epochs):
      runs = 0
      for inputs, labels in train_dl:
          runs += 1
          steps += 1
          inputs, labels = inputs.to(device), labels.to(device)
          inputs.requires_grad=True
          optimizer.zero_grad()
          logps = model.forward(inputs)
          loss = criterion(logps, labels)
          loss.backward()
          optimizer.step()
          running_loss += loss.item()

      test_loss = 0
      accuracy = 0
      train_accuracy = 0
      model.eval()
      with torch.no_grad():
          for inputs, labels in val_dl:
              inputs, labels = inputs.to(device),labels.to(device)
              logps = model.forward(inputs)
              batch_loss = criterion(logps, labels)
              test_loss += batch_loss.item()

              ps = torch.exp(logps)
              top_p, top_class = ps.topk(1, dim=1)
              equals = top_class == labels.view(*top_class.shape)
              accuracy += torch.mean(equals.type(torch.FloatTensor)).item()
          for inputs, labels in train_dl:
              inputs, labels = inputs.to(device),labels.to(device)
              logps = model.forward(inputs)
              ps = torch.exp(logps)
              top_p, top_class = ps.topk(1, dim=1)
              equals = top_class == labels.view(*top_class.shape)
              train_accuracy += torch.mean(equals.type(torch.FloatTensor)).item()
      train_losses.append(running_loss/len(train_dl))
      test_losses.append(test_loss/len(val_dl))

      print(f"Epoch {epoch+1}/{epochs}.. "
            f"Train loss: {running_loss/runs:.3f}.. "
            f"Test loss: {test_loss/len(val_dl):.3f}.. "
            f"Train accuracy: {train_accuracy/len(train_dl):.3f}.. "
            f"Test accuracy: {accuracy/len(val_dl):.3f}")
      running_loss = 0
      model.train()
      scheduler.step()


  # Save last model
  #save_model_path = f'/content/drive/MyDrive/Practicum Model Saves/{model_input}_nograd{str(no_grad)}_adam_lr{str(lr)}_wd{str(weight_decay)}_lrdecg{str(lr_decay_gamma)}_dropout{last_layer_dropout}.pth'
  save_model_p = save_model_path+model_number+'.pth'
  torch.save(model,save_model_p )
  print("saved!")

  # Save best model
  if accuracy/len(val_dl) > best_accuracy:
      best_epoch = epoch
      best_accuracy = accuracy/len(val_dl)
      best_model = deepcopy(model)
      save_model_p = save_model_path + model_number +'_BEST.pth'
      torch.save(model,save_model_p)
      print("saved best model!")



  # Output results for best model
  best_model.eval()
  preds = []
  true_y = []
  for inputs, labels in val_dl:
              with torch.no_grad():
                  true_y += labels.tolist()
                  inputs, labels = inputs.to(device),labels.to(device)
                  logps = best_model.forward(inputs).detach()
                  top_p, top_class = logps.topk(1, dim=1)
                  preds += top_class.flatten().tolist()

  preds = torch.Tensor(preds)
  true_y = torch.Tensor(true_y)


  print("Best Epoch: "+ str(best_epoch))
  print("Accuracy: "+ str(tm.functional.accuracy(preds, true_y, task="binary").item()))
  print("Precision: "+ str(tm.functional.precision(preds,true_y,task="binary").item()))
  print("Recall: "+ str(tm.functional.recall(preds,true_y,task="binary").item()))
  print("F1-Score: "+ str(tm.functional.f1_score(preds,true_y,task="binary").item()))


In [95]:
# Trainer
m_1 = {'model':'resnet','lr':.0001,'weight_decay':0.01,'lr_decay_gamma':0.9,'last_layer_dropout':.15,'no_grad':False}
m_2 = {'model':'densenet','lr':.0005,'weight_decay':0.01,'lr_decay_gamma':0.80,'last_layer_dropout':.15,'no_grad':False}
m_3 = {'model':'resnext','lr':.0001,'weight_decay':0.01,'lr_decay_gamma':0.80,'last_layer_dropout':.15,'no_grad':False}

m_4 = {'model':'densenet','lr':.0005,'weight_decay':0.01,'lr_decay_gamma':0.80,'last_layer_dropout':.15,'no_grad':False,'opt':'SGD'}
m_5 = {'model':'densenet','lr':.0005,'weight_decay':0.01,'lr_decay_gamma':0.80,'last_layer_dropout':.15,'no_grad':False,'opt':'AdamW'}
m_6 = {'model':'densenet','lr':.0005,'weight_decay':0.01,'lr_decay_gamma':0.70,'last_layer_dropout':.15,'no_grad':False}
m_7 = {'model':'densenet','lr':.0005,'weight_decay':0.00,'lr_decay_gamma':0.80,'last_layer_dropout':.05,'no_grad':False}

params = [m_1,m_2,m_3,m_4,m_5,m_6,m_7]
for i in range(len(params)):
  m = params[i]
  model,criterion, optimizer, scheduler = model_loader(model_input = m['model'], lr=m['lr'], weight_decay=m['weight_decay'],lr_decay_gamma=m['lr_decay_gamma'],last_layer_dropout=m['last_layer_dropout'],no_grad=m['no_grad'])
  model_train(model, criterion, optimizer, scheduler, epochs=60,model_number=str(i+8))

Epoch 1/100.. Train loss: 5.071.. Test loss: 1.780.. Train accuracy: 0.438.. Test accuracy: 0.406
Epoch 2/100.. Train loss: 1.292.. Test loss: 1.201.. Train accuracy: 0.549.. Test accuracy: 0.542
Epoch 3/100.. Train loss: 1.117.. Test loss: 0.993.. Train accuracy: 0.564.. Test accuracy: 0.557
Epoch 4/100.. Train loss: 1.006.. Test loss: 0.898.. Train accuracy: 0.605.. Test accuracy: 0.615
Epoch 5/100.. Train loss: 0.945.. Test loss: 0.832.. Train accuracy: 0.632.. Test accuracy: 0.641
Epoch 6/100.. Train loss: 0.849.. Test loss: 0.767.. Train accuracy: 0.657.. Test accuracy: 0.672
Epoch 7/100.. Train loss: 0.817.. Test loss: 0.706.. Train accuracy: 0.669.. Test accuracy: 0.672
Epoch 8/100.. Train loss: 0.765.. Test loss: 0.645.. Train accuracy: 0.702.. Test accuracy: 0.703
Epoch 9/100.. Train loss: 0.799.. Test loss: 0.661.. Train accuracy: 0.672.. Test accuracy: 0.703
Epoch 10/100.. Train loss: 0.732.. Test loss: 0.669.. Train accuracy: 0.699.. Test accuracy: 0.688
Epoch 11/100.. Trai

In [None]:
#Ensemble Evaluator

torch.cuda.empty_cache()
preds_master = []

for m_num in range(7):

  model = torch.load(f"/content/drive/MyDrive/Practicum Model Saves/ensemble/{m_num}_BEST.pth")
  model.eval()
  preds = []
  true_y = []
  for inputs, labels in val_dl:
              with torch.no_grad():
                  true_y += labels.tolist()
                  inputs, labels = inputs.to(device),labels.to(device)
                  logps = model.forward(inputs).detach()
                  top_p, top_class = logps.topk(1, dim=1)
                  preds += top_class.flatten().tolist()

  preds = torch.Tensor(preds)
  true_y = torch.Tensor(true_y)
  preds_master.append(preds)
  print("Accuracy: "+ str(tm.functional.accuracy(preds, true_y, task="binary").item()))
  print("Precision: "+ str(tm.functional.precision(preds,true_y,task="binary").item()))
  print("Recall: "+ str(tm.functional.recall(preds,true_y,task="binary").item()))
  print("F1-Score: "+ str(tm.functional.f1_score(preds,true_y,task="binary").item()))
  print('======================================')

Accuracy: 0.8770949840545654
Precision: 0.8441558480262756
Recall: 0.8666666746139526
F1-Score: 0.8552631735801697
Accuracy: 0.9273743033409119
Precision: 0.887499988079071
Recall: 0.9466666579246521
F1-Score: 0.9161290526390076
Accuracy: 0.7932960987091064
Precision: 0.8275862336158752
Recall: 0.6399999856948853
F1-Score: 0.7218044996261597
Accuracy: 0.8882681727409363
Precision: 0.8571428656578064
Recall: 0.8799999952316284
F1-Score: 0.8684210777282715
Accuracy: 0.8994413614273071
Precision: 0.8518518805503845
Recall: 0.9200000166893005
F1-Score: 0.8846153616905212
Accuracy: 0.8826815485954285
Precision: 0.8461538553237915
Recall: 0.8799999952316284
F1-Score: 0.8627451062202454
Accuracy: 0.8938547372817993
Precision: 0.8783783912658691
Recall: 0.8666666746139526
F1-Score: 0.8724831938743591


In [None]:
raw_vote = torch.mean(torch.stack(preds_master),dim=0)
final_vote = torch.where(raw_vote>=0.5,1,0)
print("Accuracy: "+ str(tm.functional.accuracy(final_vote, true_y, task="binary").item()))
print("Precision: "+ str(tm.functional.precision(final_vote,true_y,task="binary").item()))
print("Recall: "+ str(tm.functional.recall(final_vote,true_y,task="binary").item()))
print("F1-Score: "+ str(tm.functional.f1_score(final_vote,true_y,task="binary").item()))
print('======================================')

Accuracy: 0.9050279259681702
Precision: 0.8717948794364929
Recall: 0.9066666960716248
F1-Score: 0.8888888955116272


# Testing against external test set

In [25]:
best_model_path = ''
# Above is defined by user, for their fine-tuned saved model .pth file.
model,criterion, optimizer, scheduler = model_loader(model_input = "pre_trained", lr=.00001, weight_decay=0,lr_decay_gamma=1,last_layer_dropout=0,no_grad=False,model_path=best_model_path,crit_pos_weight=torch.Tensor([0.3,0.7]))
model.eval()
preds = []
true_y = []
for inputs, labels in test_dl:
            with torch.no_grad():
                true_y += labels.tolist()
                inputs, labels = inputs.to(device),labels.to(device)
                logps = model.forward(inputs).detach()
                top_p, top_class = logps.topk(1, dim=1)
                preds += top_class.flatten().tolist()

preds = torch.Tensor(preds)
true_y = torch.Tensor(true_y)

In [26]:
print("Accuracy: "+ str(tm.functional.accuracy(preds, true_y, task="binary").item()))
print("Precision: "+ str(tm.functional.precision(preds,true_y,task="binary").item()))
print("Recall: "+ str(tm.functional.recall(preds,true_y,task="binary").item()))
print("F1-Score: "+ str(tm.functional.f1_score(preds,true_y,task="binary").item()))
print('======================================')
confmat = tm.ConfusionMatrix(task="binary", num_classes=2)
confmat(preds, true_y)

Accuracy: 0.8007968068122864
Precision: 0.8037382960319519
Recall: 0.747826099395752
F1-Score: 0.7747747898101807


tensor([[115,  21],
        [ 29,  86]])

#Loading One Model, and fine-tuning it for higher precision

In [21]:
# First loading best model, using it to predict, and then fine-tuning misclassified samples with higher attention to precision
model,criterion, optimizer, scheduler = model_loader(model_input = "pre_trained", lr=.00001, weight_decay=0,lr_decay_gamma=1,last_layer_dropout=0,no_grad=False,model_path=best_model_path,crit_pos_weight=torch.Tensor([0.3,0.7]))
model_train(model, criterion, optimizer, scheduler, epochs=60,model_number=str(9))

Epoch 1/60.. Train loss: 0.298.. Test loss: 0.241.. Train accuracy: 0.876.. Test accuracy: 0.906
Epoch 2/60.. Train loss: 0.295.. Test loss: 0.255.. Train accuracy: 0.885.. Test accuracy: 0.901
Epoch 3/60.. Train loss: 0.299.. Test loss: 0.246.. Train accuracy: 0.888.. Test accuracy: 0.906
Epoch 4/60.. Train loss: 0.298.. Test loss: 0.242.. Train accuracy: 0.893.. Test accuracy: 0.896
Epoch 5/60.. Train loss: 0.326.. Test loss: 0.243.. Train accuracy: 0.883.. Test accuracy: 0.906
Epoch 6/60.. Train loss: 0.292.. Test loss: 0.245.. Train accuracy: 0.896.. Test accuracy: 0.896
Epoch 7/60.. Train loss: 0.283.. Test loss: 0.237.. Train accuracy: 0.894.. Test accuracy: 0.906
Epoch 8/60.. Train loss: 0.319.. Test loss: 0.241.. Train accuracy: 0.897.. Test accuracy: 0.917
Epoch 9/60.. Train loss: 0.299.. Test loss: 0.241.. Train accuracy: 0.875.. Test accuracy: 0.901
Epoch 10/60.. Train loss: 0.287.. Test loss: 0.252.. Train accuracy: 0.880.. Test accuracy: 0.901
Epoch 11/60.. Train loss: 0.2

# Using ensemble for images with low confidence

In [None]:
model,criterion, optimizer, scheduler = model_loader(model_input = "pre_trained", lr=.00001, weight_decay=0,lr_decay_gamma=1,last_layer_dropout=0,no_grad=False,model_path=best_model_path,crit_pos_weight=torch.Tensor([0.3,0.7]))

model.eval()
preds_master = []
confidence = []
true_y = []
for inputs, labels in val_dl:
            with torch.no_grad():
                true_y += labels.tolist()
                inputs, labels = inputs.to(device),labels.to(device)
                logps = model.forward(inputs).detach()
                probs = torch.exp(logps)
                to_conf = torch.max(probs,axis=1)[0]
                confidence += to_conf.flatten().tolist()
                top_p, top_class = probs.topk(1, dim=1)
                preds_master += top_class.flatten().tolist()

preds_master = torch.Tensor(preds_master)
true_y = torch.Tensor(true_y)
confidence = torch.Tensor(confidence)

print("Accuracy: "+ str(tm.functional.accuracy(preds_master, true_y, task="binary").item()))
print("Precision: "+ str(tm.functional.precision(preds_master,true_y,task="binary").item()))
print("Recall: "+ str(tm.functional.recall(preds_master,true_y,task="binary").item()))
print("F1-Score: "+ str(tm.functional.f1_score(preds_master,true_y,task="binary").item()))
print('======================================')

Accuracy: 0.8994413614273071
Precision: 0.8607594966888428
Recall: 0.9066666960716248
F1-Score: 0.8831169009208679


In [None]:
incorrect_inds = torch.argwhere(preds_master-true_y!=0).flatten()
incorrect_inds

tensor([  0,   3,  20,  26,  30,  38,  39,  40,  41,  50,  79,  92,  97, 105,
        109, 112, 121, 131, 136, 156])

In [None]:
confidence[incorrect_inds]

tensor([0.9282, 0.5528, 0.5174, 0.5671, 0.6766, 0.5200, 0.5306, 0.8518, 0.6422,
        0.6003, 0.6627, 0.5362, 0.5545, 0.5219, 0.9196, 0.8078, 0.5726, 0.7115,
        0.8084, 0.6537])

In [None]:
preds_master[incorrect_inds]

tensor([0., 0., 0., 0., 1., 0., 0., 1., 0., 0., 1., 1., 1., 1., 0., 1., 1., 1.,
        1., 1.])

In [None]:
next_best_model_path = ''
# User must define this
model,criterion, optimizer, scheduler = model_loader(model_input = "pre_trained", lr=.00001, weight_decay=0,lr_decay_gamma=1,last_layer_dropout=0,no_grad=False,model_path=next_best_model_path,crit_pos_weight=torch.Tensor([0.3,0.7]))

model.eval()
preds_master = []
confidence = []
true_y = []
for inputs, labels in val_dl:
            with torch.no_grad():
                true_y += labels.tolist()
                inputs, labels = inputs.to(device),labels.to(device)
                logps = model.forward(inputs).detach()
                probs = torch.exp(logps)
                to_conf = torch.max(probs,axis=1)[0]
                confidence += to_conf.flatten().tolist()
                top_p, top_class = probs.topk(1, dim=1)
                preds_master += top_class.flatten().tolist()

preds_master = torch.Tensor(preds_master)
true_y = torch.Tensor(true_y)
confidence = torch.Tensor(confidence)

print("Accuracy: "+ str(tm.functional.accuracy(preds_master, true_y, task="binary").item()))
print("Precision: "+ str(tm.functional.precision(preds_master,true_y,task="binary").item()))
print("Recall: "+ str(tm.functional.recall(preds_master,true_y,task="binary").item()))
print("F1-Score: "+ str(tm.functional.f1_score(preds_master,true_y,task="binary").item()))
print('======================================')

Accuracy: 0.8882681727409363
Precision: 0.8571428656578064
Recall: 0.8799999952316284
F1-Score: 0.8684210777282715
