<a href="https://colab.research.google.com/github/yaara-dev/brain-tumor/blob/main/vgg16.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import time
import platform
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torchvision
from torchvision import transforms, utils, models
from torch.utils.data import Dataset, DataLoader ,Subset
from torchvision.datasets import ImageFolder
from torchvision.transforms import Compose, ToTensor, Resize

import warnings
warnings.filterwarnings("ignore", module="matplotlib")


In [None]:
from google.colab import drive

drive.mount('/content/drive', force_remount=True)
FOLDERNAME = 'project_test'
ASSIGNMENTNAME = 'vgg16.ipynb'

%cd drive/My\ Drive
%cp -r $FOLDERNAME/$ASSIGNMENTNAME ../../
%cd ../../
#use gpu if possible
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [None]:
# To test your code change the following paths so they point to samples on your drives.
# Once you finish testing you code, revert the changes and write back these paths.
PATH_TUMOR_Folder = 'drive/MyDrive/project_test/TUMOR_TEST' 
PATH_NOTUMOR_Folder ='drive/MyDrive/project_test/NOTUMOR_TEST'

#Path of the training data with one directory
PATH_Folder_Train = 'drive/MyDrive/project_test/ALLDATA_TRAIN'

# Resize the images to the size expected by your model.
dataset_yes_test = ImageFolder(PATH_TUMOR_Folder, transform=Compose([Resize((224, 224)),ToTensor()]))
dataset_no_test = ImageFolder(PATH_NOTUMOR_Folder, transform=Compose([Resize((224, 224)),ToTensor()]))

In [None]:
##########
#this function gets data batch of images and labels and plots 30 images with the label above
def PlotImages(images, labels):
  with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    #plt.figure.tight_layout()
    figure = plt.figure(figsize=(18, 6))
    cols, rows = 3, 10
    for i in range(1,len(images)):

      figure.add_subplot(cols,rows, i)
    
      plt.title(labels[i].numpy())
      plt.axis("off")
      
      plt.imshow(np.transpose(images[i].detach().numpy(), (1, 2, 0)),cmap=plt.get_cmap('gist_gray') ,)
      
    plt.show()

In [None]:
# training data, just for plotting:
train_path=PATH_Folder_Train
validation_path='drive/MyDrive/project_test/ALLDATA_VALDATION/'
nepochs =60
batch_size =30
num_workers = 2
learningRate =0.0001
image_size = (224,224)
#generate dataset with compose image in tensor format:
dataset=torchvision.datasets.ImageFolder(
    root=train_path,
    transform=transforms.Compose([
    transforms.Resize(image_size),    
    transforms.ToTensor()
]))
#load data
dataset_loader=torch.utils.data.DataLoader(
    dataset,
    batch_size=batch_size,
    num_workers=num_workers
)
next(iter(dataset_loader))
data =next(iter(dataset_loader))
images, labels = data
PlotImages(images, labels)

In [None]:
#normalization of images to match VGG image format
data_transforms=transforms.Compose([
    transforms.Resize(image_size)
    ,transforms.ToTensor()
    ,transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])

In [None]:
#generate two datasets (one for training and one for validation)
train_data=torchvision.datasets.ImageFolder(
    root=train_path,
    transform=data_transforms
)
validation_data=torchvision.datasets.ImageFolder(
    root=validation_path,
    transform=data_transforms
)

In [None]:
#load data to data loader, training and validation
train_loader=torch.utils.data.DataLoader(
    train_data,
    batch_size=batch_size,
    shuffle=True,
    num_workers=num_workers,
    pin_memory=True
)
validation_loader=torch.utils.data.DataLoader(
    validation_data,
    batch_size=batch_size,
    shuffle=False,
    num_workers=num_workers,
    pin_memory=True

)

In [None]:
print("Classes : ", train_data.class_to_idx)
print("Number of Training Samples : ", len(train_data))
print("Number of Validation Samples : ", len(validation_data))

In [None]:
#plot training batch
batch = next(iter(train_loader))
images, labels = batch

PlotImages(images, labels)


In [None]:
# load trained model vgg16
model = models.vgg16(pretrained=True)


In [None]:
#print current model parameters
total_params = sum(p.numel() for p in model.parameters())
total_trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
non_trainable_params = total_params - total_trainable_params
print('Total Parameters: ',total_params)
print('Total Trainable Parameters: ',total_trainable_params)
print('Total Non Trainable Parameters: ',non_trainable_params)

In [None]:
for param in model.features.parameters():
    param.requires_grad=False
#define layers relation
gap = nn.AdaptiveAvgPool2d((1, 1))
first_fc_layer=nn.Linear(512, 1024)
second_fc_layer=nn.Linear(1024, 1024)
last_fc_layer=nn.Linear(1024, 2)
do1=torch.nn.Dropout(p=0, inplace=False)
do2=torch.nn.Dropout(p=0, inplace=False)
#assign to existing model layers
model.avgpool = gap
model.classifier[0]=first_fc_layer
model.classifier[3]=second_fc_layer
model.classifier[6]=last_fc_layer
model.classifier[2]=do1
model.classifier[5]=do2

#use gpu if possible
model.to(device)


In [None]:
#print updated model parameters 
total_params = sum(p.numel() for p in model.parameters())
total_trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
non_trainable_params = total_params - total_trainable_params
print('Total Parameters: ',total_params)
print('Total Trainable Parameters: ',total_trainable_params)
print('Total Non Trainable Parameters: ',non_trainable_params)

In [None]:
#define criterion to avoid overfitting
criterion=nn.CrossEntropyLoss()
#optimizes model learning taking into consideration learning rate and model parameters
optimizer=torch.optim.Adam(model.parameters(),lr=learningRate)

In [None]:
##################
#this function gets loss history, validation loss_history ,corrects history ,validation corrects history
#and plots loss/accuracy
def PlotLossAccuracy( loss_history  ,val_loss_history ,corrects_history ,val_corrects_history):
  plt.style.use('seaborn-darkgrid')
  plt.figure()
  plt.plot(loss_history, color = 'red')
  plt.plot(val_loss_history, color = 'darkgreen')
  plt.plot(corrects_history, color = 'magenta')
  plt.plot(val_corrects_history, color = 'blue')
  plt.title('Model Loss/Accuracy')
  plt.ylabel('Loss/Accuracy')
  plt.xlabel('Epoch #')
  plt.legend(['Training loss', 'Validation loss', 'Training accuracy', 'Validation accuracy'], loc='center right')
  plt.grid(axis = 'y', c = 'black', alpha = 0.2)
  plt.grid(axis = 'x', c = 'black', alpha = 0.2)

In [None]:
########################
#this function gets corrects history, validation corrects history and plots the modes accuracy
def PlotAccuracy( corrects_history, val_corrects_history):
  plt.style.use('seaborn-darkgrid')
  plt.figure()
  plt.plot(corrects_history, color = '#1ddb1d')
  plt.plot(val_corrects_history, color = '#fcebfa')
  plt.title('Model Accuracy')
  plt.ylabel('Accuracy')
  plt.xlabel('Epoch #')
  plt.legend(['Training accuracy', 'Validation accuracy'], loc='lower right')
  # plt.grid(axis = 'y', c = 'black', alpha = 0.2)
  plt.grid(axis = 'x', c = 'black', alpha = 0.2)
  plt.savefig(r'graphs\vgg16_acc.png', dpi = 100)

In [None]:
##############################
#this function gets loss history and validation loss history and plots the models loss
def PlotLoss(loss_history, val_loss_history):
  plt.style.use('seaborn-darkgrid')
  plt.figure()
  plt.plot(loss_history, color = 'magenta')
  plt.plot(val_loss_history, color = '#606060')
  plt.title('Model Loss')
  plt.ylabel('Loss')
  plt.xlabel('Epoch #')
  plt.legend(['Training loss', 'Validation loss'], loc='upper right')
  plt.grid(axis = 'y', c = 'black', alpha = 0.2)
  plt.grid(axis = 'x', c = 'black', alpha = 0.2)

In [None]:
##############
#this function rounds down the result
def round_down(value, decimals):
    factor = 1 / (10 ** decimals)
    return (value // factor) * factor

In [None]:
#############
#this function trains the model with the provided data, to predict if there is a tumor in the sample
def training(
  loss_history=[],
  corrects_history=[],
  val_loss_history=[],
  val_corrects_history=[]):


  val_loss_min = np.inf
  for epoch in range(nepochs ): 
      
      t0 = time.time()
      running_loss=0.0
      running_corrects=0.0
      val_running_loss=0.0
      val_running_corrects=0.0
      
      model.train()
      for images,labels in train_loader:
          #use gpu if possible
          images=images.to(device)
          labels=labels.to(device)

          #update the output 
          outputs=model(images)
          loss=criterion(outputs, labels)
          
          #step of training
          optimizer.zero_grad()
          loss.backward()
          optimizer.step()
          
          #calculation of results
          running_loss+=loss.item()
          _, preds=torch.max(outputs, 1)
          running_corrects+=torch.sum(preds==labels.data)
      
      else:
          #evaluation        
          model.eval()
          
          with torch.no_grad():
              #check with validation data
              for val_images,val_labels, in validation_loader:
                  #use gpu if possible
                  val_images=val_images.to(device)
                  val_labels=val_labels.to(device)

                  #update the output
                  val_outputs=model(val_images)
                  val_loss=criterion(val_outputs, val_labels)
                  
                  #calculation of results without updating the model
                  val_running_loss+=val_loss.item()
                  _, val_preds=torch.max(val_outputs, 1)
                  val_running_corrects+=torch.sum(val_preds==val_labels.data)
      #insert training information for plotting
      epoch_loss=running_loss/len(train_loader.dataset)
      epoch_acc=running_corrects.float()/len(train_loader.dataset) 
      loss_history.append(epoch_loss)
      corrects_history.append(epoch_acc)

      #insert validation information for plotting
      val_epoch_loss=val_running_loss/len(validation_loader.dataset)
      val_epoch_acc=val_running_corrects.float()/len(validation_loader.dataset) 
      val_loss_history.append(val_epoch_loss)
      val_corrects_history.append(val_epoch_acc)
      
      #setting checkpoint
      checkpoint = {
              'epoch': epoch + 1
              ,'state_dict': model.state_dict()
              ,'optimizer' : optimizer.state_dict()
              ,'val_loss_min' : val_epoch_loss
          }
      
      

      print('epoch: ', (epoch + 1))

      print('training loss: {:.4f}, acc: {:.4f}'.format(epoch_loss ,round_down(epoch_acc.item(),4)))
      print('validation loss: {:.4f}, acc: {:.4f}'.format(val_epoch_loss, round_down(val_epoch_acc.item(),4)))
      print('epoch time: {:.4f} seconds'.format(time.time() - t0))
      #save checkpoint if validation loss is smaller than the minimal loss
      if val_epoch_loss <= val_loss_min:
          print('validation loss decreased from {:.4f} to {:.4f}, saving model...'.format(val_loss_min,        val_epoch_loss))
          torch.save(checkpoint, r'checkpoint\vgg16_lowest_val_loss_epoch_{}.pth'.format(epoch + 1))
          val_loss_min = val_epoch_loss
  return loss_history  ,val_loss_history ,corrects_history ,val_corrects_history;

TEST provided by Zach

In [None]:
idx = np.arange(0, 30)

test_yes = {}
test_no = {}

test_yes['test'] = Subset(dataset_yes_test, idx)
test_set_yes = {x:DataLoader(test_yes[x],30, shuffle=True, num_workers=2) for x in ['test']}

test_no['test'] = Subset(dataset_no_test, idx)
test_set_no = {x:DataLoader(test_no[x],30, shuffle=True, num_workers=2) for x in ['test']}




Set the values that your model uses for the case where a tumor exists or does not exist

In [None]:
predict_tumor = 1
predict_no_tumor = 0

In [None]:
def test(model):
    model.eval()
    corrects = 0

    for inputs_yes, labels_yes in test_set_yes['test']:
        #labels_yes += predict_tumor  #(didnt use because this line created labels of 2 and 0 and we used 1 and 0)
        inputs_yes = inputs_yes.to(device)
        labels_yes = labels_yes.to(device)

    for inputs_no, labels_no in test_set_no['test']:
        #labels_no += predict_no_tumor 
        inputs_no = inputs_no.to(device)
        labels_no = labels_no.to(device)

    inputs = torch.cat((inputs_yes, inputs_no), 0)
    labels = torch.cat((labels_yes, labels_no), 0)
    outputs = model(inputs)
    _, preds = torch.max(outputs, 1)
    corrects += torch.sum(preds == labels.data)
    acc = corrects.double() / 60
    print('Test accuracy = ', acc)
   

In [None]:
if __name__ == '__main__':
  # run training model
  loss_history  ,val_loss_history ,corrects_history ,val_corrects_history = training()

  #plot graphs
  PlotLossAccuracy( loss_history  ,val_loss_history ,corrects_history ,val_corrects_history)
  PlotLoss(loss_history, val_loss_history)
  PlotAccuracy( corrects_history, val_corrects_history)
  
  #test the model
  test(model)
