### 1. Download the Data from Kaggle and Unzip


In [1]:
!pip install -q kaggle

In [None]:
from google.colab import files
files.upload()

In [3]:
!mkdir ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json
!kaggle datasets download -d 'maedemaftouni/large-covid19-ct-slice-dataset'

Downloading large-covid19-ct-slice-dataset.zip to /content
100% 2.06G/2.06G [01:01<00:00, 37.8MB/s]
100% 2.06G/2.06G [01:01<00:00, 35.7MB/s]


In [4]:
import shutil
from google.colab import drive

drive.mount("/content/gdrive") # Connect to google drive as we will save model weights here
shutil.unpack_archive("/content/large-covid19-ct-slice-dataset.zip", "/tmp/")

Mounted at /content/gdrive


### 1. OR Unzip the data if you have uploaded it to drive

In [None]:
import shutil
from google.colab import drive

drive.mount("/content/gdrive")
# Change the code below if the path to the dataset is different for you.
shutil.unpack_archive("/content/gdrive/MyDrive/archive.zip", "/tmp/")

Mounted at /content/gdrive


### 2. Splitting

In [5]:
import pandas as pd

meta_normal = pd.read_csv("/tmp/meta_data_normal.csv")
meta_covid = pd.read_csv("/tmp/meta_data_covid.csv", encoding='windows-1252')
meta_cap = pd.read_csv("/tmp/meta_data_cap.csv")

# Define the variables below using meta dataframes

normal_pt_nb = meta_normal['Patient ID'].nunique() # Number of patients in normal group
covid_pt_nb = meta_covid['Patient ID'].nunique() # Number of patients in covid group
cap_pt_nb = meta_cap['Patient ID'].nunique() # Number of patients in CAP group

normal_img_nb = meta_normal['File name'].count() # Number of images in normal group
covid_img_nb = meta_covid['File name'].count() # Number of images in covid group
cap_img_nb = meta_cap['File name'].count() # Number of images in CAP group


print("Normal")
print("Number of patient: ", normal_pt_nb)
print("Number of image: ", normal_img_nb)

print("\nCovid")
print("Number of patient: ", covid_pt_nb)
print("Number of image: ", covid_img_nb)

print("\nCAP")
print("Number of patient: ", cap_pt_nb)
print("Number of image: ", cap_img_nb)


Normal
Number of patient:  604
Number of image:  6893

Covid
Number of patient:  464
Number of image:  7593

CAP
Number of patient:  54
Number of image:  2618


In [6]:
import numpy as np

# Set seed to get the same result (I specifically chose this seed after a couple of tries so that we'll have approximately same split ratios on slice level as well)
np.random.seed(58)
val_split_size = .2
test_split_size = .5

normal_val_file_list, normal_test_file_list = [], []
covid_val_file_list, covid_test_file_list = [], []
##### START OF YOUR CODE #####
patient_id_normal = meta_normal['Patient ID'].unique()
patient_id_covid = meta_covid['Patient ID'].unique()

#random shuffle
np.random.shuffle(patient_id_normal)
np.random.shuffle(patient_id_covid)

#validation and test size
normal_val_size = int(normal_pt_nb * val_split_size)
normal_test_size = int(normal_pt_nb * test_split_size)
covid_val_size = int(covid_pt_nb * val_split_size)
covid_test_size = int(covid_pt_nb * test_split_size)


pt_id_normal_val = patient_id_normal[0:normal_val_size]
pt_id_normal_test = patient_id_normal[normal_val_size:normal_val_size + normal_test_size]
pt_id_covid_val =  patient_id_covid[0:covid_val_size]
pt_id_covid_test = patient_id_covid[covid_val_size:covid_val_size + covid_test_size]
pt_id_covid_train = patient_id_covid[covid_val_size + covid_test_size:len(patient_id_covid)]
pt_id_normal_train = patient_id_normal[normal_val_size + normal_test_size:len(patient_id_normal)]

#empty lists for train split
normal_train_file_list, covid_train_file_list = [], []

#val, test split for normal
for i in range(0, len(pt_id_normal_val)):
  for j in range(0, len(meta_normal['Patient ID'])):
    if pt_id_normal_val[i] == meta_normal['Patient ID'].iloc[j]:
      normal_val_file_list.append(meta_normal['File name'].iloc[j])

for i in range(0, len(pt_id_normal_test)):
  for j in range(0, len(meta_normal['Patient ID'])):
    if pt_id_normal_test[i] == meta_normal['Patient ID'].iloc[j]:
      normal_test_file_list.append(meta_normal['File name'].iloc[j])
#val, test split for covid
for i in range(0, len(pt_id_covid_val)):
  for j in range(0, len(meta_covid['Patient ID'])):
    if pt_id_covid_val[i] == meta_covid['Patient ID'].iloc[j]:
      covid_val_file_list.append(meta_covid['File name'].iloc[j])

for i in range(0, len(pt_id_covid_test)):
  for j in range(0, len(meta_covid['Patient ID'])):
    if pt_id_covid_test[i] == meta_covid['Patient ID'].iloc[j]:
      covid_test_file_list.append(meta_covid['File name'].iloc[j])
#train split for normal, covid
for i in range(0, len(pt_id_normal_train)):
  for j in range(0, len(meta_normal['Patient ID'])):
    if pt_id_normal_train[i] == meta_normal['Patient ID'].iloc[j]:
      normal_train_file_list.append(meta_normal['File name'].iloc[j])      

for i in range(0, len(pt_id_covid_train)):
  for j in range(0, len(meta_covid['Patient ID'])):
    if pt_id_covid_train[i] == meta_covid['Patient ID'].iloc[j]:
      covid_train_file_list.append(meta_covid['File name'].iloc[j])
##### END OF YOUR CODE #####

print("Slice-based val size: ")
print("Normal: ", round(len(normal_val_file_list)/normal_img_nb, 2))
print("Covid: ", round(len(covid_val_file_list)/covid_img_nb, 2))

print("\nSlice-based test size: ")
print("Normal: ", round(len(normal_test_file_list)/normal_img_nb, 2))
print("Covid: ", round(len(covid_test_file_list)/covid_img_nb, 2))

print("\nSlice-based train size: ")
print("Normal: ", round(len(normal_train_file_list)/normal_img_nb, 2))
print("Covid: ", round(len(covid_train_file_list)/covid_img_nb, 2))

Slice-based val size: 
Normal:  0.21
Covid:  0.18

Slice-based test size: 
Normal:  0.51
Covid:  0.5

Slice-based train size: 
Normal:  0.29
Covid:  0.32


In [7]:
import os

##### START OF YOUR CODE #####
import shutil

#make the directories
os.mkdir('/tmp/curated_data/data')
for i in ["train/", "test/", "val/"]: 
  os.mkdir('/tmp/curated_data/data/'+i)
  for j in ["normal/", "covid/"]:
    os.mkdir(f'/tmp/curated_data/data/{i}'+j)

#paths for moving operation
src_normal_path = "/tmp/curated_data/curated_data/1NonCOVID/"
src_covid_path = "/tmp/curated_data/curated_data/2COVID/"
dst_normal_train_path = "/tmp/curated_data/data/train/normal/"
dst_normal_test_path = "/tmp/curated_data/data/test/normal/"
dst_normal_val_path = "/tmp/curated_data/data/val/normal/"
dst_covid_train_path = "/tmp/curated_data/data/train/covid/"
dst_covid_test_path = "/tmp/curated_data/data/test/covid/"
dst_covid_val_path = "/tmp/curated_data/data/val/covid/"

#moving normal train, test, val
for i in normal_train_file_list:
  for j in  os.listdir(src_normal_path):
    if i == j:
      shutil.move(src_normal_path+i, dst_normal_train_path)

for i in normal_test_file_list:
  for j in  os.listdir(src_normal_path):
    if i == j:
      shutil.move(src_normal_path+i, dst_normal_test_path)

for i in normal_val_file_list:
  for j in  os.listdir(src_normal_path):
    if i == j:
      shutil.move(src_normal_path+i, dst_normal_val_path)      

#moving covid train, test, val
for i in covid_train_file_list:
  for j in  os.listdir(src_covid_path):
    if i == j:
      shutil.move(src_covid_path+i, dst_covid_train_path)

for i in covid_test_file_list:
  for j in  os.listdir(src_covid_path):
    if i == j:
      shutil.move(src_covid_path+i, dst_covid_test_path)

for i in covid_val_file_list:
  for j in  os.listdir(src_covid_path):
    if i == j:
      shutil.move(src_covid_path+i, dst_covid_val_path)      
      
    

     


##### END OF YOUR CODE #####

data_counts = {x+y: len(os.listdir("/tmp/curated_data/data/"+x+y)) for x in ["train/", "val/", "test/"] for y in ["normal/", "covid/"]}
for i in ["train", "val", "test"]:
  print("\nPercentage of {} set: {:.2f}" .format(i, (data_counts[i+"/normal/"]+data_counts[i+"/covid/"])/sum(data_counts.values())))
  print("Percentage of Covid + slices in {} set is: {:.2f}" .format(i, data_counts[i+"/covid/"]/(data_counts[i+"/normal/"]+data_counts[i+"/covid/"])))


Percentage of train set: 0.30
Percentage of Covid + slices in train set is: 0.55

Percentage of val set: 0.19
Percentage of Covid + slices in val set is: 0.49

Percentage of test set: 0.51
Percentage of Covid + slices in test set is: 0.52


### 3. Dataloader

In [None]:
!pip install torchio

In [9]:
from IPython.core.display import Path
import os
import numpy as np
import torch
from PIL import Image
import torchio as tio
import torchvision.transforms as T


class CustomDataset(torch.utils.data.Dataset):
  def __init__(self, image_size, data_folder, partition):
    # Define attributes
    ##### START OF YOUR CODE #####
    self.image_size = image_size
    self.data_folder = data_folder
    self.partition = partition
    
    ##### END OF YOUR CODE #####
    if data_folder.endswith("/") == True:
      self.paths = self.data_folder + self.partition # List of image paths
    else:
      self.paths = self.data_folder + "/" + self.partition
    

  def __len__(self):
    ##### START OF YOUR CODE #####
    tot_list = self.img_paths()
    return len(tot_list)
    ##### END OF YOUR CODE #####

  def __getitem__(self, idx):
    
    ##### START OF YOUR CODE #####
    image_size = self.image_size
    partition = self.partition
    data_folder = self.data_folder  
    tot_list = self.img_paths()
    path_img = tot_list[idx]

    img = self.read_and_resize_img(path_img)

    if partition == "train":
      img = self.augmentation(img)

    if "covid" in path_img:
      label = np.ones(1)
    else:
      label = np.zeros(1)        
    
    return img, label
    ##### END OF YOUR CODE #####

  def img_paths(self):
    ##### START OF YOUR CODE #####
    normal_paths = [os.path.join(self.data_folder, self.partition, "normal", i) for i in os.listdir(os.path.join(self.data_folder, self.partition, "normal"))]
    covid_paths = [os.path.join(self.data_folder, self.partition, "covid", i) for i in os.listdir(os.path.join(self.data_folder, self.partition, "covid"))]
    paths = normal_paths + covid_paths
    np.random.shuffle(paths)
    
    return paths
    ##### END OF YOUR CODE #####

  def read_and_resize_img(self, path):
    ##### START OF YOUR CODE #####
    img = Image.open(path)
  
    if img.size[1:] != (self.image_size, self.image_size):
      transform = T.Compose([
          T.Grayscale(),
          T.Resize((self.image_size, self.image_size)),
          T.ToTensor()
      ])
      img = transform(img)

    else:
      transform = T.Compose([
         T.Grayscale(),
         T.ToTensor()
              ])
      img = transform(img)

    return img  
    ##### END OF YOUR CODE #####

  def augmentation(self, data):
    ##### START OF YOUR CODE #####
    transform = T.Compose([
       T.RandomApply([
          tio.transforms.RandomBiasField(p=0.2),
          tio.transforms.RandomNoise(p=0.2),
          tio.transforms.RandomGhosting(p=0.2),
          tio.transforms.RandomSpike(p=0.2),
          tio.transforms.RandomAffine(degrees=10, scales=0., translation=0., p=0.2)],
          p=0.8)
    ])

    x = data.unsqueeze(-1) # x is a random variable to store data
    x = transform(x)
    data = x.squeeze(-1)
    
    return data
    ##### END OF YOUR CODE #####

In [None]:
# from torch.utils.data import DataLoader
# import matplotlib.pyplot as plt

# image_size = 256
# data_dir = "/tmp/curated_data/data"
# batch_size = 16

# data_dict = {x: CustomDataset(image_size, data_dir, x) for x in ["train", "val", "test"]}
# img_data, lab_data = next(iter(data_dict["train"]))
# print(img_data.shape, lab_data.shape)

# dataloader_dict = {x: DataLoader(data_dict[x], batch_size) for x in ["train", "val", "test"]}
# img, lab = next(iter(dataloader_dict["train"]))
# print(img.shape, lab.shape)


# label_dict = {
#     0: "Normal",
#     1: "Covid"
# }
# plt.figure(figsize=(10, 10))
# for i in range(4):
#   plt.subplot(2,2, i+1)
#   plt.imshow(img[i, 0], cmap="gray")
#   plt.title(label_dict[int(lab[i].item())], fontsize=16)
#   plt.axis("off")
# plt.show()

### 4. ResNet-18

In [10]:
import torch
from torch import nn
from torch.nn import functional as F

class ConvBlock(nn.Module):
    def __init__(self, ch_in, ch_out, s, act):
      super(ConvBlock,self).__init__()
      # Initialize layers
      ##### START OF YOUR CODE #####
      self.ch_in = ch_in
      self.ch_out =ch_out
      self.s = s
      self.bn = nn.BatchNorm2d(ch_out)
      self.act = act

      if self.act.lower() == "relu":
        self.act = nn.ReLU()
      elif act.lower() == "leaky_relu":
        self.act = nn.Leaky_ReLU()
      elif act.lower() == "gelu":
        self.act = nn.GELU()

      #convolutional layers of one block
      if self.s == 1:
        self.conv1 = nn.Conv2d(ch_in, ch_out, kernel_size = (1,1), stride = s, padding = 0)
        self.conv2 = nn.Conv2d(ch_in, ch_out, kernel_size = (3,3), stride = s, padding = 1)
        self.conv3 = nn.Conv2d(ch_out, ch_out, kernel_size = (3,3), stride = s, padding = 1)    
      else:
        self.conv1 = nn.Conv2d(ch_in, ch_out, kernel_size = (1,1), stride = s, padding = 0)
        self.conv2 = nn.Conv2d(ch_in, ch_out, kernel_size = (3,3), stride = s, padding = 1)
        self.conv3 = nn.Conv2d(ch_out, ch_out, kernel_size = (3,3), stride = s-1, padding = 1)
      
      ##### END OF YOUR CODE #####

    def forward(self, X):
      ##### START OF YOUR CODE #####      
      x = self.conv1(X)
      x = self.bn(x)
      
      y = self.conv2(X)
      y = self.bn(y)
      y = self.act(y)
      y = self.conv3(y)
      y = self.bn(y)

      y += x # skip connection
      X = self.act(y)
      ##### END OF YOUR CODE #####
      return X


class ResNet18(nn.Module):
    def __init__(self, act, drop_rate, image_size):
      super(ResNet18, self).__init__()
      # Initialize layers
      ##### START OF YOUR CODE #####
      self.drop_rate = drop_rate
      self.drop_out = nn.Dropout2d(drop_rate)
      self.max_pool = nn.MaxPool2d(kernel_size = (3,3), stride = (2,2), padding = (1,1))
      self.image_size = image_size
      kernel_size = self.image_size/2**5
      self.avg_pool = nn.AvgPool2d(int(kernel_size), int(kernel_size))
      self.fully_connected = nn.Linear(512,1)
      self.sigmoid = nn.Sigmoid()
      self.bn = nn.BatchNorm2d(64)  
      self.flatten = nn.Flatten()
      self.act = act 
     
      

      if self.act.lower() == "relu":
        self.act = nn.ReLU()
      elif act.lower() == "leaky_relu":
        self.act = nn.Leaky_ReLU()
      elif act.lower() == "gelu":
        self.act = nn.GELU()

      #convolutional blocks
      self.conv1 = nn.Conv2d(1, 64, kernel_size = (7,7), stride = (2,2), padding = (3,3))
      
      self.conv2_x = nn.Sequential(ConvBlock(64, 64, 1, act),
                                   ConvBlock(64, 64, 1, act))
      
      self.conv3_x = nn.Sequential(ConvBlock(64, 128, 2, act),
                                   ConvBlock(128, 128, 1, act))
      
      self.conv4_x = nn.Sequential(ConvBlock(128, 256, 2, act),
                                   ConvBlock(256, 256, 1, act))
      
      self.conv5_x = nn.Sequential(ConvBlock(256, 512, 2, act),
                                   ConvBlock(512, 512, 1, act))
          
      ##### END OF YOUR CODE #####

    def forward(self, X):
      ##### START OF YOUR CODE #####
      batch_size = X.size(0)
      image_size = X.size(2)
      
      X = self.conv1(X)
      X = self.bn(X)
      X = self.act(X)
      X = self.max_pool(X)
      X = self.conv2_x(X)
      X = self.conv3_x(X)
      X = self.conv4_x(X)
      X = self.conv5_x(X) 
      X = self.avg_pool(X) 
      X = self.drop_out(X)
      X = self.flatten(X)
      X = self.fully_connected(X)
      X = self.sigmoid(X)
      ##### END OF YOUR CODE #####
      return X

In [None]:
 from torchsummary import summary

 device = torch.device('cuda')
 model = ResNet18("relu", .5, 128).to(device)
 summary(model, (1, 128, 128))

# Assignment 3
<p>In this assignment you will

* write helper functions
* train the model
* hyperparameter search using W&B

Read the comments carefully and insert your code where you see: <br><br><b>##### START OF YOUR CODE #####</b><br><br><b>##### END OF YOUR CODE #####</b><br><br>or for the inline codes you will see<br><br><b>##### INSERT YOUR CODE HERE #####</b>

#### I. AverageMeter
First we will write a helper function. AverageMeter is to calculate the mean of the running loss and accuracy. 

*   It will have 2 functions which are reset and update.
*   reset will be called on initialization and set the attributes to 0. 
*   update takes 2 arguments for the value and the size. It will add the value to the sum and the size to the count. Attribute "avg" (use this name) will also be updated as sum/count.

In [11]:
class AverageMeter:
    """Computes and stores the average and current value"""

    def __init__(self):
      ##### START OF YOUR CODE #####
      self.sum = []
      self.size = []
      self.avg = 0
      self.count = 0

      self.reset()

    def reset(self): 
      self.sum = []
      self.size = []
      self.avg = 0
      self.count = 0

    def update(self, value, size):
   
      self.sum.append(value*size)   
      self.size.append(size)

      sum1 = 0  
      size1 = 0    
      for i in range(0, len(self.sum)):
        sum1 += self.sum[i]
        size1 += self.size[i]
        self.count = size1
        self.avg = sum1/self.count


      ##### END OF YOUR CODE #####

In [12]:
avg_meter = AverageMeter()
avg_meter.update(100, 5)
avg_meter.update(50, 5)

print(avg_meter.avg, avg_meter.count)

75.0 10


#### II. Train Loop
Now we will write the training and validation loops. Detailed instructions are given within the code.

In [13]:
device = torch.device('cuda')

def training(train_loader, model, criterion, optimizer):
  # Let's start by initializing our AverageMeters.
  avg_meters = {'loss': AverageMeter(),
                'acc': AverageMeter()}

  # We will go through the train_loader.
  # Zero the gradients.
  # Make prediction.
  # Calculate the loss and the accuracy using prediction and labels.
  # Update the average meters.
  # Compute gradients and adjust learning weights.

  ##### START OF YOUR CODE #####
    #load data
  for data in train_loader:
      
    inputs, labels = data
    inputs, labels = inputs.to(device), labels.to(device)

    #zero gradients
    optimizer.zero_grad()

    #prediction
    output = model(inputs).to(device)

    #loss and accuracy
    loss = criterion(output.float(), labels.float())
    acc = float((output.round() == labels).float().mean())

    avg_meters['loss'].update(loss, len(data))
    avg_meters['acc'].update(acc, len(data))

    #gradients and learning weights
    loss.backward()
    optimizer.step()

  ##### END OF YOUR CODE #####

  return dict([('loss', avg_meters['loss'].avg),
                ('acc', avg_meters['acc'].avg)])

def validation(val_loader, model, criterion):
  avg_meters = {'loss': AverageMeter(),
                'acc': AverageMeter()}

  # Validation is almost the same but don't forget to turn the eval mode of the model and with torch no_grad.
  # You don't need to compute gradients or adjust learning weights for evaluation.

  ##### START OF YOUR CODE #####
  model.eval()
 
  with torch.no_grad():
     #load data
    for data in val_loader:
      
      inputs,labels = data
      inputs, labels = inputs.to(device), labels.to(device)

      #prediction
      output = model(inputs).to(device)

      #loss and accuracy
      loss = criterion(output.float(), labels.float())
      acc = float((output.round() == labels).float().mean())

      avg_meters['loss'].update(loss, len(data))
      avg_meters['acc'].update(acc, len(data))

  ##### END OF YOUR CODE #####

  return dict([('loss', avg_meters['loss'].avg),
              ('acc', avg_meters['acc'].avg)])

In [14]:
#test function

def testing(test_loader, model, criterion):
  avg_meters = {'loss': AverageMeter(),
                'acc': AverageMeter()}
  
  model.eval()
 
  with torch.no_grad():
     #load data
    for data in test_loader:
      
      inputs,labels = data
      inputs, labels = inputs.to(device), labels.to(device)

      #prediction
      output = model(inputs).to(device)

      #accuracy
      loss = criterion(output.float(), labels.float())
      acc = float((output.round() == labels).float().mean())
      
      avg_meters['loss'].update(loss, len(data))
      avg_meters['acc'].update(acc, len(data))

  ##### END OF YOUR CODE #####

  return dict([('loss', avg_meters['loss'].avg),
               ('acc', avg_meters['acc'].avg)])


We will use Weights & Biases for hyperparameter search. This will only be an introduction and we highly recommend you to read the <a href="https://docs.wandb.ai/?_gl=1*1xon9b*_ga*NDg5OTYzNTM3LjE2NzUwNjYzNjk.*_ga_JH1SJHJQXJ*MTY3Njc0MDEyNi4xMi4xLjE2NzY3NDAxMjguNTguMC4w">documentation</a> for more information.

In [None]:
!pip install wandb

In [None]:
os.mkdir("/content/gdrive/MyDrive/CMPE_runs/")

In [16]:
from torch import optim
import wandb
import os
from torch.utils.data import DataLoader

def main():
  # Set the initial configuration
  initial_config = {
      "data_dir": "/tmp/curated_data/data/",
      "image_size": 128,
      "train_batch_size": 64,
      "val_batch_size": 32,
      "test_batch_size": 1,
      "activation": "relu",
      "drop_rate": .2,
      "optimizer": "Adam",
      "learning_rate": 1e-3,
      "l2_reg": 1e-4, # Weight decay
      "nb_epoch": 50,
      "early_stopping": 15, # trigger value for early stopping
      
  }

  # Using this configuration dictionary:
  # initialize wandb
  # Create a run directory in your drive ("/content/drive/MyDrive/CMPE_runs/" + the current run name that you'll get from wandb)
  # Create the model
  # Create dataloader dictionary with "train", "val", "test" keys
  # Define binary cross entropy loss
  # Define optimizer with weight decay
  # Set lr scheduler to ReduceLROnPlateau:
    # It will decrease the lr by .1 if the val_loss did not decrease > .01. The minimum lr value can be 1e-9.
  # Print train and val results and log them to wandb at the end of each epoch
  # Save best model weights to your run directory when the val accuracy is at least .01 better than the best val accuracy.
  # Set early stopping with the trigger in config["early_stopping"], monitoring val accuracy. config["early_stopping"] = -1 means no early stopping.
  # Print when a new model is saved or early stopping trigger is reached.
  # After the final epoch (or early stopping), load the best model weights and log the test results to wandb*

  ##### START OF YOUR CODE #####
  #init wandb
  wandb.init(
      config = initial_config,
      project = "cmpe58p_assignment",
      reinit = True  
  )
  
  #directories
  run_name = wandb.run.name
  run_path = "/content/gdrive/MyDrive/CMPE_runs/" + run_name + "/"
  save_path = run_path  + "models.pth"
  os.mkdir(run_path)
  

  #model
  model = ResNet18(initial_config['activation'], initial_config['drop_rate'], initial_config['image_size']).to(device)

  #dataloader dictionary
  image_size = initial_config['image_size']
  data_dir = initial_config['data_dir']

  data_dict = {x: CustomDataset(image_size, data_dir, x) for x in ["train", "val", "test"]}
  
  dataloader = {x: DataLoader(data_dict[x], initial_config[f'{x}''_batch_size']) for x in ["train", "val", "test"]}

  #some important stuff
  criterion = nn.BCELoss()
  optimizer = optim.Adam(model.parameters(), lr=initial_config['learning_rate'], weight_decay = initial_config['l2_reg'])
  lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode = 'min', factor = 0.1, threshold = 0.01,  min_lr = 1e-9)
  best_val_acc = 0
  patience = initial_config['early_stopping']
  trigger_times = 0
  #training and validation
  for epoch in range(1, initial_config['nb_epoch']+1):
   

    wandb.watch(model, criterion, log = "all")

    train = training(dataloader['train'], model, criterion, optimizer)
    
    val = validation(dataloader['val'], model, criterion)

    lr_scheduler.step(val['loss'])

    
    if val['acc'] > best_val_acc:
      trigger_times = 0

    if val['acc'] - best_val_acc <= 0:
      trigger_times += 1
      if trigger_times == patience:
        wandb.log({'epoch' : epoch, 'train_loss' : train['loss'], 'train_acc' : train['acc'], 'val_loss' : val['loss'], 'val_acc' : val['acc']})
        print(f"epoch : {epoch}'  train_loss : {train['loss']}  train_acc : {train['acc']}  val_loss : {val['loss']}  val_acc : {val['acc']}")
        print("Early Stopping!")
        break

    wandb.log({'epoch' : epoch, 'train_loss' : train['loss'], 'train_acc' : train['acc'], 'val_loss' : val['loss'], 'val_acc' : val['acc']})
    print(f"epoch : {epoch}'  train_loss : {train['loss']}  train_acc : {train['acc']}  val_loss : {val['loss']}  val_acc : {val['acc']}")

    if val['acc'] - best_val_acc >= 0.01:
      torch.save(model.state_dict(), save_path)
      best_val_acc = val['acc']
      print("The model is saved")
    
  
  #testing
  model.load_state_dict(torch.load(save_path))
  model.eval()
  tester = testing(dataloader['test'], model, criterion)
  wandb.log({'Test Result ' : tester['acc']})
  print(f"Test Result : {tester['acc']}" )
  
      
  ##### END OF YOUR CODE #####

In [None]:
# Define the parameters that we will fine tune with, which are:
  # Activation function
  # Optimizer
  # Drop rate: should be chosen randomly from a uniform distribution between [0., 0.9]
  # Weight decay: should be chosen randomly from a uniform distribution between [0., 0.1]
  # Learning rate: should be chosen randomly from a uniform distribution between [0.0001, 0.1]
  
parameter_dict = {
    ##### START OF YOUR CODE #####
    'activation_function' : {'values' : ['relu', 'leaky_relu', 'gelu']},
    'optimizer' : {'values' : ['Adam', 'SGD', 'RMSprop']},
    'drop_rate' : {'min' : .0, 'max' : 0.9},
    'weight_decay' : {'min' : .0, 'max' : 0.1},
    'lr_rate' : {'min' : 0.0001, 'max' : 0.1}
    ##### END OF YOUR CODE #####
}

# Define a sweep configuration tells wandb that it will randomly choose from parameter dict and the purpose is to maximize val_accuracy
sweep_config = {
    ##### START OF YOUR CODE #####
    'method' : 'random',
    'name' : 'sweep',
    'metric' : {'goal' : 'maximize', 'name' : 'val_acc'},
    'parameters' : {}
    ##### END OF YOUR CODE #####
}
sweep_config['parameters'] = parameter_dict

# Start the sweep
sweep_id = wandb.sweep(
    sweep = sweep_config,
    project = 'cmpe58p_assignment'
)

In [None]:
wandb.agent(sweep_id, function=main)