# Training a Neural Network on PCam Data

## Imports

In [None]:
import pickle 
import torch
from torch import nn, optim
from torchvision import transforms, models
import matplotlib.pyplot as plt
import numpy as np
from time import time
import torch.nn.functional as F

## Training a perceptron on the pcam data

Load the pickle file and create the normalization function

In [None]:
pcam = pickle.load(open('content/drive/My Drive/pcam.pkl', 'rb'))
train_images, train_y = pcam['train']
valid_images, valid_y = pcam['valid']
test_images, test_y = pcam['test']

transform = transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))

Create the dataloaders

In [None]:
train_tensor_X = torch.stack([transform(torch.as_tensor(i/255.0, dtype=torch.float32).permute(2,0,1)) for i in train_images])
train_tensor_y = torch.as_tensor(train_y,dtype=torch.long)
train_dataset = torch.utils.data.TensorDataset(train_tensor_X, train_tensor_y)
trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)

In [None]:
valid_tensor_X = torch.stack([transform(torch.as_tensor(i/255.0, dtype=torch.float32).permute(2,0,1)) for i in valid_images])
valid_tensor_y = torch.as_tensor(valid_y,dtype=torch.long)
valid_dataset = torch.utils.data.TensorDataset(valid_tensor_X, valid_tensor_y)
valloader = torch.utils.data.DataLoader(valid_dataset, batch_size=64, shuffle=True)

In [None]:
test_tensor_X = torch.stack([transform(torch.as_tensor(i/255.0, dtype=torch.float32).permute(2,0,1)) for i in test_images])
test_tensor_y = torch.as_tensor(test_y,dtype=torch.long)
test_dataset = torch.utils.data.TensorDataset(test_tensor_X, test_tensor_y)
testloader = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=True)

View 60 of the images

In [None]:
dataiter = iter(trainloader)
images, labels = dataiter.next()

figure = plt.figure()
num_of_images = 60
for index in range(1, num_of_images + 1):
    plt.subplot(6, 10, index)
    plt.axis('off')
    plt.imshow(images[index].permute(1,2,0).numpy()/2 + 0.5)
                              

Create a model that takes a flattened version of the image and has three linear layers (similar to what we used for MNIST yesterday) and train it.

Before you move on, you must achieve .65 or higher on the test set. You may change any parameters or model layers. You must set the learning rate for the optimizer. 

In [None]:
input_size = 96*96*3
hidden_sizes = [528, 256, 64]
output_size = 10

model = nn.Sequential(nn.Linear(input_size, hidden_sizes[0]),
                      nn.ReLU(),
                      nn.Linear(hidden_sizes[0], hidden_sizes[1]),
                      nn.ReLU(),
                      nn.Linear(hidden_sizes[1], hidden_sizes[2]),
                      nn.ReLU(),
                      nn.Linear(hidden_sizes[2], output_size),
                      nn.LogSoftmax(dim=1))
model = model.to('cuda')

criterion = nn.NLLLoss()
### YOUR CODE HERE - find a learning rate that allows the model to achieve above 65% on the test set
optimizer = optim.SGD(model.parameters(), lr= , momentum=0.96, weight_decay = 0.001)
time0 = time()
epochs = 15
for e in range(epochs):
    running_loss = 0
    for images, labels in trainloader:
        # Flatten MNIST images into a 784 long vector
        images = images.view(images.shape[0], -1).to('cuda')
    
        # Training pass
        optimizer.zero_grad()
        
        output = model(images)
        loss = criterion(output, labels.to('cuda'))
        
        #This is where the model learns by backpropagating
        loss.backward()
        
        #And optimizes its weights here
        optimizer.step()
        
        running_loss += loss.item()
    else:
        print("Epoch {} - Training loss: {}".format(e, running_loss/len(trainloader)))
print("\nTraining Time (in minutes) =",(time()-time0)/60)

correct_count, all_count = 0, 0
for images,labels in testloader:
  for i in range(len(labels)):
    img = images[i].view(1, -1).to('cuda')
    with torch.no_grad():
        logps = model(img)

    
    ps = torch.exp(logps)
    probab = list(ps.cpu().numpy()[0])
    pred_label = probab.index(max(probab))
    true_label = labels.numpy()[i]
    if(true_label == pred_label):
      correct_count += 1
    all_count += 1

print("Number Of Images Tested =", all_count)
print("\nModel Accuracy on Test Set =", (correct_count/all_count))

## Training a convolutional network on PCam

Below, we will add convolutional layers into our model. These convolutional layers use the same convolution we learned in the "Linear Filtering" notebook, but with learned weights for the kernel. 

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, 3)
        self.conv2 = nn.Conv2d(64, 32, 3)
        self.conv3 = nn.Conv2d(32, 16, 3)
        self.fc1 = nn.Linear(16*10*10, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128,2)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2, 2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2, 2)
        x = F.relu(self.conv3(x))
        x = F.max_pool2d(x, 2, 2)
        x = x.view(-1, 16*10*10)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return F.log_softmax(x, dim=1)

This time, be sure to achieve .75 or higher on the test set before moving on. Set the learning rate (lr), momentum, and weight_decay parameters. Feel free to change the model as well, but changing the model is not necessary to achieve .75.

In [None]:
model = Net().to('cuda')
model.train()

criterion = nn.NLLLoss()
### Your code here ###
optimizer = optim.SGD(model.parameters(), lr=, momentum=, weight_decay = )
time0 = time()
epochs = 15
for e in range(epochs):
    running_loss = 0
    for images, labels in trainloader:
        images = images.to('cuda')
        labels = labels.to('cuda')
        
        # Training pass
        optimizer.zero_grad()
        
        output = model(images)
        loss = criterion(output, labels)
        
        #This is where the model learns by backpropagating
        loss.backward()
        
        #And optimizes its weights here
        optimizer.step()
        
        running_loss += loss.item()
    else:
        print("Epoch {} - Training loss: {}".format(e, running_loss/len(trainloader)))
print("\nTraining Time (in minutes) =",(time()-time0)/60)

correct_count, all_count = 0, 0
for images,labels in testloader:
  images = images.to('cuda')
  for i in range(len(labels)):
    img = images[i][None,:,:,:].to('cuda')
    with torch.no_grad():
        logps = model(img)

    ps = torch.exp(logps)
    probab = list(ps.cpu().numpy()[0])
    pred_label = probab.index(max(probab))
    true_label = labels.numpy()[i]
    if(true_label == pred_label):
      correct_count += 1
    all_count += 1

print("Number Of Images Tested =", all_count)
print("\nModel Accuracy =", (correct_count/all_count))

Great! Once you've finished finding parameters that work for the network, we will try to train the exact same model with ALL of the data. This will take longer, so make sure you are happy with your parameters before you start training on all of the data.

## Training on ALL of the PCam data

Load the full PCam dataset.

In [None]:
train_images, train_y = pickle.load(open('/content/drive/My Drive/pcam_train.pkl', 'rb'))
test_images, test_y = pickle.load(open('/content/drive/My Drive/pcam_test.pkl', 'rb'))
transform = transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))

In [None]:
train_tensor_X = torch.stack([transform(torch.as_tensor(i/255.0, dtype=torch.float32).permute(2,0,1)) for i in train_images])
train_tensor_y = torch.as_tensor(train_y,dtype=torch.long)
train_dataset = torch.utils.data.TensorDataset(train_tensor_X, train_tensor_y)
trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)

test_tensor_X = torch.stack([transform(torch.as_tensor(i/255.0, dtype=torch.float32).permute(2,0,1)) for i in test_images])
test_tensor_y = torch.as_tensor(test_y,dtype=torch.long)
test_dataset = torch.utils.data.TensorDataset(test_tensor_X, test_tensor_y)
testloader = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=True)

Set your parameters and run the training on the full dataset. 

In [None]:
model = Net().to('cuda')
model.train()

criterion = nn.NLLLoss()
### Your code here ###
optimizer = optim.SGD(model.parameters(), lr=, momentum=, weight_decay = )
time0 = time()
epochs = 15
for e in range(epochs):
    running_loss = 0
    for images, labels in trainloader:
        images = images.to('cuda')
        labels = labels.to('cuda')
        
        # Training pass
        optimizer.zero_grad()
        
        output = model(images)
        loss = criterion(output, labels)
        
        #This is where the model learns by backpropagating
        loss.backward()
        
        #And optimizes its weights here
        optimizer.step()
        
        running_loss += loss.item()
    else:
        print("Epoch {} - Training loss: {}".format(e, running_loss/len(trainloader)))
print("\nTraining Time (in minutes) =",(time()-time0)/60)

correct_count, all_count = 0, 0
for images,labels in testloader:
  images = images.to('cuda')
  for i in range(len(labels)):
    img = images[i][None,:,:,:].to('cuda')
    with torch.no_grad():
        logps = model(img)

    ps = torch.exp(logps)
    probab = list(ps.cpu().numpy()[0])
    pred_label = probab.index(max(probab))
    true_label = labels.numpy()[i]
    if(true_label == pred_label):
      correct_count += 1
    all_count += 1

print("Number Of Images Tested =", all_count)
print("\nModel Accuracy =", (correct_count/all_count))

## Discussion Questions

1. Why did adding more data help?


2. What parameter/model change did you make that helped improve your score the most?


3. What features can we capture with neural nets that we didn't capture with average pixel value or histograms?