In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

import matplotlib.pyplot as plt
import numpy as np
'''
Inputs: datafile
The default input is ”"notMNIST.npz", which is the dataset we are using in this assignment.

Output: trainData, validData, testData, trainTarget, validTarget, testTarget
The outputs are images and annotations in the form of Numpy matrices. trainData and trainTarget are the images and annotations for training. Similarly, validData and validTarget are the images and annotations for validation and testData and testTarget are the images and annotations for testing.
'''
# Function for loading notMNIST Dataset
def loadData(datafile = "notMNIST.npz"):
    with np.load(datafile) as data:
        Data, Target = data["images"].astype(np.float32), data["labels"]
        np.random.seed(521)
        randIndx = np.arange(len(Data))
        np.random.shuffle(randIndx)
        Data = Data[randIndx] / 255.0
        Target = Target[randIndx]
        trainData, trainTarget = Data[:10000], Target[:10000]
        validData, validTarget = Data[10000:16000], Target[10000:16000]
        testData, testTarget = Data[16000:], Target[16000:]
    return trainData, validData, testData, trainTarget, validTarget, testTarget

# Custom Dataset class.
class notMNIST(Dataset):
    def __init__(self, annotations, images, transform=None, target_transform=None):
        self.img_labels = annotations
        self.imgs = images
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        image = self.imgs[idx]
        label = self.img_labels[idx]
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label

#Define CNN
class CNN(nn.Module):
    '''
    Inputs: self, drop out p
    The input drop out p is a scalar that represents the dropout rate of the dropout layer in the neural network.
    '''
    def __init__(self, drop_out_p=0.0):
        #TODO
        #DEFINE YOUR LAYERS HERE
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=4)
        self.batchnorm1 = nn.BatchNorm2d(32)
        self.pooling1 = nn.MaxPool2d(kernel_size=(2,2))

        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=4)
        self.batchnorm2 = nn.BatchNorm2d(64)
        self.pooling2 = nn.MaxPool2d(kernel_size=(2,2))

        self.dropout = nn.Dropout(p=drop_out_p)
        self.linear1 = nn.Linear(1024,784)
        self.linear2 = nn.Linear(784,10)

    '''
    Inputs: self, x
    The input x is the batch of images of size (BATCH SIZE, 1, 28, 28). The input self represents the instance of the class.

    Output: out
    This function computes the logits for each image in the batch. The output has a size of(BATCH SIZE, 10), where each (i,j) position represent the class-logit score j of image i.
    '''
    def forward(self, x):
        #TODO
        #DEFINE YOUR FORWARD FUNCTION HERE
        x = self.conv1(x)
        x = F.relu(x)

        x = self.batchnorm1(x)

        x = self.pooling1(x)

        x = self.conv2(x)
        x = F.relu(x)

        x = self.batchnorm2(x)

        x = self.pooling2(x)

        x = torch.flatten(x, start_dim=1)

        x = self.dropout(x)
        x = self.linear1(x)
        x = F.relu(x)

        output = self.linear2(x)

        return output


#Define FNN
class FNN(nn.Module):
    '''
    Inputs: self, drop out p
    The input drop out p is a scalar that represents the dropout rate of the dropout layer in the neural network. The input self represents the instance of the class.
    '''
    def __init__(self, drop_out_p=0.0):
        super(FNN, self).__init__()
        #TODO
        #DEFINE YOUR LAYERS HERE
        self.linear1 = nn.Linear(784, 10)
        self.linear2 = nn.Linear(10, 10)
        self.linear3 = nn.Linear(10, 10)
        self.dropout = nn.Dropout(p=drop_out_p)

    '''
    Inputs: self, x
    The input x is the batch of images of size (BATCH SIZE, 1, 28, 28). We use self input represents the instance of the class.

    Output: out
    This function computes the logits for each image in the batch. The output has a size of (BATCH SIZE, 10), where each (i,j) position represent the class-logit score j of image i belongs to class j.
    '''
    def forward(self, x):
        #TODO
        #DEFINE YOUR FORWARD FUNCTION HERE
        x = torch.flatten(x, start_dim=1)

        x = self.linear1(x)
        x = F.relu(x)

        x = self.linear2(x)
        x = F.relu(x)

        x = self.dropout(x)
        output = self.linear3(x)
        return output

'''
Inputs: model, dataloader
model is an instance of the neural network class. dataloader is an instance of the notMNIST dataloader (see the function experiment).

Output:
The output of this function is a scalar, which is the accuracy over all images in dataloader.
'''
# Commented out IPython magic to ensure Python compatibility.
# Compute accuracy
def get_accuracy(model, dataloader):

    model.eval()
    device = next(model.parameters()).device
    accuracy = 0.0
    total = 0.0

    with torch.no_grad():
        for data in dataloader:
            images, labels = data
            images = images.to(device)
            labels = labels.to(device)
            # TODO
            # Return the accuracy
            # Enable GPU usage
            image = images.cuda()
            label = labels.cuda()
            prediction = torch.argmax(model(image), dim=1)
            accuracy += (prediction == label).sum().item()
            total += len(image)
    return (100 * accuracy/total)
'''
Inputs: model, device, learning rate, weight decay, train loader, val loader, test loader, num epochs=50, verbose
model is an instance of the neural network class. To train the neural networks with GPU, device should be "cuda:0" as shown in the experiment function. The learning rate,
weight decay and float scalars that specify the learning rate and L2 weight decay of the model.
train loader, val loader, test loader are the notMNIST dataloader for training, validation and testing set respectively. num epochs is the number of passes through the dataset
we would like in our training (default is 50 in this assignment). Finally, set verbose to True will print out the training progress.

Output: model, acc hist
This function will return a trained model and the evolution of training, validation and testing accuracy
'''
def train(model, device, learning_rate, weight_decay, train_loader, val_loader, test_loader, num_epochs=50, verbose=False):
  #TODO
  # Define your cross entropy loss function here
  # Use cross entropy loss
  criterion = nn.CrossEntropyLoss()
  #TODO
  # Define your optimizer here
  # Use AdamW optimizer, set the weights, learning rate and weight decay argument.
  optimizer = torch.optim.AdamW(model.parameters(),weight_decay = weight_decay, lr=learning_rate)

  acc_hist = {'train':[], 'val':[], 'test': []}

  for epoch in range(num_epochs):
    model = model.train()
    ## training step
    for i, (images, labels) in enumerate(train_loader):

        images = images.to(device)
        labels = labels.to(device)
        # TODO
        # Follow the step in the tutorial
        ## forward + backprop + loss

        # Enable GPU usage

        image = images.cuda()
        label = labels.cuda()



        logits = model(image)
        loss = criterion(logits, label)
        ## update model params

        optimizer.zero_grad()

        loss.backward()

        optimizer.step()

    model.eval()
    acc_hist['train'].append(get_accuracy(model, train_loader))
    acc_hist['val'].append(get_accuracy(model, val_loader))
    acc_hist['test'].append(get_accuracy(model, test_loader))

    if verbose:
      print('Epoch: %d | Train Accuracy: %.2f | Validation Accuracy: %.2f | Test Accuracy: %.2f' \
           %(epoch, acc_hist['train'][-1], acc_hist['val'][-1], acc_hist['test'][-1]))

  return model, acc_hist

def experiment(model_type='CNN', learning_rate=0.0001, dropout_rate=0.5, weight_decay=0.01, num_epochs=50, verbose=False):
  # Use GPU if it is available.
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

  # Inpute Batch size:
  BATCH_SIZE = 32

  # Convert images to tensor
  transform = transforms.Compose(
      [transforms.ToTensor()])

  # Get train, validation and test data loader.
  trainData, validData, testData, trainTarget, validTarget, testTarget = loadData()

  train_data = notMNIST(trainTarget, trainData, transform=transform)
  val_data = notMNIST(validTarget, validData, transform=transform)
  test_data = notMNIST(testTarget, testData, transform=transform)


  train_loader = torch.utils.data.DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True)
  val_loader = torch.utils.data.DataLoader(val_data, batch_size=BATCH_SIZE, shuffle=True)
  test_loader = torch.utils.data.DataLoader(test_data, batch_size=BATCH_SIZE, shuffle=False)

  # Specify which model to use
  if model_type == 'CNN':
    model = CNN(dropout_rate)
  elif model_type == 'FNN':
    model = FNN(dropout_rate)


  # Loading model into device
  model = model.to(device)
  criterion = nn.CrossEntropyLoss()
  model, acc_hist = train(model, device, learning_rate, weight_decay, train_loader, val_loader, test_loader, num_epochs=num_epochs, verbose=verbose)

  # Release the model from the GPU (else the memory wont hold up)
  model.cpu()

  return model, acc_hist

#Experiment 1
def compare_arch(num_epochs=50):
  CNNmodel, CNNacc_hist = experiment(model_type = 'CNN', dropout_rate = 0.0, weight_decay = 0.0,verbose = True)
  print("done CNN")
  FNNmodel, FNNacc_hist = experiment(model_type = 'FNN', dropout_rate = 0.0, weight_decay = 0.0, verbose = True)
  epochs = np.arange(0,num_epochs,1)
  plt.title("Traning Accuracy for CNN and FNN")
  plt.plot(epochs,CNNacc_hist['train'],markeredgecolor='r',label='CNN')
  plt.plot(epochs,FNNacc_hist['train'],markeredgecolor='G',label='FNN')
  plt.xlabel("Epoch")
  plt.ylabel("Training Accuracy in %")
  plt.legend()
  plt.show()
  plt.title("Testing Accuracy for CNN and FNN")
  plt.plot(epochs,CNNacc_hist['test'],markeredgecolor='r',label='CNN')
  plt.plot(epochs,FNNacc_hist['test'],markeredgecolor='G',label='FNN')
  plt.xlabel("Epoch")
  plt.ylabel("Testing Accuracy in %")
  plt.legend()
  plt.show()
compare_arch()

#Experiment 2
def compare_dropout(num_epochs=50):
  model1, acc_hist1 = experiment(model_type = 'CNN', dropout_rate = 0.5, weight_decay = 0.0,verbose = True)
  model2, acc_hist2 = experiment(model_type = 'CNN', dropout_rate = 0.8, weight_decay = 0.0,verbose = True)
  model3, acc_hist3 = experiment(model_type = 'CNN', dropout_rate = 0.95, weight_decay = 0.0,verbose = True)
  epochs = np.arange(0,num_epochs,1)
  plt.title("Traning Accuracy for three different CNN dropout rates")
  plt.plot(epochs,acc_hist1['train'],label='DR=0.5',markeredgecolor='r')
  plt.plot(epochs,acc_hist2['train'],label='DR=0.8',markeredgecolor='g')
  plt.plot(epochs,acc_hist3['train'],label='DR=0.95',markeredgecolor='b')
  plt.xlabel("Epoch")
  plt.ylabel("Training Accuracy in %")
  plt.legend()
  plt.show()
  plt.title("Testing Accuracy for three different CNN dropout rates")
  plt.plot(epochs,acc_hist1['test'],label='DR=0.5',markeredgecolor='r')
  plt.plot(epochs,acc_hist2['test'],label='DR=0.8',markeredgecolor='g')
  plt.plot(epochs,acc_hist3['test'],label='DR=0.95',markeredgecolor='b')
  plt.xlabel("Epoch")
  plt.ylabel("Testing Accuracy in %")
  plt.legend()
  plt.show()
compare_dropout()

#Experiment 3
def compare_l2(num_epochs=50):
  model1, acc_hist1 = experiment(model_type = 'CNN', dropout_rate = 0.0, weight_decay = 0.1,verbose = True)
  model2, acc_hist2 = experiment(model_type = 'CNN', dropout_rate = 0.0, weight_decay = 1.0,verbose = True)
  model3, acc_hist3 = experiment(model_type = 'CNN', dropout_rate = 0.0, weight_decay = 10.0,verbose = True)
  epochs = np.arange(0,num_epochs,1)
  plt.title("Traning Accuracy for three different CNN weight decays")
  plt.plot(epochs,acc_hist1['train'],label='WD=0.1',markeredgecolor='r')
  plt.plot(epochs,acc_hist2['train'],label='WD=1.0',markeredgecolor='g')
  plt.plot(epochs,acc_hist3['train'],label='WD=10.0',markeredgecolor='b')
  plt.xlabel("Epoch")
  plt.ylabel("Training Accuracy in %")
  plt.legend()
  plt.show()
  plt.title("Testing Accuracy for three different CNN weight decays")
  plt.plot(epochs,acc_hist1['test'],label='WD=0.1',markeredgecolor='r')
  plt.plot(epochs,acc_hist2['test'],label='WD=1.0',markeredgecolor='g')
  plt.plot(epochs,acc_hist3['test'],label='WD=10.0',markeredgecolor='b')
  plt.xlabel("Epoch")
  plt.ylabel("Testing Accuracy in %")
  plt.legend()
  plt.show()
compare_l2()


Epoch: 0 | Train Accuracy: 87.61 | Validation Accuracy: 84.77 | Test Accuracy: 85.98
Epoch: 1 | Train Accuracy: 92.20 | Validation Accuracy: 87.65 | Test Accuracy: 88.51


KeyboardInterrupt: ignored

# 新段落