# Chem277B: Machine Learning Algorithms

## Homework assignment #7: Deeper Learning and Regularization

In [164]:
import numpy as np 
import pandas as pd
import math 
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.optim import SGD, Adam
from sklearn.model_selection import train_test_split, KFold
from sklearn import cluster, datasets, mixture
from sklearn.preprocessing import StandardScaler
from itertools import cycle, islice
from pylab import *
import seaborn as sns
from functools import wraps
from time import time
import random
from tqdm import tqdm
import warnings

sns.set()

### 1. Bias-variance tradeoff.

(a) I have sorted out the training and testing datasets and normalized the data using each 32 * 32 image's maximum pixel value.

In [153]:
# First load the mnist data and convert all elements into lists / arrays
mnist = list(pd.read_pickle('mnist.pkl'))
mnist[0] = list(mnist[0])
mnist[1] = list(mnist[1])
train_X = mnist[0][0]
train_y = mnist[0][1]
test_X = mnist[1][0]
test_y = mnist[1][1]
print('Train: X=%s, y=%s' % (train_X.shape, train_y.shape))
print('Test: X=%s, y=%s' % (test_X.shape, test_y.shape))

Train: X=(60000, 32, 32), y=(60000,)
Test: X=(10000, 32, 32), y=(10000,)


In [154]:
# Define a function to normalize the training and testing data sets
def normalize_pixels(train_X, test_X):
    
    # First convert the dataset to floats
    train_X_norm = train_X.astype('float32')
    test_X_norm = test_X.astype('float32')
    
    # Find maximum values for all 60000 / 10000 pictures in train and test datasets
    # and broadcast to a (60000 / 10000, 32, 32) shape array
    train_X_max = np.broadcast_to(train_X.max(axis=(1,2))[:, np.newaxis, np.newaxis], (60000, 32, 32))
    test_X_max = np.broadcast_to(test_X.max(axis=(1,2))[:, np.newaxis, np.newaxis], (10000, 32, 32))
    
    # Normalize the datasets
    train_X_norm = train_X_norm / train_X_max
    test_X_norm = test_X_norm / test_X_max
    
    # Return the normalized datasets
    return train_X_norm, test_X_norm

In [162]:
# Normalize the datasets and confirm the data has been normalized
train_X_norm, test_X_norm = normalize_pixels(train_X, test_X)
print(train_X.sum(), train_X_norm.sum())
print(test_X.sum(), test_X_norm.sum())

1567298545 6148662.5
264923200 1039329.2


(b)

In [165]:
def timing(f):
    @wraps(f)
    def wrap(*args, **kw):
        ts = time()
        result = f(*args, **kw)
        te = time()
        print('func:%r  took: %2.4f sec' % (f.__name__,  te-ts))
        return result
    return wrap

def create_chunks(complete_list, chunk_size=None, num_chunks=None):
    '''
    Cut a list into multiple chunks, each having chunk_size (the last chunk might be less than chunk_size)
    or having a total of num_chunk chunks
    '''
    chunks = []
    if num_chunks is None:
        num_chunks = math.ceil(len(complete_list) / chunk_size)
    elif chunk_size is None:
        chunk_size = math.ceil(len(complete_list) / num_chunks)
    for i in range(num_chunks):
        chunks.append(complete_list[i * chunk_size: (i + 1) * chunk_size])
    return chunks

class Trainer():
    def __init__(self, model, optimizer_type, learning_rate, epoch, batch_size, input_transform=lambda x: x,):
        """ The class for training the model
        model: nn.Module
            A pytorch model
        optimizer_type: 'adam' or 'sgd'
        learning_rate: float
        epoch: int
        batch_size: int
        input_transform: func
            transforming input. Can do reshape here
        """
        self.model = model
        if optimizer_type == "sgd":
            self.optimizer = SGD(model.parameters(), learning_rate,momentum=0.9)
        elif optimizer_type == "adam":
            self.optimizer = optim.Adam(model.parameters(), learning_rate)
            
        self.epoch = epoch
        self.batch_size = batch_size
        self.input_transform = input_transform

    @timing
    def train(self, inputs, outputs, val_inputs, val_outputs, early_stop=False, l2=False, silent=False):
        """ train self.model with specified arguments using 3-fold cross-validation
        inputs: np.array, The shape of input_transform(input) should be (ndata,nfeatures)
        outputs: np.array shape (ndata,)
        val_nputs: np.array, The shape of input_transform(val_input) should be (ndata,nfeatures)
        val_outputs: np.array shape (ndata,)
        early_stop: bool
        l2: bool
        silent: bool. Controls whether or not to print the train and val error during training
        
        @return
        a dictionary of arrays with train and val losses and accuracies
        """
        ### convert data to tensor of correct shape and type here ###
        inputs = torch.Tensor(self.input_transform(inputs)).float()
        outputs = torch.Tensor(outputs).long()
        
        # Split data into 3-fold groups of training (2/3 of the training data), 
        # validation (1/3 of the training data)
        inputs_train, inputs_val, outputs_train, outputs_val = [], [], [], []
        for i in range(3):
            inputs_train_fold, inputs_val_fold, outputs_train_fold, outputs_val_fold = \
                train_test_split(inputs, outputs, test_size=1/3, random_state=i)
            inputs_train.append(inputs_train_fold)
            inputs_val.append(inputs_val_fold)
            outputs_train.append(outputs_train_fold)
            outputs_val.append(outputs_val_fold)
        
        losses = []
        accuracies = []
        val_losses = []
        val_accuracies = []
        weights = self.model.state_dict()
        lowest_val_loss = np.inf
        
        for n_epoch in tqdm(range(self.epoch), leave=False):
            self.model.train()
            batch_indices = list(range(inputs.shape[0]))
            random.shuffle(batch_indices)
            batch_indices = create_chunks(batch_indices, chunk_size=self.batch_size)
            epoch_loss = 0
            epoch_acc = 0
            for batch in batch_indices:
                batch_importance = len(batch) / len(outputs)
                batch_input = inputs_train[batch]
                batch_output = outputs_train[batch]
                ### make prediction and compute loss with loss function of your choice on this batch ###
                batch_predictions = self.model(batch_input)
                loss = F.cross_entropy(batch_predictions, batch_output)
                if l2:
                    ### Compute the loss with L2 regularization ###
                    l2_lambda = 0.01
                    l2_reg = torch.tensor(0.)
                    for param in self.model.parameters():
                        l2_reg += torch.norm(param)
                    loss += l2_lambda * l2_reg
                    
                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()
                ### Compute epoch_loss and epoch_acc
                epoch_loss += loss.item() * batch_importance
                epoch_acc += (batch_predictions.argmax(dim=1) == batch_output).float().mean().item() * batch_importance
            
            val_loss, val_acc = self.evaluate(val_inputs[0], val_outputs[0], print_acc=False)
            for i in range(1, 3):
                val_loss_i, val_acc_i = self.evaluate(inputs_val[i], outputs_val[i], print_acc=False)
                val_loss += val_loss_i
                val_acc += val_acc_i
            val_loss /= 3
            val_acc /= 3
            
            if n_epoch % 10 ==0 and not silent: 
                print("Epoch %d/%d - Loss: %.3f - Acc: %.3f" % (n_epoch + 1, self.epoch, epoch_loss, epoch_acc))
                print("              Val_loss: %.3f - Val_acc: %.3f" % (val_loss, val_acc))
            losses.append(epoch_loss)
            accuracies.append(epoch_acc)
            val_losses.append(val_loss)
            val_accuracies.append(val_acc)
            if early_stop:
                if val_loss < lowest_val_loss:
                    lowest_val_loss = val_loss
                    weights = self.model.state_dict()

        if early_stop:
            self.model.load_state_dict(weights)    

        return {"losses": losses, "accuracies": accuracies, "val_losses": val_losses, "val_accuracies": val_accuracies}
        
    def evaluate(self, inputs, outputs, print_acc=True):
        """ evaluate model on provided input and output
        inputs: np.array, The shape of input_transform(input) should be (ndata,nfeatures)
        outputs: np.array shape (ndata,)
        print_acc: bool
        
        @return
        losses: float
        acc: float
        """
        with torch.no_grad():
            self.model.eval()
            inputs = torch.tensor(self.input_transform(inputs)).float()
            outputs = torch.tensor(outputs).long()
            outputs_pred = self.model(inputs)
            losses = F.cross_entropy(outputs_pred, outputs).items()
            acc = torch.mean((torch.argmax(outputs_pred, dim=1) == outputs).float()).items()
        if print_acc:
            print("Accuracy: %.3f" % acc)
        return losses, acc