In [27]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
import numpy as np
import pandas as pd

In [28]:
epochs = 2
batch_size = 100
test_runs = 3

In [29]:
"""
For a dataset, will loop through all the optimizers and save the test losses after training + testing.
"""

def train_test(train_loader, test_loader, criterion, input_size, num_classes, epochs, batch_size, test_runs):
    
    # Logistic regression model.
    model = torch.nn.Sequential(
        torch.nn.Flatten(),
        torch.nn.Linear(input_size, num_classes),
        torch.nn.LogSoftmax(dim=1) 
    )
    
    # add all optimizers to a list
    optimizer_list=[]
    optimizer_list.append(optim.SGD(model.parameters(), lr=0.01))
    optimizer_list.append(optim.SGD(model.parameters(), lr=0.01,momentum=0.9))
    optimizer_list.append(optim.SGD(model.parameters(), lr=0.01,momentum=0.9,nesterov=True))
    optimizer_list.append(optim.Adagrad(model.parameters(), lr=0.01))
    optimizer_list.append(optim.RMSprop(model.parameters(), lr=0.01))
    optimizer_list.append(optim.Adam(model.parameters(), lr=0.01))
    
    test_losses = [] # store test losses for each optimizer
    
    # carry out training and testing for each optimizer and save the test losses for the number of test runs:
    
    for i in range(test_runs):
        
        # TRAIN
        for optimizer in optimizer_list:
            
            # Logistic regression model.
            model = torch.nn.Sequential(
                torch.nn.Flatten(),
                torch.nn.Linear(input_size, num_classes),
                torch.nn.LogSoftmax(dim=1) 
            )
            
            model.train()

            for epoch in range(epochs):
    #             print('Epoch {}'.format(epoch+1))
                for i, (images, labels) in enumerate(train_loader):
                    # zero the parameter gradients
                    optimizer.zero_grad()

                    # forward + backward + optimize
                    outputs = model(images)
                    loss = criterion(outputs, labels)
                    loss.backward()
                    optimizer.step()

                    # Log the loss
    #                 log_interval = 100
    #                 if i % log_interval == 0:
    #                     print('Current loss: {}'.format(loss))


            # TEST
            model.eval()
            test_acc = 0
            total_data = 0
            loss = 0
            with torch.no_grad():
                for _, (images, labels) in enumerate(test_loader):
                    output = model(images)
                    pred = output.argmax(dim=1, keepdim=True)
                    test_acc += pred.eq(labels.view_as(pred)).sum().item()
                    total_data += len(images)
                    loss = criterion(output, labels)

#             print('Loss: {}'.format(loss))

            test_acc /= total_data
#             print('Test accuracy over {} data points: {}%'.format(total_data, test_acc * 100))
            
        test_losses.append(test_run_loss)
#         print(test_losses)

    return test_losses

In [30]:
"""
Return a list of the test losses at the end of each epoch.
"""

def train_test_trajectory(model, optimizer, train_loader, test_loader, criterion, epochs):

    test_trajectory = []
    
    for t in range(epochs):
        
        # TRAIN
        for i, (images, labels) in enumerate(train_loader):
            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
        

        # TEST
        model.eval()
        test_acc = 0
        total_data = 0
        loss = 0
        with torch.no_grad():
            for _, (images, labels) in enumerate(test_loader):
                output = model(images)
                pred = output.argmax(dim=1, keepdim=True)
                test_acc += pred.eq(labels.view_as(pred)).sum().item()
                total_data += len(images)
                loss = criterion(output, labels)
#         print(loss.item())
        test_trajectory.append(loss.item())

    return test_trajectory

In [31]:
"""
For a dataset, will loop through all the optimizers and save the test loss trajectories after training + testing.
"""

def trajectory_loss(train_loader, test_loader, criterion, input_size, num_classes, epochs, batch_size, test_runs):
    
    # Logistic regression model.
    model = torch.nn.Sequential(
        torch.nn.Flatten(),
        torch.nn.Linear(input_size, num_classes),
        torch.nn.LogSoftmax(dim=1) 
    )
    
    test_losses = [] # store test losses for each optimizer
    
    # carry out training and testing for each optimizer and save the test losses for the number of test runs:
    
    for i in range(test_runs):
        test_run_loss = []
        
        # SGD       
        model = torch.nn.Sequential(
                torch.nn.Flatten(),
                torch.nn.Linear(input_size, num_classes),
                torch.nn.LogSoftmax(dim=1) 
        )
        optimizer = optim.SGD(model.parameters(), lr=0.01)
        trajectory = train_test_trajectory(model, optimizer, train_loader, test_loader, criterion, epochs)
        for i in trajectory:
            test_run_loss.append(i)
        
        # Momentum        
        model = torch.nn.Sequential(
                torch.nn.Flatten(),
                torch.nn.Linear(input_size, num_classes),
                torch.nn.LogSoftmax(dim=1) 
        )
        optimizer = optim.SGD(model.parameters(), lr=0.01,momentum=0.9)
        trajectory = train_test_trajectory(model, optimizer, train_loader, test_loader, criterion, epochs)
        for i in trajectory:
            test_run_loss.append(i)
        
        # Adadelta        
        model = torch.nn.Sequential(
                torch.nn.Flatten(),
                torch.nn.Linear(input_size, num_classes),
                torch.nn.LogSoftmax(dim=1) 
        )
        optimizer = optim.Adadelta(model.parameters(), lr=1.0)
        trajectory = train_test_trajectory(model, optimizer, train_loader, test_loader, criterion, epochs)
        for i in trajectory:
            test_run_loss.append(i)
        
        # Adagrad        
        model = torch.nn.Sequential(
                torch.nn.Flatten(),
                torch.nn.Linear(input_size, num_classes),
                torch.nn.LogSoftmax(dim=1) 
        )
        optimizer = optim.Adagrad(model.parameters(), lr=0.01)
        trajectory = train_test_trajectory(model, optimizer, train_loader, test_loader, criterion, epochs)
        for i in trajectory:
            test_run_loss.append(i)
        
        # RMSprop       
        model = torch.nn.Sequential(
                torch.nn.Flatten(),
                torch.nn.Linear(input_size, num_classes),
                torch.nn.LogSoftmax(dim=1) 
        )
        optimizer = optim.RMSprop(model.parameters(), lr=0.01)
        trajectory = train_test_trajectory(model, optimizer, train_loader, test_loader, criterion, epochs)
        for i in trajectory:
            test_run_loss.append(i)
        
        # Adam
        model = torch.nn.Sequential(
                torch.nn.Flatten(),
                torch.nn.Linear(input_size, num_classes),
                torch.nn.LogSoftmax(dim=1) 
        )
        optimizer = optim.Adam(model.parameters(), lr=0.01)
        trajectory = train_test_trajectory(model, optimizer, train_loader, test_loader, criterion, epochs)
        for i in trajectory:
            test_run_loss.append(i)
   
            
        test_losses.append(test_run_loss)         

    return test_losses

In [18]:
"""
Calculate the average loss for each optimizer over several test runs.
"""
def calc_task_avg_loss(loss_list):
    avg_loss = len(loss_list[0])*[0]
    for test_run in range(len(loss_list)):
        for optimizer in range(len(loss_list[test_run])):
            avg_loss[optimizer] += loss_list[test_run][optimizer]

    for i in range(len(avg_loss)):
        avg_loss[i] /= len(loss_list) 
    
    return avg_loss

In [32]:
# Use NLL since we include softmax as part of model.  
criterion = nn.NLLLoss()  

In [33]:
# all_logistic_regression_loss = []

In [34]:
all_losses = []

In [35]:
all_trajectories = []

In [12]:
# CIFAR-10 dataset

# Normalizer
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)

# Data loader
train_loader = torch.utils.data.DataLoader(trainset, batch_size=4,
                                          shuffle=True, num_workers=2)

test_loader = torch.utils.data.DataLoader(testset, batch_size=4,
                                         shuffle=False, num_workers=2)

Files already downloaded and verified
Files already downloaded and verified


In [36]:
# Hyperparameters 
input_size = 32*32*3
num_classes = 10

In [37]:
loss_trajectory = trajectory_loss(train_loader, test_loader, criterion, input_size, num_classes, epochs, batch_size, test_runs)
for i in loss_trajectory:
    all_trajectories.append(i)

In [38]:
print(all_trajectories)

[[1.385009765625, 1.5343563556671143, 9.765871047973633, 16.975919723510742, 3.4305944442749023, 2.4465889930725098, 1.5743026733398438, 1.7565749883651733, 18.759689331054688, 5.984659194946289, 10.059686660766602, 11.810853958129883], [2.2807648181915283, 2.4239768981933594, 27.430213928222656, 26.346155166625977, 1.2262992858886719, 3.0819363594055176, 1.8389556407928467, 1.748749852180481, 10.621614456176758, 19.63835334777832, 6.363015651702881, 10.001669883728027], [1.9121567010879517, 2.067091464996338, 17.15863609313965, 3.398571491241455, 2.518282890319824, 4.72898006439209, 1.3821437358856201, 1.544044017791748, 12.258016586303711, 10.70290470123291, 10.811775207519531, 8.383513450622559]]


In [39]:
df = pd.DataFrame(data=all_trajectories)

tasks = ['cifar10_logistic_regression']
runs = range(test_runs)
df.index = pd.MultiIndex.from_product([tasks, runs])

optimizers = ['SGD','Momentum','Nesterov','Adagrad','RMSProp','Adam']
epoch_ind = range(epochs)
df.columns = pd.MultiIndex.from_product([optimizers, epoch_ind])

df

Unnamed: 0_level_0,Unnamed: 1_level_0,SGD,SGD,Momentum,Momentum,Nesterov,Nesterov,Adagrad,Adagrad,RMSProp,RMSProp,Adam,Adam
Unnamed: 0_level_1,Unnamed: 1_level_1,0,1,0,1,0,1,0,1,0,1,0,1
cifar10_logistic_regression,0,1.38501,1.534356,9.765871,16.97592,3.430594,2.446589,1.574303,1.756575,18.759689,5.984659,10.059687,11.810854
cifar10_logistic_regression,1,2.280765,2.423977,27.430214,26.346155,1.226299,3.081936,1.838956,1.74875,10.621614,19.638353,6.363016,10.00167
cifar10_logistic_regression,2,1.912157,2.067091,17.158636,3.398571,2.518283,4.72898,1.382144,1.544044,12.258017,10.702905,10.811775,8.383513


In [None]:
loss_results = train_test(train_loader, test_loader, criterion, input_size, num_classes, num_epochs, batch_size, test_runs)
all_logistic_regression_loss.append(calc_task_avg_loss(loss_results))

In [None]:
# CIFAR-100 dataset

# Normalizer
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR100(root='./data', train=True,
                                        download=True, transform=transform)

testset = torchvision.datasets.CIFAR100(root='./data', train=False,
                                       download=True, transform=transform)

# Data loader
train_loader = torch.utils.data.DataLoader(trainset, batch_size=4,
                                          shuffle=True, num_workers=2)

test_loader = torch.utils.data.DataLoader(testset, batch_size=4,
                                         shuffle=False, num_workers=2)

In [None]:
# Hyperparameters 
input_size = 32*32*3
num_classes = 100

In [None]:
loss_results = train_test(train_loader, test_loader, criterion, input_size, num_classes, num_epochs, batch_size, 3)
all_logistic_regression_loss.append(calc_task_avg_loss(loss_results))

In [None]:
# F-MNIST dataset

# Normalizer
transform = transforms.Compose([transforms.ToTensor(),
  transforms.Normalize((0.5,), (0.5,))
])

trainset = torchvision.datasets.FashionMNIST(root='./data', train=True,
                                        download=False, transform=transform)

testset = torchvision.datasets.FashionMNIST(root='./data', train=False,
                                       download=False, transform=transform)

# Data loader
train_loader = torch.utils.data.DataLoader(trainset, batch_size=4,
                                          shuffle=True, num_workers=2)

test_loader = torch.utils.data.DataLoader(testset, batch_size=4,
                                         shuffle=False, num_workers=2)

In [None]:
# Hyperparameters 
input_size = 28*28
num_classes = 10

In [None]:
loss_results = train_test(train_loader, test_loader, criterion, input_size, num_classes, num_epochs, batch_size, 3)
all_logistic_regression_loss.append(calc_task_avg_loss(loss_results))

In [None]:
# MNIST dataset (images and labels)
train_dataset = torchvision.datasets.MNIST(root='../../data', 
                                           train=True, 
                                           transform=transforms.ToTensor(),
                                           download=True)

test_dataset = torchvision.datasets.MNIST(root='../../data', 
                                          train=False, 
                                          transform=transforms.ToTensor())

# Data loader (input pipeline)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                           batch_size=batch_size, 
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 
                                          batch_size=batch_size, 
                                          shuffle=False)

In [None]:
# Hyperparameters 
input_size = 28 * 28    # 784
num_classes = 10

In [None]:
loss_results = train_test(train_loader, test_loader, criterion, input_size, num_classes, num_epochs, batch_size, 3)
all_logistic_regression_loss.append(calc_task_avg_loss(loss_results))

In [None]:
# SVHN dataset

# Normalizer
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.SVHN(root='./data', split="train",
                                        download=True, transform=transform)

testset = torchvision.datasets.SVHN(root='./data', split="test",
                                       download=True, transform=transform)

# Data loader
train_loader = torch.utils.data.DataLoader(trainset, batch_size=4,
                                          shuffle=True, num_workers=2)

test_loader = torch.utils.data.DataLoader(testset, batch_size=4,
                                         shuffle=False, num_workers=2)

In [None]:
# Hyperparameters 
input_size = 32*32*3
num_classes = 10

In [None]:
loss_results = train_test(train_loader, test_loader, criterion, input_size, num_classes, num_epochs, batch_size, 3)
all_loss.append(loss_results)
# all_logistic_regression_loss.append(calc_task_avg_loss(loss_results)) # get avg loss for all test runs

In [None]:
# save avg test run results to csv
index = ['logistic_regression_cifar100','logistic_regression_cifar10','logistic_regression_fmnist','logistic_regression_mnist','logistic_regression_svhn']
col = ['SGD','Momentum','Nesterov','Adagrad','RMSProp','Adam']
df = pd.DataFrame(data=all_logistic_regression_loss, index=index, columns=col)
df

In [None]:
df.to_csv('all_avg_losses.csv')

In [None]:
# test_losses = np.asarray(all_logistic_regression_loss)
# test_losses
# normalized_test_losses = []

# for i in range(len(test_losses)):
#     mean = np.mean(test_losses[i])
#     minus_mean = test_losses[i] - mean
#     normalized_test_losses.append((minus_mean)/np.linalg.norm(minus_mean))

In [None]:
# index = ['logistic_regression_cifar100','logistic_regression_cifar10','logistic_regression_fmnist','logistic_regression_mnist','logistic_regression_svhn']
# col = ['SGD','Momentum','Nesterov','Adagrad','RMSProp','Adam']
# df = pd.DataFrame(data=normalized_test_losses, index=index, columns=col)
# df

In [None]:
# df.to_csv('all_logistic_regression_avg_normalized_loss.csv')