In [None]:
import torch
import torch.optim as optim
import torch.nn as nn
import numpy as np
import math
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
"""
Return a list of the test losses at the end of each epoch.
"""

def train_test_trajectory(optimizer, model, x_dataset, y_dataset, x_test, y_test, criterion, epochs):
    test_trajectory = []
    
    # loss with initial weights (to match deepOBS data)
    y_predicted = model(x_test)
    loss = criterion(y_predicted, y_test)
    predicted_labels = torch.argmax(y_predicted, dim=1)
    correct = (predicted_labels == y_test).sum()
    test_trajectory.append(loss.item())
    
    # Main optimization loop
    for t in range(epochs):
        
        # Set the gradients to 0.
        optimizer.zero_grad()
        
        # Compute the current predicted labels from x_dataset
        y_predicted = model(x_dataset)
        
        # See how far off the prediction is
        current_loss = criterion(y_predicted, y_dataset)

        # Compute the gradient of the loss with respect to A and b
        current_loss.backward()
        
        # Update A and b accordingly
        optimizer.step()

#         print(f"train loss = {current_loss}")

        y_predicted = model(x_test)
    
        loss = criterion(y_predicted, y_test)

        # Get index with highest probability.
        predicted_labels = torch.argmax(y_predicted, dim=1)

        correct = (predicted_labels == y_test).sum()
        
#         print('test loss: {}'.format(loss.item()))

        test_trajectory.append(loss.item())

    return test_trajectory

In [None]:
"""
Return the final test loss for each optimizer.
"""

def get_final_losses(samples, dim, optimizer_list, criterion, test_runs):       
    loss_list = []
    
    samples = samples #number of samples from each distribution
    dim = dim
    output_dim = 2
    
    # means of the distributions
    mean1 = 0
    mean2 = 1/math.sqrt(dim)
    
    # get training samples
    x_dataset = torch.Tensor(np.vstack((np.random.normal(mean1, 1, size=(samples, dim)),
                                        np.random.normal(mean2, 1, size=(samples, dim)))))

    # get training labels
    gaussian1_labels = [1]*int(samples)
    gaussian2_labels = [0]*int(samples)
    y_dataset = torch.tensor(gaussian1_labels+gaussian2_labels,dtype=torch.long) # combine labels and convert to tensor

    # get testing samples
    x_test = torch.Tensor(np.vstack((np.random.normal(mean1, 1, size=(samples, dim)),
                                     np.random.normal(mean2, 1, size=(samples, dim)))))

    # get testing labels 
    gaussian1_labels = [1]*int(samples)
    gaussian2_labels = [0]*int(samples)
    y_test = torch.tensor(gaussian1_labels+gaussian2_labels,dtype=torch.long) # combine labels and convert to tensor

    
    for i in range(test_runs):
        test_run_loss = []
        for optimizer in optimizer_list:
            
            # Logistic regression model
            model = torch.nn.Sequential(
                torch.nn.Linear(dim, samples),
                torch.nn.LogSoftmax(dim=1) 
            )
            
            train(samples, optimizer, model, x_dataset, y_dataset, criterion)
            test_run_loss.append(test(model, x_test, y_test, criterion))
            
        loss_list.append(test_run_loss)
      
    return loss_list

In [None]:
"""
Return the test loss trajectories for each optimizer.
"""

def get_trajectory_losses(samples, dim, test_runs, epochs):
    loss_list = []
    
    samples = samples #number of samples from each distribution
    dim = dim
    output_dim = 2

    # means of the distributions
    mean1 = 0
    mean2 = 1/math.sqrt(dim)
    
    # Logistic regression model
    model = torch.nn.Sequential(
        torch.nn.Linear(dim, samples),
        torch.nn.LogSoftmax(dim=1) 
    )
    
    criterion = nn.NLLLoss() 
    
    # get training samples
    x_dataset = torch.Tensor(np.vstack((np.random.normal(mean1, 1, size=(samples, dim)),
                                        np.random.normal(mean2, 1, size=(samples, dim)))))

    # get training labels
    gaussian1_labels = [1]*int(samples)
    gaussian2_labels = [0]*int(samples)
    y_dataset = torch.tensor(gaussian1_labels+gaussian2_labels,dtype=torch.long) # combine labels and convert to tensor

    # get testing samples
    x_test = torch.Tensor(np.vstack((np.random.normal(mean1, 1, size=(samples, dim)),
                                     np.random.normal(mean2, 1, size=(samples, dim)))))

    # get testing labels 
    gaussian1_labels = [1]*int(samples)
    gaussian2_labels = [0]*int(samples)
    y_test = torch.tensor(gaussian1_labels+gaussian2_labels,dtype=torch.long) # combine labels and convert to tensor
            
    
    for i in range(test_runs):
        test_run_loss = []
            
        # SGD
        model = torch.nn.Sequential(
            torch.nn.Linear(dim, samples),
            torch.nn.LogSoftmax(dim=1) 
        )
        optimizer = optim.SGD(model.parameters(), lr=0.01)
        trajectory = train_test_trajectory(optimizer, model, x_dataset, y_dataset, x_test, y_test, criterion, epochs)      
        for l in trajectory:
            test_run_loss.append(l)

        # Momentum
        model = torch.nn.Sequential(
            torch.nn.Linear(dim, samples),
            torch.nn.LogSoftmax(dim=1) 
        )
        optimizer = optim.SGD(model.parameters(), lr=0.01,momentum=0.9)
        trajectory = train_test_trajectory(optimizer, model, x_dataset, y_dataset, x_test, y_test, criterion, epochs)      
        for l in trajectory:
            test_run_loss.append(l)

        # Adadelta
        model = torch.nn.Sequential(
            torch.nn.Linear(dim, samples),
            torch.nn.LogSoftmax(dim=1) 
        )
        optimizer = optim.Adadelta(model.parameters(), lr=1.0)
        trajectory = train_test_trajectory(optimizer, model, x_dataset, y_dataset, x_test, y_test, criterion, epochs)      
        for l in trajectory:
            test_run_loss.append(l)

        # Adagrad
        model = torch.nn.Sequential(
            torch.nn.Linear(dim, samples),
            torch.nn.LogSoftmax(dim=1) 
        )
        optimizer = optim.Adagrad(model.parameters(), lr=0.01)
        trajectory = train_test_trajectory(optimizer, model, x_dataset, y_dataset, x_test, y_test, criterion, epochs)      
        for l in trajectory:
            test_run_loss.append(l)

        # RMSprop
        model = torch.nn.Sequential(
            torch.nn.Linear(dim, samples),
            torch.nn.LogSoftmax(dim=1) 
        )
        optimizer = optim.RMSprop(model.parameters(), lr=0.01)
        trajectory = train_test_trajectory(optimizer, model, x_dataset, y_dataset, x_test, y_test, criterion, epochs)      
        for l in trajectory:
            test_run_loss.append(l)

        # Adam
        model = torch.nn.Sequential(
            torch.nn.Linear(dim, samples),
            torch.nn.LogSoftmax(dim=1) 
        )
        optimizer = optim.Adam(model.parameters(), lr=0.01)
        trajectory = train_test_trajectory(optimizer, model, x_dataset, y_dataset, x_test, y_test, criterion, epochs)      
        for l in trajectory:
            test_run_loss.append(l)
            
        loss_list.append(test_run_loss)
        
    return loss_list

In [None]:
"""
Calculate the average loss for each optimizer over several test runs.
"""
def calc_task_avg_loss(loss_list):
    avg_loss = len(loss_list[0])*[0]
    for test_run in range(len(loss_list)):
        for optimizer in range(len(loss_list[test_run])):
            avg_loss[optimizer] += loss_list[test_run][optimizer]

    for i in range(len(avg_loss)):
        avg_loss[i] /= len(loss_list) 
    
    return avg_loss

In [None]:
all_losses = []
test_runs = 2
output_dim = 2
epochs = 3

In [None]:
# exactly parameterized
samples = 20 #number of samples from each distribution
dim = 20

exact_param = get_trajectory_losses(samples, dim, test_runs, epochs)

for i in exact_param:
    all_losses.append(i)

In [None]:
# overparameterized
samples = 10 #number of samples from each distribution
dim = 300

over_param = get_trajectory_losses(samples, dim, test_runs, epochs)
for i in over_param:
    all_losses.append(i)

In [None]:
# underparameterized
samples = 50 #number of samples from each distribution
dim = 3

under_param = get_trajectory_losses(samples, dim, test_runs, epochs)
for i in under_param:
    all_losses.append(i)

In [None]:
df = pd.DataFrame(data=all_losses)

tasks = ['logistic_regression_gaussian_exact_param','logistic_regression_gaussian_over_param','logistic_regression_gaussian_under_param']
runs = range(test_runs)
df.index = pd.MultiIndex.from_product([tasks, runs])

optimizers = ['GradientDescentOptimizer','MomentumOptimizer','Adadelta','AdagradOptimizer','RMSPropOptimizer','AdamOptimizer']
epoch_ind = range(epochs+1)
df.columns = pd.MultiIndex.from_product([optimizers, epoch_ind])

df

In [None]:
df.to_csv('logistic_regression_gaussian_trajectory__indiv_traj___rows__obj__runSeed___cols__optim__epoch.csv')

In [None]:
# normalize results

test_losses = np.asarray(all_losses)
test_losses
normalized_test_losses = []

for i in range(len(test_losses)):
    mean = np.mean(test_losses[i])
    minus_mean = test_losses[i] - mean
    normalized_test_losses.append((minus_mean)/np.linalg.norm(minus_mean))

In [None]:
df_norm = pd.DataFrame(data=normalized_test_losses)

tasks = ['logistic_regression_gaussian_exact_param','logistic_regression_gaussian_over_param','logistic_regression_gaussian_under_param']
runs = range(test_runs)
df_norm.index = pd.MultiIndex.from_product([tasks, runs])

optimizers = ['GradientDescentOptimizer','MomentumOptimizer','Adadelta','AdagradOptimizer','RMSPropOptimizer','AdamOptimizer']
epoch_ind = range(epochs+1)
df_norm.columns = pd.MultiIndex.from_product([optimizers, epoch_ind])

df_norm

In [None]:
df.to_csv('logistic_regression_gaussian_trajectory_norm__indiv_traj___rows__obj__runSeed___cols__optim__epoch.csv')

In [None]:
# index = ['logistic_regression_gaussian_exact_param','logistic_regression_gaussian_over_param','logistic_regression_gaussian_over_param']
# col = ['SGD','Momentum','Nesterov','Adagrad','RMSProp','Adam']
# df = pd.DataFrame(data=all_losses, index=index, columns=col)
# df