In [1]:
import numpy as np
from neuralnet import Neuralnetwork

In [2]:
import copy
import os, gzip
import yaml
import numpy as np
import pickle
import matplotlib.pyplot as plt
import pandas as pd
import constants

In [3]:
import gradient
from constants import *
from train import *
from gradient import *
import util
import argparse
import neuralnet

In [41]:
def main(args):

    # Read the required config
    # Create different config files for different experiments
    configFile = None #Will contain the name of the config file to be loaded
    if (args.experiment == 'test_gradients'):  #3b
        configFile = 'config_3b.yaml' # Create a config file for 3b and change None to the config file name
    elif(args.experiment=='test_momentum'):  #3c
        configFile = "config_3c.yaml" # Create a config file for 3c and change None to the config file name
    elif (args.experiment == 'test_regularization'): #3d
        configFile = None # Create a config file for 3d and change None to the config file name
    elif (args.experiment == 'test_activation'): #3e
        configFile = None # Create a config file for 3e and change None to the config file name
    elif (args.experiment == 'test_hidden_units'):  #3f-i
        configFile = None # Create a config file for 3f-i and change None to the config file name
    elif (args.experiment == 'test_hidden_layers'):  #3f-ii
        configFile = None # Create a config file for 3f-ii and change None to the config file name
    elif (args.experiment == 'test_100_classes'):  #3g
        configFile = None # Create a config file for 3g and change None to the config file name. Please make the necessaty changes to load_data()
        # in util.py first before running this experiment

    # Load the data
    x_train, y_train, x_valid, y_valid, x_test, y_test = util.load_data(path='')  # Set datasetDir in constants.py

    # Load the configuration from the corresponding yaml file. Specify the file path and name
    config = util.load_config('configs/' + configFile) # Set configYamlPath, configFile  in constants.py

    if(args.experiment == 'test_gradients'):
        subsetSize = 5  #Feel free to change this
        sample_idx = np.random.randint(0,len(x_train),subsetSize)
        x_train_sample, y_train_sample = x_train[sample_idx], y_train[sample_idx]
        model = Neuralnetwork(config)
        results  = check_grad(model, x_train_sample, y_train_sample)
      
        return results

    # Create a Neural Network object which will be our model
    model = neuralnet.Neuralnetwork(config)

    # train the model. Use train.py's train method for this
    model = modeltrain(model, x_train, y_train, x_valid, y_valid, config)

    # test the model. Use train.py's modelTest method for this
    test_acc, test_loss =  modelTest(model, x_test, y_test)

    # Print test accuracy and test loss
    print('Test Accuracy:', test_acc, ' Test Loss:', test_loss)




In [6]:
parser = argparse.ArgumentParser()
parser.add_argument('--experiment', type=str, default='test_gradients', help='Specify the experiment that you want to run')

args, unknown = parser.parse_known_args()
#args = parser.parse_args()# This specifies the number of layers and number of hidden neurons in each layer.



In [37]:
results = main(args)

In [40]:
print('weight type               ', 'numerical gradient          ', 'true gradient        ', 'absolute difference')
for i in range(len(results)):
    print(results[i])
    print('\n')

weight type                numerical gradient           true gradient         absolute difference
('output layer bias weight', -7.473294338029568e-05, 5.1535591235313295e-06, 7.9886502503827e-05)


('hiden layer bias weight', -5.879822996135431e-07, 5.1535591235313295e-06, 5.741541423144872e-06)


('hidden to output weight #1', -2.065973648862851e-05, 9.803444316535384e-05, 0.00011869417965398235)


('hidden to output weight #2', -1.4476268714016528e-05, 4.889707240854833e-05, 6.337334112256485e-05)


('input to hidden weight #1', -4.136323141423759e-07, 4.889707240854833e-05, 4.9310704722690706e-05)


('input to hidden weight #2', -6.562391244635535e-07, 9.803444316535384e-05, 9.86906822898174e-05)




In [35]:
def check_grad(model, x_train, y_train):

    """
    TODO
        Checks if gradients computed numerically are within O(epsilon**2)

        args:
            model
            x_train: Small subset of the original train dataset
            y_train: Corresponding target labels of x_train

        Prints gradient difference of values calculated via numerical approximation and backprop implementation
    """
    epsilon = 10**(-2)
    results = []
    #1 output layer bias weight
    output_layer = model.layers[1]
    out_bias_w = output_layer.w[0][1]
    model(x_train, y_train)
    deltas = model.backward(False)
    delta = deltas[0][1]
    
    output_layer.w[0][1] = out_bias_w + epsilon
    loss_plus, acc = model(x_train, y_train)
    
    output_layer.w[0][1] = out_bias_w - epsilon
    loss_minus, acc = model(x_train, y_train)
    output_layer.w[0][1] = out_bias_w
    
    num_delta = (loss_plus - loss_minus) / 2*epsilon

    abs_diff = abs(delta - num_delta)
    
    results.append(('output layer bias weight',num_delta, delta, abs_diff))
    
    #2 hidden bias weight
    hidden_layer = model.layers[0]
    hidden_bias_w = hidden_layer.w[0][1]
    model(x_train, y_train)
    deltas = model.backward(False)
    delta2 = deltas[0][1]
    
    hidden_layer.w[0][1] = hidden_bias_w + epsilon
    loss_plus, acc = model(x_train, y_train)
    
    hidden_layer.w[0][1] = hidden_bias_w - epsilon
    loss_minus, acc = model(x_train, y_train)
    hidden_layer.w[0][1] = hidden_bias_w
    
    num_delta2 = (loss_plus - loss_minus) / 2*epsilon
    abs_diff2 = abs(delta2 - num_delta2)
    
    results.append(('hiden layer bias weight',num_delta2, delta2, abs_diff2))
    
    
    #3 hidden to output weight #1
    output_layer = model.layers[1]
    w3 = output_layer.w[4][0]
    
    model(x_train, y_train)
    deltas = model.backward(False)
    delta3 = deltas[4][0]
    
    output_layer.w[4][0] = w3 + epsilon
    loss_plus, acc = model(x_train, y_train)
    
    output_layer.w[4][0] = w3 - epsilon
    loss_minus, acc = model(x_train, y_train)
    output_layer.w[4][0] = w3 

    num_delta3 = (loss_plus - loss_minus) / 2*epsilon
    abs_diff3 = abs(delta3 - num_delta3)
    
    results.append(('hidden to output weight #1', num_delta3, delta3, abs_diff3))
    
    
    #4 hidden to output weight #2
    output_layer = model.layers[1]
    w1 = output_layer.w[2][0]
    
    model(x_train, y_train)
    deltas = model.backward(False)
    delta4 = deltas[2][0]
    
    output_layer.w[2][0] = w1 + epsilon
    loss_plus, acc = model(x_train, y_train)
    
    output_layer.w[2][0] = w1 - epsilon
    loss_minus, acc = model(x_train, y_train)
    output_layer.w[2][0] = w1 

    num_delta4 = (loss_plus - loss_minus) / 2*epsilon
    abs_diff4 = abs(delta4 - num_delta4)
    
    results.append(('hidden to output weight #2', num_delta4, delta4, abs_diff4))
    
    #5 input to hidden layer weight #1
    hidden_layer = model.layers[0]
    w1 = hidden_layer.w[2][0]
    
    model(x_train, y_train)
    deltas = model.backward(False)
    delta5 = deltas[2][0]
    
    hidden_layer.w[2][0] = w1 + epsilon
    loss_plus, acc = model(x_train, y_train)
    
    hidden_layer.w[2][0] = w1 - epsilon
    loss_minus, acc = model(x_train, y_train)
    hidden_layer.w[2][0] = w1 

    num_delta5 = (loss_plus - loss_minus) / 2*epsilon
    abs_diff5 = abs(delta5 - num_delta5)
    results.append(('input to hidden weight #1', num_delta5, delta5, abs_diff5))
    
    
    #6 input to hidden layer weight #2
    hidden_layer = model.layers[0]
    w1 = hidden_layer.w[4][0]
    
    model(x_train, y_train)
    deltas = model.backward(False)
    delta6 = deltas[4][0]
    
    hidden_layer.w[4][0] = w1 + epsilon
    loss_plus, acc = model(x_train, y_train)
    
    hidden_layer.w[4][0] = w1 - epsilon
    loss_minus, acc = model(x_train, y_train)
    hidden_layer.w[4][0] = w1 

    num_delta6 = (loss_plus - loss_minus) / 2*epsilon
    abs_diff6 = abs(delta6 - num_delta6)
    results.append(('input to hidden weight #2', num_delta6, delta6, abs_diff6))
    
    return results

In [None]:
abs(0.991790 - 0.998552)

In [None]:
a

In [None]:
def checkGradient(x_train,y_train,config):

    subsetSize = 10  #Feel free to change this
    sample_idx = np.random.randint(0,len(x_train),subsetSize)
    x_train_sample, y_train_sample = x_train[sample_idx], y_train[sample_idx]

    model = Neuralnetwork(config)
    check_grad(model, x_train_sample, y_train_sample)