In [1]:
# import utils and modules
import StochasticGradientAscentUtils as sga
import derivatives as der
import activationFunctions as activate
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from numpy.linalg import norm
import os,glob

# Read and store our data from MNISTDATA

In [2]:
# store our file names for better usage
def make_filePaths_to_arr():
    print("Trying to find all files in mnistdata directory!")
    files = glob.glob('*/*.txt')
    return files
        

In [3]:
# split array into test_paths and train_paths
files = make_filePaths_to_arr()
test_file_paths = [files[i] for i in range(10)]
train_file_paths = [files[i] for i in range(10,20)]

Trying to find all files in mnistdata directory!


In [4]:
test_file_paths

['mnistdata\\test0.txt',
 'mnistdata\\test1.txt',
 'mnistdata\\test2.txt',
 'mnistdata\\test3.txt',
 'mnistdata\\test4.txt',
 'mnistdata\\test5.txt',
 'mnistdata\\test6.txt',
 'mnistdata\\test7.txt',
 'mnistdata\\test8.txt',
 'mnistdata\\test9.txt']

In [5]:
train_file_paths

['mnistdata\\train0.txt',
 'mnistdata\\train1.txt',
 'mnistdata\\train2.txt',
 'mnistdata\\train3.txt',
 'mnistdata\\train4.txt',
 'mnistdata\\train5.txt',
 'mnistdata\\train6.txt',
 'mnistdata\\train7.txt',
 'mnistdata\\train8.txt',
 'mnistdata\\train9.txt']

In [6]:
# function which reads every test and train .txt ( image ), 
# store every row from the dataframe in an array ( RGB values for every image ) 
# and returns the 2 arrays with every single image.
# train_images : stores in 2D np.array every train image
# test_images : stores in 2D np.array every test image
def load_images(paths):
    train_images, test_images = [], []    # initialize dynamic arrays
    for i in range(len(paths[0])):        # for every train(i).txt ( eg. train0.txt, train1.txt etc. )
        df = pd.read_csv(paths[0][i],header=None,sep=' ')      # take every file as dataframe
        # read every row and take every column's value ( RGB )
        for row in range(df.shape[0]):
            current_row = np.array(df.iloc[row:row+1,:])   # store the 2D array
            train_images.append(current_row.flatten())     # flatten the 2D into 1D for better usage, and save it to our array.
            
    for i in range(len(paths[1])):           # for every train(i).txt ( eg. train0.txt, train1.txt etc. )
        df = pd.read_csv(paths[1][i],header=None,sep=' ')      # take every file as dataframe
         # read every row and take every column's value ( RGB )
        for row in range(df.shape[0]):
            current_row = np.array(df.iloc[row:row+1,:])      # store the 2D array
            test_images.append(current_row.flatten())          # flatten the 2D into 1D for better usage, and save it to our array.
            
    train_images, test_images = np.array(train_images), np.array(test_images)        # convert them into np.arrays
    
    train_images = train_images.astype(float)/255
    test_images = test_images.astype(float)/255
    
    return train_images, test_images      # return

In [7]:
def store_true_values(paths):
    train_true_values = []
    for i in range(len(paths[0])):
        one_hot = [1 if i==k else 0 for k in range(len(paths[0]))]
        df = pd.read_csv(paths[0][i],header=None,sep=' ')
        for k in range(df.shape[0]):
            train_true_values.append(one_hot)
    test_true_values = []
    for i in range(len(paths[1])):
        one_hot = [1 if i==k else 0 for k in range(len(paths[1]))]
        df = pd.read_csv(paths[1][i],header=None,sep=' ')
        for k in range(df.shape[0]):
            test_true_values.append(one_hot)

    y_train, y_test  = np.array(train_true_values), np.array(test_true_values)
    return y_train, y_test
        

## Checking data

In [8]:
# check if we read every .txt file correctly
paths = [train_file_paths, test_file_paths]
X_train, X_test = load_images(paths)
Y_train, Y_test = store_true_values(paths)
print(X_train.shape)
print(X_test.shape)
print(Y_train.shape)
print(Y_test.shape)

(60000, 784)
(10000, 784)
(60000, 10)
(10000, 10)


# Gradcheck for random hyperParameters

In [9]:
%%time
gradW1 , num_w1, gradW2, num_w2 = sga.gradcheck(X_train, Y_train, 0.1, 200, 100, 10, "h1")

Wall time: 1min 42s


In [10]:
print( "Gradient of W1 is : ", np.max(np.abs(gradW1 - num_w1)) )

Gradient of W1 is :  7.170281368651388e-09


In [11]:
print( "Gradient of W2 is : ", np.max(np.abs(gradW2 - num_w2)) )

Gradient of W2 is :  7.069218543875877e-09


# Testing hyperParameters for mnistdata

In [12]:
def testParameters(X, T, x_test, y_test):
    
    X_Test = np.copy(x_test)
    
    add_one = np.ones((X_Test.shape[0],1))
    X_Test = np.concatenate((add_one,X_Test), axis=1)
    
    lamdas = [0.1, 0.5, 0.01]
    learning_rates = [0.01, 0.001, 0.05]
    EPOCHS = [10, 20, 30]
    Ms = [100, 200, 300]
    activation_h = ["h1", "h2", "h3"]
    batch_size = [200]
    
    results = {"batch_size" : [],
              "HiddenLayers M" : [],
              "activation_h" : [],
              "epochs" : [],
              "learning_rate" : [],
              "lamda" : [],
               "error" : [],
              "accuracy" : []
              }
    
    scores = []
    for batchSize in batch_size:
        for M in Ms:
            for h in activation_h:
                for epoch in EPOCHS:
                    print("Current M: ", M, " | Current h: ", h, " | Current epoch: ", epoch)
                    for l_rate in learning_rates:
                        for l in lamdas:
                            w1, w2 ,costs = sga.mini_batches_SGA(X, T,activation_h = h, hiddenLayers = M, classesK = 10, learning_rate = l_rate, lamda = l, epochs = epoch, batch_size = batchSize)
                            pred, _, _ = sga.NeuralNetwork(X_Test, w1, w2, hiddenLayerSize = M, outLayerSize = 10, activation_h = h)
                            acc = np.mean( np.argmax(pred,1) == np.argmax(y_test ,1) )
                            scores.append(acc) # save score we got
                            results['batch_size'].append(batchSize)
                            results['HiddenLayers M'].append(M)
                            results['activation_h'].append(h)
                            results['epochs'].append(epoch)
                            results['learning_rate'].append(l_rate)
                            results['lamda'].append(l)
                            results['error'].append(1-acc)
                            results['accuracy'].append(acc)
    return scores, results

        

In [14]:
%%time
scores, data = testParameters(X_train, Y_train, X_test, Y_test)
df = pd.DataFrame(data, columns= ['batch_size', 'HiddenLayers M', 'activation_h', 'epochs', 'learning_rate', 'lamda', 'error', 'accuracy'])
df.to_csv("MNISTDATA_RESULTS_TEST_PARAMETERS.csv")

Current M:  100  | Current h:  h1  | Current epoch:  10
Current M:  100  | Current h:  h1  | Current epoch:  20
Current M:  100  | Current h:  h1  | Current epoch:  30
Current M:  100  | Current h:  h2  | Current epoch:  10
Current M:  100  | Current h:  h2  | Current epoch:  20
Current M:  100  | Current h:  h2  | Current epoch:  30
Current M:  100  | Current h:  h3  | Current epoch:  10
Current M:  100  | Current h:  h3  | Current epoch:  20
Current M:  100  | Current h:  h3  | Current epoch:  30
Current M:  200  | Current h:  h1  | Current epoch:  10
Current M:  200  | Current h:  h1  | Current epoch:  20
Current M:  200  | Current h:  h1  | Current epoch:  30
Current M:  200  | Current h:  h2  | Current epoch:  10
Current M:  200  | Current h:  h2  | Current epoch:  20
Current M:  200  | Current h:  h2  | Current epoch:  30
Current M:  200  | Current h:  h3  | Current epoch:  10
Current M:  200  | Current h:  h3  | Current epoch:  20
Current M:  200  | Current h:  h3  | Current epo