In [2]:
import pandas as pd
import numpy as np
from pathlib import Path

from sklearn.datasets import fetch_covtype
from sklearn import naive_bayes as nb
from sklearn import neural_network as nn

from IPython.core.display import display

import time


import codes as c

import importlib
importlib.reload(c)

<module 'codes' from '/Users/pmccauley/analysis/comp5318/comp5318_assignment_2/codes/__init__.py'>

In [13]:
def read_covtype(frey=False):
    '''
    Name:
        read_covtype
    
    Purpose: 
        Read covtype dataset using the sklearn.datasets function fetch_covtype 
        and return in X, y array format along with class name and number arrays
    
    Parameters: 
        No Required Inputs:
        
        1 Optional Settings:
                 
        frey = Boolean, default=False. Use Virginia's read and preprocessing
               method
    
    Returns: 
        4 Ouputs: 
        
        X = NumPy array, data array
        y = NumPy array, class labels
        cnames = list, class names
        cnums = NumPy array, class number (numeric class labels)
    '''     
    
    if frey==True:
        data = c.load_covertype_data
        X, y = c.create_features_and_labels(data)
        
    else:
        data = fetch_covtype()
        X = data['data']
        y = data['target']

    cnames = ['Spruce/Fir','Lodgepole Pine','Ponderosa Pine','Cottonwood/Willow','Aspen','Douglas-fir','Krummholz']
    cnums = np.arange(1,8)
    
    return X, y, cnames, cnums
    

<h2>Naive Bayes Classification Results</h2>

The following two cells perform a 10-fold cross validation using the Gaussian Naive Bayes classifier in scikit-learn

In [4]:
def covtype_naive_bayes(kfold=10, style='prop', frey=False):
    '''
    Name:
        covtype_naive_bayes
    
    Purpose: 
        Wrapper for Naive Bayes classifier 
    
    Parameters: 
        No Required Inputs: Data will be (re)read automatically 
        
        2 Optional Settings:
        
        crop = Boolean, default=False. Set to only keep the first 10 columns
                 of X, which encode the most information
                 
        stype = String, options are ['prop', 'random', 'equal']
                 
        frey = Boolean, default=False. Use Virginia's read and preprocessing
               method
    
    Returns: 
        3 Ouputs: 
        
        df_total = Pandas DataFrame containing the average metrics (accuracy, 
                    F1, precision, recall) over the cross validation cycles
        df_class = Pandas DataFrame containing the average cross validation 
                    metrics for each separate class (f1, precision, recall)
        df_conf = Pandas DataFrame containing the average confusion matrix 
                    across the cross validation cycles
        
    ''' 
    
    #read data
    X, y, cnames, cnums = read_covtype(crop=False, frey=frey)
    
    #define classifier
    _classifier = nb.GaussianNB
    
    #pass classifier object, data, and labels to cross validator from evaluations.py
    confs, runs = c.cross_validate_classifier(_classifier, X, y-1, kfold=kfold, style=style)
    
    #average the confusion matrices
    conf = np.mean(confs, axis=0)
    
    #average runtime
    run = np.mean(runs)
    
    #calculate performance metrics, format into datatables, and display
    df_total, df_class, df_conf = c.metrics_wrapper(conf, cnames, runtime=run, do_display=True)
        
    return df_total, df_class, df_conf
    

In [28]:
#classify and display results
df_total, df_class, df_conf = covtype_naive_bayes(kfold=10)

Average/overall metrics:


Unnamed: 0,Avg Runtime,Average F-Meas,Average Precision,Average Recall,Overall Accuracy
Class-Averaged or Overall:,1.340539,0.37,0.41,0.59,0.46


Class-specific metrics:


Unnamed: 0_level_0,Class,F-Meas,Precision,Recall
Confusion Matrix Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,Spruce/Fir,0.61,0.5,0.78
1,Lodgepole Pine,0.27,0.84,0.16
2,Ponderosa Pine,0.57,0.44,0.8
3,Cottonwood/Willow,0.35,0.22,0.84
4,Aspen,0.14,0.08,0.62
5,Douglas-fir,0.14,0.38,0.09
6,Krummholz,0.52,0.38,0.81


Confusion matrix (yellow = col max; red = row max):


Predicted,0,1,2,3,4,5,6
Actual,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,16545.2,854.5,123.7,0.0,1135.0,57.5,2468.1
1,15883.8,4587.7,1992.8,26.9,5383.6,159.4,296.8
2,7.4,4.0,2874.5,612.0,57.7,19.4,0.0
3,0.0,0.0,45.3,229.7,0.0,0.0,0.0
4,193.5,15.4,139.9,0.0,591.2,9.0,0.0
5,42.6,5.0,1279.8,165.9,92.5,151.2,0.0
6,350.3,11.4,6.9,0.0,12.1,0.0,1670.3


<h2>Multi-Layer Perceptron (MLP) Hyperparameter Exploration</h2>

The following several cells explore the MLP hyperparameters by varying each of them over some range while keeping the rest as their default values. See PDF report for more details.

In [7]:
def mlp_explore_param(param, values, X_train, y_train, X_test, y_test, args=None, redo=False):
    '''
    Name:
        mlp_explore_param
    
    Purpose: 
        Explore a given scikit-learn MLP hyperparameter by looping through an 
        array of values and recording classification performance for each step. 
        Results are be written to CSV files and read from there on subsequent 
        calls unless redo=True.
    
    Parameters: 
        6 Required Inputs:
        
        param = String, hyperparameter to test (e.g. 'hidden_layer_sizes')
        values = List or numpy array containing param values to be tested
        X_train = NumPy array, Training data
        y_train = NumPy array, Training labels
        X_test = NumPy array, Test data
        y_test = NumPy array, Test labels
        
        2 Optional Settings:
        
        args = Dictionary, default={'solver':'sgd', 'early_stopping':True}. 
               Arguments passed to the MLP classifier via **kwargs that 
               will be kept constant for each test
        redo = Boolean, default=False. Results will be written to and read 
               from a CSV file. Set redo=True to remake an existing CSV
        
    Returns: 
        Out: Pandas DataFrame containing the metrics (accuracy, f1, precision, 
             recall, run time, loss, and iteration count) for each parameter value
    ''' 
    
    #default arguments
    kwargs = args if args != None else {'solver':'sgd', 'early_stopping':True}
    
    #add input parameter to kwargs if necessary
    if param not in kwargs:
        kwargs = {**kwargs, **{param:values[0]}}

    #create blank output arrays    
    runtimes = np.zeros(np.shape(values)[0])
    n_iter = np.zeros(np.shape(values)[0])
    loss = np.zeros(np.shape(values)[0])
    
    #define csv file name for storing results table
    if param=='learning_rate_init' or param=='tol':
        file = Path('mlp_explore_results/mlp_explore_'+param+'_'+kwargs['learning_rate']+'.csv')
    elif param=='hidden_layer_sizes':
        config = 'depth' if type(values)==list else 'width'
        file = Path('mlp_explore_results/mlp_explore_'+param+'_'+config+'.csv')
    else:
        file = Path('mlp_explore_results/mlp_explore_'+param+'.csv')
    
    #if the file exists, read results from there unless redo keyword is set
    if file.is_file()==True and redo==False:
        
        print('Reading existing results from '+file.name)
        df_totals = pd.read_csv(file.resolve(), index_col=0)

    else:
    
        print('Exploring param = '+param+' from '+str(values[0])+' to '+str(values[-1]))
        print('Working on '+param+' = ', sep=' ', end='', flush=True)
    
        #loop through input values for param
        for i, param_value in enumerate(values):
            
            #insert current value into kwargs and print
            kwargs[param] = param_value
            print(param_value, sep=' ', end=',', flush=True)
            
            start = time.time()
            
            #initialise classifier, train, and predict
            _classifier = nn.MLPClassifier(**kwargs)
            _classifier = _classifier.fit(X_train, y_train)
            y_pred = _classifier.predict(X_test)
            
            #store runtime, number of iterations, and final loss function value
            runtimes[i] = time.time()-start
            n_iter[i] = _classifier.n_iter_
            loss[i] = _classifier.loss_
            
            #construct confusion matrix and format metrics into dataframe
            conf = c.construct_confusion_matrix(y_test-1, y_pred-1, dim=7)
            df_total, df_class, df_conf = c.metrics_wrapper(conf, cnames, do_display=False)

            #initialise or append to main results output dataframe  
            df_totals = df_total.copy() if i==0 else df_totals.append(df_total, ignore_index=True)

        #insert additional information into output    
        df_totals.insert(0, 'n_iter', n_iter)    
        df_totals.insert(0, 'loss', loss)    
        df_totals.insert(0, 'Run Time', runtimes)
        df_totals.insert(0, param, values)

        #write output dataframe to csv file
        df_totals.to_csv(path_or_buf=file)
        print(' ', sep='\newline')
        print('Wrote results to '+file.name)
    
    return df_totals


In [8]:
def mlp_explore_params(X_train, y_train, X_test, y_test, redo=False):
    '''
    Name:
        mlp_explore_params
    
    Purpose: 
        Explore scikit-learn MLP hyperparameters by looping through an 
        arrays possible values. This code is wrapper for mlp_explore_param 
        that defines the the values to be tested and passes them to the main 
        routine. 
    
    Parameters: 
        4 Required Inputs:
    
        X_train = NumPy array, Training data
        y_train = NumPy array, Training labels
        X_test = NumPy array, Test data
        y_test = NumPy array, Test labels
        
        1 Optional Settings:
        
        redo = Boolean, default=False. Results will be written to and read 
               from CSV filee. Set redo=True to remake existing CSVe
        
    
    Returns: 
        Out: List with Pandas DataFrames containing the metrics (accuracy, f1, precision, 
             recall, run time, loss, and iteration count) for each parameter value
    ''' 
    
    #hyperparameters accepted by MLPClassifier to be tested
    params = ['hidden_layer_sizes', \
              'hidden_layer_sizes', \
              'activation', \
              'alpha', \
              'batch_size', \
              'momentum', \
              'learning_rate_init', \
              'shuffle', \
              'nesterovs_momentum', \
              'power_t', \
              'tol']
    
    param_titles = ['Single Hidden Layer (Width)', \
                    'Multiple Hidden Layers (Depth)', \
                    'Activation Function', \
                    'Alpha (L2 Penalty)', \
                    'Mini Batch Size', \
                    'Momentum', \
                    'Initial Learning Rate (Rate = Constant)', \
                    'Initial Learning Rate (Rate = Inverse Scaling)', \
                    'Initial Learning Rate (Rate = Adaptive)', \
                    'Shuffle', \
                    'Nesterov''s Momentum', \
                    'Inverse Scaling Exponent', \
                    'Tolerance (Rate = Constant)', \
                    'Tolerance (Rate = Inverse Scaling)', \
                    'Tolerance (Rate = Adaptive)']    
    
    #test values corresponding to params array above
    values = [np.append([np.arange(1,10,1),np.arange(10,100,10)],np.arange(100,1100,100)), \
              [[100],[100,100],[100,100,100],[100,100,100,100],[100,100,100,100,100]], \
              ['identity', 'logistic', 'tanh', 'relu'], \
              np.sort(np.append(np.geomspace(1e-6,1e-1,num=6),np.geomspace(5e-6,5e-1,num=6))), \
              np.sort(np.append(np.geomspace(1e1,1e5,num=5),np.geomspace(5e1,5e4,num=4))).round().astype(int), \
              np.linspace(0.01, 0.99, 50), \
              np.sort(np.append(np.geomspace(1e-5,1e0,num=6),np.geomspace(5e-5,5e-1,num=5))), \
              [False,True], \
              [False,True], \
              np.linspace(0.1, 2, 20), \
              np.sort(np.append(np.geomspace(1e-6,1e1,num=8),np.geomspace(5e-6,5e0,num=7)))]
    
    #possible learning rates, used for params==learning_rate_init and tol
    learning_rates = ['constant','invscaling','adaptive']
    
    #detault classifier arguments
    base_args = {'solver':'sgd', 'early_stopping':True}
    
    #cycle through params and pass to mlp_explore_param for the hard work
    output = []
    for i, param in enumerate(params):
        
        #for these params, cycle value through the possible learning rates
        if param=='learning_rate_init' or param=='tol':
            for rate in learning_rates:
                args = {**base_args, **{'learning_rate':rate}}
                df_totals = mlp_explore_param(param, values[i], X_train, y_train, X_test, y_test, args=args, redo=redo)
                output.append(df_totals)
        
        #this param is only used for learning_rate=invscaling, so ensure that
        elif param=='power_t':
            args = {**base_args, **{'learning_rate_init':0.1, 'learning_rate':'invscaling'}}
            df_totals = mlp_explore_param(param, values[i], X_train, y_train, X_test, y_test, redo=redo)
            output.append(df_totals)
            
        else:
            df_totals = mlp_explore_param(param, values[i], X_train, y_train, X_test, y_test, redo=redo)
            output.append(df_totals)
        
    return output, param_titles
        

In [9]:
def mlp_hyper_awesome(tables, param_titles, do_display=True):
    '''
    Name:
        mlp_hyper_awesome
    
    Purpose: 
        Take tables from mlp_explore_params, choose "best" parameter value 
        based on accuracy and f1 score, output results in a dataframe, 
        and format/display all the tables
    
    Parameters: 
        2 Required Inputs:
    
        tables = list containing Pandas DataFrames from mlp_explore_params
        param_titles = string array containing the associated parameter names
        
        1 Optional Settings:
        
        do_display = Boolean, default=True. Set to display all tables.
        
    
    Returns: 
        Out: Pandas DataFrame with "best" value for each pameter based on 
        highest average of accuracy and f1 score. 
    ''' 
    
    best_values = []
    best_acc = np.zeros(np.size(param_titles))
    best_f1 = np.zeros(np.size(param_titles))
    
    for i, table in enumerate(tables):
        acc = table['Overall Accuracy'].values
        f1 = table['Average F-Meas'].values
        score = (acc + f1)/2.
        runtime = table['Run Time'].values
        values = table.iloc[:,0].values

        highscore = np.argwhere(score == np.amax(score)).flatten()
        lowtime = np.argwhere(runtime[highscore] == np.amin(runtime[highscore])).flatten()
        ind = highscore[lowtime]
        value = values[ind]
        
        best_values.append(str(value))
        best_acc[i] = acc[highscore[lowtime]]
        best_f1[i] = f1[highscore[lowtime]]
        
        if do_display==True:
            print('')
            print('"Best" value = '+str(value)+' for param = '+param_titles[i])
            display(table.style.apply(c.color_max, axis=0).apply(c.color_min).apply(\
            lambda x: ['background: yellow' if x.name == ind else '' for i in x], axis=1))
    
    #best_values = np.array(best_values).flatten()
    output = pd.DataFrame(index=param_titles, data={'Best Value':best_values, 'F1':best_f1, 'Overall Accuracy':best_acc})
    
    if do_display==True:
        print('Best values from the tables above...')
        display(output)
    
    return output

In [173]:
#read data
X, y, cnames, cnums = read_covtype(crop=False)
#split into training and test sets, regularise
X_train, y_train, X_test, y_test = c.split_dataset(X,y,regularize=True)

#obtain parameter exploration results tables
tables, param_titles = mlp_explore_params(X_train, y_train, X_test, y_test, redo=False)

#obtain "best" values for each parameter and display results
print('')
best = mlp_hyper_awesome(tables, param_titles, do_display=True)

Reading existing results from mlp_explore_hidden_layer_sizes_width.csv
Reading existing results from mlp_explore_hidden_layer_sizes_depth.csv
Reading existing results from mlp_explore_activation.csv
Reading existing results from mlp_explore_alpha.csv
Reading existing results from mlp_explore_batch_size.csv
Reading existing results from mlp_explore_momentum.csv
Reading existing results from mlp_explore_learning_rate_init_constant.csv
Reading existing results from mlp_explore_learning_rate_init_invscaling.csv
Reading existing results from mlp_explore_learning_rate_init_adaptive.csv
Reading existing results from mlp_explore_shuffle.csv
Reading existing results from mlp_explore_nesterovs_momentum.csv
Reading existing results from mlp_explore_power_t.csv
Reading existing results from mlp_explore_tol_constant.csv
Reading existing results from mlp_explore_tol_invscaling.csv
Reading existing results from mlp_explore_tol_adaptive.csv


"Best" value = [1000] for param = Single Hidden Layer (Widt

Unnamed: 0,hidden_layer_sizes,Run Time,loss,n_iter,Average F-Meas,Average Precision,Average Recall,Overall Accuracy
0,1,4.71715,0.996071,5,0.18,0.17,0.2,0.6
1,2,26.4291,0.709037,27,0.38,0.41,0.4,0.71
2,3,14.7004,0.710617,14,0.34,0.41,0.36,0.71
3,4,19.6812,0.65746,19,0.41,0.45,0.43,0.72
4,5,15.7901,0.652057,12,0.41,0.46,0.43,0.73
5,6,23.0706,0.639999,15,0.43,0.47,0.44,0.73
6,7,18.7216,0.629666,13,0.44,0.48,0.44,0.74
7,8,40.7661,0.607699,31,0.47,0.59,0.46,0.74
8,9,37.8995,0.599644,29,0.5,0.65,0.49,0.74
9,10,43.8272,0.586098,32,0.52,0.7,0.49,0.75



"Best" value = ['[100, 100, 100]'] for param = Multiple Hidden Layers (Depth)


Unnamed: 0,hidden_layer_sizes,Run Time,loss,n_iter,Average F-Meas,Average Precision,Average Recall,Overall Accuracy
0,[100],181.774,0.425178,60,0.69,0.77,0.65,0.83
1,"[100, 100]",207.346,0.37552,37,0.72,0.8,0.68,0.84
2,"[100, 100, 100]",253.366,0.327365,31,0.75,0.8,0.72,0.86
3,"[100, 100, 100, 100]",264.145,0.3238,25,0.75,0.81,0.71,0.86
4,"[100, 100, 100, 100, 100]",124.826,0.414679,10,0.66,0.76,0.62,0.81



"Best" value = ['tanh'] for param = Activation Function


Unnamed: 0,activation,Run Time,loss,n_iter,Average F-Meas,Average Precision,Average Recall,Overall Accuracy
0,identity,19.9921,0.636018,9,0.49,0.57,0.48,0.72
1,logistic,389.6,0.511144,138,0.6,0.72,0.57,0.78
2,tanh,284.749,0.390859,93,0.73,0.8,0.68,0.84
3,relu,205.431,0.420116,70,0.7,0.78,0.66,0.83



"Best" value = [1.e-05] for param = Alpha (L2 Penalty)


Unnamed: 0,alpha,Run Time,loss,n_iter,Average F-Meas,Average Precision,Average Recall,Overall Accuracy
0,1e-06,255.23,0.404065,80,0.71,0.79,0.67,0.83
1,5e-06,207.298,0.419526,67,0.69,0.78,0.64,0.83
2,1e-05,353.098,0.378021,119,0.74,0.8,0.71,0.85
3,5e-05,135.691,0.451277,45,0.66,0.76,0.62,0.81
4,0.0001,202.692,0.419221,69,0.7,0.78,0.66,0.83
5,0.0005,220.663,0.417998,74,0.69,0.78,0.66,0.83
6,0.001,173.797,0.435759,58,0.68,0.77,0.64,0.82
7,0.005,208.028,0.422282,70,0.7,0.79,0.66,0.83
8,0.01,162.276,0.444854,55,0.68,0.76,0.64,0.82
9,0.05,206.153,0.463443,69,0.68,0.75,0.65,0.82



"Best" value = [10] for param = Mini Batch Size


Unnamed: 0,batch_size,Run Time,loss,n_iter,Average F-Meas,Average Precision,Average Recall,Overall Accuracy
0,10,256.119,0.403205,17,0.75,0.78,0.73,0.83
1,50,112.98,0.412581,23,0.69,0.77,0.65,0.82
2,100,129.685,0.410432,39,0.71,0.77,0.67,0.83
3,500,317.477,0.428576,140,0.68,0.75,0.64,0.82
4,1000,234.87,0.505575,109,0.61,0.73,0.58,0.78
5,5000,328.22,0.585946,150,0.54,0.66,0.51,0.75
6,10000,240.266,0.641078,106,0.45,0.58,0.45,0.73
7,50000,432.454,0.701637,176,0.4,0.44,0.41,0.72
8,100000,507.495,0.741914,200,0.38,0.43,0.39,0.7



"Best" value = [0.99] for param = Momentum


Unnamed: 0,momentum,Run Time,loss,n_iter,Average F-Meas,Average Precision,Average Recall,Overall Accuracy
0,0.01,477.408,0.526691,144,0.59,0.7,0.56,0.78
1,0.03,318.161,0.548998,109,0.56,0.67,0.53,0.77
2,0.05,397.082,0.532404,135,0.59,0.71,0.55,0.77
3,0.07,382.823,0.535173,126,0.59,0.71,0.56,0.77
4,0.09,432.673,0.530249,144,0.59,0.7,0.56,0.77
5,0.11,280.75,0.54604,95,0.57,0.69,0.54,0.77
6,0.13,390.086,0.531713,122,0.59,0.71,0.56,0.78
7,0.15,333.01,0.542375,110,0.58,0.7,0.55,0.77
8,0.17,243.786,0.55671,79,0.57,0.7,0.54,0.76
9,0.19,344.981,0.536977,117,0.59,0.7,0.56,0.77



"Best" value = [0.005] for param = Initial Learning Rate (Rate = Constant)


Unnamed: 0,learning_rate_init,Run Time,loss,n_iter,Average F-Meas,Average Precision,Average Recall,Overall Accuracy
0,1e-05,318.888,0.676339,109,0.42,0.56,0.43,0.72
1,5e-05,402.187,0.577155,137,0.56,0.68,0.52,0.75
2,0.0001,393.795,0.535097,140,0.59,0.7,0.56,0.77
3,0.0005,288.379,0.438278,102,0.68,0.77,0.64,0.82
4,0.001,140.502,0.441402,50,0.66,0.76,0.62,0.81
5,0.005,83.5311,0.389008,30,0.69,0.8,0.65,0.84
6,0.01,50.6411,0.40527,18,0.68,0.79,0.63,0.83
7,0.05,14.2906,0.440352,5,0.64,0.79,0.61,0.79
8,0.1,16.7617,0.442782,6,0.71,0.75,0.69,0.81
9,0.5,30.7693,0.507072,13,0.64,0.67,0.63,0.79



"Best" value = [0.1] for param = Initial Learning Rate (Rate = Inverse Scaling)


Unnamed: 0,learning_rate_init,Run Time,loss,n_iter,Average F-Meas,Average Precision,Average Recall,Overall Accuracy
0,1e-05,19.9899,1.55777,7,0.17,0.19,0.18,0.42
1,5e-05,20.0377,1.1544,7,0.23,0.29,0.23,0.58
2,0.0001,14.3145,0.952088,5,0.28,0.34,0.29,0.64
3,0.0005,11.6909,0.731994,4,0.39,0.43,0.4,0.71
4,0.001,19.7821,0.67503,7,0.42,0.6,0.42,0.72
5,0.005,25.5388,0.602458,9,0.51,0.65,0.49,0.74
6,0.01,19.8956,0.564047,7,0.56,0.7,0.53,0.76
7,0.05,86.9392,0.487317,31,0.63,0.74,0.6,0.79
8,0.1,78.0304,0.465246,28,0.67,0.76,0.63,0.8
9,0.5,102.51,0.469164,42,0.64,0.73,0.6,0.8



"Best" value = [0.1] for param = Initial Learning Rate (Rate = Adaptive)


Unnamed: 0,learning_rate_init,Run Time,loss,n_iter,Average F-Meas,Average Precision,Average Recall,Overall Accuracy
0,1e-05,365.659,0.666314,121,0.45,0.66,0.44,0.73
1,5e-05,449.38,0.575338,155,0.55,0.69,0.52,0.76
2,0.0001,519.465,0.521971,180,0.59,0.72,0.56,0.78
3,0.0005,318.642,0.455245,108,0.65,0.74,0.62,0.81
4,0.001,302.215,0.400269,103,0.72,0.79,0.68,0.83
5,0.005,129.443,0.394181,44,0.72,0.78,0.68,0.84
6,0.01,129.074,0.362184,45,0.74,0.79,0.7,0.85
7,0.05,134.759,0.334388,49,0.79,0.82,0.76,0.86
8,0.1,121.849,0.305015,45,0.81,0.84,0.78,0.87
9,0.5,139.463,0.37115,59,0.75,0.81,0.71,0.85



"Best" value = [ True] for param = Shuffle


Unnamed: 0,shuffle,Run Time,loss,n_iter,Average F-Meas,Average Precision,Average Recall,Overall Accuracy
0,False,144.88,0.437117,52,0.68,0.76,0.65,0.82
1,True,223.504,0.409401,80,0.69,0.78,0.65,0.83



"Best" value = [ True] for param = Nesterovs Momentum


Unnamed: 0,nesterovs_momentum,Run Time,loss,n_iter,Average F-Meas,Average Precision,Average Recall,Overall Accuracy
0,False,231.251,0.404003,86,0.7,0.78,0.66,0.83
1,True,218.758,0.402426,79,0.7,0.77,0.66,0.83



"Best" value = [1.4] for param = Inverse Scaling Exponent


Unnamed: 0,power_t,Run Time,loss,n_iter,Average F-Meas,Average Precision,Average Recall,Overall Accuracy
0,0.1,198.364,0.420775,71,0.68,0.77,0.64,0.83
1,0.2,221.797,0.412569,78,0.71,0.77,0.68,0.83
2,0.3,234.688,0.407098,82,0.7,0.77,0.66,0.83
3,0.4,242.877,0.399237,84,0.71,0.78,0.67,0.84
4,0.5,155.7,0.433795,54,0.65,0.75,0.62,0.82
5,0.6,214.615,0.413485,73,0.69,0.77,0.65,0.83
6,0.7,172.261,0.43473,58,0.67,0.75,0.64,0.82
7,0.8,172.597,0.434768,60,0.67,0.75,0.64,0.82
8,0.9,230.571,0.407458,81,0.71,0.76,0.68,0.83
9,1.0,191.925,0.421533,68,0.7,0.76,0.66,0.82



"Best" value = [5.e-06] for param = Tolerance (Rate = Constant)


Unnamed: 0,tol,Run Time,loss,n_iter,Average F-Meas,Average Precision,Average Recall,Overall Accuracy
0,1e-06,269.231,0.40525,82,0.71,0.79,0.67,0.83
1,5e-06,256.67,0.405978,79,0.71,0.79,0.67,0.83
2,1e-05,130.247,0.456996,41,0.65,0.77,0.61,0.81
3,5e-05,205.558,0.419962,64,0.69,0.76,0.66,0.83
4,0.0001,202.911,0.429458,63,0.68,0.78,0.64,0.82
5,0.0005,192.724,0.422463,61,0.69,0.77,0.65,0.83
6,0.001,126.265,0.46384,39,0.65,0.75,0.62,0.81
7,0.005,19.8741,0.59417,6,0.54,0.71,0.51,0.75
8,0.01,16.7839,0.603304,5,0.53,0.64,0.51,0.75
9,0.05,13.7562,0.616323,4,0.54,0.68,0.52,0.75



"Best" value = [1.] for param = Tolerance (Rate = Inverse Scaling)


Unnamed: 0,tol,Run Time,loss,n_iter,Average F-Meas,Average Precision,Average Recall,Overall Accuracy
0,1e-06,13.3275,0.678232,4,0.46,0.6,0.45,0.72
1,5e-06,19.7406,0.678617,6,0.44,0.56,0.44,0.73
2,1e-05,20.0989,0.678369,6,0.43,0.49,0.43,0.73
3,5e-05,16.5243,0.68135,5,0.43,0.49,0.43,0.72
4,0.0001,16.4378,0.679352,5,0.43,0.53,0.43,0.72
5,0.0005,12.8708,0.680625,4,0.42,0.52,0.43,0.72
6,0.001,12.1235,0.685058,4,0.42,0.5,0.43,0.72
7,0.005,12.177,0.678885,4,0.42,0.46,0.42,0.72
8,0.01,12.151,0.675463,4,0.44,0.57,0.44,0.72
9,0.05,12.0395,0.675915,4,0.43,0.52,0.43,0.72



"Best" value = [5.e-06] for param = Tolerance (Rate = Adaptive)


Unnamed: 0,tol,Run Time,loss,n_iter,Average F-Meas,Average Precision,Average Recall,Overall Accuracy
0,1e-06,327.64,0.399041,117,0.71,0.78,0.67,0.83
1,5e-06,295.849,0.398319,102,0.71,0.79,0.67,0.83
2,1e-05,230.4,0.425509,77,0.69,0.77,0.65,0.82
3,5e-05,274.683,0.412678,89,0.69,0.78,0.65,0.83
4,0.0001,298.081,0.407793,100,0.7,0.78,0.66,0.83
5,0.0005,229.337,0.421732,77,0.68,0.79,0.64,0.82
6,0.001,214.147,0.430667,71,0.67,0.76,0.63,0.82
7,0.005,60.2215,0.587172,20,0.55,0.68,0.52,0.75
8,0.01,59.798,0.587568,20,0.54,0.68,0.51,0.75
9,0.05,56.5418,0.600146,19,0.54,0.71,0.51,0.75


Best values from the tables above...


Unnamed: 0,Best Value,F1,Overall Accuracy
Single Hidden Layer (Width),[1000],0.78,0.88
Multiple Hidden Layers (Depth),"['[100, 100, 100]']",0.75,0.86
Activation Function,['tanh'],0.73,0.84
Alpha (L2 Penalty),[1.e-05],0.74,0.85
Mini Batch Size,[10],0.75,0.83
Momentum,[0.99],0.73,0.84
Initial Learning Rate (Rate = Constant),[0.005],0.69,0.84
Initial Learning Rate (Rate = Inverse Scaling),[0.1],0.67,0.8
Initial Learning Rate (Rate = Adaptive),[0.1],0.81,0.87
Shuffle,[ True],0.69,0.83


<h2>Multi-Layer Perceptron (MLP) Parameter Classification Results</h2>

This section uses the hyperparameter exploration results from above to inform the final values used for our classifier comparison.

In [None]:
#hidden layer number = mean([features,columns])
start = time.time()

kwargs = mlp_args()

_classifier = nn.MLPClassifier(**kwargs)
_classifier = _classifier.fit(X_train, y_train)
y_pred = _classifier.predict(X_test)
print((time.time()-start)/60)

conf = c.construct_confusion_matrix(y_test-1, y_pred-1, dim=7)
df_total, df_class, df_conf = c.metrics_wrapper(conf, cnames, do_display=True)

In [30]:
def covtype_mlp(kfold=10, style='prop', frey=False, redo=False):
    '''
    Name:
        covtype_mlp
    
    Purpose: 
        Wrapper for MLP cross validation 
    
    Parameters: 
        No Required Inputs: Data will be (re)read automatically 
        
        2 Optional Settings:
        
        crop = Boolean, default=False. Set to only keep the first 10 columns
                 of X, which encode the most information
                 
        stype = String, options are ['prop', 'random', 'equal']
                 
        frey = Boolean, default=False. Use Virginia's read and preprocessing
               method
    
    Returns: 
        3 Ouputs: 
        
        df_total = Pandas DataFrame containing the average metrics (accuracy, 
                    F1, precision, recall) over the cross validation cycles
        df_class = Pandas DataFrame containing the average cross validation 
                    metrics for each separate class (f1, precision, recall)
        df_conf = Pandas DataFrame containing the average confusion matrix 
                    across the cross validation cycles
        
    ''' 
    
    file = Path('mlp_explore_results/mlp_final_total.csv')
    #if the file exists, read results from there unless redo keyword is set
    if file.is_file()==True and redo==False:
        
        print('Reading existing results from '+file.name)
        df_total = pd.read_csv(file.resolve(), index_col=0)
        
        file = Path('mlp_explore_results/mlp_final_class.csv')
        print('Reading existing results from '+file.name)
        df_class = pd.read_csv(file.resolve(), index_col=0)
        
        file = Path('mlp_explore_results/mlp_final_conf.csv')
        print('Reading existing results from '+file.name)
        df_conf = pd.read_csv(file.resolve(), index_col=0)
        
        display(df_total) 
        print('Class-specific metrics:')
        display(df_class.style.applymap(c.color_good_bad, subset=['Precision', 'Recall', 'F-Meas']))
        print('Confusion matrix (yellow = col max; blue = row max):')
        display(df_conf.style.apply(c.highlight_max, axis=0).apply(c.color_max, axis=1)) 
        
    else:
    
        #read data
        X, y, cnames, cnums = read_covtype(crop=False, frey=frey)

        #split into training and test sets
        X_train, y_train, X_test, y_test = c.split_dataset(X,y,regularize=True)

        #define classifier
        _classifier = nn.MLPClassifier 

        #define keyword arguments
        kwargs = {'hidden_layer_sizes':[500,500,500], \
                  'activation':'tanh', \
                  'solver':'sgd', \
                  'alpha':1.e-5, \
                  'batch_size':10, \
                  'learning_rate':'adaptive', \
                  'learning_rate_init':0.1, \
                  'shuffle':True, \
                  'tol':5.e-6, \
                  'momentum':0.99, \
                  'nesterovs_momentum':True, \
                  'early_stopping':False}

        #pass classifier object, data, and labels to cross validator from evaluations.py
        confs, runs = c.cross_validate_classifier(_classifier, X, y-1, \
            kfold=kfold, style=style, kwargs=kwargs)

        #average the confusion matrices
        conf = np.mean(confs, axis=0)

        #average runtimes
        run = np.mean(runs)

        #calculate performance metrics, format into datatables, and display
        df_total, df_class, df_conf = c.metrics_wrapper(conf, cnames, runtime=run, do_display=True)

        file = Path('mlp_explore_results/mlp_final_total.csv')
        df_total.to_csv(path_or_buf=file)
        file = Path('mlp_explore_results/mlp_final_class.csv')
        df_class.to_csv(path_or_buf=file)
        file = Path('mlp_explore_results/mlp_final_conf.csv')
        df_conf.to_csv(path_or_buf=file)        
    
    return df_total, df_class, df_conf
    

In [None]:
df_total, df_class, df_conf = covtype_mlp(kfold=1, redo=True)