# Submission creator  
Given all the models obtained, create a submission file for each of them

In [1]:
# General imports
import sys
import os
import numpy as np
import pandas as pd

# Set the path to import our local tools
pathArr = os.getcwd().split("/")
scriptPath = '/'.join(map(str, pathArr[:len(pathArr)-1]))
sys.path.append(scriptPath)

# import my tools
from tools import save4later, submit, getdata

In [2]:
# load the data
_loaded = getdata.load_data(0, test=True, nonas=True)

FEATURES = _loaded['features']
print 'Number of features:', len(FEATURES)

train_data = _loaded['training']['data']
train_labels = _loaded['training']['labels']
print 'Training dataset size: ', train_data.shape

test_data = _loaded['test']['data']
print 'Test dataset size: ', test_data.shape

Number of features: 30
Training dataset size:  (2140,)
Test dataset size:  (1783,)


## Define Accuracy Functions

In [9]:
IMAGE_SIZE = 96

def get_accuracy(models_list, verbose=False, ret_acc=True, test=None):
    ''' Calculates the accuracy for a given suite of models '''
    if verbose:
        print "{:30} Accuracy".format("Model")
    
    if test is None:
        test = train_data.tolist()
    
    acc_list = []
    
    for index,(feat,model) in enumerate(models_list):
        predications = model.predict(test)
        accuracy = np.mean(1 - abs(train_labels[:,index] - predications)/ IMAGE_SIZE)
        acc_list.append(accuracy)

        if verbose:
            print " - {f:<27} {a:.3%}".format(f=FEATURES[index],a=accuracy)
    
    if ret_acc:
        return acc_list
    
def compare_accuracies(iter_model_lists, model_labels):
    ''' Compares the accuracy of different model suites '''
    accuracies = []
    
    for mod in iter_model_lists:
        accuracies.append( get_accuracy(mod, verbose=False) )
    
    # Print report
    print " Feature     |   ACCURACIES:    ", '   '.join(model_labels)
    
    for f in xrange(len(accuracies[0])):  # Num of FEATURES
        # format all the accuracies
        _entry = "{:<27}   ".format(FEATURES[f])
        for m in xrange(len(accuracies)):
            _entry += " {:.2%}       ".format(accuracies[m][f])
        
        print _entry

## Logistic Regression

In [5]:
logistic = save4later.load_model('Logistic')
logistic_mask = save4later.load_model('Logistic_Mask')
logistic_PCA = save4later.load_model('Logistic_PCA')

Loaded pk
Loaded pk
Loaded pk


In [8]:
compare_accuracies([logistic, logistic_mask],
                   ['LogistR', 'Log_mask'])

 Feature     |   ACCURACIES:     LogistR   Log_mask
left_eye_center_x              100.00%        99.69%       
left_eye_center_y              100.00%        99.73%       
right_eye_center_x             100.00%        99.62%       
right_eye_center_y             100.00%        99.74%       
left_eye_inner_corner_x        100.00%        99.61%       
left_eye_inner_corner_y        100.00%        99.70%       
left_eye_outer_corner_x        100.00%        99.45%       
left_eye_outer_corner_y        100.00%        99.65%       
right_eye_inner_corner_x       100.00%        99.55%       
right_eye_inner_corner_y       100.00%        99.70%       
right_eye_outer_corner_x       100.00%        99.41%       
right_eye_outer_corner_y       100.00%        99.63%       
left_eyebrow_inner_end_x       100.00%        99.39%       
left_eyebrow_inner_end_y       100.00%        99.54%       
left_eyebrow_outer_end_x       100.00%        99.29%       
left_eyebrow_outer_end_y       100.00%        99

In [10]:
submit.create_generate(test_data, logistic, label='logistic', verbose=False)
submit.create_generate(test_data, logistic_mask, label='logistic_mask', verbose=False)


... Created the csv file: ../../data/submissions/logistic_submission.csv

... Created the csv file: ../../data/submissions/logistic_mask_submission.csv


### Logistic Regression on PCA

Since the PCA needs to be applied tot he test data as well, let's create the submission aside.

In [15]:
pca_test = save4later.load_preprod('pca_transformed')
pca_train = save4later.load_preprod('pca_trained')

Loaded pk
Loaded pk


In [17]:
get_accuracy(logistic_PCA, test=pca_train, verbose=True, ret_acc=False)

Model                          Accuracy
 - left_eye_center_x           99.577%
 - left_eye_center_y           99.363%
 - right_eye_center_x          99.556%
 - right_eye_center_y          99.638%
 - left_eye_inner_corner_x     99.288%
 - left_eye_inner_corner_y     99.606%
 - left_eye_outer_corner_x     99.395%
 - left_eye_outer_corner_y     99.510%
 - right_eye_inner_corner_x    99.448%
 - right_eye_inner_corner_y    99.574%
 - right_eye_outer_corner_x    99.449%
 - right_eye_outer_corner_y    99.445%
 - left_eyebrow_inner_end_x    98.849%
 - left_eyebrow_inner_end_y    99.006%
 - left_eyebrow_outer_end_x    99.310%
 - left_eyebrow_outer_end_y    99.045%
 - right_eyebrow_inner_end_x   99.249%
 - right_eyebrow_inner_end_y   99.010%
 - right_eyebrow_outer_end_x   98.674%
 - right_eyebrow_outer_end_y   98.832%
 - nose_tip_x                  98.912%
 - nose_tip_y                  99.518%
 - mouth_left_corner_x         99.031%
 - mouth_left_corner_y         99.577%
 - mouth_right_corner_x 

In [18]:
submit.create_generate(pca_test, logistic_PCA, label='logistic_PCA', verbose=False)


... Created the csv file: ../../data/submissions/logistic_PCA_submission.csv
