# Logistic Regression Model

## Import libraries

In [1]:
# set the path
import sys, os

pathArr = os.getcwd().split("/")
scriptPath = '/'.join(map(str, pathArr[:len(pathArr)-1]))
sys.path.append(scriptPath)

# import my tools
from tools import save4later, submit, getdata

# import the sklearn libraries and numpy
from sklearn.linear_model import LogisticRegression
from sklearn.grid_search import GridSearchCV
import numpy as np

## Load the data

In [2]:
# load the data
_loaded = getdata.load_data(0, test=True, nonas=True)

FEATURES = _loaded['features']
print 'Number of features:', len(FEATURES)

train_data = _loaded['training']['data']
train_labels = _loaded['training']['labels']
print 'Training dataset size: ', train_data.shape

test_data = _loaded['test']['data']
print 'Test dataset size: ', test_data.shape

Number of features: 30
Training dataset size:  (2140,)
Test dataset size:  (1783,)


## Train a logistic regression model for each feature

In [6]:
# create a list to hold logistic regressions
logistics = []

# initalize a potential set of reasonable C values
#Lparameters = {'C':[0.001, 0.01, 0.1, 0.5, 1.0, 5.0, 10.0, 100.0]}

# loop through every facial feature
for index,facial_feature in enumerate(FEATURES):

    # initalize the logistic regression model
    logistic = LogisticRegression()

    # set the C search with the given C options and the logistic model
    #C_search = GridSearchCV(logistic,Lparameters)

    # fit the Gridsearch model to the data
    #C_search.fit(train_data.tolist(),train_labels[:,index])

    # find the best C parameter
    #best_C = C_search.best_params_

    # initalize a model with the best C
    #logistic_optimal = LogisticRegression(C = best_C['C'])
    #logistic_optimal.fit(train_data.tolist(),train_labels[:,index])
    logistic_optimal = logistic.fit(train_data.tolist(),train_labels[:,index])
    
    # create a tuple with the name of the feature and the model
    appending = facial_feature, logistic_optimal
    
    # append the name and the model to our list of facial feature models
    logistics.append(appending)

# save the models for later
save4later.save_model(logistics, 'Logistic', 
                      'Logistic regression with non-preprocessed data with no NAs',overwrite=True)

## Load the model

In [4]:
logistics = save4later.load_model("Logistic")

Loaded pk


## Run the models on the test data

In [7]:
submit.create_generate(test_data, logistics, 'logistics', verbose=True)

Predicting "left_eye_center_x"... done! (0.3s)
Predicting "left_eye_center_y"... done! (0.1s)
Predicting "right_eye_center_x"... done! (0.1s)
Predicting "right_eye_center_y"... done! (0.1s)
Predicting "left_eye_inner_corner_x"... done! (0.1s)
Predicting "left_eye_inner_corner_y"... done! (0.1s)
Predicting "left_eye_outer_corner_x"... done! (0.1s)
Predicting "left_eye_outer_corner_y"... done! (0.1s)
Predicting "right_eye_inner_corner_x"... done! (0.1s)
Predicting "right_eye_inner_corner_y"... done! (0.1s)
Predicting "right_eye_outer_corner_x"... done! (0.1s)
Predicting "right_eye_outer_corner_y"... done! (0.1s)
Predicting "left_eyebrow_inner_end_x"... done! (0.1s)
Predicting "left_eyebrow_inner_end_y"... done! (0.1s)
Predicting "left_eyebrow_outer_end_x"... done! (0.1s)
Predicting "left_eyebrow_outer_end_y"... done! (0.1s)
Predicting "right_eyebrow_inner_end_x"... done! (0.1s)
Predicting "right_eyebrow_inner_end_y"... done! (0.1s)
Predicting "right_eyebrow_outer_end_x"... done! (0.1s)
P

## Calculate the accuracies on the training data

In [8]:
print "{:30} Accuracy".format("Model")

# use the models to predict the dev data
for index,(feat,model) in enumerate(logistics):
    predications = model.predict(train_data.tolist())
    accuracy = np.mean(1 - abs(train_labels[:,index] - predications)/96)
    print " - {f:<27} {a:.3%}".format(f=FEATURES[index],a=accuracy)

Model                          Accuracy
 - left_eye_center_x           100.000%
 - left_eye_center_y           100.000%
 - right_eye_center_x          100.000%
 - right_eye_center_y          100.000%
 - left_eye_inner_corner_x     100.000%
 - left_eye_inner_corner_y     100.000%
 - left_eye_outer_corner_x     100.000%
 - left_eye_outer_corner_y     100.000%
 - right_eye_inner_corner_x    100.000%
 - right_eye_inner_corner_y    100.000%
 - right_eye_outer_corner_x    100.000%
 - right_eye_outer_corner_y    100.000%
 - left_eyebrow_inner_end_x    100.000%
 - left_eyebrow_inner_end_y    100.000%
 - left_eyebrow_outer_end_x    100.000%
 - left_eyebrow_outer_end_y    100.000%
 - right_eyebrow_inner_end_x   100.000%
 - right_eyebrow_inner_end_y   100.000%
 - right_eyebrow_outer_end_x   100.000%
 - right_eyebrow_outer_end_y   100.000%
 - nose_tip_x                  100.000%
 - nose_tip_y                  100.000%
 - mouth_left_corner_x         100.000%
 - mouth_left_corner_y         100.000%


## Fit the logistic regression on 'masked' preprocessed data

In [9]:
# load the masked training data
train_masked = save4later.load_preprod("masked_nonas")

Loaded pk


In [10]:
# create a list to hold logistic regressions
Mask_logistics = []

# initalize a potential set of reasonable C values
#Lparameters = {'C':[0.001, 0.01, 0.1, 0.5, 1.0, 5.0, 10.0, 100.0]}

# loop through every facial feature
for index,facial_feature in enumerate(FEATURES):

    # initalize the logistic regression model
    logistic = LogisticRegression()

    # set the C search with the given C options and the logistic model
    #C_search = GridSearchCV(logistic,Lparameters)

    # fit the Gridsearch model to the data
    #C_search.fit(train_masked,train_labels[:,index])

    # find the best C parameter
    #best_C = C_search.best_params_

    # initalize a model with the best C
    #logistic_optimal = LogisticRegression(C = best_C['C'])
    #logistic_optimal.fit(train_masked,train_labels[:,index])
    logistic_optimal = logistic.fit(train_masked, train_labels[:,index])
    
    # create a tuple with the name of the feature and the model
    appending = facial_feature, logistic_optimal
    
    # append the name and the model to our list of facial feature models
    Mask_logistics.append(appending)

# save the models for later
save4later.save_model(Mask_logistics, 'Logistic_Mask', 
                      'Logistic regression with masked data with no NAs',overwrite=True)

## Load the model

In [None]:
Mask_logistics = save4later.load_model("Logistic_Mask")

## Run the model on the test data

In [11]:
submit.create_generate(test_data, Mask_logistics, 'Mask_logistics', verbose=True)

Predicting "left_eye_center_x"... done! (0.3s)
Predicting "left_eye_center_y"... done! (0.1s)
Predicting "right_eye_center_x"... done! (0.1s)
Predicting "right_eye_center_y"... done! (0.1s)
Predicting "left_eye_inner_corner_x"... done! (0.1s)
Predicting "left_eye_inner_corner_y"... done! (0.1s)
Predicting "left_eye_outer_corner_x"... done! (0.1s)
Predicting "left_eye_outer_corner_y"... done! (0.1s)
Predicting "right_eye_inner_corner_x"... done! (0.1s)
Predicting "right_eye_inner_corner_y"... done! (0.1s)
Predicting "right_eye_outer_corner_x"... done! (0.1s)
Predicting "right_eye_outer_corner_y"... done! (0.1s)
Predicting "left_eyebrow_inner_end_x"... done! (0.1s)
Predicting "left_eyebrow_inner_end_y"... done! (0.1s)
Predicting "left_eyebrow_outer_end_x"... done! (0.1s)
Predicting "left_eyebrow_outer_end_y"... done! (0.1s)
Predicting "right_eyebrow_inner_end_x"... done! (0.1s)
Predicting "right_eyebrow_inner_end_y"... done! (0.1s)
Predicting "right_eyebrow_outer_end_x"... done! (0.1s)
P

## Calculate accuracies on the masked data

In [12]:
print "{:30} Accuracy".format("Model")

# use the models to predict the dev data
for index,(feat,model) in enumerate(Mask_logistics):
    predications = model.predict(train_masked)
    accuracy = np.mean(1 - abs(train_labels[:,index] - predications)/96)
    print " - {f:<27} {a:.3%}".format(f=FEATURES[index],a=accuracy)

Model                          Accuracy
 - left_eye_center_x           100.000%
 - left_eye_center_y           100.000%
 - right_eye_center_x          100.000%
 - right_eye_center_y          100.000%
 - left_eye_inner_corner_x     100.000%
 - left_eye_inner_corner_y     100.000%
 - left_eye_outer_corner_x     100.000%
 - left_eye_outer_corner_y     100.000%
 - right_eye_inner_corner_x    100.000%
 - right_eye_inner_corner_y    100.000%
 - right_eye_outer_corner_x    100.000%
 - right_eye_outer_corner_y    100.000%
 - left_eyebrow_inner_end_x    100.000%
 - left_eyebrow_inner_end_y    100.000%
 - left_eyebrow_outer_end_x    100.000%
 - left_eyebrow_outer_end_y    100.000%
 - right_eyebrow_inner_end_x   100.000%
 - right_eyebrow_inner_end_y   100.000%
 - right_eyebrow_outer_end_x   100.000%
 - right_eyebrow_outer_end_y   100.000%
 - nose_tip_x                  100.000%
 - nose_tip_y                  100.000%
 - mouth_left_corner_x         100.000%
 - mouth_left_corner_y         100.000%


## Fit the logistic regression model on dimensions reduced data
We reduce the dimensions of our data to remove some of the excess noise using principal component analysis.

In [14]:
# load the PCA training data
train_PCA = save4later.load_preprod("pca_trained")

Loaded pk


In [15]:
# create a list to hold logistic regressions
PCA_logistics = []

# initalize a potential set of reasonable C values
#Lparameters = {'C':[0.001, 0.01, 0.1, 0.5, 1.0, 5.0, 10.0, 100.0]}

# loop through every facial feature
for index,facial_feature in enumerate(FEATURES):

    # initalize the logistic regression model
    logistic = LogisticRegression()

    # set the C search with the given C options and the logistic model
    #C_search = GridSearchCV(logistic,Lparameters)

    # fit the Gridsearch model to the data
    #C_search.fit(train_PCA,train_labels[:,index])

    # find the best C parameter
    #best_C = C_search.best_params_

    # initalize a model with the best C
    #logistic_optimal = LogisticRegression(C = best_C['C'])
    #logistic_optimal.fit(train_masked,train_labels[:,index])
    logistic_optimal = logistic.fit(train_PCA, train_labels[:,index])
    
    # create a tuple with the name of the feature and the model
    appending = facial_feature, logistic_optimal
    
    # append the name and the model to our list of facial feature models
    PCA_logistics.append(appending)

# save the models for later
save4later.save_model(PCA_logistics, 'Logistic_PCA', 
                      'Logistic regression with PCA data with no NAs',overwrite=True)

In [16]:
# save the models for later
save4later.save_model(PCA_logistics, 'Logistic_PCA', 
                      'Logistic regression with PCA data with no NAs',overwrite=True)

In [18]:
save4later.list_models()


    STORED MODELS
 Total: 14

  + Multi_NB_sobel.pk : Multinomial naive bayes with sobel data with no NAs
  + MultinomialNB_HOG.pk : Multinomial naive bayes with blurred HOG data with no NAs
  + Multi_NB_mask.pk : Multinomial naive bayes with masked data with no NAs
  + Logistic_PCA.pk : Logistic regression with PCA data with no NAs
  + Logistic.pk : Logistic regression with non-preprocessed data with no NAs
  + Logistic_Mask.pk : Logistic regression with masked data with no NAs
  + Multi_NB.pk : Multinomial naive bayes without preprocessed data with no NAs
  + MultinomialNB_sobel.pk : Multinomial naive bayes with masked data with no NAs
  + MultinomialNB.pk : Multinomial naive bayes with non-preprocessed data with no NAs
  + MultinomialNB_gauss.pk : Multinomial naive bayes with Gaussian blurred data with no NAs
  + Multi_NB_LapG.pk : Multinomial naive bayes with Laplace and Gaussian transformed data with no NAs
  + Multi_NB_HOG.pk : Multinomial naive bayes with blurred HOG data with 

## Load the model

In [None]:
PCA_logistics = save4later.load_model("Logistic_PCA")

## Run the model on the test data

In [None]:
submit.create_generate(test_data, PCA_logistics, 'Mask_logistics', verbose=True)

## Accuracy functions

In [24]:
IMAGE_SIZE = 96

def get_accuracy(models_list, verbose=False, ret_acc=True):
    ''' Calculates the accuracy for a given suite of models '''
    if verbose:
        print "{:30} Accuracy".format("Model")
    
    acc_list = []
    
    for index,(feat,model) in enumerate(models_list):
        predications = model.predict(train_PCA.tolist())
        accuracy = np.mean(1 - abs(train_labels[:,index] - predications)/ IMAGE_SIZE)
        acc_list.append(accuracy)

        if verbose:
            print " - {f:<27} {a:.3%}".format(f=FEATURES[index],a=accuracy)
    
    if ret_acc:
        return acc_list
    
def compare_accuracies(iter_model_lists, model_labels):
    ''' Compares the accuracy of different model suites '''
    accuracies = []
    
    for mod in iter_model_lists:
        accuracies.append( get_accuracy(mod, verbose=False) )
    
    # Print report
    print "   Feature     |   ACCURACIES:    ", '   '.join(model_labels)
    
    for f in xrange(len(accuracies[0])):  # Num of FEATURES
        # format all the accuracies
        _entry = " - {:<27}   ".format(FEATURES[f])
        for m in xrange(len(accuracies)):
            _entry += " {:.2%}  ".format(accuracies[m][f])
        
        print _entry

In [26]:
get_accuracy(PCA_logistics,verbose=True)

Model                          Accuracy
 - left_eye_center_x           99.577%
 - left_eye_center_y           99.363%
 - right_eye_center_x          99.556%
 - right_eye_center_y          99.638%
 - left_eye_inner_corner_x     99.288%
 - left_eye_inner_corner_y     99.606%
 - left_eye_outer_corner_x     99.395%
 - left_eye_outer_corner_y     99.510%
 - right_eye_inner_corner_x    99.448%
 - right_eye_inner_corner_y    99.574%
 - right_eye_outer_corner_x    99.449%
 - right_eye_outer_corner_y    99.445%
 - left_eyebrow_inner_end_x    98.849%
 - left_eyebrow_inner_end_y    99.006%
 - left_eyebrow_outer_end_x    99.310%
 - left_eyebrow_outer_end_y    99.045%
 - right_eyebrow_inner_end_x   99.249%
 - right_eyebrow_inner_end_y   99.010%
 - right_eyebrow_outer_end_x   98.674%
 - right_eyebrow_outer_end_y   98.832%
 - nose_tip_x                  98.912%
 - nose_tip_y                  99.518%
 - mouth_left_corner_x         99.031%
 - mouth_left_corner_y         99.577%
 - mouth_right_corner_x 

[0.99577005451713385,
 0.99362830996884721,
 0.99555588006230533,
 0.99638337227414342,
 0.99288356697819324,
 0.99606211059190042,
 0.99394957165109021,
 0.99510319314641749,
 0.99447527258566959,
 0.99573598130841123,
 0.99448987538940792,
 0.99445093457943923,
 0.98848812305295952,
 0.9900603582554518,
 0.99309774143302176,
 0.99044976635514015,
 0.99248929127725838,
 0.99010416666666679,
 0.98674065420560753,
 0.98832262461059184,
 0.98912091121495327,
 0.99517620716510913,
 0.99031347352024934,
 0.99577492211838015,
 0.9913843457943925,
 0.99535144080996873,
 0.99265965732087247,
 0.99100467289719629,
 0.99357476635514019,
 0.99030373831775698]