# Logistic Regression Model

## Import libraries

In [1]:
# set the path
import sys, os

pathArr = os.getcwd().split("/")
scriptPath = '/'.join(map(str, pathArr[:len(pathArr)-1]))
sys.path.append(scriptPath)

# import my tools
from tools import save4later, submit, getdata

# import the sklearn libraries and numpy
from sklearn.linear_model import LogisticRegression
from sklearn.grid_search import GridSearchCV
import numpy as np

## Load the data

In [2]:
# load the data
_loaded = getdata.load_data(0, test=True, nonas=True)

FEATURES = _loaded['features']
print 'Number of features:', len(FEATURES)

train_data = _loaded['training']['data']
train_labels = _loaded['training']['labels']
print 'Training dataset size: ', train_data.shape

test_data = _loaded['test']['data']
print 'Test dataset size: ', test_data.shape

Number of features: 30
Training dataset size:  (2140,)
Test dataset size:  (1783,)


## Train a logistic regression model for each feature

In [6]:
# create a list to hold logistic regressions
logistics = []

# initalize a potential set of reasonable C values
#Lparameters = {'C':[0.001, 0.01, 0.1, 0.5, 1.0, 5.0, 10.0, 100.0]}

# loop through every facial feature
for index,facial_feature in enumerate(FEATURES):

    # initalize the logistic regression model
    logistic = LogisticRegression()

    # set the C search with the given C options and the logistic model
    #C_search = GridSearchCV(logistic,Lparameters)

    # fit the Gridsearch model to the data
    #C_search.fit(train_data.tolist(),train_labels[:,index])

    # find the best C parameter
    #best_C = C_search.best_params_

    # initalize a model with the best C
    #logistic_optimal = LogisticRegression(C = best_C['C'])
    #logistic_optimal.fit(train_data.tolist(),train_labels[:,index])
    logistic_optimal = logistic.fit(train_data.tolist(),train_labels[:,index])
    
    # create a tuple with the name of the feature and the model
    appending = facial_feature, logistic_optimal
    
    # append the name and the model to our list of facial feature models
    logistics.append(appending)

# save the models for later
save4later.save_model(logistics, 'Logistic', 
                      'Logistic regression with non-preprocessed data with no NAs',overwrite=True)

## Load the model

In [4]:
logistics = save4later.load_model("Logistic")

Loaded pk


## Run the models on the test data

In [7]:
submit.create_generate(test_data, logistics, 'logistics', verbose=True)

Predicting "left_eye_center_x"... done! (0.3s)
Predicting "left_eye_center_y"... done! (0.1s)
Predicting "right_eye_center_x"... done! (0.1s)
Predicting "right_eye_center_y"... done! (0.1s)
Predicting "left_eye_inner_corner_x"... done! (0.1s)
Predicting "left_eye_inner_corner_y"... done! (0.1s)
Predicting "left_eye_outer_corner_x"... done! (0.1s)
Predicting "left_eye_outer_corner_y"... done! (0.1s)
Predicting "right_eye_inner_corner_x"... done! (0.1s)
Predicting "right_eye_inner_corner_y"... done! (0.1s)
Predicting "right_eye_outer_corner_x"... done! (0.1s)
Predicting "right_eye_outer_corner_y"... done! (0.1s)
Predicting "left_eyebrow_inner_end_x"... done! (0.1s)
Predicting "left_eyebrow_inner_end_y"... done! (0.1s)
Predicting "left_eyebrow_outer_end_x"... done! (0.1s)
Predicting "left_eyebrow_outer_end_y"... done! (0.1s)
Predicting "right_eyebrow_inner_end_x"... done! (0.1s)
Predicting "right_eyebrow_inner_end_y"... done! (0.1s)
Predicting "right_eyebrow_outer_end_x"... done! (0.1s)
P

## Calculate the accuracies on the training data

In [8]:
print "{:30} Accuracy".format("Model")

# use the models to predict the dev data
for index,(feat,model) in enumerate(logistics):
    predications = model.predict(train_data.tolist())
    accuracy = np.mean(1 - abs(train_labels[:,index] - predications)/96)
    print " - {f:<27} {a:.3%}".format(f=FEATURES[index],a=accuracy)

Model                          Accuracy
 - left_eye_center_x           100.000%
 - left_eye_center_y           100.000%
 - right_eye_center_x          100.000%
 - right_eye_center_y          100.000%
 - left_eye_inner_corner_x     100.000%
 - left_eye_inner_corner_y     100.000%
 - left_eye_outer_corner_x     100.000%
 - left_eye_outer_corner_y     100.000%
 - right_eye_inner_corner_x    100.000%
 - right_eye_inner_corner_y    100.000%
 - right_eye_outer_corner_x    100.000%
 - right_eye_outer_corner_y    100.000%
 - left_eyebrow_inner_end_x    100.000%
 - left_eyebrow_inner_end_y    100.000%
 - left_eyebrow_outer_end_x    100.000%
 - left_eyebrow_outer_end_y    100.000%
 - right_eyebrow_inner_end_x   100.000%
 - right_eyebrow_inner_end_y   100.000%
 - right_eyebrow_outer_end_x   100.000%
 - right_eyebrow_outer_end_y   100.000%
 - nose_tip_x                  100.000%
 - nose_tip_y                  100.000%
 - mouth_left_corner_x         100.000%
 - mouth_left_corner_y         100.000%


## Fit the logistic regression on 'masked' preprocessed data

In [9]:
# load the masked training data
train_masked = save4later.load_preprod("masked_nonas")

Loaded pk


In [None]:
# create a list to hold logistic regressions
Mask_logistics = []

# initalize a potential set of reasonable C values
#Lparameters = {'C':[0.001, 0.01, 0.1, 0.5, 1.0, 5.0, 10.0, 100.0]}

# loop through every facial feature
for index,facial_feature in enumerate(FEATURES):

    # initalize the logistic regression model
    logistic = LogisticRegression()

    # set the C search with the given C options and the logistic model
    #C_search = GridSearchCV(logistic,Lparameters)

    # fit the Gridsearch model to the data
    #C_search.fit(train_masked,train_labels[:,index])

    # find the best C parameter
    #best_C = C_search.best_params_

    # initalize a model with the best C
    #logistic_optimal = LogisticRegression(C = best_C['C'])
    #logistic_optimal.fit(train_masked,train_labels[:,index])
    logistic_optimal = logistic.fit(train_masked, train_labels[:,index])
    
    # create a tuple with the name of the feature and the model
    appending = facial_feature, logistic_optimal
    
    # append the name and the model to our list of facial feature models
    Mask_logistics.append(appending)

# save the models for later
save4later.save_model(Mask_logistics, 'Logistic_Mask', 
                      'Logistic regression with masked data with no NAs',overwrite=True)

## Calculate accuracies on the masked data

In [None]:
print "{:30} Accuracy".format("Model")

# use the models to predict the dev data
for index,(feat,model) in enumerate(Mask_logistics):
    predications = model.predict(train_masked)
    accuracy = np.mean(1 - abs(train_labels[:,index] - predications)/96)
    print " - {f:<27} {a:.3%}".format(f=FEATURES[index],a=accuracy)