# Multinomial Naive Bayes Model

## Import libraries

In [32]:
# set the path
import sys, os

pathArr = os.getcwd().split("/")
scriptPath = '/'.join(map(str, pathArr[:len(pathArr)-1]))
sys.path.append(scriptPath)

# import my tools
from tools import save4later, submit, getdata

# import the sklearn libraries and numpy
from sklearn.naive_bayes import MultinomialNB
from sklearn.grid_search import GridSearchCV
import numpy as np

## Load the data

In [33]:
# load the data
_loaded = getdata.load_data(0, test=True, nonas=True)

FEATURES = _loaded['features']
print 'Number of features:', len(FEATURES)

train_data = _loaded['training']['data']
train_labels = _loaded['training']['labels']
print 'Training dataset size: ', train_data.shape

test_data = _loaded['test']['data']
print 'Test dataset size: ', test_data.shape

Number of features: 30
Training dataset size:  (2140,)
Test dataset size:  (1783,)


## Train Naive Bayes models for each feature

In [34]:
# create an array to store the multinomial naive bayes models
multinomials = []

# initalize a set of reasonable alphas that we would like to search for the optimal alpha
MNparameters = {'alpha':[0.001, 0.01, 0.1, 0.5, 1.0, 5.0, 10.0]}

# loop through all the facial features
for index,facial_feature in enumerate(FEATURES):
    
    # initalize the multinomail naive bayes model
    Multinomial = MultinomialNB()

    # set the alpha search with the given alpha options and the Multinomial model
    alpha_search = GridSearchCV(Multinomial,MNparameters)

    # fit the Gridsearch model on the training data
    alpha_search.fit(train_data.tolist(),train_labels[:,index])

    # find the best parameter
    best_alpha = alpha_search.best_params_

    # fit a model with the best alpha
    Multinomial_optimal = MultinomialNB(alpha = best_alpha['alpha'])
    Multinomial_optimal.fit(train_data.tolist(),train_labels[:,index])
    
    # create a tuple with the model and its associated facial feature
    appending = facial_feature, Multinomial_optimal
    
    # append the model and its name to our list
    multinomials.append(appending)

# save the models for later
save4later.save_model(multinomials, 'MultinomialNB', 
                      'Multinomial naive bayes with non-preprocessed data with no NAs',overwrite=True)

## Load the model

In [35]:
multinomials = save4later.load_model("MultinomialNB")

Loaded pk


## Run the models on the test data

In [36]:
submit.create_generate(test_data, multinomials, 'multinomials', verbose=True)

Predicting "left_eye_center_x"... done! (0.2s)
Predicting "left_eye_center_y"... done! (0.1s)
Predicting "right_eye_center_x"... done! (0.1s)
Predicting "right_eye_center_y"... done! (0.1s)
Predicting "left_eye_inner_corner_x"... done! (0.1s)
Predicting "left_eye_inner_corner_y"... done! (0.1s)
Predicting "left_eye_outer_corner_x"... done! (0.2s)
Predicting "left_eye_outer_corner_y"... done! (0.1s)
Predicting "right_eye_inner_corner_x"... done! (0.1s)
Predicting "right_eye_inner_corner_y"... done! (0.1s)
Predicting "right_eye_outer_corner_x"... done! (0.2s)
Predicting "right_eye_outer_corner_y"... done! (0.2s)
Predicting "left_eyebrow_inner_end_x"... done! (0.2s)
Predicting "left_eyebrow_inner_end_y"... done! (0.1s)
Predicting "left_eyebrow_outer_end_x"... done! (0.1s)
Predicting "left_eyebrow_outer_end_y"... done! (0.2s)
Predicting "right_eyebrow_inner_end_x"... done! (0.2s)
Predicting "right_eyebrow_inner_end_y"... done! (0.1s)
Predicting "right_eyebrow_outer_end_x"... done! (0.1s)
P

## Calculate accuracies for training data

In [37]:
print "{:30} Accuracy".format("Model")

# use the models to predict the dev data
for index,(feat,model) in enumerate(multinomials):
    predications = model.predict(train_data.tolist())
    accuracy = np.mean(1 - abs(train_labels[:,index] - predications)/96)
    print " - {f:<27} {a:.3%}".format(f=FEATURES[index],a=accuracy)

Model                          Accuracy
 - left_eye_center_x           96.023%
 - left_eye_center_y           96.255%
 - right_eye_center_x          97.168%
 - right_eye_center_y          96.625%
 - left_eye_inner_corner_x     97.402%
 - left_eye_inner_corner_y     96.869%
 - left_eye_outer_corner_x     96.665%
 - left_eye_outer_corner_y     96.036%
 - right_eye_inner_corner_x    97.664%
 - right_eye_inner_corner_y    96.663%
 - right_eye_outer_corner_x    96.258%
 - right_eye_outer_corner_y    96.286%
 - left_eyebrow_inner_end_x    96.369%
 - left_eyebrow_inner_end_y    95.002%
 - left_eyebrow_outer_end_x    96.137%
 - left_eyebrow_outer_end_y    95.255%
 - right_eyebrow_inner_end_x   96.482%
 - right_eyebrow_inner_end_y   95.688%
 - right_eyebrow_outer_end_x   95.911%
 - right_eyebrow_outer_end_y   94.327%
 - nose_tip_x                  95.722%
 - nose_tip_y                  95.219%
 - mouth_left_corner_x         95.021%
 - mouth_left_corner_y         94.796%
 - mouth_right_corner_x 

## Fit naive bayes models on 'masked' preprocessed data
We use face detect to remove the background before putting the data through our naive bayes models.

In [38]:
# load the masked training data
train_masked = save4later.load_preprod("masked_nonas")

Loaded pk


In [39]:
# create an array to store the multinomial naive bayes models
Mask_multinomials = []

# initalize a set of reasonable alphas that we would like to search for the optimal alpha
MNparameters = {'alpha':[0.001, 0.01, 0.1, 0.5, 1.0, 5.0, 10.0]}

# loop through all the facial features
for index,facial_feature in enumerate(FEATURES):
    
    # initalize the multinomail naive bayes model
    Multinomial = MultinomialNB()

    # set the alpha search with the given alpha options and the Multinomial model
    alpha_search = GridSearchCV(Multinomial,MNparameters)

    # fit the Gridsearch model on the training data
    alpha_search.fit(train_masked,train_labels[:,index])

    # find the best parameter
    best_alpha = alpha_search.best_params_

    # fit a model with the best alpha
    Multinomial_optimal = MultinomialNB(alpha = best_alpha['alpha'])
    Multinomial_optimal.fit(train_masked,train_labels[:,index])
    
    # create a tuple with the model and its associated facial feature
    appending = facial_feature, Multinomial_optimal
    
    # append the model and its name to our list
    Mask_multinomials.append(appending)

# save the models for later
save4later.save_model(multinomials, 'MultinomialNB_Mask', 
                      'Multinomial naive bayes with masked data with no NAs',overwrite=True)

## Calculate accuracies on the masked data

In [40]:
print "{:30} Accuracy".format("Model")

# use the models to predict the dev data
for index,(feat,model) in enumerate(Mask_multinomials):
    predications = model.predict(train_masked)
    accuracy = np.mean(1 - abs(train_labels[:,index] - predications)/96)
    print " - {f:<27} {a:.3%}".format(f=FEATURES[index],a=accuracy)

Model                          Accuracy
 - left_eye_center_x           96.362%
 - left_eye_center_y           96.372%
 - right_eye_center_x          95.991%
 - right_eye_center_y          96.502%
 - left_eye_inner_corner_x     96.714%
 - left_eye_inner_corner_y     95.895%
 - left_eye_outer_corner_x     94.128%
 - left_eye_outer_corner_y     94.607%
 - right_eye_inner_corner_x    97.217%
 - right_eye_inner_corner_y    97.173%
 - right_eye_outer_corner_x    94.001%
 - right_eye_outer_corner_y    93.601%
 - left_eyebrow_inner_end_x    95.809%
 - left_eyebrow_inner_end_y    95.797%
 - left_eyebrow_outer_end_x    93.704%
 - left_eyebrow_outer_end_y    93.538%
 - right_eyebrow_inner_end_x   95.535%
 - right_eyebrow_inner_end_y   95.396%
 - right_eyebrow_outer_end_x   92.221%
 - right_eyebrow_outer_end_y   91.992%
 - nose_tip_x                  93.300%
 - nose_tip_y                  90.154%
 - mouth_left_corner_x         90.837%
 - mouth_left_corner_y         90.917%
 - mouth_right_corner_x 