# Multinomial Naive Bayes Model

## Import libraries

In [25]:
# set the path
import sys, os

pathArr = os.getcwd().split("/")
scriptPath = '/'.join(map(str, pathArr[:len(pathArr)-1]))
sys.path.append(scriptPath)

# import my tools
from tools import save4later, submit, getdata

# import the sklearn libraries and numpy
from sklearn.naive_bayes import MultinomialNB
from sklearn.grid_search import GridSearchCV
import numpy as np

## Define accuracy calculation functions

In [26]:
IMAGE_SIZE = 96

def get_accuracy(models_list, verbose=False, ret_acc=True):
    ''' Calculates the accuracy for a given suite of models '''
    if verbose:
        print "{:30} Accuracy".format("Model")
    
    acc_list = []
    
    for index,(feat,model) in enumerate(models_list):
        predications = model.predict(train_data.tolist())
        accuracy = np.mean(1 - abs(train_labels[:,index] - predications)/ IMAGE_SIZE)
        acc_list.append(accuracy)

        if verbose:
            print " - {f:<27} {a:.3%}".format(f=FEATURES[index],a=accuracy)
    
    if ret_acc:
        return acc_list
    
def compare_accuracies(iter_model_lists, model_labels):
    ''' Compares the accuracy of different model suites '''
    accuracies = []
    
    for mod in iter_model_lists:
        accuracies.append( get_accuracy(mod, verbose=False) )
    
    # Print report
    print "   Feature     |   ACCURACIES:    ", '   '.join(model_labels)
    
    for f in xrange(len(accuracies[0])):  # Num of FEATURES
        # format all the accuracies
        _entry = " - {:<27}   ".format(FEATURES[f])
        for m in xrange(len(accuracies)):
            _entry += " {:.2%}  ".format(accuracies[m][f])
        
        print _entry

## Load the data

In [27]:
# load the data
_loaded = getdata.load_data(0, test=True, nonas=True)

FEATURES = _loaded['features']
print 'Number of features:', len(FEATURES)

train_data = _loaded['training']['data']
train_labels = _loaded['training']['labels']
print 'Training dataset size: ', train_data.shape

test_data = _loaded['test']['data']
print 'Test dataset size: ', test_data.shape

Number of features: 30
Training dataset size:  (2140,)
Test dataset size:  (1783,)


## Train Naive Bayes models for each feature

In [28]:
# create an array to store the multinomial naive bayes models
multinomials = []

# initalize a set of reasonable alphas that we would like to search for the optimal alpha
MNparameters = {'alpha':[0.001, 0.01, 0.1, 0.5, 1.0, 5.0, 10.0]}

# loop through all the facial features
for index,facial_feature in enumerate(FEATURES):
    
    # initalize the multinomail naive bayes model
    Multinomial = MultinomialNB()

    # set the alpha search with the given alpha options and the Multinomial model
    alpha_search = GridSearchCV(Multinomial,MNparameters)

    # fit the Gridsearch model on the training data
    alpha_search.fit(train_data.tolist(),train_labels[:,index])

    # find the best parameter
    best_alpha = alpha_search.best_params_

    # fit a model with the best alpha
    Multinomial_optimal = MultinomialNB(alpha = best_alpha['alpha'])
    Multinomial_optimal.fit(train_data.tolist(),train_labels[:,index])
    
    # create a tuple with the model and its associated facial feature
    appending = facial_feature, Multinomial_optimal
    
    # append the model and its name to our list
    multinomials.append(appending)

# save the models for later
save4later.save_model(multinomials, 'MultinomialNB', 
                      'Multinomial naive bayes with non-preprocessed data with no NAs',overwrite=True)

## Load the model

In [29]:
multinomials = save4later.load_model("MultinomialNB")

Loaded pk


## Run the models on the test data

In [30]:
submit.create_generate(test_data, multinomials, 'multinomials', verbose=True)

Predicting "left_eye_center_x"... done! (0.2s)
Predicting "left_eye_center_y"... done! (0.1s)
Predicting "right_eye_center_x"... done! (0.1s)
Predicting "right_eye_center_y"... done! (0.1s)
Predicting "left_eye_inner_corner_x"... done! (0.1s)
Predicting "left_eye_inner_corner_y"... done! (0.1s)
Predicting "left_eye_outer_corner_x"... done! (0.1s)
Predicting "left_eye_outer_corner_y"... done! (0.1s)
Predicting "right_eye_inner_corner_x"... done! (0.1s)
Predicting "right_eye_inner_corner_y"... done! (0.1s)
Predicting "right_eye_outer_corner_x"... done! (0.1s)
Predicting "right_eye_outer_corner_y"... done! (0.1s)
Predicting "left_eyebrow_inner_end_x"... done! (0.1s)
Predicting "left_eyebrow_inner_end_y"... done! (0.1s)
Predicting "left_eyebrow_outer_end_x"... done! (0.1s)
Predicting "left_eyebrow_outer_end_y"... done! (0.1s)
Predicting "right_eyebrow_inner_end_x"... done! (0.1s)
Predicting "right_eyebrow_inner_end_y"... done! (0.1s)
Predicting "right_eyebrow_outer_end_x"... done! (0.1s)
P

## Fit naive bayes models on 'masked' preprocessed data
We use face detect to remove the background before putting the data through our naive bayes models.

In [31]:
# load the masked training data
train_masked = save4later.load_preprod("masked_nonas")

Loaded pk


In [32]:
# create an array to store the multinomial naive bayes models
Mask_multinomials = []

# initalize a set of reasonable alphas that we would like to search for the optimal alpha
MNparameters = {'alpha':[0.001, 0.01, 0.1, 0.5, 1.0, 5.0, 10.0]}

# loop through all the facial features
for index,facial_feature in enumerate(FEATURES):
    
    # initalize the multinomail naive bayes model
    Multinomial = MultinomialNB()

    # set the alpha search with the given alpha options and the Multinomial model
    alpha_search = GridSearchCV(Multinomial,MNparameters)

    # fit the Gridsearch model on the training data
    alpha_search.fit(train_masked,train_labels[:,index])

    # find the best parameter
    best_alpha = alpha_search.best_params_

    # fit a model with the best alpha
    Multinomial_optimal = MultinomialNB(alpha = best_alpha['alpha'])
    Multinomial_optimal.fit(train_masked,train_labels[:,index])
    
    # create a tuple with the model and its associated facial feature
    appending = facial_feature, Multinomial_optimal
    
    # append the model and its name to our list
    Mask_multinomials.append(appending)

# save the models for later
save4later.save_model(Mask_multinomials, 'MultinomialNB_Mask', 
                      'Multinomial naive bayes with masked data with no NAs',overwrite=True)

## Predict the test data

In [33]:
submit.create_generate(test_data, Mask_multinomials, 'Mask_multinomials', verbose=True)

Predicting "left_eye_center_x"... done! (0.2s)
Predicting "left_eye_center_y"... done! (0.1s)
Predicting "right_eye_center_x"... done! (0.1s)
Predicting "right_eye_center_y"... done! (0.1s)
Predicting "left_eye_inner_corner_x"... done! (0.1s)
Predicting "left_eye_inner_corner_y"... done! (0.1s)
Predicting "left_eye_outer_corner_x"... done! (0.1s)
Predicting "left_eye_outer_corner_y"... done! (0.1s)
Predicting "right_eye_inner_corner_x"... done! (0.1s)
Predicting "right_eye_inner_corner_y"... done! (0.1s)
Predicting "right_eye_outer_corner_x"... done! (0.1s)
Predicting "right_eye_outer_corner_y"... done! (0.1s)
Predicting "left_eyebrow_inner_end_x"... done! (0.1s)
Predicting "left_eyebrow_inner_end_y"... done! (0.1s)
Predicting "left_eyebrow_outer_end_x"... done! (0.1s)
Predicting "left_eyebrow_outer_end_y"... done! (0.2s)
Predicting "right_eyebrow_inner_end_x"... done! (0.1s)
Predicting "right_eyebrow_inner_end_y"... done! (0.1s)
Predicting "right_eyebrow_outer_end_x"... done! (0.1s)
P

## Run Naive Bayes on 'sobel' training data

In [34]:
# load the sobel training data
train_sobel = save4later.load_preprod("sobel_nonas")

Loaded pk


In [35]:
# create an array to store the multinomial naive bayes models
sobel_multinomials = []

# initalize a set of reasonable alphas that we would like to search for the optimal alpha
MNparameters = {'alpha':[0.001, 0.01, 0.1, 0.5, 1.0, 5.0, 10.0]}

# loop through all the facial features
for index,facial_feature in enumerate(FEATURES):
    
    # initalize the multinomail naive bayes model
    Multinomial = MultinomialNB()

    # set the alpha search with the given alpha options and the Multinomial model
    alpha_search = GridSearchCV(Multinomial,MNparameters)

    # fit the Gridsearch model on the training data
    alpha_search.fit(train_sobel,train_labels[:,index])

    # find the best parameter
    best_alpha = alpha_search.best_params_

    # fit a model with the best alpha
    Multinomial_optimal = MultinomialNB(alpha = best_alpha['alpha'])
    Multinomial_optimal.fit(train_sobel,train_labels[:,index])
    
    # create a tuple with the model and its associated facial feature
    appending = facial_feature, Multinomial_optimal
    
    # append the model and its name to our list
    sobel_multinomials.append(appending)

# save the models for later
save4later.save_model(sobel_multinomials, 'MultinomialNB_sobel', 
                      'Multinomial naive bayes with masked data with no NAs',overwrite=True)

## Load the model

In [36]:
sobel_multinomials = save4later.load_model("MultinomialNB_sobel")

Loaded pk


## Predict the test data using the model

In [37]:
submit.create_generate(test_data, sobel_multinomials, 'multinomials_sobel', verbose=True)

Predicting "left_eye_center_x"... done! (0.1s)
Predicting "left_eye_center_y"... done! (0.1s)
Predicting "right_eye_center_x"... done! (0.1s)
Predicting "right_eye_center_y"... done! (0.1s)
Predicting "left_eye_inner_corner_x"... done! (0.1s)
Predicting "left_eye_inner_corner_y"... done! (0.1s)
Predicting "left_eye_outer_corner_x"... done! (0.1s)
Predicting "left_eye_outer_corner_y"... done! (0.1s)
Predicting "right_eye_inner_corner_x"... done! (0.1s)
Predicting "right_eye_inner_corner_y"... done! (0.1s)
Predicting "right_eye_outer_corner_x"... done! (0.1s)
Predicting "right_eye_outer_corner_y"... done! (0.1s)
Predicting "left_eyebrow_inner_end_x"... done! (0.1s)
Predicting "left_eyebrow_inner_end_y"... done! (0.1s)
Predicting "left_eyebrow_outer_end_x"... done! (0.1s)
Predicting "left_eyebrow_outer_end_y"... done! (0.1s)
Predicting "right_eyebrow_inner_end_x"... done! (0.1s)
Predicting "right_eyebrow_inner_end_y"... done! (0.1s)
Predicting "right_eyebrow_outer_end_x"... done! (0.1s)
P

## Fit the naive bayes model on blurred HOG data

In [38]:
# load the blurred HOG training data
train_HOG = save4later.load_preprod("bhog_nonas")

Loaded pk


In [39]:
# create an array to store the multinomial naive bayes models
HOG_multinomials = []

# initalize a set of reasonable alphas that we would like to search for the optimal alpha
MNparameters = {'alpha':[0.001, 0.01, 0.1, 0.5, 1.0, 5.0, 10.0]}

# loop through all the facial features
for index,facial_feature in enumerate(FEATURES):
    
    # initalize the multinomail naive bayes model
    Multinomial = MultinomialNB()

    # set the alpha search with the given alpha options and the Multinomial model
    alpha_search = GridSearchCV(Multinomial,MNparameters)

    # fit the Gridsearch model on the training data
    alpha_search.fit(train_HOG,train_labels[:,index])

    # find the best parameter
    best_alpha = alpha_search.best_params_

    # fit a model with the best alpha
    Multinomial_optimal = MultinomialNB(alpha = best_alpha['alpha'])
    Multinomial_optimal.fit(train_HOG,train_labels[:,index])
    
    # create a tuple with the model and its associated facial feature
    appending = facial_feature, Multinomial_optimal
    
    # append the model and its name to our list
    HOG_multinomials.append(appending)

# save the models for later
save4later.save_model(HOG_multinomials, 'MultinomialNB_HOG', 
                      'Multinomial naive bayes with blurred HOG data with no NAs',overwrite=True)

## Load the model

In [40]:
HOG_multinomials = save4later.load_model("MultinomialNB_HOG")

Loaded pk


## Create the submission

In [41]:
submit.create_generate(test_data, HOG_multinomials, 'multinomials_HOG', verbose=True)

Predicting "left_eye_center_x"... done! (0.2s)
Predicting "left_eye_center_y"... done! (0.1s)
Predicting "right_eye_center_x"... done! (0.1s)
Predicting "right_eye_center_y"... done! (0.1s)
Predicting "left_eye_inner_corner_x"... done! (0.1s)
Predicting "left_eye_inner_corner_y"... done! (0.1s)
Predicting "left_eye_outer_corner_x"... done! (0.1s)
Predicting "left_eye_outer_corner_y"... done! (0.1s)
Predicting "right_eye_inner_corner_x"... done! (0.1s)
Predicting "right_eye_inner_corner_y"... done! (0.1s)
Predicting "right_eye_outer_corner_x"... done! (0.1s)
Predicting "right_eye_outer_corner_y"... done! (0.1s)
Predicting "left_eyebrow_inner_end_x"... done! (0.1s)
Predicting "left_eyebrow_inner_end_y"... done! (0.1s)
Predicting "left_eyebrow_outer_end_x"... done! (0.1s)
Predicting "left_eyebrow_outer_end_y"... done! (0.1s)
Predicting "right_eyebrow_inner_end_x"... done! (0.1s)
Predicting "right_eyebrow_inner_end_y"... done! (0.1s)
Predicting "right_eyebrow_outer_end_x"... done! (0.1s)
P

## Fit a naive bayes model on data with a Laplace & Gaussian transformation

In [42]:
# load the laplace & gaussian training data
train_LapG = save4later.load_preprod("lapgauss_nonas")

Loaded pk


In [43]:
# create an array to store the multinomial naive bayes models
LapG_multinomials = []

# initalize a set of reasonable alphas that we would like to search for the optimal alpha
MNparameters = {'alpha':[0.001, 0.01, 0.1, 0.5, 1.0, 5.0, 10.0]}

# loop through all the facial features
for index,facial_feature in enumerate(FEATURES):
    
    # initalize the multinomail naive bayes model
    Multinomial = MultinomialNB()

    # set the alpha search with the given alpha options and the Multinomial model
    alpha_search = GridSearchCV(Multinomial,MNparameters)

    # fit the Gridsearch model on the training data
    alpha_search.fit(train_LapG,train_labels[:,index])

    # find the best parameter
    best_alpha = alpha_search.best_params_

    # fit a model with the best alpha
    Multinomial_optimal = MultinomialNB(alpha = best_alpha['alpha'])
    Multinomial_optimal.fit(train_LapG,train_labels[:,index])
    
    # create a tuple with the model and its associated facial feature
    appending = facial_feature, Multinomial_optimal
    
    # append the model and its name to our list
    LapG_multinomials.append(appending)

# save the models for later
save4later.save_model(LapG_multinomials, 'MultinomialNB_LapG', 
                      'Multinomial naive bayes with Laplace and Gaussian transformed data with no NAs',overwrite=True)

## Load the model

In [44]:
LapG_multinomials = save4later.load_model("MultinomialNB_LapG")

Loaded pk


## Create the submission

In [45]:
submit.create_generate(test_data, LapG_multinomials, 'multinomials_LapG', verbose=True)

Predicting "left_eye_center_x"... done! (0.2s)
Predicting "left_eye_center_y"... done! (0.1s)
Predicting "right_eye_center_x"... done! (0.1s)
Predicting "right_eye_center_y"... done! (0.1s)
Predicting "left_eye_inner_corner_x"... done! (0.1s)
Predicting "left_eye_inner_corner_y"... done! (0.1s)
Predicting "left_eye_outer_corner_x"... done! (0.1s)
Predicting "left_eye_outer_corner_y"... done! (0.1s)
Predicting "right_eye_inner_corner_x"... done! (0.1s)
Predicting "right_eye_inner_corner_y"... done! (0.1s)
Predicting "right_eye_outer_corner_x"... done! (0.1s)
Predicting "right_eye_outer_corner_y"... done! (0.1s)
Predicting "left_eyebrow_inner_end_x"... done! (0.1s)
Predicting "left_eyebrow_inner_end_y"... done! (0.1s)
Predicting "left_eyebrow_outer_end_x"... done! (0.1s)
Predicting "left_eyebrow_outer_end_y"... done! (0.1s)
Predicting "right_eyebrow_inner_end_x"... done! (0.1s)
Predicting "right_eyebrow_inner_end_y"... done! (0.1s)
Predicting "right_eyebrow_outer_end_x"... done! (0.1s)
P

## Fit a naive bayes model on Gaussian blurred data

In [46]:
# load the gaussian blurred training data
train_gauss = save4later.load_preprod("gauss_nonas")

Loaded pk


In [47]:
# create an array to store the multinomial naive bayes models
gauss_multinomials = []

# initalize a set of reasonable alphas that we would like to search for the optimal alpha
MNparameters = {'alpha':[0.001, 0.01, 0.1, 0.5, 1.0, 5.0, 10.0]}

# loop through all the facial features
for index,facial_feature in enumerate(FEATURES):
    
    # initalize the multinomail naive bayes model
    Multinomial = MultinomialNB()

    # set the alpha search with the given alpha options and the Multinomial model
    alpha_search = GridSearchCV(Multinomial,MNparameters)

    # fit the Gridsearch model on the training data
    alpha_search.fit(train_gauss,train_labels[:,index])

    # find the best parameter
    best_alpha = alpha_search.best_params_

    # fit a model with the best alpha
    Multinomial_optimal = MultinomialNB(alpha = best_alpha['alpha'])
    Multinomial_optimal.fit(train_gauss,train_labels[:,index])
    
    # create a tuple with the model and its associated facial feature
    appending = facial_feature, Multinomial_optimal
    
    # append the model and its name to our list
    gauss_multinomials.append(appending)

# save the models for later
save4later.save_model(gauss_multinomials, 'MultinomialNB_gauss', 
                      'Multinomial naive bayes with Gaussian blurred data with no NAs',overwrite=True)

## Load the model

In [48]:
gauss_multinomials = save4later.load_model("MultinomialNB_gauss")

Loaded pk


## Create the submission

In [49]:
submit.create_generate(test_data, gauss_multinomials, 'multinomials_gauss', verbose=True)

Predicting "left_eye_center_x"...

ValueError: shapes (1783,9216) and (18432,22) not aligned: 9216 (dim 1) != 18432 (dim 0)

## Compare the accuracies of the different models

In [50]:
all_models = [multinomials,Mask_multinomials,sobel_multinomials,HOG_multinomials,LapG_multinomials,gauss_multinomials]
all_model_names = ["Regular NB", "NB with masking", "NB with sobel", "NB with HOG", "NB with Laplace & Gaussian", "NB with Gaussian"]
compare_accuracies(all_models,all_model_names)

ValueError: shapes (2140,9216) and (18432,22) not aligned: 9216 (dim 1) != 18432 (dim 0)