# Ensemble Explorations

In [1]:
# General imports
import numpy as np

# set the path
import sys, os

pathArr = os.getcwd().split("/")
scriptPath = '/'.join(map(str, pathArr[:len(pathArr)-1]))
sys.path.append(scriptPath)

# import my tools
from tools import save4later, submit, getdata

In [2]:
from sklearn import tree
from sklearn.ensemble import RandomForestRegressor 
from sklearn.ensemble import AdaBoostRegressor

In [3]:
# load the data
_loaded = getdata.load_data(0, test=True, nonas=True)

FEATURES = _loaded['features']
print 'Number of features:', len(FEATURES)

train_data = _loaded['training']['data']
train_labels = _loaded['training']['labels']
print 'Training dataset size: ', train_data.shape

test_data = _loaded['test']['data']
print 'Test dataset size: ', test_data.shape

Number of features: 30
Training dataset size:  (2140,)
Test dataset size:  (1783,)


## Random Forest

In [15]:
def random_forest(data, data_labels, model_label, description, verbose=True, save=False):
    ''' Initialize random forest regressor suite '''
    rf_list = []

    for feat in xrange(30):

        if verbose:
            print 'Training feature:', FEATURES[feat],

        rfc = RandomForestRegressor(criterion="mse", n_estimators=500, max_features="log2")
        rfc.fit(data.tolist(), data_labels[:,feat])

        rf_list.append( (FEATURES[feat], rfc) )
        
        if verbose:
            print "   SCORE: {:.3%}".format(rfc.score(data.tolist(), data_labels[:,feat]))
    
    if save:
        save4later.save_model(rf_list, model_label, description, overwrite=True)
    
    return rf_list


In [13]:
%%time
rf_orig = random_forest(train_data, train_labels, 'RForig',
                        'Base Random Forest noNAs', save=True)

Training feature: left_eye_center_x    SCORE: 90.945%
Training feature: left_eye_center_y    SCORE: 93.601%
Training feature: right_eye_center_x    SCORE: 91.594%
Training feature: right_eye_center_y    SCORE: 93.130%
Training feature: left_eye_inner_corner_x    SCORE: 92.355%
Training feature: left_eye_inner_corner_y    SCORE: 93.272%
Training feature: left_eye_outer_corner_x    SCORE: 92.013%
Training feature: left_eye_outer_corner_y    SCORE: 93.581%
Training feature: right_eye_inner_corner_x    SCORE: 91.570%
Training feature: right_eye_inner_corner_y    SCORE: 93.151%
Training feature: right_eye_outer_corner_x    SCORE: 93.012%
Training feature: right_eye_outer_corner_y    SCORE: 92.970%
Training feature: left_eyebrow_inner_end_x    SCORE: 92.778%
Training feature: left_eyebrow_inner_end_y    SCORE: 93.678%
Training feature: left_eyebrow_outer_end_x    SCORE: 92.275%
Training feature: left_eyebrow_outer_end_y    SCORE: 92.805%
Training feature: right_eyebrow_inner_end_x    SCORE: 

[('left_eye_center_x',
  RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
             max_features='log2', max_leaf_nodes=None, min_samples_leaf=1,
             min_samples_split=2, min_weight_fraction_leaf=0.0,
             n_estimators=500, n_jobs=1, oob_score=False, random_state=None,
             verbose=0, warm_start=False)),
 ('left_eye_center_y',
  RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
             max_features='log2', max_leaf_nodes=None, min_samples_leaf=1,
             min_samples_split=2, min_weight_fraction_leaf=0.0,
             n_estimators=500, n_jobs=1, oob_score=False, random_state=None,
             verbose=0, warm_start=False)),
 ('right_eye_center_x',
  RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
             max_features='log2', max_leaf_nodes=None, min_samples_leaf=1,
             min_samples_split=2, min_weight_fraction_leaf=0.0,
             n_estimators=500, n_jobs=1, oob_score=F

In [14]:
save4later.list_models()


    STORED MODELS
 Total: 6

  + nnSobel.pk : NN on Sobel Filter
  + MultinomialNB.pk : Multinomial naive bayes with non-preprocessed data with no NAs
  + nnMasked.pk : NN on Masked Faces
  + nnOrigData.pk : NN on Original Data
  + RForig.pk : Base Random Forest noNAs
  + nnBhog.pk : NN on Blur HOG


In [None]:
%%time

abc = AdaBoostClassifier(base_estimator=DecisionTreeClassifier(max_depth=1), n_estimators=100, learning_rate=0.1)

abc.fit(train_data.tolist(), train_labels)
print 'Accuracy (adaboost with decision trees):', abc.score(train_data, train_labels)

In [None]:
def adaboost(data, data_labels, model_label, description, verbose=True, save=False):
    ''' Initialize AdaBoost regressor suite based on Decision Trees '''
    rf_list = []
    
    if verbose:
        print "{:30} Accuracy".format("Model")

    for feat in xrange(30):

        if verbose:
            print 'Training feature:', FEATURES[feat],

        rfc = AdaBoostClassifier(criterion="mse", n_estimators=500, max_features="log2")
        rfc.fit(data.tolist(), data_labels[:,feat])

        rf_list.append( (FEATURES[feat], rfc) )
        
        if verbose:
            _acc = rfc.score(data.tolist(), data_labels[:,feat])
            print " - {f:<27} {a:.3%}".format(f=FEATURES[index],a=_acc)
    
    if save:
        save4later.save_model(rf_list, model_label, description)
    
    return rf_list


In [None]:
# Save models
save4later.save_model(dt, 'dtree', 'Basic decision tree (noNAs)')
save4later.save_model(rfc, 'rforest', 'Basic random forest (noNAs)')
save4later.save_model(abc, 'adaboost', 'Basic decision tree (noNAs)')
