# Ensemble Explorations

In [6]:
# General imports
import numpy as np

# set the path
import sys, os

pathArr = os.getcwd().split("/")
scriptPath = '/'.join(map(str, pathArr[:len(pathArr)-1]))
sys.path.append(scriptPath)

# import my tools
from tools import save4later, submit, getdata

In [7]:
from sklearn import tree
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor


In [8]:
# load the data
_loaded = getdata.load_data(0, test=True, nonas=True)

FEATURES = _loaded['features']
print 'Number of features:', len(FEATURES)

train_data = _loaded['training']['data']
train_labels = _loaded['training']['labels']
print 'Training dataset size: ', train_data.shape

test_data = _loaded['test']['data']
print 'Test dataset size: ', test_data.shape

Number of features: 30
Training dataset size:  (2140,)
Test dataset size:  (1783,)


## Random Forest

In [28]:
def random_forest(data, data_labels, model_label, description, verbose=True, save=False):
    ''' Initialize random forest regressor suite '''
    rf_list = []

    if verbose:
        print "{:30} Accuracy".format("Model")

    for feat in xrange(30):
        rfc = RandomForestRegressor(criterion="mse", n_estimators=500, max_features="log2")
        rfc.fit(data, data_labels[:,feat])

        rf_list.append( (FEATURES[feat], rfc) )
        
        if verbose:
            _acc = abc.score(data, data_labels[:,feat])
            print " - {f:<27} {a:.3%}".format(f=FEATURES[feat],a=_acc)
    
    if save:
        save4later.save_model(rf_list, model_label, description, overwrite=True)
    
    return rf_list


In [13]:
%%time
rf_orig = random_forest(train_data.tolist(), train_labels, 'RForig',
                        'Base Random Forest noNAs', save=True)

Training feature: left_eye_center_x    SCORE: 90.945%
Training feature: left_eye_center_y    SCORE: 93.601%
Training feature: right_eye_center_x    SCORE: 91.594%
Training feature: right_eye_center_y    SCORE: 93.130%
Training feature: left_eye_inner_corner_x    SCORE: 92.355%
Training feature: left_eye_inner_corner_y    SCORE: 93.272%
Training feature: left_eye_outer_corner_x    SCORE: 92.013%
Training feature: left_eye_outer_corner_y    SCORE: 93.581%
Training feature: right_eye_inner_corner_x    SCORE: 91.570%
Training feature: right_eye_inner_corner_y    SCORE: 93.151%
Training feature: right_eye_outer_corner_x    SCORE: 93.012%
Training feature: right_eye_outer_corner_y    SCORE: 92.970%
Training feature: left_eyebrow_inner_end_x    SCORE: 92.778%
Training feature: left_eyebrow_inner_end_y    SCORE: 93.678%
Training feature: left_eyebrow_outer_end_x    SCORE: 92.275%
Training feature: left_eyebrow_outer_end_y    SCORE: 92.805%
Training feature: right_eyebrow_inner_end_x    SCORE: 

[('left_eye_center_x',
  RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
             max_features='log2', max_leaf_nodes=None, min_samples_leaf=1,
             min_samples_split=2, min_weight_fraction_leaf=0.0,
             n_estimators=500, n_jobs=1, oob_score=False, random_state=None,
             verbose=0, warm_start=False)),
 ('left_eye_center_y',
  RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
             max_features='log2', max_leaf_nodes=None, min_samples_leaf=1,
             min_samples_split=2, min_weight_fraction_leaf=0.0,
             n_estimators=500, n_jobs=1, oob_score=False, random_state=None,
             verbose=0, warm_start=False)),
 ('right_eye_center_x',
  RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
             max_features='log2', max_leaf_nodes=None, min_samples_leaf=1,
             min_samples_split=2, min_weight_fraction_leaf=0.0,
             n_estimators=500, n_jobs=1, oob_score=F

In [16]:
rf_orig = save4later.load_model('RForig')

Loaded pk


In [17]:
submit.create_generate(test_data, rf_orig, 'RForig')


... Created the csv file: ../../data/submissions/RForig_submission.csv


## AdaBoost

In [None]:
def adaboost(data, data_labels, model_label, description, verbose=True, save=False):
    ''' Initialize AdaBoost regressor suite based on Decision Trees '''
    rf_list = []
    
    if verbose:
        print "{:30} Accuracy".format("Model")

    for feat in xrange(30):

        if verbose:
            print 'Training feature:', FEATURES[feat],

        abc = AdaBoostClassifier(default=DecisionTreeRegressor, n_estimators=10)
        abc.fit(data.tolist(), data_labels[:,feat])

        rf_list.append( (FEATURES[feat], rfc) )
        
        if verbose:
            _acc = abc.score(data.tolist(), data_labels[:,feat])
            print " - {f:<27} {a:.3%}".format(f=FEATURES[index],a=_acc)
    
    if save:
        save4later.save_model(rf_list, model_label, description)
    
    return rf_list


In [14]:
%%time

abc = AdaBoostRegressor(n_estimators=50, learning_rate=1)
abc.fit(train_data.tolist(), train_labels[:,0])

abc.score(train_data.tolist(), train_labels[:,0])

CPU times: user 2min 43s, sys: 4.04 s, total: 2min 48s
Wall time: 5min 1s


In [None]:
adaboost_suite = adaboost(train_data.tolist(), train_labels, 'Adaboost',
                        'Base AdaBoost noNAs', save=True)