# Stacking

In [1]:
import csv
import numpy as np
import pandas as pd
import random

from sklearn.model_selection import KFold

## Data Processing

In [2]:
#########################
# Load the datasets
train = pd.read_csv('data.csv', sep=",")
test_data = pd.read_csv('quiz.csv', sep=",")
train_data = train.iloc[:,:-1]
train_labels = train.iloc[:,-1]

all_data = pd.concat([train_data, test_data])
train_obs = len(train_data)
test_obs = len(test_data)

# Change dtype of categorical columns
categorical_columns = ['0','5','7','8','9','14','16','17','18','20','23','25','26','56','57','58']
for i in xrange(0,len(categorical_columns)):
    all_data[categorical_columns[i]] = all_data[categorical_columns[i]].astype('category')

#########################
# Only numerical data
print('Processing numerical data...')
all_data_num = all_data.drop(categorical_columns, axis=1)
train_data_num = all_data_num.iloc[0:train_obs,]
test_data_num = all_data_num.iloc[train_obs:,]

train_data_num['div'] = (train_data_num.loc[:,'60'] / train_data_num.loc[:,'59'])
train_data_num.loc[:,'div'] = train_data_num.loc[:,'div'].fillna(0)
test_data_num['div'] = (test_data_num.loc[:,'60'] / test_data_num.loc[:,'59'])
test_data_num.loc[:,'div'] = test_data_num.loc[:,'div'].fillna(0)

#########################
# Only categorial data
print('Processing categorical data...')
all_data_cat = pd.get_dummies(all_data[categorical_columns])
train_data_cat = all_data_cat.iloc[0:train_obs,]
test_data_cat = all_data_cat.iloc[train_obs:,]

#########################
# Ignoring two large columns ('slim')
print('Processing categorical data (slim)...')
categorical_columns_slim = ['0','5','7','8','9','14','16','17','18','20','25','26','56','57']

all_data_cat_slim = pd.get_dummies(all_data[categorical_columns_slim])
train_data_cat_slim = all_data_cat_slim.iloc[0:train_obs,]
test_data_cat_slim = all_data_cat_slim.iloc[train_obs:,]

#########################
# Combined sets
print('Combining data...')
train_data_combo = pd.concat([train_data_num, train_data_cat], axis=1)
test_data_combo = pd.concat([test_data_num, test_data_cat], axis=1)

train_data_combo_slim = pd.concat([train_data_num, train_data_cat_slim], axis=1)
test_data_combo_slim = pd.concat([test_data_num, test_data_cat_slim], axis=1)

#########################
# Clear memory
all_data, train, train_data, test_data = None, None, None, None
all_data_num, train_data_num, test_data_num = None, None, None
all_data_cat, train_data_cat, test_data_cat = None, None, None
all_data_cat_slim, train_data_cat_slim, test_data_cat_slim = None, None, None

print('Finished processing!')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item_labels[indexer[info_axis]]] = value


Processing numerical data...
Processing categorical data...
Processing categorical data (slim)...
Combining data...
Finished processing!


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [3]:
from sklearn.ensemble import AdaBoostClassifier, BaggingClassifier, ExtraTreesClassifier, RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.naive_bayes import GaussianNB, MultinomialNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVR

from sklearn.tree import ExtraTreeClassifier

def pred_and_error(model, test_data, test_labels):
    preds = model.predict(test_data)
    error = 1 - sum(preds == test_labels)/float(len(test_labels))
    return preds, error

def cv_run_ada(train_data, train_labels, test_data, test_labels):
    model = AdaBoostClassifier().fit(train_data, train_labels)
    return pred_and_error(model, test_data, test_labels)

def cv_run_bag(train_data, train_labels, test_data, test_labels):
    model = BaggingClassifier(max_features=0.394512412319, n_estimators=435,
                              random_state=1, n_jobs=-1).fit(train_data, train_labels)
    return pred_and_error(model, test_data, test_labels)

def cv_run_et(train_data, train_labels, test_data, test_labels):
    model = ExtraTreesClassifier(n_jobs=-1, min_samples_leaf=2, n_estimators=99,
                                 min_samples_split=3, random_state=1,
                                 max_features=1611, max_depth=None).fit(train_data, train_labels)
    return pred_and_error(model, test_data, test_labels)

def cv_run_knn(train_data, train_labels, test_data, test_labels, n_neigh):
    model = KNeighborsClassifier(n_neighbors = n_neigh, n_jobs=-1).fit(train_data, train_labels)
    return pred_and_error(model, test_data, test_labels)

def cv_run_logistic(train_data, train_labels, test_data, test_labels):
    model = LogisticRegression(penalty='l1',
                               C=0.9029677391429398,
                               n_jobs=-1, random_state=1).fit(train_data, train_labels)
    return pred_and_error(model, test_data, test_labels)

def cv_run_neural(train_data, train_labels, test_data, test_labels):
    model = MLPClassifier(hidden_layer_sizes=900).fit(train_data, train_labels)
    return pred_and_error(model, test_data, test_labels)

def cv_run_rf(train_data, train_labels, test_data, test_labels):
    model = RandomForestClassifier(n_jobs=-1, min_samples_leaf=1, n_estimators=77,
                                   min_samples_split=2, random_state=1, max_features=771,
                                   max_depth=None).fit(train_data, train_labels)
    return pred_and_error(model, test_data, test_labels)

####################
# Meta-classifiers
def cv_run_bag_meta(train_data, train_labels, test_data, test_labels):
    model = BaggingClassifier(max_features=0.7268891521595635, n_estimators=26,
                              random_state=1, n_jobs=-1).fit(train_data, train_labels)
    return pred_and_error(model, test_data, test_labels)

def cv_run_et_meta(train_data, train_labels, test_data, test_labels):
    model = ExtraTreesClassifier(max_features=None,
                                 n_jobs=-1, random_state=1).fit(train_data, train_labels)
    return pred_and_error(model, test_data, test_labels)

def cv_run_rf_meta(train_data, train_labels, test_data, test_labels):
    model = RandomForestClassifier(max_features=0.38988227030541617, min_samples_leaf=4,
                                   min_samples_split=2, n_estimators=112,
                                   random_state=1, n_jobs=-1).fit(train_data, train_labels)
    return pred_and_error(model, test_data, test_labels)

## Part One - Cross validation predictions and errors

In [4]:
kf = KFold(n_folds=5, shuffle=True, random_state=1)

cv_preds = []
indices = []
for i, (train, test) in enumerate(kf.split(train_data_combo)):
    # Collect the indices used for test sets
    indices = np.concatenate((indices, test))
    
    # Split into train and testing data/labels
    cv_train_data = train_data_combo.iloc[train,:]
    cv_train_data_slim = train_data_combo_slim.iloc[train,:]
    cv_train_labels = train_labels[train]
    
    cv_test_data = train_data_combo.iloc[test,:]
    cv_test_data_slim = train_data_combo_slim.iloc[test,:]
    cv_test_labels = train_labels[test]
    
    # CV predictions & errors for each classifier
    print("Starting fold #{}".format(i+1))
    preds_1, error_1 = cv_run_et(cv_train_data, cv_train_labels, cv_test_data, cv_test_labels)
    print("Error: {}".format(error_1))
    preds_2, error_2 = cv_run_rf(cv_train_data, cv_train_labels, cv_test_data, cv_test_labels)
    print("Error: {}".format(error_2))
    preds_3, error_3 = cv_run_bag(cv_train_data_slim, cv_train_labels, cv_test_data_slim, cv_test_labels)
    print("Error: {}".format(error_3))
    preds_4, error_4 = cv_run_logistic(cv_train_data, cv_train_labels, cv_test_data, cv_test_labels)
    print("Error: {}".format(error_4))
    preds_5, error_5 = cv_run_knn(cv_train_data_slim, cv_train_labels, cv_test_data_slim, cv_test_labels, 1)
    print("Error: {}".format(error_5))
    preds_6, error_6 = cv_run_knn(cv_train_data_slim, cv_train_labels, cv_test_data_slim, cv_test_labels, 2)
    print("Error: {}".format(error_6))
    preds_7, error_7 = cv_run_knn(cv_train_data_slim, cv_train_labels, cv_test_data_slim, cv_test_labels, 4)
    print("Error: {}".format(error_7))
    preds_8, error_8 = cv_run_knn(cv_train_data_slim, cv_train_labels, cv_test_data_slim, cv_test_labels, 8)
    print("Error: {}".format(error_8))
    preds_9, error_9 = cv_run_knn(cv_train_data_slim, cv_train_labels, cv_test_data_slim, cv_test_labels, 16)
    print("Error: {}".format(error_9))
    preds_10, error_10 = cv_run_knn(cv_train_data_slim, cv_train_labels, cv_test_data_slim, cv_test_labels, 32)
    print("Error: {}".format(error_10))
    preds_11, error_11 = cv_run_neural(cv_train_data_slim, cv_train_labels, cv_test_data_slim, cv_test_labels)
    print("Error: {}".format(error_11))
    
    # Collect all the fold predictions together, fold_length * 8
    fold_preds = np.column_stack((preds_1, preds_2, preds_3, preds_4, 
                                  preds_5, preds_6, preds_7, preds_8,
                                  preds_9, preds_10, preds_11))
    
    # Vertically stack the current fold predictions below the previous ones
    if len(cv_preds) == 0:
        cv_preds = fold_preds
    else:
        cv_preds = np.vstack((cv_preds, fold_preds))
        
    print('')

Starting fold #1
Error: 0.0527436140019
Error: 0.052270577105


descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensio

Error: 0.0507726269316
Error: 0.105605487228
Error: 0.0830179754021
Error: 0.0810864080732
Error: 0.0963418479975
Error: 0.118929359823
Error: 0.138087354147
Error: 0.158388520971
Error: 0.0599180069379

Starting fold #2
Error: 0.0538473667613
Error: 0.0512850835699


descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensio

Error: 0.0509697256386
Error: 0.10733995585
Error: 0.0827026174708
Error: 0.0818353831599
Error: 0.0969331441186
Error: 0.117589088616
Error: 0.136549984232
Error: 0.157481866919
Error: 0.0656732891832

Starting fold #3
Error: 0.0547561792881
Error: 0.0548744431742


descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensio

Error: 0.0529427997004
Error: 0.110892103915
Error: 0.0835731462136
Error: 0.0823510860567
Error: 0.0971340718256
Error: 0.119998423148
Error: 0.138211061615
Error: 0.16233689439
Error: 0.069026688217

Starting fold #4
Error: 0.0535735404265
Error: 0.0530610635865


descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensio

Error: 0.0530216422912
Error: 0.107580715102
Error: 0.0838490952813
Error: 0.0829818267828
Error: 0.0963062246225
Error: 0.11861867781
Error: 0.135806362597
Error: 0.159064926873
Error: 0.065123979974

Starting fold #5
Error: 0.0536523830173
Error: 0.0517601608389


descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensio

Error: 0.0512871052943
Error: 0.101233886546
Error: 0.0831789332597
Error: 0.0821934008752
Error: 0.0960696968502
Error: 0.11451886309
Error: 0.137343793117
Error: 0.15764576024
Error: 0.062561595774



descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensio

### Average CV errors for each classifier

In [5]:
cv_labels = train_labels[indices]
for i in xrange(cv_preds.shape[1]):
    print("Method #{}: {}".format(i, 1 - sum(cv_preds[:,i] == cv_labels)/float(len(cv_labels))))
cv_labels = cv_labels.as_matrix()

Method #0: 0.0537146100901
Method #1: 0.0526502518981
Method #2: 0.0517987653445
Method #3: 0.106530428818
Method #4: 0.0832643471542
Method #5: 0.0820896110756
Method #6: 0.0965569983522
Method #7: 0.117930887675
Method #8: 0.137199713017
Method #9: 0.158983577347
Method #10: 0.064460685762


### Save fold results

In [6]:
pd.DataFrame(cv_preds).to_pickle('20160422-cv_preds_stack.pkl')
np.save('20160422-cv_labels', cv_labels, allow_pickle=True)

## Part Two - Combining fold predictions

In [7]:
cv_preds_stack = pd.DataFrame(cv_preds)

kf = KFold(n_folds=10, shuffle=True, random_state=1)

cv_errors = []
for i, (train, test) in enumerate(kf.split(cv_preds_stack)):
    cv_train_data = cv_preds_stack.iloc[train,:]
    cv_train_labels = cv_labels[train]
    cv_test_data = cv_preds_stack.iloc[test,:]
    cv_test_labels = cv_labels[test]
    
    print("Starting fold #{}".format(i+1))
    preds_0, error_0 = cv_run_ada(cv_train_data, cv_train_labels, cv_test_data, cv_test_labels)
    print("Error: {}".format(error_0))
    preds_1, error_1 = cv_run_et_meta(cv_train_data, cv_train_labels, cv_test_data, cv_test_labels)
    print("Error: {}".format(error_1))
    preds_2, error_2 = cv_run_rf_meta(cv_train_data, cv_train_labels, cv_test_data, cv_test_labels)
    print("Error: {}".format(error_2))
    preds_3, error_3 = cv_run_bag_meta(cv_train_data, cv_train_labels, cv_test_data, cv_test_labels)
    print("Error: {}".format(error_3))
    preds_4, error_4 = cv_run_logistic(cv_train_data, cv_train_labels, cv_test_data, cv_test_labels)
    print("Error: {}".format(error_4))
    preds_5, error_5 = cv_run_knn(cv_train_data, cv_train_labels, cv_test_data, cv_test_labels, 1)
    print("Error: {}".format(error_5))
    preds_6, error_6 = cv_run_knn(cv_train_data, cv_train_labels, cv_test_data, cv_test_labels, 2)
    print("Error: {}".format(error_6))
    preds_7, error_7 = cv_run_knn(cv_train_data, cv_train_labels, cv_test_data, cv_test_labels, 4)
    print("Error: {}".format(error_7))
    preds_8, error_8 = cv_run_knn(cv_train_data, cv_train_labels, cv_test_data, cv_test_labels, 8)
    print("Error: {}".format(error_8))
    preds_9, error_9 = cv_run_knn(cv_train_data, cv_train_labels, cv_test_data, cv_test_labels, 16)
    print("Error: {}".format(error_9))
    preds_10, error_10 = cv_run_neural(cv_train_data, cv_train_labels, cv_test_data, cv_test_labels)
    print("Error: {}\n".format(error_10))
    
    fold_errors = [error_0, error_1, error_2, error_3, 
                   error_4, error_5, error_6, error_7, 
                   error_8, error_9, error_10]
    cv_errors.append(fold_errors)
    
method_errors = pd.DataFrame(cv_errors).mean(axis=0)
for i, method_error in enumerate(method_errors):
    print("Error for method #{}: {}".format(i, method_error))
    
print('\nBest method is #{}: {}'.format(method_errors.idxmin(), method_errors[method_errors.idxmin()]))

Starting fold #1
Error: 0.0514033427941
Error: 0.0521128981394
Error: 0.0514033427941
Error: 0.0502995900347
Error: 0.0514821822769
Error: 0.0630715862504
Error: 0.0588142541785
Error: 0.0540838852097
Error: 0.0517187007253
Error: 0.0517187007253
Error: 0.0503784295175

Starting fold #2
Error: 0.0523494165878
Error: 0.0499842321034
Error: 0.0496688741722
Error: 0.0500630715863
Error: 0.0520340586566
Error: 0.058183538316
Error: 0.060706401766
Error: 0.0555818353832
Error: 0.0509303058972
Error: 0.0502995900347
Error: 0.0505361084831

Starting fold #3
Error: 0.05353200883
Error: 0.0520340586566
Error: 0.0515610217597
Error: 0.0516398612425
Error: 0.0528224534847
Error: 0.0676442762535
Error: 0.0640176600442
Error: 0.0540838852097
Error: 0.052270577105
Error: 0.0517187007253
Error: 0.0507726269316

Starting fold #4
Error: 0.0577893409019
Error: 0.0557395143488
Error: 0.0553453169347
Error: 0.0563702302113
Error: 0.0573951434879
Error: 0.0646483759067
Error: 0.066619362977
Error: 0.058893

## Train all models for export

In [8]:
print('Model 1')
model = ExtraTreesClassifier(n_jobs=-1, min_samples_leaf=2, n_estimators=99,
                                 min_samples_split=3, random_state=1,
                                 max_features=1611, max_depth=None).fit(train_data_combo, train_labels)
preds_1 = model.predict(test_data_combo)

print('Model 2')
model = RandomForestClassifier(n_jobs=-1, min_samples_leaf=1, n_estimators=77,
                                   min_samples_split=2, random_state=1, max_features=771,
                                   max_depth=None).fit(train_data_combo, train_labels)
preds_2 = model.predict(test_data_combo)

print('Model 3')
model = BaggingClassifier(max_features=0.394512412319, n_estimators=435,
                              random_state=1, n_jobs=-1).fit(train_data_combo_slim, train_labels)
preds_3 = model.predict(test_data_combo_slim)

print('Model 4')
model = LogisticRegression(penalty='l1', C=0.9029677391429398,
                               n_jobs=-1, random_state=1).fit(train_data_combo, train_labels)
preds_4 = model.predict(test_data_combo)

print('Model 5')
model = KNeighborsClassifier(n_neighbors=1, n_jobs=-1).fit(train_data_combo_slim, train_labels)
preds_5 = model.predict(test_data_combo_slim)

print('Model 6')
model = KNeighborsClassifier(n_neighbors=2, n_jobs=-1).fit(train_data_combo_slim, train_labels)
preds_6 = model.predict(test_data_combo_slim)

print('Model 7')
model = KNeighborsClassifier(n_neighbors=4, n_jobs=-1).fit(train_data_combo_slim, train_labels)
preds_7 = model.predict(test_data_combo_slim)

print('Model 8')
model = KNeighborsClassifier(n_neighbors=8, n_jobs=-1).fit(train_data_combo_slim, train_labels)
preds_8 = model.predict(test_data_combo_slim)

print('Model 9')
model = KNeighborsClassifier(n_neighbors=16, n_jobs=-1).fit(train_data_combo_slim, train_labels)
preds_9 = model.predict(test_data_combo_slim)

print('Model 10')
model = KNeighborsClassifier(n_neighbors=32, n_jobs=-1).fit(train_data_combo_slim, train_labels)
preds_10 = model.predict(test_data_combo_slim)

print('Model 11')
model = MLPClassifier(hidden_layer_sizes=900).fit(train_data_combo_slim, train_labels)
preds_11 = model.predict(test_data_combo_slim)

preds = np.column_stack((preds_1, preds_2, preds_3, preds_4, 
                         preds_5, preds_6, preds_7, preds_8,
                         preds_9, preds_10, preds_11))

Model 1
Model 2
Model 3


descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensio

Model 4
Model 5
Model 6
Model 7
Model 8
Model 9
Model 10
Model 11


descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensio

In [11]:
model = BaggingClassifier(max_features=0.6297698699152728,
                              n_estimators=60,
                              random_state=1, n_jobs=-1).fit(cv_preds_stack, cv_labels)
results = model.predict(preds)

In [12]:
with open('results/20160422-2(stack).csv', 'w') as f:
    writer = csv.writer(f)
    writer.writerow(("Id","Prediction"))
    writer.writerows(zip(range(1,len(results)+1), results))