# Setup

In [100]:
%matplotlib inline
import numpy as np
import pandas as pd

import sklearn
import sklearn.datasets
import sklearn.preprocessing
import sklearn.model_selection

import sklearn.dummy
import sklearn.tree
import sklearn.linear_model
import sklearn.neighbors
import sklearn.neural_network
import sklearn.naive_bayes

import ds_utils.explore
import ds_utils.preprocessing

from secrets import KAGGLE_USER, KAGGLE_PW

# Data
http://blog.kaggle.com/2016/12/27/a-kagglers-guide-to-model-stacking-in-practice/

In [132]:
train = pd.read_csv('https://raw.githubusercontent.com/ben519/MLPB/master/Problems/Classify%20Dart%20Throwers/_Data/train.csv')
test = pd.read_csv('https://raw.githubusercontent.com/ben519/MLPB/master/Problems/Classify%20Dart%20Throwers/_Data/test.csv')
target = 'Competitor'
features = ['XCoord', 'YCoord']
X_train = train[features]
y_train = train[target]
X_test = test[features]
y_test = test[target]

# Baseline models

In [103]:
for mdl in [
        sklearn.dummy.DummyClassifier(),
        sklearn.tree.DecisionTreeClassifier(),
        sklearn.naive_bayes.BernoulliNB(),
        sklearn.neighbors.KNeighborsClassifier(),
        sklearn.linear_model.Perceptron(),
        sklearn.svm.SVC(),
]:
    mdl.fit(X_train, y_train).score(X_test, y_test)
#     sklearn.model_selection.cross_val_score(mdl, X_train, y_train)

0.16216216216216217

0.78378378378378377

0.16216216216216217

0.70270270270270274

0.29729729729729731

0.32432432432432434

# Implementation-1 [How to Implement Stacked Generalization From Scratch With Python](http://machinelearningmastery.com/implementing-stacking-scratch-python/)

## Preprocessing

In [254]:
label_binarizer = sklearn.preprocessing.LabelBinarizer()
y_train_encoded = label_binarizer.fit_transform(y_train)

In [145]:
y_test_encoded = label_binarizer.transform(y_test)

## Level 1

In [257]:
mdl_1 = sklearn.neighbors.KNeighborsClassifier(n_neighbors=20)
mdl_1.fit(X_train, y_train)
pred_level_1_mdl_1 = mdl_1.predict(X_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=20, p=2,
           weights='uniform')

In [258]:
mdl_2 = sklearn.svm.SVC()
mdl_2.fit(X_train, y_train)
pred_level_1_mdl_2 = mdl_2.predict(X_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

## Level 2

In [259]:
label_binarizer = sklearn.preprocessing.LabelBinarizer()

In [260]:
label_binarizer.fit(pred_level_1_mdl_1)
pred_level_1_mdl_1_encoded = label_binarizer.transform(pred_level_1_mdl_1)
pred_level_1_mdl_2_encoded = label_binarizer.transform(pred_level_1_mdl_2)

LabelBinarizer(neg_label=0, pos_label=1, sparse_output=False)

In [261]:
X_train_level_2 = np.hstack([pred_level_1_mdl_1_encoded, pred_level_1_mdl_2_encoded])

In [262]:
mdl_level_2 = sklearn.linear_model.LogisticRegression()

In [263]:
mdl_level_2.fit(X_train_level_2, y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

## Predict

In [264]:
mdl_1.score(X_test, y_test)

0.54054054054054057

In [265]:
mdl_2.score(X_test, y_test)

0.32432432432432434

In [266]:
pred_level_1_mdl_1_test = mdl_1.predict(X_test)
pred_level_1_mdl_2_test = mdl_2.predict(X_test)
pred_level_1_mdl_1_test_encoded = label_binarizer.transform(pred_level_1_mdl_1_test)
pred_level_1_mdl_2_test_encoded = label_binarizer.transform(pred_level_1_mdl_2_test)
X_test_level_2 = np.hstack([pred_level_1_mdl_1_test_encoded, pred_level_1_mdl_2_test_encoded])
mdl_level_2.score(X_test_level_2, y_test)

0.56756756756756754

# Implementation-2
 [Quora](https://www.quora.com/What-is-stacking-in-machine-learning) and [Stacking (RapidMiner Studio Core)](http://docs.rapidminer.com/studio/operators/modeling/predictive/ensembles/stacking.)

1. Split the training set into two disjoint sets.
2. Train several base learners on the first part.
3. Test the base learners on the second part.
4. Using the predictions from 3) as the inputs, and the correct responses as the outputs, train a higher level learner.

## Preprocessing

In [221]:
X_train_part_1, X_train_part_2, y_train_part_1, y_train_part_2 = sklearn.model_selection.train_test_split(
     X_train, y_train, test_size=0.3, random_state=55)

In [222]:
y_test_encoded = label_binarizer.transform(y_test)

## Level 1

In [243]:
mdl_1 = sklearn.neighbors.KNeighborsClassifier(n_neighbors=20)
mdl_1.fit(X_train_part_1, y_train_part_1)
pred_level_1_mdl_1 = mdl_1.predict(X_train_part_2)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=20, p=2,
           weights='uniform')

In [244]:
mdl_2 = sklearn.svm.SVC()
mdl_2.fit(X_train_part_1, y_train_part_1)
pred_level_1_mdl_2 = mdl_2.predict(X_train_part_2)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

## Level 2

In [245]:
label_binarizer = sklearn.preprocessing.LabelBinarizer()

In [246]:
label_binarizer.fit(pred_level_1_mdl_1)
pred_level_1_mdl_1_encoded = label_binarizer.transform(pred_level_1_mdl_1)
pred_level_1_mdl_2_encoded = label_binarizer.transform(pred_level_1_mdl_2)

LabelBinarizer(neg_label=0, pos_label=1, sparse_output=False)

In [247]:
X_train_level_2 = np.hstack([pred_level_1_mdl_1_encoded, pred_level_1_mdl_2_encoded])

In [248]:
mdl_level_2 = sklearn.linear_model.LogisticRegression()

In [249]:
mdl_level_2.fit(X_train_level_2, y_train_part_2)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

## Predict

In [250]:
mdl_1.score(X_test, y_test)

0.54054054054054057

In [251]:
mdl_2.score(X_test, y_test)

0.29729729729729731

In [252]:
pred_level_1_mdl_1_test = mdl_1.predict(X_test)
pred_level_1_mdl_2_test = mdl_2.predict(X_test)
pred_level_1_mdl_1_test_encoded = label_binarizer.transform(pred_level_1_mdl_1_test)
pred_level_1_mdl_2_test_encoded = label_binarizer.transform(pred_level_1_mdl_2_test)
X_test_level_2 = np.hstack([pred_level_1_mdl_1_test_encoded, pred_level_1_mdl_2_test_encoded])
mdl_level_2.score(X_test_level_2, y_test)

0.72972972972972971

# Implementation-3 [A Kaggler's Guide to Model Stacking in Practice](http://blog.kaggle.com/2016/12/27/a-kagglers-guide-to-model-stacking-in-practice/)

In [129]:
#TO-DO

# References
- http://blog.kaggle.com/2016/12/27/a-kagglers-guide-to-model-stacking-in-practice/
- http://machinelearningmastery.com/implementing-stacking-scratch-python/
- http://docs.rapidminer.com/studio/operators/modeling/predictive/ensembles/stacking.html
- https://www.quora.com/What-is-stacking-in-machine-learning