## Stacking

https://towardsdatascience.com/stacking-made-easy-with-sklearn-e27a0793c92b


In [25]:
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier, StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier

In [2]:
X, y = load_iris(return_X_y=True)



In [21]:
# Create Base Learners
base_learners = [
                 ('rf_1', RandomForestClassifier(n_estimators=10, random_state=42)),
                 ('rf_2', KNeighborsClassifier(n_neighbors=5))             
                ]

#base_learners = [
#                        ('dt_2', DecisionTreeClassifier()),
#                        ('rf_2', RandomForestClassifier(n_estimators=50, random_state=42)),
#                       ]

In [22]:
# Initialize Stacking Classifier with the Meta Learner
#clf = StackingClassifier(estimators=base_learners, final_estimator=LogisticRegression())

clf = StackingClassifier(estimators=base_learners,
                         final_estimator=LogisticRegression(),  
                         cv=10)

In [23]:

# Extract score
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42)


In [24]:

clf.fit(X_train, y_train).score(X_test, y_test)

0.9473684210526315

In [14]:
from sklearn.model_selection import LeaveOneOut
loo = LeaveOneOut()
clf = StackingClassifier(estimators=base_learners, final_estimator=LogisticRegression(), cv=loo , verbose=True)

In [15]:
clf.fit(X_train, y_train).score(X_test, y_test)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 112 out of 112 | elapsed:    2.0s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 112 out of 112 | elapsed:    0.2s finished


0.9473684210526315

### Multi-layer Stacking

In [16]:
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier, StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier

X, y = load_iris(return_X_y=True)

# Create Learners per layer
layer_one_estimators = [
                        ('rf_1', RandomForestClassifier(n_estimators=10, random_state=42)),
                        ('knn_1', KNeighborsClassifier(n_neighbors=5))             
                       ]
layer_two_estimators = [
                        ('dt_2', DecisionTreeClassifier()),
                        ('rf_2', RandomForestClassifier(n_estimators=50, random_state=42)),
                       ]
layer_two = StackingClassifier(estimators=layer_two_estimators, final_estimator=LogisticRegression())

# Create Final model by 
clf = StackingClassifier(estimators=layer_one_estimators, final_estimator=layer_two)

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42)
clf.fit(X_train, y_train).score(X_test, y_test)

0.9736842105263158

In [27]:
import pandas as pd
# load dataset
url = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/sonar.csv'
dataframe = pd.read_csv(url, header=None)
# split into input and output elements
data = dataframe.values
X, y = data[:, :-1], data[:, -1]
print(X.shape, y.shape)

(208, 60) (208,)


In [35]:
# Create Learners per layer
layer_one_estimators = [
                        ('rf_1', RandomForestClassifier(n_estimators=10, random_state=42)),
                        ('knn_1', KNeighborsClassifier(n_neighbors=5))             
                       ]
layer_two_estimators = [
                        ('dt_2', DecisionTreeClassifier()),
                        ('rf_2', RandomForestClassifier(n_estimators=50, random_state=42)),
                       ]
layer_two = StackingClassifier(estimators=layer_two_estimators, final_estimator=LogisticRegression())

# Create Final model by 
clf = StackingClassifier(estimators=layer_one_estimators, final_estimator=layer_two)

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42)
trained_model = clf.fit(X_train, y_train)




In [36]:
trained_model.score(X_train, y_train)



0.9807692307692307

In [37]:
trained_model.score(X_test, y_test)

0.7692307692307693

In [29]:
# Create Base Learners
base_learners = [
                 ('rf_1', RandomForestClassifier(n_estimators=10, random_state=42)),
                 ('rf_2', KNeighborsClassifier(n_neighbors=5))             
                ]

#base_learners = [
#                        ('dt_2', DecisionTreeClassifier()),
#                        ('rf_2', RandomForestClassifier(n_estimators=50, random_state=42)),
#                       ]

In [32]:
# Initialize Stacking Classifier with the Meta Learner
#clf = StackingClassifier(estimators=base_learners, final_estimator=LogisticRegression())

clf = StackingClassifier(estimators=base_learners,
                         final_estimator=LogisticRegression(),  
                         cv=10)

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42)
trained_model = clf.fit(X_train, y_train)


In [33]:
trained_model.score(X_train, y_train)


0.9615384615384616

In [34]:
trained_model.score(X_test, y_test)

0.75