<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#On-Stacking" data-toc-modified-id="On-Stacking-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>On Stacking</a></span><ul class="toc-item"><li><span><a href="#Initial-Data-Prep" data-toc-modified-id="Initial-Data-Prep-1.1"><span class="toc-item-num">1.1&nbsp;&nbsp;</span>Initial Data Prep</a></span></li><li><span><a href="#Splitting" data-toc-modified-id="Splitting-1.2"><span class="toc-item-num">1.2&nbsp;&nbsp;</span>Splitting</a></span></li><li><span><a href="#Setting-Up-a-Pipeline" data-toc-modified-id="Setting-Up-a-Pipeline-1.3"><span class="toc-item-num">1.3&nbsp;&nbsp;</span>Setting Up a Pipeline</a></span></li><li><span><a href="#Setting-Up-a-Stack" data-toc-modified-id="Setting-Up-a-Stack-1.4"><span class="toc-item-num">1.4&nbsp;&nbsp;</span>Setting Up a Stack</a></span></li><li><span><a href="#Comparison-with-Base-Estimators" data-toc-modified-id="Comparison-with-Base-Estimators-1.5"><span class="toc-item-num">1.5&nbsp;&nbsp;</span>Comparison with Base Estimators</a></span></li></ul></li></ul></div>

In [None]:
import xlrd
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import StackingRegressor

# On Stacking

Remember weighted averaging? Stacking is about using DS models to estimate those weights for us. This means we'll have one layer of base estimators and another layer that is "**trained to optimally combine the model predictions to form a new set of predictions**". See [this short blog post](https://blogs.sas.com/content/subconsciousmusings/2017/05/18/stacked-ensemble-models-win-data-science-competitions/) for more.

## Initial Data Prep

In [None]:
wb = xlrd.open_workbook('data/Sales Report.xls',
                        logfile=open(os.devnull, 'w'))

sales = pd.read_excel(wb)
sales = sales.dropna()

In [None]:
sales.dtypes

In [None]:
sales['Category'].value_counts()

In [None]:
sales['Sub-Category'].value_counts()

In [None]:
X_num = sales[['Discount', 'Profit']].columns
X_cat = sales[['Category', 'Sub-Category']].columns

In [None]:
X = sales[['Discount', 'Profit',
          'Category', 'Sub-Category']]
y = sales['Sales']

## Splitting

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

## Setting Up a Pipeline

In [None]:
numTrans = Pipeline(steps=[
    ('scaler', StandardScaler())
])
catTrans = Pipeline(steps=[
    ('ohe', OneHotEncoder(drop='first',
                          sparse=False))
])

In [None]:
pp = ColumnTransformer(transformers=[
    ('num', numTrans, X_num),
    ('cat', catTrans, X_cat)
])

In [None]:
pp.fit(X_train)

In [None]:
X_tr_pp = pp.transform(X_train)

## Setting Up a Stack

In [None]:
estimators = [
    ('lr', LinearRegression()),
    ('knn', KNeighborsRegressor()),
    ('rt', DecisionTreeRegressor())
]

sr = StackingRegressor(estimators)

In [None]:
sr.fit(X_tr_pp, y_train)

In [None]:
X_test_pp = pp.transform(X_test)

In [None]:
sr.score(X_test_pp, y_test)

## Comparison with Base Estimators

In [None]:
lr = LinearRegression().fit(X_tr_pp, y_train)
lr.score(X_test_pp, y_test)

In [None]:
knn = KNeighborsRegressor().fit(X_tr_pp, y_train)
knn.score(X_test_pp, y_test)

In [None]:
rt = DecisionTreeRegressor().fit(X_tr_pp, y_train)
rt.score(X_test_pp, y_test)