In [None]:
# import dependencies and global settings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objects as go

from sklearn import preprocessing
from sklearn.model_selection import KFold, GridSearchCV
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error

import pickle

In [None]:
# load linear features (X) and target variable (y) dataframes
X = pd.read_csv('./data_frames/housing_X_features.csv', index_col=0)
y = pd.read_csv('./data_frames/housing_y_target.csv', index_col=0)

In [None]:
# label encode non numeric columns
label_encoder = preprocessing.LabelEncoder()

for col in list(X.select_dtypes(['object']).columns):
    X[col] = label_encoder.fit_transform(X[col])

In [None]:
reg_boost_model = GradientBoostingRegressor(learning_rate=0.1, random_state=0, loss='squared_error')

grid_para_forest = {'max_depth': range(1, 21),
                    'n_estimators': range(10, 200, 20)}

In [None]:
k=5
cv = KFold(n_splits=k, shuffle=True, random_state=12)
reg_boost_clf = GridSearchCV(reg_boost_model, grid_para_forest, scoring='r2', cv=cv)

In [None]:
%time reg_boost_clf.fit(X, np.ravel(y))

In [None]:
reg_boost = reg_boost_clf
# save rfr_boosting to pickle file
filename = './models/reg_boosting.pkl'
outfile = open(filename,'wb')
pickle.dump(reg_boosting, outfile)
outfile.close()

In [None]:
# open stacking_clf.pkl
# filename = './models/reg_boost.pkl'
# file = open(filename, 'rb')
# reg_boost = pickle.load(file)

In [None]:
# print mean_test_score from cross validation and mean absolute error
print('Max Mean Test Value (r^2):' + str(reg_boost.cv_results_['mean_test_score'][reg_boost.best_index_]))

y_pred = reg_boost.best_estimator_.predict(X)
print('Mean Absolute Error:' + str(mean_absolute_error(y, y_pred)))

In [None]:
# create dataframe of results and pivot table
df = pd.concat([pd.DataFrame(reg_boost.cv_results_['params']), pd.DataFrame(reg_boost.cv_results_['mean_test_score'], columns=['Score'])] ,axis=1)
cv_table = df.pivot(index='max_depth', columns='n_estimators')
cv_table

In [None]:
fig = go.Figure(data=[go.Surface(z=cv_table.values)])

fig.update_layout(title='Boosting Grid Search',
                  autosize=False,
                  width=800,
                  height=800,
                  margin=dict(l=65, r=50, b=65, t=90),
                  scene = dict(yaxis_title='N Estimators',
                               yaxis = dict(
                                   ticktext= ['19','17','15','13','11','9','7','5','3','1'],
                                   tickvals= [0,3,5,7,9,11,13,15,17,19]
                               ),
                               xaxis_title='Max Depth',
                               xaxis = dict(
                                   ticktext= ['10','30','50','70','90','110','130','150','170','190'],
                                   tickvals= [0,1,2,3,4,5,6,7,8,9]
                               ),
                               zaxis_title='Score')
                 )

fig.show()