In [None]:
# import dependencies and global settings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')

In [None]:
# tabulate original results
data = {'Model':['Linear Regression w/SBS', 'Elastic Net', 'Lasso', 'Random Forest', 'Gradient Boosting'],
        'MAE':[15905, 13824, 14102, 5522, 6983],
        'Score (r^2)':[0.89, 0.89, 0.90, 0.90, 0.92],
        'Number of Features':[24, 213, 213, 79, 79],
        'CPU time':['< 1s', '12s', '1min 3s', '1h 9min 52s', '1h 28min 54s']}

In [None]:
# ranking models by r_2 error
df = pd.DataFrame(data).sort_values('Score (r^2)', axis=0, ascending=False)
X = df['Model']
error = 1 - df['Score (r^2)']

plt.bar(X, error, label='Scoring Error (r^2)')
plt.xticks(rotation = 45)
plt.ylabel("Scoring Error (r^2)")
plt.title("Score")
plt.grid(False)

plt.show()

In [None]:
# ranking models by MAE
df = pd.DataFrame(data).sort_values('MAE', axis=0, ascending=True)
X = df['Model']
MAE = df['MAE']

plt.bar(X, MAE, label = 'Mean Absolute Error')
plt.xticks(rotation = 45)
plt.ylabel("Home Sales Price (USD)")
plt.title("Mean Absolute Error")
plt.grid(False)

plt.show()

In [None]:
# visualize model summary table
model = data['Model'][0] #'Linear Regression w/SBS'
#model = data['Model'][1] #'Elastic Net'
#model = data['Model'][2] #'Lasso'
#model = data['Model'][3] #'Random Forest'
#model = data['Model'][4] #'Gradient Boosting'

pd.DataFrame(df.set_index('Model').transpose()[model])

## Charts w/stacking model

In [None]:
# tabulate results w/stacking model
data = {'Model':['Linear Regression w/SBS', 'Elastic Net', 'Lasso', 'Random Forest', 'Gradient Boosting', 'Stacking'], 
        'MAE':[15905, 13824, 14102, 5522, 6983, 1000],
        'Score (r^2)':[0.89, 0.89, 0.90, 0.90, 0.92, 0.5],
        'Number of Features':[24, 213, 213, 79, 79, 0],
        'CPU time':['< 1s', '12s', '1min 3s', '1h 9min 52s', '1h 28min 54s', '?']}

In [None]:
# ranking models by r_2 error
df = pd.DataFrame(data).sort_values('Score (r^2)', axis=0, ascending=False)
X = df['Model']
error = 1 - df['Score (r^2)']

plt.bar(X, error, label='Scoring Error (r^2)')
plt.xticks(rotation = 45)
plt.ylabel("Scoring Error (r^2)")
plt.title("Score")
plt.grid(False)

plt.show()

In [None]:
# ranking models by MAE
df = pd.DataFrame(data).sort_values('MAE', axis=0, ascending=True)
X = df['Model']
MAE = df['MAE']

plt.bar(X, MAE, label = 'Mean Absolute Error')
plt.xticks(rotation = 45)
plt.ylabel("Home Sales Price (USD)")
plt.title("Mean Absolute Error")
plt.grid(False)

plt.show()

In [None]:
# visualize stacking model summary table
model = data['Model'][5] #'Stacking'
pd.DataFrame(df.set_index('Model').transpose()[model])

## All metrics summarized

In [None]:
# https://towardsdatascience.com/a-practical-guide-to-stacking-using-scikit-learn-91e8d021863d

In [None]:
# ranking models by r_2 error
df = pd.DataFrame(data).sort_values('Score (r^2)', axis=0, ascending=False)
X = df['Model']
error = 1 - df['Score (r^2)']

plt.bar(X, error, label='Scoring Error (r^2)')
plt.xticks(rotation = 45)
plt.ylabel("Scoring Error (r^2)")
plt.title("Score")
plt.grid(False)

plt.show()

In [None]:
# ranked boxplots
# rank neighborhood' category by mean sale price
df = pd.DataFrame(data).sort_values('Score (r^2)', axis=0, ascending=False)
score_rank = df.groupby('Model')['Score (r^2)']#.sort_values(ascending=False).index()

# plot box plots in order of median neighborhood sale price
plt.figure(figsize=(10,6))
sns.boxplot(x='Model', y='Score (r^2)', data=df)#, order=ranks)
plt.title('Mean r^2 Score')
plt.xticks(rotation=90)
plt.ylabel("Score (r^2)")
plt.show()

In [None]:
### bar graph

# ranking models by r_2 error
df = pd.DataFrame(data).sort_values('Score (r^2)', axis=0, ascending=False)
X = df['Model']
error = 1 - df['Score (r^2)']

plt.bar(X, error, label='Scoring Error (r^2)')
plt.xticks(rotation = 45)
plt.ylabel("Scoring Error (r^2)")
plt.title("Score")
plt.grid(False)

plt.show()

In [None]:
plt.figure(figsize=(10,8))
#plt.tight_layout()

#plot 1:
x0 = np.array([0, 1, 2, 3])
y0 = np.array([3, 8, 1, 10])

x1 = np.array([0, 1, 2, 3])
y1 = np.array([3, 8, 1, 10])

x2 = np.array([0, 1, 2, 3])
y2 = np.array([3, 8, 1, 10])

x3 = np.array([0, 1, 2, 3])
y3 = np.array([3, 8, 1, 10])

x4 = np.array([0, 1, 2, 3])
y4 = np.array([3, 8, 1, 10])

x5 = np.array([0, 1, 2, 3])
y5 = np.array([3, 8, 1, 10])

#plot 1:
plt.subplot(2, 3, 1)
plt.title('graph1')
plt.xlabel('x1')
plt.ylabel('y1')
plt.bar(x0,y0)
plt.grid(False)

#plot 2:
plt.subplot(2, 3, 2)
plt.title('graph2')
plt.xlabel('x2')
plt.ylabel('y2')
plt.bar(x1,y1)
plt.grid(False)

#plot 3:
plt.subplot(2, 3, 3)
plt.title('graph3')
plt.xlabel('x3')
plt.ylabel('y3')
plt.plot(x2,y2)
plt.grid(False)

#plot 4:
plt.subplot(2, 3, 4)
plt.title('graph4')
plt.xlabel('x4')
plt.ylabel('y4')
plt.grid(False)
plt.plot(x3,y3)

#plot 5:
plt.subplot(2, 3, 5)
plt.title('graph5')
plt.xlabel('x5')
plt.ylabel('y5')
plt.grid(False)
plt.plot(x4,y4)

#plot 6:
plt.subplot(2, 3, 6)
plt.title('graph6')
plt.xlabel('x6')
plt.ylabel('y6')
plt.grid(False)
plt.bar(x5,y5)

plt.subplots_adjust(hspace=0.5, wspace=0.5)
plt.suptitle("MY SHOP")
plt.grid(False)

plt.show()

#plt.title('Regression Score vs Number of Features')
# plt.xlabel('Number of Features')
# plt.ylabel('score')
# plt.show()