In [56]:
import pandas as pd
import numpy as np
import seaborn as sns
import math
from sklearn.linear_model import ElasticNetCV, ElasticNet
from sklearn.kernel_ridge import KernelRidge
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV, cross_val_score
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor 
from bayes_opt import BayesianOptimization

import warnings
warnings.filterwarnings('ignore')

In [9]:
house_train = pd.read_csv('D:/NYC-Data-Science/Projects/HousingPricesML/Data/train_120feats_Dense_OutlierFree_LogTransform.csv')
house_train = house_train.drop("Unnamed: 0", axis = 1)
# house_train

In [10]:
X = house_train.loc[:, house_train.columns != 'SalePrice']
y = house_train.SalePrice

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [19]:
def EN_func(alpha, l1_ratio):
    val = cross_val_score(ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state = 42),
                         X_train,y_train, cv=5).mean()
    return val

In [39]:
def rf_func(n_estimators, max_depth):
    val = cross_val_score(RandomForestRegressor(n_estimators = int(n_estimators),
                                               max_depth = int(max_depth),
                                               random_state = 42),
                         X_train, y_train, cv=5).mean()
    return val

In [70]:
def krr_func(alpha, degree, coef0):
    val = cross_val_score(KernelRidge(alpha=alpha, kernel='polynomial', degree=degree, coef0=coef0),
                         X_train, y_train, cv=5).mean()
    return val

In [63]:
def rmse(y_true,y_pred):
   assert len(y_true) == len(y_pred)
   return np.square((y_pred + 1) - (y_true + 1)).mean() ** 0.5

In [64]:
elastic_BO = BayesianOptimization(EN_func, {"alpha":(1e-4,20), "l1_ratio":(0,1)})
elastic_BO.explore({"alpha":np.linspace(1e-4,20,10), 'l1_ratio':np.linspace(0,1,10)})
elastic_BO.maximize(n_iter=20)
print(elastic_BO.res['max'])

[31mInitialization[0m
[94m------------------------------------------------------[0m
 Step |   Time |      Value |     alpha |   l1_ratio | 
    1 | 00m00s | [35m   0.91195[0m | [32m   0.0001[0m | [32m    0.0000[0m | 
    2 | 00m00s |    0.66702 |    2.2223 |     0.1111 | 
    3 | 00m00s |    0.64873 |    4.4445 |     0.2222 | 
    4 | 00m00s |    0.64240 |    6.6667 |     0.3333 | 
    5 | 00m00s |    0.64011 |    8.8889 |     0.4444 | 
    6 | 00m00s |    0.63523 |   11.1112 |     0.5556 | 
    7 | 00m00s |    0.62634 |   13.3334 |     0.6667 | 
    8 | 00m00s |    0.61166 |   15.5556 |     0.7778 | 
    9 | 00m00s |    0.58910 |   17.7778 |     0.8889 | 
   10 | 00m00s |    0.55626 |   20.0000 |     1.0000 | 
   11 | 00m00s |    0.66775 |    0.8819 |     0.2260 | 
   12 | 00m00s |    0.64252 |    2.0989 |     0.9998 | 
   13 | 00m00s |    0.64215 |   18.6187 |     0.1322 | 
   14 | 00m00s |    0.65775 |    3.7053 |     0.1981 | 
   15 | 00m00s |    0.64142 |   17.9294 |    

KeyboardInterrupt: 

In [37]:
estimator = ElasticNet(alpha = 0.0023227894706349642,
                                  l1_ratio = 0.12359373609103728,
                                  random_state=42)
estimator.fit(X_train,y_train)

ElasticNet(alpha=0.002322789470634964, copy_X=True, fit_intercept=True,
      l1_ratio=0.12359373609103728, max_iter=1000, normalize=False,
      positive=False, precompute=False, random_state=42,
      selection='cyclic', tol=0.0001, warm_start=False)

In [38]:
y_pred = estimator.predict(X_test)
rmse(y_pred,y_test)

0.1365111341685229

In [55]:
rf_BO = BayesianOptimization(rf_func, {'n_estimators': (100,1000), 'max_depth': (10,500)})
rf_BO.explore({"n_estimators":np.linspace(100,1000,20), 'max_depth':np.linspace(10,500,20)})
rf_BO.maximize(n_inter=10)
print(rf_BO.res['max'])
#errors out on the 13th iteration. Not sure why since valid params

[31mInitialization[0m
[94m------------------------------------------------------------[0m
 Step |   Time |      Value |   max_depth |   n_estimators | 
    1 | 00m05s | [35m   0.88525[0m | [32m    10.0000[0m | [32m      100.0000[0m | 
    2 | 00m10s | [35m   0.88593[0m | [32m    35.7895[0m | [32m      147.3684[0m | 
    3 | 00m13s | [35m   0.88638[0m | [32m    61.5789[0m | [32m      194.7368[0m | 
    4 | 00m17s | [35m   0.88686[0m | [32m    87.3684[0m | [32m      242.1053[0m | 
    5 | 00m20s |    0.88663 |    113.1579 |       289.4737 | 
    6 | 00m23s |    0.88669 |    138.9474 |       336.8421 | 
    7 | 00m27s |    0.88664 |    164.7368 |       384.2105 | 
    8 | 00m30s |    0.88664 |    190.5263 |       431.5789 | 
    9 | 00m34s | [35m   0.88690[0m | [32m   216.3158[0m | [32m      478.9474[0m | 
   10 | 00m37s |    0.88688 |    242.1053 |       526.3158 | 
   11 | 00m40s | [35m   0.88696[0m | [32m   267.8947[0m | [32m      573.6842[0m | 


ValueError: Invalid parameter n_inter for estimator GaussianProcessRegressor(alpha=1e-10, copy_X_train=True,
             kernel=Matern(length_scale=1, nu=2.5),
             n_restarts_optimizer=25, normalize_y=False,
             optimizer='fmin_l_bfgs_b',
             random_state=<mtrand.RandomState object at 0x000001C18FFC9948>). Check the list of available parameters with `estimator.get_params().keys()`.

In [69]:
estimator = RandomForestRegressor(n_estimators= 700, 
                                  max_depth=350,
                                  random_state=42)
estimator.fit(X_train,y_train)

KeyboardInterrupt: 

In [54]:
y_pred = estimator.predict(X_test)
rmse(y_pred,y_test)

0.14946686617140614

In [71]:
krr_BO = BayesianOptimization(krr_func, {'alpha':(0,10000), 'degree':(1,5), 'coef0':(0,10000)})
krr_BO.explore({'alpha':np.linspace(.01,1,20), 'degree':np.linspace(2,6,20), 'coef0':np.linspace(0,10,20)})
krr_BO.maximize(n_inter=100)
print(krr_BO.res['max'])

[31mInitialization[0m
[94m-----------------------------------------------------------------[0m
 Step |   Time |      Value |     alpha |     coef0 |    degree | 
    1 | 00m00s | [35m-1157451804446.29858[0m | [32m   0.0100[0m | [32m   0.0000[0m | [32m   2.0000[0m | 
    2 | 00m00s | [35m-1723064.89315[0m | [32m   0.0621[0m | [32m   0.5263[0m | [32m   2.2105[0m | 
    3 | 00m00s | [35m-822402.02414[0m | [32m   0.1142[0m | [32m   1.0526[0m | [32m   2.4211[0m | 
    4 | 00m00s | [35m-5336.03491[0m | [32m   0.1663[0m | [32m   1.5789[0m | [32m   2.6316[0m | 
    5 | 00m00s | -6517741093.49666 |    0.2184 |    2.1053 |    2.8421 | 
    6 | 00m00s | -5389696830314.13086 |    0.2705 |    2.6316 |    3.0526 | 
    7 | 00m00s | -26045042.46852 |    0.3226 |    3.1579 |    3.2632 | 
    8 | 00m00s | -16381299.53463 |    0.3747 |    3.6842 |    3.4737 | 
    9 | 00m00s | -17804347.68949 |    0.4268 |    4.2105 |    3.6842 | 
   10 | 00m00s | -24996.83828 |    0.

ValueError: Invalid parameter n_inter for estimator GaussianProcessRegressor(alpha=1e-10, copy_X_train=True,
             kernel=Matern(length_scale=1, nu=2.5),
             n_restarts_optimizer=25, normalize_y=False,
             optimizer='fmin_l_bfgs_b',
             random_state=<mtrand.RandomState object at 0x000001C191C5A3A8>). Check the list of available parameters with `estimator.get_params().keys()`.