In [1]:
import numpy as np
import pandas as pd

In [2]:
data = pd.read_csv("concrete_data.csv")
data.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [3]:
X = data.iloc[:, :8].values
Y = data.iloc[:, 8].values.reshape(-1, 1)

In [4]:
print(np.shape(X))
print(np.shape(Y))

(1030, 8)
(1030, 1)


In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state = 2021)

In [6]:
from xgboost import XGBRegressor
xgb_model = XGBRegressor(random_state = 2021)

In [7]:
# make a dictionary of hyperparameter values to search
search_space = {
    "n_estimators" : [100, 200, 500],
    "max_depth" : [3, 6, 9],
    "gamma" : [0.01, 0.1],
    "learning_rate" : [0.001, 0.01, 0.1, 1]
}

In [8]:
from sklearn.model_selection import GridSearchCV
# make a GridSearchCV object
GS = GridSearchCV(estimator = xgb_model,
                  param_grid = search_space,
                  scoring = ["r2", "neg_root_mean_squared_error"], #sklearn.metrics.SCORERS.keys()
                  refit = "r2",
                  cv = 5,
                  verbose = 4)

In [9]:
GS.fit(X_train, Y_train)

Fitting 5 folds for each of 72 candidates, totalling 360 fits
[CV] gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=100 ..
[CV]  gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=100, neg_root_mean_squared_error=-35.192, r2=-3.685, total=   0.1s
[CV] gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=100 ..


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s


[CV]  gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=100, neg_root_mean_squared_error=-36.463, r2=-4.022, total=   0.1s
[CV] gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=100 ..
[CV]  gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=100, neg_root_mean_squared_error=-35.801, r2=-3.374, total=   0.1s
[CV] gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=100 ..


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.2s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.3s remaining:    0.0s


[CV]  gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=100, neg_root_mean_squared_error=-35.460, r2=-3.748, total=   0.1s
[CV] gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=100 ..
[CV]  gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=100, neg_root_mean_squared_error=-37.090, r2=-3.535, total=   0.1s
[CV] gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=200 ..
[CV]  gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=200, neg_root_mean_squared_error=-32.121, r2=-2.903, total=   0.2s
[CV] gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=200 ..
[CV]  gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=200, neg_root_mean_squared_error=-33.338, r2=-3.198, total=   0.2s
[CV] gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=200 ..
[CV]  gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=200, neg_root_mean_squared_error=-32.709, r2=-2.652, total=   0.2s
[CV] gamma=0.01, learning_rate=0.001, max_depth=3, n_est

[CV]  gamma=0.01, learning_rate=0.001, max_depth=9, n_estimators=500, neg_root_mean_squared_error=-25.887, r2=-1.209, total=   0.8s
[CV] gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=100 ...
[CV]  gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=100, neg_root_mean_squared_error=-16.098, r2=0.020, total=   0.1s
[CV] gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=100 ...
[CV]  gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=100, neg_root_mean_squared_error=-17.399, r2=-0.143, total=   0.1s
[CV] gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=100 ...
[CV]  gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=100, neg_root_mean_squared_error=-16.667, r2=0.052, total=   0.1s
[CV] gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=100 ...
[CV]  gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=100, neg_root_mean_squared_error=-16.650, r2=-0.047, total=   0.1s
[CV] gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators

[CV]  gamma=0.01, learning_rate=0.01, max_depth=9, n_estimators=500, neg_root_mean_squared_error=-4.362, r2=0.928, total=   1.2s
[CV] gamma=0.01, learning_rate=0.01, max_depth=9, n_estimators=500 ...
[CV]  gamma=0.01, learning_rate=0.01, max_depth=9, n_estimators=500, neg_root_mean_squared_error=-4.791, r2=0.913, total=   1.2s
[CV] gamma=0.01, learning_rate=0.01, max_depth=9, n_estimators=500 ...
[CV]  gamma=0.01, learning_rate=0.01, max_depth=9, n_estimators=500, neg_root_mean_squared_error=-4.751, r2=0.923, total=   1.2s
[CV] gamma=0.01, learning_rate=0.01, max_depth=9, n_estimators=500 ...
[CV]  gamma=0.01, learning_rate=0.01, max_depth=9, n_estimators=500, neg_root_mean_squared_error=-4.731, r2=0.915, total=   1.2s
[CV] gamma=0.01, learning_rate=0.01, max_depth=9, n_estimators=500 ...
[CV]  gamma=0.01, learning_rate=0.01, max_depth=9, n_estimators=500, neg_root_mean_squared_error=-5.733, r2=0.892, total=   1.2s
[CV] gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=100 ....


[CV]  gamma=0.01, learning_rate=0.1, max_depth=9, n_estimators=200, neg_root_mean_squared_error=-4.613, r2=0.927, total=   1.0s
[CV] gamma=0.01, learning_rate=0.1, max_depth=9, n_estimators=200 ....
[CV]  gamma=0.01, learning_rate=0.1, max_depth=9, n_estimators=200, neg_root_mean_squared_error=-4.685, r2=0.917, total=   0.6s
[CV] gamma=0.01, learning_rate=0.1, max_depth=9, n_estimators=200 ....
[CV]  gamma=0.01, learning_rate=0.1, max_depth=9, n_estimators=200, neg_root_mean_squared_error=-5.613, r2=0.896, total=   0.5s
[CV] gamma=0.01, learning_rate=0.1, max_depth=9, n_estimators=500 ....
[CV]  gamma=0.01, learning_rate=0.1, max_depth=9, n_estimators=500, neg_root_mean_squared_error=-4.145, r2=0.935, total=   1.0s
[CV] gamma=0.01, learning_rate=0.1, max_depth=9, n_estimators=500 ....
[CV]  gamma=0.01, learning_rate=0.1, max_depth=9, n_estimators=500, neg_root_mean_squared_error=-4.425, r2=0.926, total=   1.2s
[CV] gamma=0.01, learning_rate=0.1, max_depth=9, n_estimators=500 ....
[CV] 

[CV]  gamma=0.01, learning_rate=1, max_depth=9, n_estimators=100, neg_root_mean_squared_error=-6.977, r2=0.840, total=   0.3s
[CV] gamma=0.01, learning_rate=1, max_depth=9, n_estimators=200 ......
[CV]  gamma=0.01, learning_rate=1, max_depth=9, n_estimators=200, neg_root_mean_squared_error=-7.556, r2=0.784, total=   0.6s
[CV] gamma=0.01, learning_rate=1, max_depth=9, n_estimators=200 ......
[CV]  gamma=0.01, learning_rate=1, max_depth=9, n_estimators=200, neg_root_mean_squared_error=-6.750, r2=0.828, total=   0.4s
[CV] gamma=0.01, learning_rate=1, max_depth=9, n_estimators=200 ......
[CV]  gamma=0.01, learning_rate=1, max_depth=9, n_estimators=200, neg_root_mean_squared_error=-6.873, r2=0.839, total=   0.4s
[CV] gamma=0.01, learning_rate=1, max_depth=9, n_estimators=200 ......
[CV]  gamma=0.01, learning_rate=1, max_depth=9, n_estimators=200, neg_root_mean_squared_error=-6.153, r2=0.857, total=   0.5s
[CV] gamma=0.01, learning_rate=1, max_depth=9, n_estimators=200 ......
[CV]  gamma=0.0

[CV]  gamma=0.1, learning_rate=0.001, max_depth=9, n_estimators=100, neg_root_mean_squared_error=-36.465, r2=-4.022, total=   0.2s
[CV] gamma=0.1, learning_rate=0.001, max_depth=9, n_estimators=100 ...
[CV]  gamma=0.1, learning_rate=0.001, max_depth=9, n_estimators=100, neg_root_mean_squared_error=-35.788, r2=-3.371, total=   0.2s
[CV] gamma=0.1, learning_rate=0.001, max_depth=9, n_estimators=100 ...
[CV]  gamma=0.1, learning_rate=0.001, max_depth=9, n_estimators=100, neg_root_mean_squared_error=-35.434, r2=-3.741, total=   0.2s
[CV] gamma=0.1, learning_rate=0.001, max_depth=9, n_estimators=100 ...
[CV]  gamma=0.1, learning_rate=0.001, max_depth=9, n_estimators=100, neg_root_mean_squared_error=-37.039, r2=-3.522, total=   0.2s
[CV] gamma=0.1, learning_rate=0.001, max_depth=9, n_estimators=200 ...
[CV]  gamma=0.1, learning_rate=0.001, max_depth=9, n_estimators=200, neg_root_mean_squared_error=-31.977, r2=-2.868, total=   0.3s
[CV] gamma=0.1, learning_rate=0.001, max_depth=9, n_estimator

[CV]  gamma=0.1, learning_rate=0.01, max_depth=6, n_estimators=500, neg_root_mean_squared_error=-4.775, r2=0.922, total=   0.8s
[CV] gamma=0.1, learning_rate=0.01, max_depth=6, n_estimators=500 ....
[CV]  gamma=0.1, learning_rate=0.01, max_depth=6, n_estimators=500, neg_root_mean_squared_error=-4.987, r2=0.906, total=   0.8s
[CV] gamma=0.1, learning_rate=0.01, max_depth=6, n_estimators=500 ....
[CV]  gamma=0.1, learning_rate=0.01, max_depth=6, n_estimators=500, neg_root_mean_squared_error=-5.889, r2=0.886, total=   0.8s
[CV] gamma=0.1, learning_rate=0.01, max_depth=9, n_estimators=100 ....
[CV]  gamma=0.1, learning_rate=0.01, max_depth=9, n_estimators=100, neg_root_mean_squared_error=-15.511, r2=0.090, total=   0.2s
[CV] gamma=0.1, learning_rate=0.01, max_depth=9, n_estimators=100 ....
[CV]  gamma=0.1, learning_rate=0.01, max_depth=9, n_estimators=100, neg_root_mean_squared_error=-16.758, r2=-0.061, total=   0.2s
[CV] gamma=0.1, learning_rate=0.01, max_depth=9, n_estimators=100 ....
[C

[CV]  gamma=0.1, learning_rate=0.1, max_depth=6, n_estimators=200, neg_root_mean_squared_error=-5.683, r2=0.894, total=   0.4s
[CV] gamma=0.1, learning_rate=0.1, max_depth=6, n_estimators=500 .....
[CV]  gamma=0.1, learning_rate=0.1, max_depth=6, n_estimators=500, neg_root_mean_squared_error=-4.022, r2=0.939, total=   0.7s
[CV] gamma=0.1, learning_rate=0.1, max_depth=6, n_estimators=500 .....
[CV]  gamma=0.1, learning_rate=0.1, max_depth=6, n_estimators=500, neg_root_mean_squared_error=-4.773, r2=0.914, total=   0.8s
[CV] gamma=0.1, learning_rate=0.1, max_depth=6, n_estimators=500 .....
[CV]  gamma=0.1, learning_rate=0.1, max_depth=6, n_estimators=500, neg_root_mean_squared_error=-4.262, r2=0.938, total=   0.8s
[CV] gamma=0.1, learning_rate=0.1, max_depth=6, n_estimators=500 .....
[CV]  gamma=0.1, learning_rate=0.1, max_depth=6, n_estimators=500, neg_root_mean_squared_error=-4.342, r2=0.929, total=   0.7s
[CV] gamma=0.1, learning_rate=0.1, max_depth=6, n_estimators=500 .....
[CV]  gamm

[CV]  gamma=0.1, learning_rate=1, max_depth=6, n_estimators=200, neg_root_mean_squared_error=-6.324, r2=0.849, total=   0.7s
[CV] gamma=0.1, learning_rate=1, max_depth=6, n_estimators=200 .......
[CV]  gamma=0.1, learning_rate=1, max_depth=6, n_estimators=200, neg_root_mean_squared_error=-7.095, r2=0.828, total=   0.4s
[CV] gamma=0.1, learning_rate=1, max_depth=6, n_estimators=200 .......
[CV]  gamma=0.1, learning_rate=1, max_depth=6, n_estimators=200, neg_root_mean_squared_error=-5.581, r2=0.882, total=   0.3s
[CV] gamma=0.1, learning_rate=1, max_depth=6, n_estimators=200 .......
[CV]  gamma=0.1, learning_rate=1, max_depth=6, n_estimators=200, neg_root_mean_squared_error=-6.833, r2=0.846, total=   0.3s
[CV] gamma=0.1, learning_rate=1, max_depth=6, n_estimators=500 .......
[CV]  gamma=0.1, learning_rate=1, max_depth=6, n_estimators=500, neg_root_mean_squared_error=-6.766, r2=0.827, total=   0.7s
[CV] gamma=0.1, learning_rate=1, max_depth=6, n_estimators=500 .......
[CV]  gamma=0.1, lea

[Parallel(n_jobs=1)]: Done 360 out of 360 | elapsed:  2.7min finished


GridSearchCV(cv=5,
             estimator=XGBRegressor(base_score=None, booster=None,
                                    colsample_bylevel=None,
                                    colsample_bynode=None,
                                    colsample_bytree=None,
                                    enable_categorical=False, gamma=None,
                                    gpu_id=None, importance_type=None,
                                    interaction_constraints=None,
                                    learning_rate=None, max_delta_step=None,
                                    max_depth=None, min_child_weight=None,
                                    missing=nan, monotone_constraints=None,
                                    n...
                                    num_parallel_tree=None, predictor=None,
                                    random_state=2021, reg_alpha=None,
                                    reg_lambda=None, scale_pos_weight=None,
                                 

In [10]:
print(GS.best_estimator_) # to get the complete detail of the best model

XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
             gamma=0.1, gpu_id=-1, importance_type=None,
             interaction_constraints='', learning_rate=0.1, max_delta_step=0,
             max_depth=6, min_child_weight=1, missing=nan,
             monotone_constraints='()', n_estimators=500, n_jobs=8,
             num_parallel_tree=1, predictor='auto', random_state=2021,
             reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
             tree_method='exact', validate_parameters=1, verbosity=None)


In [11]:
print(GS.best_params_) # to get only the best hyperparameter values that we searched for

{'gamma': 0.1, 'learning_rate': 0.1, 'max_depth': 6, 'n_estimators': 500}


In [12]:
print(GS.best_score_) # score according to the metric we passed in refit

0.9228097089517535


In [13]:
df = pd.DataFrame(GS.cv_results_)
df = df.sort_values("rank_test_r2")
df.to_csv("cv_result.csv")