In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import KFold
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import ExtraTreesRegressor, RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import GridSearchCV
from xgboost.sklearn import XGBRegressor

In [2]:
# Load dataset
Dataset = pd.read_csv("../data/UD_867_formulation_training.csv")
TARGETS = ['Water_Absorption_%','Hardness','Thermal_Conductivity_(mW/m.K)']

In [3]:
# Reproducibility
SEED = 12345
# Select only features from dataset
X_train = Dataset.drop(columns=['name']+TARGETS)
# Select only targets from dataset
Y_train = Dataset[TARGETS]
# Standardization
mu = X_train.mean(axis=0)
sigma = X_train.std(axis=0)
X_train = (X_train - mu)/sigma
y_train = Y_train[TARGETS[0]]

In [5]:
!python3 --version

Python 3.7.12


In [49]:
models =    {
    'XGBRegressor':{
            'learning_rate': np.arange(0.025,0.150,0.025),
            'gamma':np.arange(0.05,0.45,0.05),
            'max_depth':np.arange(2,14,2),
            'min_child_weight':np.arange(1,8,1),
            'n_estimators':np.arange(10,80,5),
            'subsample':np.arange(0.60,0.95,0.05),
            'colsample_bytree':np.arange(0.60,0.95,0.05),
            'reg_alpha':[1e-5, 1e-2, 0.1, 0.5, 1, 5, 10], #alpha
            'reg_lambda':[1e-5, 1e-2, 0.1, 0.5, 1, 5, 10],#lambda
    },
    'KNeighborsRegressor':{
            'n_neighbors': np.arange(1,10,2),
            'weights': ['uniform','distance'],
            'p': [1,2],
            'metric': ['minkowski'],
    },
    'RandomForestRegressor':{
            'n_estimators':np.arange(10,80,5),
            'criterion': ['squared_error','absolute_error','poisson'],
            'max_features': ['auto','sqrt','log2']
    },

            }

In [50]:
def set_model(name):
    if name=='GradientBoostingRegressor':
        model = GradientBoostingRegressor()
    elif name=='KNeighborsRegressor':
        model = KNeighborsRegressor()
    elif name=='ExtraTreesRegressor':
        model = ExtraTreesRegressor()
    elif name=='RandomForestRegressor':
        model = RandomForestRegressor()
    elif name=='DecisionTreeRegressor':
        model = DecisionTreeRegressor()
    elif name=='XGBRegressor':
        model = XGBRegressor()
    return model

In [51]:
for m_i in models:
    model = set_model(m_i)
    grid_model = GridSearchCV(estimator=model, param_grid = models[m_i], cv = 5, scoring='r2', n_jobs=-1)
    grid_model.fit(X_train,y_train)
    print(" Results from Grid Search for:",m_i )
    print("\n The best estimator across ALL searched params:\n",grid_model.best_estimator_)
    print("\n The best score across ALL searched params:\n",grid_model.best_score_)
    print("\n The best parameters across ALL searched params:\n",grid_model.best_params_)
    