## Random Forest with 3-fold CV, StandardScaler and optuna on dataset4.0
dataset: 11 features,  subsampled dataset with 11+7 new features

20220930

In [1]:
import pandas as pd
import numpy as np
from numpy import random
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
import optuna as opt
import warnings
warnings.filterwarnings("ignore")
import time
from sklearn.model_selection import cross_val_score
from sklearn.impute import SimpleImputer

In [2]:
#import data
train_df = pd.read_parquet("train_main.parquet.snappy")
test_df = pd.read_parquet("test_main.parquet.snappy")

In [3]:
X_train = train_df.drop(['r_id','r_useful'],axis=1)
y_train = train_df['r_useful']
X_test = test_df.drop(['r_id','r_useful'],axis=1)
y_test = test_df['r_useful']

In [4]:
def objective(trial):
    start = time.time()
    params = {
        "n_estimators": trial.suggest_int('n_estimators', 1, 500, 50),
        "max_depth": trial.suggest_int('max_depth', 1, 500, 50),
        "min_samples_split": trial.suggest_int('min_samples_split', 2, 20),
        "max_features" : trial.suggest_categorical("max_features", ['sqrt', 'log2']),
        "min_samples_leaf": trial.suggest_int('min_samples_leaf', 1, 20), 
        "random_state": 760, 
        "n_jobs": -1
    }
    print("Currently running with:")
    print(params)
    
    pipe = Pipeline([('imputer', SimpleImputer()), ('scaler', StandardScaler()), ('rf', RandomForestRegressor(**params))])
    
    # model.fit(X_train, y_train)
    cvscore = np.sqrt(-np.mean(cross_val_score(pipe, X_train, y_train, scoring = "neg_mean_squared_error", cv=3)))
    # y_pred = model.predict(X_val)
    print("Time cost:", time.time()-start)
    # return mean_squared_error(y_val, y_pred, squared=False)
    return cvscore

In [5]:
study = opt.create_study(direction = 'minimize')
study.optimize(objective, n_trials = 50)
study.best_params

[32m[I 2022-09-30 17:17:32,377][0m A new study created in memory with name: no-name-e7a48934-695e-49a5-a64f-4fe6708e695c[0m


Currently running with:
{'n_estimators': 151, 'max_depth': 1, 'min_samples_split': 20, 'max_features': 'sqrt', 'min_samples_leaf': 15, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 17:17:37,672][0m Trial 0 finished with value: 4.3887554965048725 and parameters: {'n_estimators': 151, 'max_depth': 1, 'min_samples_split': 20, 'max_features': 'sqrt', 'min_samples_leaf': 15}. Best is trial 0 with value: 4.3887554965048725.[0m


Time cost: 5.291033506393433
Currently running with:
{'n_estimators': 51, 'max_depth': 401, 'min_samples_split': 17, 'max_features': 'log2', 'min_samples_leaf': 2, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 17:17:57,670][0m Trial 1 finished with value: 3.640257007526359 and parameters: {'n_estimators': 51, 'max_depth': 401, 'min_samples_split': 17, 'max_features': 'log2', 'min_samples_leaf': 2}. Best is trial 1 with value: 3.640257007526359.[0m


Time cost: 19.997899055480957
Currently running with:
{'n_estimators': 151, 'max_depth': 351, 'min_samples_split': 6, 'max_features': 'sqrt', 'min_samples_leaf': 20, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 17:18:57,267][0m Trial 2 finished with value: 3.7912822780104105 and parameters: {'n_estimators': 151, 'max_depth': 351, 'min_samples_split': 6, 'max_features': 'sqrt', 'min_samples_leaf': 20}. Best is trial 1 with value: 3.640257007526359.[0m


Time cost: 59.59423089027405
Currently running with:
{'n_estimators': 51, 'max_depth': 151, 'min_samples_split': 17, 'max_features': 'sqrt', 'min_samples_leaf': 10, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 17:19:19,241][0m Trial 3 finished with value: 3.7384653697731633 and parameters: {'n_estimators': 51, 'max_depth': 151, 'min_samples_split': 17, 'max_features': 'sqrt', 'min_samples_leaf': 10}. Best is trial 1 with value: 3.640257007526359.[0m


Time cost: 21.973971128463745
Currently running with:
{'n_estimators': 151, 'max_depth': 401, 'min_samples_split': 9, 'max_features': 'log2', 'min_samples_leaf': 17, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 17:20:16,884][0m Trial 4 finished with value: 3.7789115868413616 and parameters: {'n_estimators': 151, 'max_depth': 401, 'min_samples_split': 9, 'max_features': 'log2', 'min_samples_leaf': 17}. Best is trial 1 with value: 3.640257007526359.[0m


Time cost: 57.6410129070282
Currently running with:
{'n_estimators': 51, 'max_depth': 1, 'min_samples_split': 20, 'max_features': 'sqrt', 'min_samples_leaf': 19, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 17:20:19,936][0m Trial 5 finished with value: 4.380695456781327 and parameters: {'n_estimators': 51, 'max_depth': 1, 'min_samples_split': 20, 'max_features': 'sqrt', 'min_samples_leaf': 19}. Best is trial 1 with value: 3.640257007526359.[0m


Time cost: 3.051701545715332
Currently running with:
{'n_estimators': 51, 'max_depth': 451, 'min_samples_split': 4, 'max_features': 'log2', 'min_samples_leaf': 1, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 17:20:48,808][0m Trial 6 finished with value: 3.5619498962636627 and parameters: {'n_estimators': 51, 'max_depth': 451, 'min_samples_split': 4, 'max_features': 'log2', 'min_samples_leaf': 1}. Best is trial 6 with value: 3.5619498962636627.[0m


Time cost: 28.871169328689575
Currently running with:
{'n_estimators': 151, 'max_depth': 401, 'min_samples_split': 17, 'max_features': 'sqrt', 'min_samples_leaf': 1, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 17:21:55,876][0m Trial 7 finished with value: 3.635339055535706 and parameters: {'n_estimators': 151, 'max_depth': 401, 'min_samples_split': 17, 'max_features': 'sqrt', 'min_samples_leaf': 1}. Best is trial 6 with value: 3.5619498962636627.[0m


Time cost: 67.066903591156
Currently running with:
{'n_estimators': 151, 'max_depth': 201, 'min_samples_split': 20, 'max_features': 'log2', 'min_samples_leaf': 17, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 17:22:51,643][0m Trial 8 finished with value: 3.7789115868413616 and parameters: {'n_estimators': 151, 'max_depth': 201, 'min_samples_split': 20, 'max_features': 'log2', 'min_samples_leaf': 17}. Best is trial 6 with value: 3.5619498962636627.[0m


Time cost: 55.76619076728821
Currently running with:
{'n_estimators': 401, 'max_depth': 351, 'min_samples_split': 16, 'max_features': 'sqrt', 'min_samples_leaf': 6, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 17:25:28,840][0m Trial 9 finished with value: 3.693135128351646 and parameters: {'n_estimators': 401, 'max_depth': 351, 'min_samples_split': 16, 'max_features': 'sqrt', 'min_samples_leaf': 6}. Best is trial 6 with value: 3.5619498962636627.[0m


Time cost: 157.1973259449005
Currently running with:
{'n_estimators': 301, 'max_depth': 251, 'min_samples_split': 2, 'max_features': 'log2', 'min_samples_leaf': 7, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 17:27:28,302][0m Trial 10 finished with value: 3.7009596728596614 and parameters: {'n_estimators': 301, 'max_depth': 251, 'min_samples_split': 2, 'max_features': 'log2', 'min_samples_leaf': 7}. Best is trial 6 with value: 3.5619498962636627.[0m


Time cost: 119.46035885810852
Currently running with:
{'n_estimators': 1, 'max_depth': 451, 'min_samples_split': 12, 'max_features': 'log2', 'min_samples_leaf': 1, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 17:27:31,767][0m Trial 11 finished with value: 4.603967778020403 and parameters: {'n_estimators': 1, 'max_depth': 451, 'min_samples_split': 12, 'max_features': 'log2', 'min_samples_leaf': 1}. Best is trial 6 with value: 3.5619498962636627.[0m


Time cost: 3.4643654823303223
Currently running with:
{'n_estimators': 251, 'max_depth': 451, 'min_samples_split': 2, 'max_features': 'sqrt', 'min_samples_leaf': 4, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 17:29:17,457][0m Trial 12 finished with value: 3.65638269162474 and parameters: {'n_estimators': 251, 'max_depth': 451, 'min_samples_split': 2, 'max_features': 'sqrt', 'min_samples_leaf': 4}. Best is trial 6 with value: 3.5619498962636627.[0m


Time cost: 105.6882598400116
Currently running with:
{'n_estimators': 251, 'max_depth': 301, 'min_samples_split': 13, 'max_features': 'log2', 'min_samples_leaf': 10, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 17:30:52,459][0m Trial 13 finished with value: 3.729428353358576 and parameters: {'n_estimators': 251, 'max_depth': 301, 'min_samples_split': 13, 'max_features': 'log2', 'min_samples_leaf': 10}. Best is trial 6 with value: 3.5619498962636627.[0m


Time cost: 94.99983501434326
Currently running with:
{'n_estimators': 101, 'max_depth': 101, 'min_samples_split': 7, 'max_features': 'log2', 'min_samples_leaf': 4, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 17:31:36,994][0m Trial 14 finished with value: 3.667894882853864 and parameters: {'n_estimators': 101, 'max_depth': 101, 'min_samples_split': 7, 'max_features': 'log2', 'min_samples_leaf': 4}. Best is trial 6 with value: 3.5619498962636627.[0m


Time cost: 44.5343177318573
Currently running with:
{'n_estimators': 1, 'max_depth': 301, 'min_samples_split': 5, 'max_features': 'sqrt', 'min_samples_leaf': 1, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 17:31:40,671][0m Trial 15 finished with value: 4.791932890075713 and parameters: {'n_estimators': 1, 'max_depth': 301, 'min_samples_split': 5, 'max_features': 'sqrt', 'min_samples_leaf': 1}. Best is trial 6 with value: 3.5619498962636627.[0m


Time cost: 3.675612211227417
Currently running with:
{'n_estimators': 351, 'max_depth': 451, 'min_samples_split': 14, 'max_features': 'sqrt', 'min_samples_leaf': 7, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 17:33:58,855][0m Trial 16 finished with value: 3.701209178421419 and parameters: {'n_estimators': 351, 'max_depth': 451, 'min_samples_split': 14, 'max_features': 'sqrt', 'min_samples_leaf': 7}. Best is trial 6 with value: 3.5619498962636627.[0m


Time cost: 138.1835606098175
Currently running with:
{'n_estimators': 201, 'max_depth': 351, 'min_samples_split': 10, 'max_features': 'log2', 'min_samples_leaf': 3, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 17:35:23,700][0m Trial 17 finished with value: 3.6388214896226647 and parameters: {'n_estimators': 201, 'max_depth': 351, 'min_samples_split': 10, 'max_features': 'log2', 'min_samples_leaf': 3}. Best is trial 6 with value: 3.5619498962636627.[0m


Time cost: 84.84266376495361
Currently running with:
{'n_estimators': 101, 'max_depth': 401, 'min_samples_split': 4, 'max_features': 'sqrt', 'min_samples_leaf': 13, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 17:36:01,885][0m Trial 18 finished with value: 3.75580502800491 and parameters: {'n_estimators': 101, 'max_depth': 401, 'min_samples_split': 4, 'max_features': 'sqrt', 'min_samples_leaf': 13}. Best is trial 6 with value: 3.5619498962636627.[0m


Time cost: 38.18272089958191
Currently running with:
{'n_estimators': 451, 'max_depth': 251, 'min_samples_split': 8, 'max_features': 'log2', 'min_samples_leaf': 5, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 17:39:06,760][0m Trial 19 finished with value: 3.6703863048684084 and parameters: {'n_estimators': 451, 'max_depth': 251, 'min_samples_split': 8, 'max_features': 'log2', 'min_samples_leaf': 5}. Best is trial 6 with value: 3.5619498962636627.[0m


Time cost: 184.87398719787598
Currently running with:
{'n_estimators': 201, 'max_depth': 301, 'min_samples_split': 15, 'max_features': 'log2', 'min_samples_leaf': 8, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 17:40:25,043][0m Trial 20 finished with value: 3.715335234170097 and parameters: {'n_estimators': 201, 'max_depth': 301, 'min_samples_split': 15, 'max_features': 'log2', 'min_samples_leaf': 8}. Best is trial 6 with value: 3.5619498962636627.[0m


Time cost: 78.2827250957489
Currently running with:
{'n_estimators': 201, 'max_depth': 351, 'min_samples_split': 10, 'max_features': 'log2', 'min_samples_leaf': 3, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 17:41:49,867][0m Trial 21 finished with value: 3.6388214896226647 and parameters: {'n_estimators': 201, 'max_depth': 351, 'min_samples_split': 10, 'max_features': 'log2', 'min_samples_leaf': 3}. Best is trial 6 with value: 3.5619498962636627.[0m


Time cost: 84.82232213020325
Currently running with:
{'n_estimators': 101, 'max_depth': 401, 'min_samples_split': 11, 'max_features': 'log2', 'min_samples_leaf': 1, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 17:42:35,543][0m Trial 22 finished with value: 3.625478474049642 and parameters: {'n_estimators': 101, 'max_depth': 401, 'min_samples_split': 11, 'max_features': 'log2', 'min_samples_leaf': 1}. Best is trial 6 with value: 3.5619498962636627.[0m


Time cost: 45.674355030059814
Currently running with:
{'n_estimators': 101, 'max_depth': 451, 'min_samples_split': 11, 'max_features': 'log2', 'min_samples_leaf': 1, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 17:43:21,465][0m Trial 23 finished with value: 3.625478474049642 and parameters: {'n_estimators': 101, 'max_depth': 451, 'min_samples_split': 11, 'max_features': 'log2', 'min_samples_leaf': 1}. Best is trial 6 with value: 3.5619498962636627.[0m


Time cost: 45.92189645767212
Currently running with:
{'n_estimators': 101, 'max_depth': 451, 'min_samples_split': 11, 'max_features': 'log2', 'min_samples_leaf': 3, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 17:44:05,788][0m Trial 24 finished with value: 3.6529873117159344 and parameters: {'n_estimators': 101, 'max_depth': 451, 'min_samples_split': 11, 'max_features': 'log2', 'min_samples_leaf': 3}. Best is trial 6 with value: 3.5619498962636627.[0m


Time cost: 44.322327852249146
Currently running with:
{'n_estimators': 1, 'max_depth': 451, 'min_samples_split': 4, 'max_features': 'log2', 'min_samples_leaf': 1, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 17:44:09,351][0m Trial 25 finished with value: 4.812482434144907 and parameters: {'n_estimators': 1, 'max_depth': 451, 'min_samples_split': 4, 'max_features': 'log2', 'min_samples_leaf': 1}. Best is trial 6 with value: 3.5619498962636627.[0m


Time cost: 3.5613346099853516
Currently running with:
{'n_estimators': 51, 'max_depth': 401, 'min_samples_split': 12, 'max_features': 'log2', 'min_samples_leaf': 5, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 17:44:31,863][0m Trial 26 finished with value: 3.6863727596720657 and parameters: {'n_estimators': 51, 'max_depth': 401, 'min_samples_split': 12, 'max_features': 'log2', 'min_samples_leaf': 5}. Best is trial 6 with value: 3.5619498962636627.[0m


Time cost: 22.511359930038452
Currently running with:
{'n_estimators': 101, 'max_depth': 451, 'min_samples_split': 8, 'max_features': 'log2', 'min_samples_leaf': 12, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 17:45:10,859][0m Trial 27 finished with value: 3.7483720712362354 and parameters: {'n_estimators': 101, 'max_depth': 451, 'min_samples_split': 8, 'max_features': 'log2', 'min_samples_leaf': 12}. Best is trial 6 with value: 3.5619498962636627.[0m


Time cost: 38.99513053894043
Currently running with:
{'n_estimators': 51, 'max_depth': 101, 'min_samples_split': 6, 'max_features': 'log2', 'min_samples_leaf': 8, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 17:45:32,173][0m Trial 28 finished with value: 3.724681870126247 and parameters: {'n_estimators': 51, 'max_depth': 101, 'min_samples_split': 6, 'max_features': 'log2', 'min_samples_leaf': 8}. Best is trial 6 with value: 3.5619498962636627.[0m


Time cost: 21.313100814819336
Currently running with:
{'n_estimators': 1, 'max_depth': 401, 'min_samples_split': 14, 'max_features': 'log2', 'min_samples_leaf': 3, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 17:45:35,295][0m Trial 29 finished with value: 4.396579548908816 and parameters: {'n_estimators': 1, 'max_depth': 401, 'min_samples_split': 14, 'max_features': 'log2', 'min_samples_leaf': 3}. Best is trial 6 with value: 3.5619498962636627.[0m


Time cost: 3.120727777481079
Currently running with:
{'n_estimators': 101, 'max_depth': 351, 'min_samples_split': 10, 'max_features': 'log2', 'min_samples_leaf': 5, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 17:46:18,419][0m Trial 30 finished with value: 3.6719394324759294 and parameters: {'n_estimators': 101, 'max_depth': 351, 'min_samples_split': 10, 'max_features': 'log2', 'min_samples_leaf': 5}. Best is trial 6 with value: 3.5619498962636627.[0m


Time cost: 43.123520612716675
Currently running with:
{'n_estimators': 151, 'max_depth': 401, 'min_samples_split': 19, 'max_features': 'sqrt', 'min_samples_leaf': 1, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 17:47:22,025][0m Trial 31 finished with value: 3.62217382266857 and parameters: {'n_estimators': 151, 'max_depth': 401, 'min_samples_split': 19, 'max_features': 'sqrt', 'min_samples_leaf': 1}. Best is trial 6 with value: 3.5619498962636627.[0m


Time cost: 63.60486960411072
Currently running with:
{'n_estimators': 101, 'max_depth': 401, 'min_samples_split': 19, 'max_features': 'sqrt', 'min_samples_leaf': 2, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 17:48:04,125][0m Trial 32 finished with value: 3.6616359692358396 and parameters: {'n_estimators': 101, 'max_depth': 401, 'min_samples_split': 19, 'max_features': 'sqrt', 'min_samples_leaf': 2}. Best is trial 6 with value: 3.5619498962636627.[0m


Time cost: 42.09942150115967
Currently running with:
{'n_estimators': 151, 'max_depth': 451, 'min_samples_split': 12, 'max_features': 'log2', 'min_samples_leaf': 2, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 17:49:09,242][0m Trial 33 finished with value: 3.632741871400152 and parameters: {'n_estimators': 151, 'max_depth': 451, 'min_samples_split': 12, 'max_features': 'log2', 'min_samples_leaf': 2}. Best is trial 6 with value: 3.5619498962636627.[0m


Time cost: 65.11550831794739
Currently running with:
{'n_estimators': 51, 'max_depth': 351, 'min_samples_split': 18, 'max_features': 'sqrt', 'min_samples_leaf': 2, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 17:49:32,020][0m Trial 34 finished with value: 3.6742367990198357 and parameters: {'n_estimators': 51, 'max_depth': 351, 'min_samples_split': 18, 'max_features': 'sqrt', 'min_samples_leaf': 2}. Best is trial 6 with value: 3.5619498962636627.[0m


Time cost: 22.777767419815063
Currently running with:
{'n_estimators': 201, 'max_depth': 401, 'min_samples_split': 15, 'max_features': 'log2', 'min_samples_leaf': 4, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 17:50:55,214][0m Trial 35 finished with value: 3.6669468192396377 and parameters: {'n_estimators': 201, 'max_depth': 401, 'min_samples_split': 15, 'max_features': 'log2', 'min_samples_leaf': 4}. Best is trial 6 with value: 3.5619498962636627.[0m


Time cost: 83.19177603721619
Currently running with:
{'n_estimators': 151, 'max_depth': 301, 'min_samples_split': 8, 'max_features': 'sqrt', 'min_samples_leaf': 1, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 17:52:10,768][0m Trial 36 finished with value: 3.606503988528642 and parameters: {'n_estimators': 151, 'max_depth': 301, 'min_samples_split': 8, 'max_features': 'sqrt', 'min_samples_leaf': 1}. Best is trial 6 with value: 3.5619498962636627.[0m


Time cost: 75.55301260948181
Currently running with:
{'n_estimators': 151, 'max_depth': 301, 'min_samples_split': 8, 'max_features': 'sqrt', 'min_samples_leaf': 2, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 17:53:20,602][0m Trial 37 finished with value: 3.6177026856051095 and parameters: {'n_estimators': 151, 'max_depth': 301, 'min_samples_split': 8, 'max_features': 'sqrt', 'min_samples_leaf': 2}. Best is trial 6 with value: 3.5619498962636627.[0m


Time cost: 69.83230686187744
Currently running with:
{'n_estimators': 251, 'max_depth': 201, 'min_samples_split': 7, 'max_features': 'sqrt', 'min_samples_leaf': 3, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 17:55:08,975][0m Trial 38 finished with value: 3.6316327396822863 and parameters: {'n_estimators': 251, 'max_depth': 201, 'min_samples_split': 7, 'max_features': 'sqrt', 'min_samples_leaf': 3}. Best is trial 6 with value: 3.5619498962636627.[0m


Time cost: 108.3712420463562
Currently running with:
{'n_estimators': 151, 'max_depth': 301, 'min_samples_split': 4, 'max_features': 'sqrt', 'min_samples_leaf': 6, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 17:56:10,252][0m Trial 39 finished with value: 3.6940737054580195 and parameters: {'n_estimators': 151, 'max_depth': 301, 'min_samples_split': 4, 'max_features': 'sqrt', 'min_samples_leaf': 6}. Best is trial 6 with value: 3.5619498962636627.[0m


Time cost: 61.27563285827637
Currently running with:
{'n_estimators': 301, 'max_depth': 251, 'min_samples_split': 9, 'max_features': 'sqrt', 'min_samples_leaf': 2, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 17:58:20,413][0m Trial 40 finished with value: 3.6125939807372394 and parameters: {'n_estimators': 301, 'max_depth': 251, 'min_samples_split': 9, 'max_features': 'sqrt', 'min_samples_leaf': 2}. Best is trial 6 with value: 3.5619498962636627.[0m


Time cost: 130.16055488586426
Currently running with:
{'n_estimators': 301, 'max_depth': 251, 'min_samples_split': 9, 'max_features': 'sqrt', 'min_samples_leaf': 2, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 18:00:31,652][0m Trial 41 finished with value: 3.6125939807372394 and parameters: {'n_estimators': 301, 'max_depth': 251, 'min_samples_split': 9, 'max_features': 'sqrt', 'min_samples_leaf': 2}. Best is trial 6 with value: 3.5619498962636627.[0m


Time cost: 131.2383735179901
Currently running with:
{'n_estimators': 301, 'max_depth': 251, 'min_samples_split': 9, 'max_features': 'sqrt', 'min_samples_leaf': 2, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 18:02:49,755][0m Trial 42 finished with value: 3.6125939807372394 and parameters: {'n_estimators': 301, 'max_depth': 251, 'min_samples_split': 9, 'max_features': 'sqrt', 'min_samples_leaf': 2}. Best is trial 6 with value: 3.5619498962636627.[0m


Time cost: 138.10226154327393
Currently running with:
{'n_estimators': 301, 'max_depth': 201, 'min_samples_split': 9, 'max_features': 'sqrt', 'min_samples_leaf': 15, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 18:04:56,158][0m Trial 43 finished with value: 3.7633657783077656 and parameters: {'n_estimators': 301, 'max_depth': 201, 'min_samples_split': 9, 'max_features': 'sqrt', 'min_samples_leaf': 15}. Best is trial 6 with value: 3.5619498962636627.[0m


Time cost: 126.40071082115173
Currently running with:
{'n_estimators': 301, 'max_depth': 251, 'min_samples_split': 9, 'max_features': 'sqrt', 'min_samples_leaf': 4, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 18:07:21,224][0m Trial 44 finished with value: 3.655544123847469 and parameters: {'n_estimators': 301, 'max_depth': 251, 'min_samples_split': 9, 'max_features': 'sqrt', 'min_samples_leaf': 4}. Best is trial 6 with value: 3.5619498962636627.[0m


Time cost: 145.0642945766449
Currently running with:
{'n_estimators': 351, 'max_depth': 151, 'min_samples_split': 6, 'max_features': 'sqrt', 'min_samples_leaf': 2, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 18:10:15,940][0m Trial 45 finished with value: 3.5983005190129806 and parameters: {'n_estimators': 351, 'max_depth': 151, 'min_samples_split': 6, 'max_features': 'sqrt', 'min_samples_leaf': 2}. Best is trial 6 with value: 3.5619498962636627.[0m


Time cost: 174.71354269981384
Currently running with:
{'n_estimators': 351, 'max_depth': 151, 'min_samples_split': 6, 'max_features': 'sqrt', 'min_samples_leaf': 6, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 18:12:50,349][0m Trial 46 finished with value: 3.686656380109868 and parameters: {'n_estimators': 351, 'max_depth': 151, 'min_samples_split': 6, 'max_features': 'sqrt', 'min_samples_leaf': 6}. Best is trial 6 with value: 3.5619498962636627.[0m


Time cost: 154.4075677394867
Currently running with:
{'n_estimators': 351, 'max_depth': 151, 'min_samples_split': 3, 'max_features': 'sqrt', 'min_samples_leaf': 4, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 18:15:34,800][0m Trial 47 finished with value: 3.6538746663514705 and parameters: {'n_estimators': 351, 'max_depth': 151, 'min_samples_split': 3, 'max_features': 'sqrt', 'min_samples_leaf': 4}. Best is trial 6 with value: 3.5619498962636627.[0m


Time cost: 164.4486780166626
Currently running with:
{'n_estimators': 401, 'max_depth': 51, 'min_samples_split': 5, 'max_features': 'sqrt', 'min_samples_leaf': 19, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 18:18:05,940][0m Trial 48 finished with value: 3.781315849834383 and parameters: {'n_estimators': 401, 'max_depth': 51, 'min_samples_split': 5, 'max_features': 'sqrt', 'min_samples_leaf': 19}. Best is trial 6 with value: 3.5619498962636627.[0m


Time cost: 151.1390039920807
Currently running with:
{'n_estimators': 401, 'max_depth': 201, 'min_samples_split': 7, 'max_features': 'sqrt', 'min_samples_leaf': 3, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 18:21:12,145][0m Trial 49 finished with value: 3.631465698450852 and parameters: {'n_estimators': 401, 'max_depth': 201, 'min_samples_split': 7, 'max_features': 'sqrt', 'min_samples_leaf': 3}. Best is trial 6 with value: 3.5619498962636627.[0m


Time cost: 186.20365023612976


{'n_estimators': 51,
 'max_depth': 451,
 'min_samples_split': 4,
 'max_features': 'log2',
 'min_samples_leaf': 1}

In [8]:
#optimal model
# {'n_estimators': 51, 'max_depth': 451, 'min_samples_split': 4, 'max_features': 'log2', 'min_samples_leaf': 1}

omodel = Pipeline([('imputer', SimpleImputer()), ('scaler', StandardScaler()), 
                   ('rf', RandomForestRegressor(n_estimators = 51, 
                                                max_depth = 451, 
                                                min_samples_leaf= 1,
                                                min_samples_split= 4,
                                                max_features= 'log2', 
                                                random_state = 760, n_jobs=-1))])

omodel.fit(X_train, y_train)

In [9]:
#predict train set results
y_pred = omodel.predict(X_train)
rmse = mean_squared_error(y_train, y_pred, squared=False)
mae = mean_absolute_error(y_train, y_pred) 
print(f"\n model train score -> RMSE:", rmse, "MAE:", mae)


#predict test set results
y_pred_test = omodel.predict(X_test)
rmse = mean_squared_error(y_test, y_pred_test, squared=False)
mae = mean_absolute_error(y_test, y_pred_test) 
print(f"\n model test score -> RMSE:", rmse, "MAE:", mae)


 model train score -> RMSE: 1.6654315096909602 MAE: 0.6075989165752549

 model test score -> RMSE: 2.9961124885530652 MAE: 1.359543549646976
