## Random Forest with 3-fold CV, StandardScaler and optuna on dataset3.0
dataset: 11 features,  subsampled dataset

20220926

In [2]:
import pandas as pd
import numpy as np
from numpy import random
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
import optuna as opt
import warnings
warnings.filterwarnings("ignore")
import time
from sklearn.model_selection import cross_val_score

In [3]:
#import data
train_df = pd.read_parquet("train_main.parquet.snappy")
test_df = pd.read_parquet("test_main.parquet.snappy")

In [18]:
X_train = train_df.drop(['r_id','r_useful'],axis=1)
y_train = train_df['r_useful']
X_test = test_df.drop(['r_id','r_useful'],axis=1)
y_test = test_df['r_useful']

# standardise the features
ss = StandardScaler()
X_train = ss.fit_transform(X_train)
X_test = ss.transform(X_test)

In [4]:
def objective(trial):
    start = time.time()
    params = {
        "n_estimators": trial.suggest_int('n_estimators', 1, 500, 50),
        "max_depth": trial.suggest_int('max_depth', 1, 500, 50),
        "min_samples_split": trial.suggest_int('min_samples_split', 2, 20),
        "max_features" : trial.suggest_categorical("max_features", ['sqrt', 'log2']),
        "min_samples_leaf": trial.suggest_int('min_samples_leaf', 1, 20), 
        "random_state": 760, 
        "n_jobs": -1
    }
    print("Currently running with:")
    print(params)
    
    model = RandomForestRegressor(**params)
    
    # model.fit(X_train, y_train)
    cvscore = np.sqrt(-np.mean(cross_val_score(model, X_train, y_train, scoring = "neg_mean_squared_error", cv=3)))
    # y_pred = model.predict(X_val)
    print("Time cost:", time.time()-start)
    # return mean_squared_error(y_val, y_pred, squared=False)
    return cvscore

In [5]:
study = opt.create_study(direction = 'minimize')
study.optimize(objective, n_trials = 50)
study.best_params

[32m[I 2022-09-27 03:37:50,258][0m A new study created in memory with name: no-name-cb859c65-3fd3-44f6-a15e-bd0cd41c6c10[0m


Currently running with:
{'n_estimators': 51, 'max_depth': 201, 'min_samples_split': 10, 'max_features': 'sqrt', 'min_samples_leaf': 10, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-27 03:38:02,088][0m Trial 0 finished with value: 3.607386181203533 and parameters: {'n_estimators': 51, 'max_depth': 201, 'min_samples_split': 10, 'max_features': 'sqrt', 'min_samples_leaf': 10}. Best is trial 0 with value: 3.607386181203533.[0m


Time cost: 11.827013969421387
Currently running with:
{'n_estimators': 251, 'max_depth': 51, 'min_samples_split': 12, 'max_features': 'sqrt', 'min_samples_leaf': 8, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-27 03:39:22,836][0m Trial 1 finished with value: 3.5871712605659782 and parameters: {'n_estimators': 251, 'max_depth': 51, 'min_samples_split': 12, 'max_features': 'sqrt', 'min_samples_leaf': 8}. Best is trial 1 with value: 3.5871712605659782.[0m


Time cost: 80.74872422218323
Currently running with:
{'n_estimators': 401, 'max_depth': 51, 'min_samples_split': 13, 'max_features': 'sqrt', 'min_samples_leaf': 5, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-27 03:41:28,003][0m Trial 2 finished with value: 3.572176007070439 and parameters: {'n_estimators': 401, 'max_depth': 51, 'min_samples_split': 13, 'max_features': 'sqrt', 'min_samples_leaf': 5}. Best is trial 2 with value: 3.572176007070439.[0m


Time cost: 125.16711783409119
Currently running with:
{'n_estimators': 1, 'max_depth': 251, 'min_samples_split': 13, 'max_features': 'log2', 'min_samples_leaf': 19, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-27 03:41:30,143][0m Trial 3 finished with value: 3.93777713183662 and parameters: {'n_estimators': 1, 'max_depth': 251, 'min_samples_split': 13, 'max_features': 'log2', 'min_samples_leaf': 19}. Best is trial 2 with value: 3.572176007070439.[0m


Time cost: 2.1392438411712646
Currently running with:
{'n_estimators': 451, 'max_depth': 1, 'min_samples_split': 10, 'max_features': 'log2', 'min_samples_leaf': 14, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-27 03:41:46,548][0m Trial 4 finished with value: 3.9613842179511067 and parameters: {'n_estimators': 451, 'max_depth': 1, 'min_samples_split': 10, 'max_features': 'log2', 'min_samples_leaf': 14}. Best is trial 2 with value: 3.572176007070439.[0m


Time cost: 16.40562891960144
Currently running with:
{'n_estimators': 251, 'max_depth': 451, 'min_samples_split': 2, 'max_features': 'sqrt', 'min_samples_leaf': 7, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-27 03:43:03,076][0m Trial 5 finished with value: 3.580940966406305 and parameters: {'n_estimators': 251, 'max_depth': 451, 'min_samples_split': 2, 'max_features': 'sqrt', 'min_samples_leaf': 7}. Best is trial 2 with value: 3.572176007070439.[0m


Time cost: 76.52750015258789
Currently running with:
{'n_estimators': 101, 'max_depth': 251, 'min_samples_split': 19, 'max_features': 'log2', 'min_samples_leaf': 6, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-27 03:43:35,614][0m Trial 6 finished with value: 3.5900176553212506 and parameters: {'n_estimators': 101, 'max_depth': 251, 'min_samples_split': 19, 'max_features': 'log2', 'min_samples_leaf': 6}. Best is trial 2 with value: 3.572176007070439.[0m


Time cost: 32.53167533874512
Currently running with:
{'n_estimators': 1, 'max_depth': 151, 'min_samples_split': 17, 'max_features': 'sqrt', 'min_samples_leaf': 12, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-27 03:43:37,823][0m Trial 7 finished with value: 4.0399857618621935 and parameters: {'n_estimators': 1, 'max_depth': 151, 'min_samples_split': 17, 'max_features': 'sqrt', 'min_samples_leaf': 12}. Best is trial 2 with value: 3.572176007070439.[0m


Time cost: 2.2007627487182617
Currently running with:
{'n_estimators': 401, 'max_depth': 301, 'min_samples_split': 8, 'max_features': 'sqrt', 'min_samples_leaf': 9, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-27 03:45:42,356][0m Trial 8 finished with value: 3.592786482594953 and parameters: {'n_estimators': 401, 'max_depth': 301, 'min_samples_split': 8, 'max_features': 'sqrt', 'min_samples_leaf': 9}. Best is trial 2 with value: 3.572176007070439.[0m


Time cost: 124.53281998634338
Currently running with:
{'n_estimators': 151, 'max_depth': 301, 'min_samples_split': 7, 'max_features': 'sqrt', 'min_samples_leaf': 6, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-27 03:46:31,848][0m Trial 9 finished with value: 3.578120149302244 and parameters: {'n_estimators': 151, 'max_depth': 301, 'min_samples_split': 7, 'max_features': 'sqrt', 'min_samples_leaf': 6}. Best is trial 2 with value: 3.572176007070439.[0m


Time cost: 49.49263572692871
Currently running with:
{'n_estimators': 351, 'max_depth': 101, 'min_samples_split': 16, 'max_features': 'log2', 'min_samples_leaf': 3, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-27 03:48:26,322][0m Trial 10 finished with value: 3.567739424587229 and parameters: {'n_estimators': 351, 'max_depth': 101, 'min_samples_split': 16, 'max_features': 'log2', 'min_samples_leaf': 3}. Best is trial 10 with value: 3.567739424587229.[0m


Time cost: 114.47299671173096
Currently running with:
{'n_estimators': 351, 'max_depth': 101, 'min_samples_split': 14, 'max_features': 'log2', 'min_samples_leaf': 1, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-27 03:50:32,188][0m Trial 11 finished with value: 3.55912514463729 and parameters: {'n_estimators': 351, 'max_depth': 101, 'min_samples_split': 14, 'max_features': 'log2', 'min_samples_leaf': 1}. Best is trial 11 with value: 3.55912514463729.[0m


Time cost: 125.86395072937012
Currently running with:
{'n_estimators': 351, 'max_depth': 101, 'min_samples_split': 16, 'max_features': 'log2', 'min_samples_leaf': 1, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-27 03:52:33,287][0m Trial 12 finished with value: 3.5623166035291356 and parameters: {'n_estimators': 351, 'max_depth': 101, 'min_samples_split': 16, 'max_features': 'log2', 'min_samples_leaf': 1}. Best is trial 11 with value: 3.55912514463729.[0m


Time cost: 121.09751296043396
Currently running with:
{'n_estimators': 301, 'max_depth': 151, 'min_samples_split': 16, 'max_features': 'log2', 'min_samples_leaf': 1, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-27 03:53:55,702][0m Trial 13 finished with value: 3.5623412283330267 and parameters: {'n_estimators': 301, 'max_depth': 151, 'min_samples_split': 16, 'max_features': 'log2', 'min_samples_leaf': 1}. Best is trial 11 with value: 3.55912514463729.[0m


Time cost: 82.4152364730835
Currently running with:
{'n_estimators': 351, 'max_depth': 1, 'min_samples_split': 20, 'max_features': 'log2', 'min_samples_leaf': 2, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-27 03:54:06,488][0m Trial 14 finished with value: 3.96148808546378 and parameters: {'n_estimators': 351, 'max_depth': 1, 'min_samples_split': 20, 'max_features': 'log2', 'min_samples_leaf': 2}. Best is trial 11 with value: 3.55912514463729.[0m


Time cost: 10.785862445831299
Currently running with:
{'n_estimators': 201, 'max_depth': 101, 'min_samples_split': 15, 'max_features': 'log2', 'min_samples_leaf': 4, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-27 03:54:58,539][0m Trial 15 finished with value: 3.5710656456753878 and parameters: {'n_estimators': 201, 'max_depth': 101, 'min_samples_split': 15, 'max_features': 'log2', 'min_samples_leaf': 4}. Best is trial 11 with value: 3.55912514463729.[0m


Time cost: 52.05134129524231
Currently running with:
{'n_estimators': 451, 'max_depth': 451, 'min_samples_split': 18, 'max_features': 'log2', 'min_samples_leaf': 1, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-27 03:57:00,240][0m Trial 16 finished with value: 3.563567054228524 and parameters: {'n_estimators': 451, 'max_depth': 451, 'min_samples_split': 18, 'max_features': 'log2', 'min_samples_leaf': 1}. Best is trial 11 with value: 3.55912514463729.[0m


Time cost: 121.70104598999023
Currently running with:
{'n_estimators': 301, 'max_depth': 151, 'min_samples_split': 14, 'max_features': 'log2', 'min_samples_leaf': 16, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-27 03:58:09,944][0m Trial 17 finished with value: 3.6168135308438605 and parameters: {'n_estimators': 301, 'max_depth': 151, 'min_samples_split': 14, 'max_features': 'log2', 'min_samples_leaf': 16}. Best is trial 11 with value: 3.55912514463729.[0m


Time cost: 69.70356798171997
Currently running with:
{'n_estimators': 351, 'max_depth': 101, 'min_samples_split': 6, 'max_features': 'log2', 'min_samples_leaf': 4, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-27 03:59:44,935][0m Trial 18 finished with value: 3.561316072495803 and parameters: {'n_estimators': 351, 'max_depth': 101, 'min_samples_split': 6, 'max_features': 'log2', 'min_samples_leaf': 4}. Best is trial 11 with value: 3.55912514463729.[0m


Time cost: 94.99118328094482
Currently running with:
{'n_estimators': 201, 'max_depth': 351, 'min_samples_split': 3, 'max_features': 'log2', 'min_samples_leaf': 4, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-27 04:00:40,001][0m Trial 19 finished with value: 3.563712178910977 and parameters: {'n_estimators': 201, 'max_depth': 351, 'min_samples_split': 3, 'max_features': 'log2', 'min_samples_leaf': 4}. Best is trial 11 with value: 3.55912514463729.[0m


Time cost: 55.06561756134033
Currently running with:
{'n_estimators': 301, 'max_depth': 201, 'min_samples_split': 5, 'max_features': 'log2', 'min_samples_leaf': 12, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-27 04:01:50,756][0m Trial 20 finished with value: 3.604085513868575 and parameters: {'n_estimators': 301, 'max_depth': 201, 'min_samples_split': 5, 'max_features': 'log2', 'min_samples_leaf': 12}. Best is trial 11 with value: 3.55912514463729.[0m


Time cost: 70.75526022911072
Currently running with:
{'n_estimators': 351, 'max_depth': 101, 'min_samples_split': 8, 'max_features': 'log2', 'min_samples_leaf': 1, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-27 04:03:34,918][0m Trial 21 finished with value: 3.550118386726815 and parameters: {'n_estimators': 351, 'max_depth': 101, 'min_samples_split': 8, 'max_features': 'log2', 'min_samples_leaf': 1}. Best is trial 21 with value: 3.550118386726815.[0m


Time cost: 104.16209578514099
Currently running with:
{'n_estimators': 401, 'max_depth': 51, 'min_samples_split': 7, 'max_features': 'log2', 'min_samples_leaf': 3, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-27 04:05:28,051][0m Trial 22 finished with value: 3.5535030724063543 and parameters: {'n_estimators': 401, 'max_depth': 51, 'min_samples_split': 7, 'max_features': 'log2', 'min_samples_leaf': 3}. Best is trial 21 with value: 3.550118386726815.[0m


Time cost: 113.1330955028534
Currently running with:
{'n_estimators': 401, 'max_depth': 51, 'min_samples_split': 9, 'max_features': 'log2', 'min_samples_leaf': 3, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-27 04:07:17,407][0m Trial 23 finished with value: 3.5554866432670154 and parameters: {'n_estimators': 401, 'max_depth': 51, 'min_samples_split': 9, 'max_features': 'log2', 'min_samples_leaf': 3}. Best is trial 21 with value: 3.550118386726815.[0m


Time cost: 109.35558152198792
Currently running with:
{'n_estimators': 401, 'max_depth': 51, 'min_samples_split': 9, 'max_features': 'log2', 'min_samples_leaf': 3, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-27 04:09:09,161][0m Trial 24 finished with value: 3.5554866432670154 and parameters: {'n_estimators': 401, 'max_depth': 51, 'min_samples_split': 9, 'max_features': 'log2', 'min_samples_leaf': 3}. Best is trial 21 with value: 3.550118386726815.[0m


Time cost: 111.75445532798767
Currently running with:
{'n_estimators': 451, 'max_depth': 1, 'min_samples_split': 5, 'max_features': 'log2', 'min_samples_leaf': 6, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-27 04:09:22,833][0m Trial 25 finished with value: 3.9613842179511067 and parameters: {'n_estimators': 451, 'max_depth': 1, 'min_samples_split': 5, 'max_features': 'log2', 'min_samples_leaf': 6}. Best is trial 21 with value: 3.550118386726815.[0m


Time cost: 13.672052145004272
Currently running with:
{'n_estimators': 401, 'max_depth': 51, 'min_samples_split': 8, 'max_features': 'log2', 'min_samples_leaf': 20, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-27 04:10:51,972][0m Trial 26 finished with value: 3.627610513754922 and parameters: {'n_estimators': 401, 'max_depth': 51, 'min_samples_split': 8, 'max_features': 'log2', 'min_samples_leaf': 20}. Best is trial 21 with value: 3.550118386726815.[0m


Time cost: 89.13894295692444
Currently running with:
{'n_estimators': 451, 'max_depth': 1, 'min_samples_split': 11, 'max_features': 'log2', 'min_samples_leaf': 3, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-27 04:11:05,475][0m Trial 27 finished with value: 3.9613842179511067 and parameters: {'n_estimators': 451, 'max_depth': 1, 'min_samples_split': 11, 'max_features': 'log2', 'min_samples_leaf': 3}. Best is trial 21 with value: 3.550118386726815.[0m


Time cost: 13.502822160720825
Currently running with:
{'n_estimators': 301, 'max_depth': 51, 'min_samples_split': 4, 'max_features': 'log2', 'min_samples_leaf': 5, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-27 04:12:23,992][0m Trial 28 finished with value: 3.571142076857727 and parameters: {'n_estimators': 301, 'max_depth': 51, 'min_samples_split': 4, 'max_features': 'log2', 'min_samples_leaf': 5}. Best is trial 21 with value: 3.550118386726815.[0m


Time cost: 78.5174970626831
Currently running with:
{'n_estimators': 401, 'max_depth': 201, 'min_samples_split': 10, 'max_features': 'log2', 'min_samples_leaf': 10, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-27 04:13:59,521][0m Trial 29 finished with value: 3.5962525072119376 and parameters: {'n_estimators': 401, 'max_depth': 201, 'min_samples_split': 10, 'max_features': 'log2', 'min_samples_leaf': 10}. Best is trial 21 with value: 3.550118386726815.[0m


Time cost: 95.5287139415741
Currently running with:
{'n_estimators': 251, 'max_depth': 151, 'min_samples_split': 7, 'max_features': 'log2', 'min_samples_leaf': 2, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-27 04:15:13,579][0m Trial 30 finished with value: 3.549789758925833 and parameters: {'n_estimators': 251, 'max_depth': 151, 'min_samples_split': 7, 'max_features': 'log2', 'min_samples_leaf': 2}. Best is trial 30 with value: 3.549789758925833.[0m


Time cost: 74.05802512168884
Currently running with:
{'n_estimators': 251, 'max_depth': 151, 'min_samples_split': 7, 'max_features': 'log2', 'min_samples_leaf': 2, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-27 04:16:26,722][0m Trial 31 finished with value: 3.549789758925833 and parameters: {'n_estimators': 251, 'max_depth': 151, 'min_samples_split': 7, 'max_features': 'log2', 'min_samples_leaf': 2}. Best is trial 30 with value: 3.549789758925833.[0m


Time cost: 73.14272379875183
Currently running with:
{'n_estimators': 251, 'max_depth': 151, 'min_samples_split': 7, 'max_features': 'log2', 'min_samples_leaf': 2, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-27 04:17:39,161][0m Trial 32 finished with value: 3.549789758925833 and parameters: {'n_estimators': 251, 'max_depth': 151, 'min_samples_split': 7, 'max_features': 'log2', 'min_samples_leaf': 2}. Best is trial 30 with value: 3.549789758925833.[0m


Time cost: 72.43899488449097
Currently running with:
{'n_estimators': 251, 'max_depth': 151, 'min_samples_split': 6, 'max_features': 'log2', 'min_samples_leaf': 8, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-27 04:18:40,419][0m Trial 33 finished with value: 3.5871712605659782 and parameters: {'n_estimators': 251, 'max_depth': 151, 'min_samples_split': 6, 'max_features': 'log2', 'min_samples_leaf': 8}. Best is trial 30 with value: 3.549789758925833.[0m


Time cost: 61.25781989097595
Currently running with:
{'n_estimators': 151, 'max_depth': 201, 'min_samples_split': 8, 'max_features': 'sqrt', 'min_samples_leaf': 2, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-27 04:19:24,192][0m Trial 34 finished with value: 3.555625218150807 and parameters: {'n_estimators': 151, 'max_depth': 201, 'min_samples_split': 8, 'max_features': 'sqrt', 'min_samples_leaf': 2}. Best is trial 30 with value: 3.549789758925833.[0m


Time cost: 43.771947145462036
Currently running with:
{'n_estimators': 201, 'max_depth': 151, 'min_samples_split': 11, 'max_features': 'log2', 'min_samples_leaf': 2, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-27 04:20:20,513][0m Trial 35 finished with value: 3.560121023772295 and parameters: {'n_estimators': 201, 'max_depth': 151, 'min_samples_split': 11, 'max_features': 'log2', 'min_samples_leaf': 2}. Best is trial 30 with value: 3.549789758925833.[0m


Time cost: 56.32056188583374
Currently running with:
{'n_estimators': 251, 'max_depth': 251, 'min_samples_split': 6, 'max_features': 'log2', 'min_samples_leaf': 5, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-27 04:21:28,017][0m Trial 36 finished with value: 3.5706889192368867 and parameters: {'n_estimators': 251, 'max_depth': 251, 'min_samples_split': 6, 'max_features': 'log2', 'min_samples_leaf': 5}. Best is trial 30 with value: 3.549789758925833.[0m


Time cost: 67.50422143936157
Currently running with:
{'n_estimators': 151, 'max_depth': 201, 'min_samples_split': 4, 'max_features': 'sqrt', 'min_samples_leaf': 16, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-27 04:22:03,368][0m Trial 37 finished with value: 3.618379810510208 and parameters: {'n_estimators': 151, 'max_depth': 201, 'min_samples_split': 4, 'max_features': 'sqrt', 'min_samples_leaf': 16}. Best is trial 30 with value: 3.549789758925833.[0m


Time cost: 35.350539684295654
Currently running with:
{'n_estimators': 251, 'max_depth': 151, 'min_samples_split': 12, 'max_features': 'log2', 'min_samples_leaf': 7, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-27 04:23:06,044][0m Trial 38 finished with value: 3.580940966406305 and parameters: {'n_estimators': 251, 'max_depth': 151, 'min_samples_split': 12, 'max_features': 'log2', 'min_samples_leaf': 7}. Best is trial 30 with value: 3.549789758925833.[0m


Time cost: 62.67566156387329
Currently running with:
{'n_estimators': 101, 'max_depth': 201, 'min_samples_split': 9, 'max_features': 'sqrt', 'min_samples_leaf': 2, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-27 04:23:35,206][0m Trial 39 finished with value: 3.558606944491809 and parameters: {'n_estimators': 101, 'max_depth': 201, 'min_samples_split': 9, 'max_features': 'sqrt', 'min_samples_leaf': 2}. Best is trial 30 with value: 3.549789758925833.[0m


Time cost: 29.161722660064697
Currently running with:
{'n_estimators': 201, 'max_depth': 301, 'min_samples_split': 2, 'max_features': 'log2', 'min_samples_leaf': 5, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-27 04:24:28,605][0m Trial 40 finished with value: 3.5703696165891197 and parameters: {'n_estimators': 201, 'max_depth': 301, 'min_samples_split': 2, 'max_features': 'log2', 'min_samples_leaf': 5}. Best is trial 30 with value: 3.549789758925833.[0m


Time cost: 53.3978545665741
Currently running with:
{'n_estimators': 301, 'max_depth': 101, 'min_samples_split': 7, 'max_features': 'log2', 'min_samples_leaf': 1, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-27 04:25:57,742][0m Trial 41 finished with value: 3.5474646810677157 and parameters: {'n_estimators': 301, 'max_depth': 101, 'min_samples_split': 7, 'max_features': 'log2', 'min_samples_leaf': 1}. Best is trial 41 with value: 3.5474646810677157.[0m


Time cost: 89.13703083992004
Currently running with:
{'n_estimators': 301, 'max_depth': 101, 'min_samples_split': 7, 'max_features': 'log2', 'min_samples_leaf': 1, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-27 04:27:28,115][0m Trial 42 finished with value: 3.5474646810677153 and parameters: {'n_estimators': 301, 'max_depth': 101, 'min_samples_split': 7, 'max_features': 'log2', 'min_samples_leaf': 1}. Best is trial 42 with value: 3.5474646810677153.[0m


Time cost: 90.37242317199707
Currently running with:
{'n_estimators': 301, 'max_depth': 151, 'min_samples_split': 7, 'max_features': 'log2', 'min_samples_leaf': 2, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-27 04:28:54,947][0m Trial 43 finished with value: 3.5487148903797694 and parameters: {'n_estimators': 301, 'max_depth': 151, 'min_samples_split': 7, 'max_features': 'log2', 'min_samples_leaf': 2}. Best is trial 42 with value: 3.5474646810677153.[0m


Time cost: 86.83271908760071
Currently running with:
{'n_estimators': 301, 'max_depth': 101, 'min_samples_split': 5, 'max_features': 'log2', 'min_samples_leaf': 4, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-27 04:30:16,410][0m Trial 44 finished with value: 3.5612079878970864 and parameters: {'n_estimators': 301, 'max_depth': 101, 'min_samples_split': 5, 'max_features': 'log2', 'min_samples_leaf': 4}. Best is trial 42 with value: 3.5474646810677153.[0m


Time cost: 81.46231746673584
Currently running with:
{'n_estimators': 301, 'max_depth': 151, 'min_samples_split': 10, 'max_features': 'log2', 'min_samples_leaf': 1, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-27 04:31:42,838][0m Trial 45 finished with value: 3.5522170392669365 and parameters: {'n_estimators': 301, 'max_depth': 151, 'min_samples_split': 10, 'max_features': 'log2', 'min_samples_leaf': 1}. Best is trial 42 with value: 3.5474646810677153.[0m


Time cost: 86.42827701568604
Currently running with:
{'n_estimators': 251, 'max_depth': 101, 'min_samples_split': 7, 'max_features': 'sqrt', 'min_samples_leaf': 2, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-27 04:32:56,677][0m Trial 46 finished with value: 3.549789758925833 and parameters: {'n_estimators': 251, 'max_depth': 101, 'min_samples_split': 7, 'max_features': 'sqrt', 'min_samples_leaf': 2}. Best is trial 42 with value: 3.5474646810677153.[0m


Time cost: 73.83917427062988
Currently running with:
{'n_estimators': 301, 'max_depth': 101, 'min_samples_split': 4, 'max_features': 'sqrt', 'min_samples_leaf': 7, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-27 04:34:14,572][0m Trial 47 finished with value: 3.5806216113253853 and parameters: {'n_estimators': 301, 'max_depth': 101, 'min_samples_split': 4, 'max_features': 'sqrt', 'min_samples_leaf': 7}. Best is trial 42 with value: 3.5474646810677153.[0m


Time cost: 77.89499521255493
Currently running with:
{'n_estimators': 251, 'max_depth': 101, 'min_samples_split': 6, 'max_features': 'sqrt', 'min_samples_leaf': 3, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-27 04:35:30,930][0m Trial 48 finished with value: 3.5566728884756427 and parameters: {'n_estimators': 251, 'max_depth': 101, 'min_samples_split': 6, 'max_features': 'sqrt', 'min_samples_leaf': 3}. Best is trial 42 with value: 3.5474646810677153.[0m


Time cost: 76.34194278717041
Currently running with:
{'n_estimators': 201, 'max_depth': 51, 'min_samples_split': 3, 'max_features': 'sqrt', 'min_samples_leaf': 1, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-27 04:36:44,521][0m Trial 49 finished with value: 3.549025828527312 and parameters: {'n_estimators': 201, 'max_depth': 51, 'min_samples_split': 3, 'max_features': 'sqrt', 'min_samples_leaf': 1}. Best is trial 42 with value: 3.5474646810677153.[0m


Time cost: 73.58997440338135


{'n_estimators': 301,
 'max_depth': 101,
 'min_samples_split': 7,
 'max_features': 'log2',
 'min_samples_leaf': 1}

In [5]:
#optimal model
# 'n_estimators': 301, 'max_depth': 101, 'min_samples_split': 7, 'max_features': 'log2', 'min_samples_leaf': 1
omodel = RandomForestRegressor(n_estimators = 301, 
                               max_depth = 101, 
                               min_samples_leaf= 1,
                               min_samples_split= 7,
                               max_features= 'log2', random_state = 760, n_jobs=-1)


omodel.fit(X_train, y_train)

In [6]:
#predict train set results
y_pred = omodel.predict(X_train)
rmse = mean_squared_error(y_train, y_pred, squared=False)
mae = mean_absolute_error(y_train, y_pred) 
print(f"\n model train score -> RMSE:", rmse, "MAE:", mae)


#predict test set results
y_pred_test = omodel.predict(X_test)
rmse = mean_squared_error(y_test, y_pred_test, squared=False)
mae = mean_absolute_error(y_test, y_pred_test) 
print(f"\n model test score -> RMSE:", rmse, "MAE:", mae)


 model train score -> RMSE: 2.0704855821728825 MAE: 0.8826368503630423

 model test score -> RMSE: 4.102792258442667 MAE: 1.6248397899885325
