## Random Forest with StandardScaler and optuna on dataset2.0
dataset: 11 features, normalizsation of useful vote removed

20220926

In [1]:
import pandas as pd
import numpy as np
from numpy import random
import matplotlib.pyplot as plt
from sklearn.linear_model import SGDRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
import re
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import ParameterGrid
import optuna as opt
import warnings
warnings.filterwarnings("ignore")
import time

In [2]:
#import data
train_df = pd.read_parquet("train_main.parquet.snappy")
test_df = pd.read_parquet("test_main.parquet.snappy")
val_df = pd.read_parquet("val_main.parquet.snappy")

In [3]:
X_train = train_df.drop(['r_id','r_useful'],axis=1)
y_train = train_df['r_useful']
X_test = test_df.drop(['r_id','r_useful'],axis=1)
y_test = test_df['r_useful']
X_val = val_df.drop(['r_id','r_useful'],axis=1)
y_val = val_df['r_useful']

# standardise the features
ss = StandardScaler()
X_train = ss.fit_transform(X_train)
X_val = ss.transform(X_val)
X_test = ss.transform(X_test)

In [6]:
def objective(trial):
    start = time.time()
    params = {
        "n_estimators": trial.suggest_int('n_estimators', 1, 500, 50),
        "max_depth": trial.suggest_int('max_depth', 1, 500, 50),
        "min_samples_split": trial.suggest_int('min_samples_split', 2, 20),
        "max_features" : trial.suggest_categorical("max_features", ['sqrt', 'log2']),
        "min_samples_leaf": trial.suggest_int('min_samples_leaf', 1, 20), 
        "random_state": 760, 
        "n_jobs": -1
    }
    print("Currently running with:")
    print(params)
    
    model = RandomForestRegressor(**params)
    
    model.fit(X_train, y_train)
    y_pred = model.predict(X_val)
    print("Time cost:", time.time()-start)
    return mean_squared_error(y_val, y_pred, squared=False)

In [7]:
study = opt.create_study(direction='minimize')
study.optimize(objective, n_trials=50)
study.best_params

[32m[I 2022-09-26 17:07:54,407][0m A new study created in memory with name: no-name-792aad30-3e29-4e5a-a262-fd6fb6e389f9[0m


Currently running with:
{'n_estimators': 1, 'max_depth': 151, 'min_samples_split': 6, 'max_features': 'log2', 'min_samples_leaf': 8, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-26 17:07:59,122][0m Trial 0 finished with value: 3.9679400799878497 and parameters: {'n_estimators': 1, 'max_depth': 151, 'min_samples_split': 6, 'max_features': 'log2', 'min_samples_leaf': 8}. Best is trial 0 with value: 3.9679400799878497.[0m


Time cost: 4.710000276565552
Currently running with:
{'n_estimators': 401, 'max_depth': 1, 'min_samples_split': 5, 'max_features': 'sqrt', 'min_samples_leaf': 1, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-26 17:08:33,939][0m Trial 1 finished with value: 3.961959658873423 and parameters: {'n_estimators': 401, 'max_depth': 1, 'min_samples_split': 5, 'max_features': 'sqrt', 'min_samples_leaf': 1}. Best is trial 1 with value: 3.961959658873423.[0m


Time cost: 34.81500029563904
Currently running with:
{'n_estimators': 151, 'max_depth': 451, 'min_samples_split': 12, 'max_features': 'sqrt', 'min_samples_leaf': 3, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-26 17:10:33,731][0m Trial 2 finished with value: 3.329658456414616 and parameters: {'n_estimators': 151, 'max_depth': 451, 'min_samples_split': 12, 'max_features': 'sqrt', 'min_samples_leaf': 3}. Best is trial 2 with value: 3.329658456414616.[0m


Time cost: 119.66299939155579
Currently running with:
{'n_estimators': 351, 'max_depth': 451, 'min_samples_split': 16, 'max_features': 'log2', 'min_samples_leaf': 9, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-26 17:15:02,450][0m Trial 3 finished with value: 3.367904964982319 and parameters: {'n_estimators': 351, 'max_depth': 451, 'min_samples_split': 16, 'max_features': 'log2', 'min_samples_leaf': 9}. Best is trial 2 with value: 3.329658456414616.[0m


Time cost: 268.55871057510376
Currently running with:
{'n_estimators': 101, 'max_depth': 301, 'min_samples_split': 6, 'max_features': 'sqrt', 'min_samples_leaf': 2, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-26 17:16:25,028][0m Trial 4 finished with value: 3.310562884123113 and parameters: {'n_estimators': 101, 'max_depth': 301, 'min_samples_split': 6, 'max_features': 'sqrt', 'min_samples_leaf': 2}. Best is trial 4 with value: 3.310562884123113.[0m


Time cost: 82.42200064659119
Currently running with:
{'n_estimators': 251, 'max_depth': 301, 'min_samples_split': 3, 'max_features': 'sqrt', 'min_samples_leaf': 15, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-26 17:19:34,321][0m Trial 5 finished with value: 3.4077617930921402 and parameters: {'n_estimators': 251, 'max_depth': 301, 'min_samples_split': 3, 'max_features': 'sqrt', 'min_samples_leaf': 15}. Best is trial 4 with value: 3.310562884123113.[0m


Time cost: 189.21600127220154
Currently running with:
{'n_estimators': 1, 'max_depth': 251, 'min_samples_split': 19, 'max_features': 'sqrt', 'min_samples_leaf': 2, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-26 17:19:39,146][0m Trial 6 finished with value: 4.406359771972848 and parameters: {'n_estimators': 1, 'max_depth': 251, 'min_samples_split': 19, 'max_features': 'sqrt', 'min_samples_leaf': 2}. Best is trial 4 with value: 3.310562884123113.[0m


Time cost: 4.822999000549316
Currently running with:
{'n_estimators': 1, 'max_depth': 101, 'min_samples_split': 6, 'max_features': 'sqrt', 'min_samples_leaf': 7, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-26 17:19:43,767][0m Trial 7 finished with value: 4.331251769391874 and parameters: {'n_estimators': 1, 'max_depth': 101, 'min_samples_split': 6, 'max_features': 'sqrt', 'min_samples_leaf': 7}. Best is trial 4 with value: 3.310562884123113.[0m


Time cost: 4.610999345779419
Currently running with:
{'n_estimators': 101, 'max_depth': 401, 'min_samples_split': 16, 'max_features': 'log2', 'min_samples_leaf': 4, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-26 17:21:03,658][0m Trial 8 finished with value: 3.335731380774115 and parameters: {'n_estimators': 101, 'max_depth': 401, 'min_samples_split': 16, 'max_features': 'log2', 'min_samples_leaf': 4}. Best is trial 4 with value: 3.310562884123113.[0m


Time cost: 79.82359838485718
Currently running with:
{'n_estimators': 451, 'max_depth': 351, 'min_samples_split': 3, 'max_features': 'sqrt', 'min_samples_leaf': 15, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-26 17:26:57,915][0m Trial 9 finished with value: 3.4089002866503573 and parameters: {'n_estimators': 451, 'max_depth': 351, 'min_samples_split': 3, 'max_features': 'sqrt', 'min_samples_leaf': 15}. Best is trial 4 with value: 3.310562884123113.[0m


Time cost: 354.0706994533539
Currently running with:
{'n_estimators': 251, 'max_depth': 201, 'min_samples_split': 10, 'max_features': 'log2', 'min_samples_leaf': 19, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-26 17:30:27,729][0m Trial 10 finished with value: 3.418398025235414 and parameters: {'n_estimators': 251, 'max_depth': 201, 'min_samples_split': 10, 'max_features': 'log2', 'min_samples_leaf': 19}. Best is trial 4 with value: 3.310562884123113.[0m


Time cost: 209.75668597221375
Currently running with:
{'n_estimators': 151, 'max_depth': 451, 'min_samples_split': 10, 'max_features': 'sqrt', 'min_samples_leaf': 5, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-26 17:32:41,571][0m Trial 11 finished with value: 3.3404377770838494 and parameters: {'n_estimators': 151, 'max_depth': 451, 'min_samples_split': 10, 'max_features': 'sqrt', 'min_samples_leaf': 5}. Best is trial 4 with value: 3.310562884123113.[0m


Time cost: 133.70075130462646
Currently running with:
{'n_estimators': 151, 'max_depth': 351, 'min_samples_split': 13, 'max_features': 'sqrt', 'min_samples_leaf': 1, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-26 17:34:56,362][0m Trial 12 finished with value: 3.3134467003884707 and parameters: {'n_estimators': 151, 'max_depth': 351, 'min_samples_split': 13, 'max_features': 'sqrt', 'min_samples_leaf': 1}. Best is trial 4 with value: 3.310562884123113.[0m


Time cost: 134.63265585899353
Currently running with:
{'n_estimators': 101, 'max_depth': 301, 'min_samples_split': 13, 'max_features': 'sqrt', 'min_samples_leaf': 12, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-26 17:36:25,539][0m Trial 13 finished with value: 3.39058935777441 and parameters: {'n_estimators': 101, 'max_depth': 301, 'min_samples_split': 13, 'max_features': 'sqrt', 'min_samples_leaf': 12}. Best is trial 4 with value: 3.310562884123113.[0m


Time cost: 89.13480877876282
Currently running with:
{'n_estimators': 201, 'max_depth': 351, 'min_samples_split': 8, 'max_features': 'sqrt', 'min_samples_leaf': 1, 'random_state': 760, 'n_jobs': -1}
Time cost: 176.23971509933472


[32m[I 2022-09-26 17:39:22,242][0m Trial 14 finished with value: 3.2974346916736867 and parameters: {'n_estimators': 201, 'max_depth': 351, 'min_samples_split': 8, 'max_features': 'sqrt', 'min_samples_leaf': 1}. Best is trial 14 with value: 3.2974346916736867.[0m


Currently running with:
{'n_estimators': 301, 'max_depth': 301, 'min_samples_split': 8, 'max_features': 'sqrt', 'min_samples_leaf': 6, 'random_state': 760, 'n_jobs': -1}
Time cost: 284.7501368522644


[32m[I 2022-09-26 17:44:07,267][0m Trial 15 finished with value: 3.3391676236652543 and parameters: {'n_estimators': 301, 'max_depth': 301, 'min_samples_split': 8, 'max_features': 'sqrt', 'min_samples_leaf': 6}. Best is trial 14 with value: 3.2974346916736867.[0m


Currently running with:
{'n_estimators': 201, 'max_depth': 201, 'min_samples_split': 8, 'max_features': 'sqrt', 'min_samples_leaf': 12, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-26 17:47:13,818][0m Trial 16 finished with value: 3.390661411740683 and parameters: {'n_estimators': 201, 'max_depth': 201, 'min_samples_split': 8, 'max_features': 'sqrt', 'min_samples_leaf': 12}. Best is trial 14 with value: 3.2974346916736867.[0m


Time cost: 186.46903586387634
Currently running with:
{'n_estimators': 101, 'max_depth': 351, 'min_samples_split': 2, 'max_features': 'sqrt', 'min_samples_leaf': 4, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-26 17:48:49,215][0m Trial 17 finished with value: 3.3236696722714396 and parameters: {'n_estimators': 101, 'max_depth': 351, 'min_samples_split': 2, 'max_features': 'sqrt', 'min_samples_leaf': 4}. Best is trial 14 with value: 3.2974346916736867.[0m


Time cost: 95.27032518386841
Currently running with:
{'n_estimators': 51, 'max_depth': 251, 'min_samples_split': 8, 'max_features': 'log2', 'min_samples_leaf': 10, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-26 17:49:31,490][0m Trial 18 finished with value: 3.382906776411591 and parameters: {'n_estimators': 51, 'max_depth': 251, 'min_samples_split': 8, 'max_features': 'log2', 'min_samples_leaf': 10}. Best is trial 14 with value: 3.2974346916736867.[0m


Time cost: 42.25160455703735
Currently running with:
{'n_estimators': 201, 'max_depth': 51, 'min_samples_split': 4, 'max_features': 'sqrt', 'min_samples_leaf': 20, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-26 17:52:14,733][0m Trial 19 finished with value: 3.4290743788183953 and parameters: {'n_estimators': 201, 'max_depth': 51, 'min_samples_split': 4, 'max_features': 'sqrt', 'min_samples_leaf': 20}. Best is trial 14 with value: 3.2974346916736867.[0m


Time cost: 163.1960961818695
Currently running with:
{'n_estimators': 301, 'max_depth': 401, 'min_samples_split': 7, 'max_features': 'sqrt', 'min_samples_leaf': 6, 'random_state': 760, 'n_jobs': -1}
Time cost: 263.8040416240692


[32m[I 2022-09-26 17:56:38,860][0m Trial 20 finished with value: 3.3391676236652543 and parameters: {'n_estimators': 301, 'max_depth': 401, 'min_samples_split': 7, 'max_features': 'sqrt', 'min_samples_leaf': 6}. Best is trial 14 with value: 3.2974346916736867.[0m


Currently running with:
{'n_estimators': 151, 'max_depth': 351, 'min_samples_split': 14, 'max_features': 'sqrt', 'min_samples_leaf': 2, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-26 17:58:54,383][0m Trial 21 finished with value: 3.311576853933687 and parameters: {'n_estimators': 151, 'max_depth': 351, 'min_samples_split': 14, 'max_features': 'sqrt', 'min_samples_leaf': 2}. Best is trial 14 with value: 3.2974346916736867.[0m


Time cost: 135.3823037147522
Currently running with:
{'n_estimators': 201, 'max_depth': 401, 'min_samples_split': 17, 'max_features': 'sqrt', 'min_samples_leaf': 3, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-26 18:01:51,093][0m Trial 22 finished with value: 3.330198978357088 and parameters: {'n_estimators': 201, 'max_depth': 401, 'min_samples_split': 17, 'max_features': 'sqrt', 'min_samples_leaf': 3}. Best is trial 14 with value: 3.2974346916736867.[0m


Time cost: 176.55797147750854
Currently running with:
{'n_estimators': 51, 'max_depth': 301, 'min_samples_split': 10, 'max_features': 'sqrt', 'min_samples_leaf': 1, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-26 18:02:32,662][0m Trial 23 finished with value: 3.318040689063954 and parameters: {'n_estimators': 51, 'max_depth': 301, 'min_samples_split': 10, 'max_features': 'sqrt', 'min_samples_leaf': 1}. Best is trial 14 with value: 3.2974346916736867.[0m


Time cost: 41.51500129699707
Currently running with:
{'n_estimators': 151, 'max_depth': 351, 'min_samples_split': 14, 'max_features': 'sqrt', 'min_samples_leaf': 3, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-26 18:04:30,226][0m Trial 24 finished with value: 3.324477534508913 and parameters: {'n_estimators': 151, 'max_depth': 351, 'min_samples_split': 14, 'max_features': 'sqrt', 'min_samples_leaf': 3}. Best is trial 14 with value: 3.2974346916736867.[0m


Time cost: 117.44179344177246
Currently running with:
{'n_estimators': 51, 'max_depth': 251, 'min_samples_split': 11, 'max_features': 'sqrt', 'min_samples_leaf': 4, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-26 18:05:09,010][0m Trial 25 finished with value: 3.337047169420452 and parameters: {'n_estimators': 51, 'max_depth': 251, 'min_samples_split': 11, 'max_features': 'sqrt', 'min_samples_leaf': 4}. Best is trial 14 with value: 3.2974346916736867.[0m


Time cost: 38.737374782562256
Currently running with:
{'n_estimators': 251, 'max_depth': 401, 'min_samples_split': 9, 'max_features': 'log2', 'min_samples_leaf': 1, 'random_state': 760, 'n_jobs': -1}
Time cost: 192.87590956687927


[32m[I 2022-09-26 18:08:22,361][0m Trial 26 finished with value: 3.2999357298899876 and parameters: {'n_estimators': 251, 'max_depth': 401, 'min_samples_split': 9, 'max_features': 'log2', 'min_samples_leaf': 1}. Best is trial 14 with value: 3.2974346916736867.[0m


Currently running with:
{'n_estimators': 301, 'max_depth': 401, 'min_samples_split': 7, 'max_features': 'log2', 'min_samples_leaf': 1, 'random_state': 760, 'n_jobs': -1}
Time cost: 231.8336317539215


[32m[I 2022-09-26 18:12:14,981][0m Trial 27 finished with value: 3.2937019739271634 and parameters: {'n_estimators': 301, 'max_depth': 401, 'min_samples_split': 7, 'max_features': 'log2', 'min_samples_leaf': 1}. Best is trial 27 with value: 3.2937019739271634.[0m


Currently running with:
{'n_estimators': 301, 'max_depth': 401, 'min_samples_split': 9, 'max_features': 'log2', 'min_samples_leaf': 6, 'random_state': 760, 'n_jobs': -1}
Time cost: 225.02555966377258


[32m[I 2022-09-26 18:16:00,236][0m Trial 28 finished with value: 3.3391676236652543 and parameters: {'n_estimators': 301, 'max_depth': 401, 'min_samples_split': 9, 'max_features': 'log2', 'min_samples_leaf': 6}. Best is trial 27 with value: 3.2937019739271634.[0m


Currently running with:
{'n_estimators': 351, 'max_depth': 451, 'min_samples_split': 7, 'max_features': 'log2', 'min_samples_leaf': 8, 'random_state': 760, 'n_jobs': -1}
Time cost: 259.3686158657074


[32m[I 2022-09-26 18:20:19,819][0m Trial 29 finished with value: 3.3641964083289557 and parameters: {'n_estimators': 351, 'max_depth': 451, 'min_samples_split': 7, 'max_features': 'log2', 'min_samples_leaf': 8}. Best is trial 27 with value: 3.2937019739271634.[0m


Currently running with:
{'n_estimators': 251, 'max_depth': 401, 'min_samples_split': 5, 'max_features': 'log2', 'min_samples_leaf': 1, 'random_state': 760, 'n_jobs': -1}
Time cost: 195.66731882095337


[32m[I 2022-09-26 18:23:36,273][0m Trial 30 finished with value: 3.2849702774778318 and parameters: {'n_estimators': 251, 'max_depth': 401, 'min_samples_split': 5, 'max_features': 'log2', 'min_samples_leaf': 1}. Best is trial 30 with value: 3.2849702774778318.[0m


Currently running with:
{'n_estimators': 251, 'max_depth': 401, 'min_samples_split': 5, 'max_features': 'log2', 'min_samples_leaf': 1, 'random_state': 760, 'n_jobs': -1}
Time cost: 194.54951333999634


[32m[I 2022-09-26 18:26:51,460][0m Trial 31 finished with value: 3.2849702774778318 and parameters: {'n_estimators': 251, 'max_depth': 401, 'min_samples_split': 5, 'max_features': 'log2', 'min_samples_leaf': 1}. Best is trial 30 with value: 3.2849702774778318.[0m


Currently running with:
{'n_estimators': 351, 'max_depth': 401, 'min_samples_split': 5, 'max_features': 'log2', 'min_samples_leaf': 1, 'random_state': 760, 'n_jobs': -1}
Time cost: 270.6945266723633


[32m[I 2022-09-26 18:31:23,095][0m Trial 32 finished with value: 3.2855996771908256 and parameters: {'n_estimators': 351, 'max_depth': 401, 'min_samples_split': 5, 'max_features': 'log2', 'min_samples_leaf': 1}. Best is trial 30 with value: 3.2849702774778318.[0m


Currently running with:
{'n_estimators': 351, 'max_depth': 451, 'min_samples_split': 5, 'max_features': 'log2', 'min_samples_leaf': 3, 'random_state': 760, 'n_jobs': -1}
Time cost: 267.00911688804626


[32m[I 2022-09-26 18:35:50,749][0m Trial 33 finished with value: 3.303472884773958 and parameters: {'n_estimators': 351, 'max_depth': 451, 'min_samples_split': 5, 'max_features': 'log2', 'min_samples_leaf': 3}. Best is trial 30 with value: 3.2849702774778318.[0m


Currently running with:
{'n_estimators': 401, 'max_depth': 401, 'min_samples_split': 5, 'max_features': 'log2', 'min_samples_leaf': 5, 'random_state': 760, 'n_jobs': -1}
Time cost: 300.15660309791565


[32m[I 2022-09-26 18:40:51,228][0m Trial 34 finished with value: 3.3342443030454163 and parameters: {'n_estimators': 401, 'max_depth': 401, 'min_samples_split': 5, 'max_features': 'log2', 'min_samples_leaf': 5}. Best is trial 30 with value: 3.2849702774778318.[0m


Currently running with:
{'n_estimators': 401, 'max_depth': 451, 'min_samples_split': 4, 'max_features': 'log2', 'min_samples_leaf': 2, 'random_state': 760, 'n_jobs': -1}
Time cost: 309.57565093040466


[32m[I 2022-09-26 18:46:02,087][0m Trial 35 finished with value: 3.289258508177192 and parameters: {'n_estimators': 401, 'max_depth': 451, 'min_samples_split': 4, 'max_features': 'log2', 'min_samples_leaf': 2}. Best is trial 30 with value: 3.2849702774778318.[0m


Currently running with:
{'n_estimators': 451, 'max_depth': 451, 'min_samples_split': 2, 'max_features': 'log2', 'min_samples_leaf': 2, 'random_state': 760, 'n_jobs': -1}
Time cost: 349.2734613418579


[32m[I 2022-09-26 18:51:52,859][0m Trial 36 finished with value: 3.290291050329043 and parameters: {'n_estimators': 451, 'max_depth': 451, 'min_samples_split': 2, 'max_features': 'log2', 'min_samples_leaf': 2}. Best is trial 30 with value: 3.2849702774778318.[0m


Currently running with:
{'n_estimators': 401, 'max_depth': 451, 'min_samples_split': 4, 'max_features': 'log2', 'min_samples_leaf': 3, 'random_state': 760, 'n_jobs': -1}
Time cost: 303.9979431629181


[32m[I 2022-09-26 18:56:57,444][0m Trial 37 finished with value: 3.3021344527146477 and parameters: {'n_estimators': 401, 'max_depth': 451, 'min_samples_split': 4, 'max_features': 'log2', 'min_samples_leaf': 3}. Best is trial 30 with value: 3.2849702774778318.[0m


Currently running with:
{'n_estimators': 401, 'max_depth': 1, 'min_samples_split': 4, 'max_features': 'log2', 'min_samples_leaf': 5, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-26 18:57:30,189][0m Trial 38 finished with value: 3.9619596588734236 and parameters: {'n_estimators': 401, 'max_depth': 1, 'min_samples_split': 4, 'max_features': 'log2', 'min_samples_leaf': 5}. Best is trial 30 with value: 3.2849702774778318.[0m


Time cost: 32.740076541900635
Currently running with:
{'n_estimators': 351, 'max_depth': 451, 'min_samples_split': 6, 'max_features': 'log2', 'min_samples_leaf': 2, 'random_state': 760, 'n_jobs': -1}
Time cost: 276.28396224975586


[32m[I 2022-09-26 19:02:07,257][0m Trial 39 finished with value: 3.2944226207513236 and parameters: {'n_estimators': 351, 'max_depth': 451, 'min_samples_split': 6, 'max_features': 'log2', 'min_samples_leaf': 2}. Best is trial 30 with value: 3.2849702774778318.[0m


Currently running with:
{'n_estimators': 451, 'max_depth': 401, 'min_samples_split': 3, 'max_features': 'log2', 'min_samples_leaf': 8, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-26 19:07:40,594][0m Trial 40 finished with value: 3.363800596739312 and parameters: {'n_estimators': 451, 'max_depth': 401, 'min_samples_split': 3, 'max_features': 'log2', 'min_samples_leaf': 8}. Best is trial 30 with value: 3.2849702774778318.[0m


Time cost: 333.14928817749023
Currently running with:
{'n_estimators': 451, 'max_depth': 451, 'min_samples_split': 2, 'max_features': 'log2', 'min_samples_leaf': 2, 'random_state': 760, 'n_jobs': -1}
Time cost: 347.02111625671387


[32m[I 2022-09-26 19:13:28,768][0m Trial 41 finished with value: 3.290291050329043 and parameters: {'n_estimators': 451, 'max_depth': 451, 'min_samples_split': 2, 'max_features': 'log2', 'min_samples_leaf': 2}. Best is trial 30 with value: 3.2849702774778318.[0m


Currently running with:
{'n_estimators': 451, 'max_depth': 451, 'min_samples_split': 2, 'max_features': 'log2', 'min_samples_leaf': 4, 'random_state': 760, 'n_jobs': -1}
Time cost: 338.7918076515198


[32m[I 2022-09-26 19:19:08,237][0m Trial 42 finished with value: 3.318998720613918 and parameters: {'n_estimators': 451, 'max_depth': 451, 'min_samples_split': 2, 'max_features': 'log2', 'min_samples_leaf': 4}. Best is trial 30 with value: 3.2849702774778318.[0m


Currently running with:
{'n_estimators': 401, 'max_depth': 451, 'min_samples_split': 3, 'max_features': 'log2', 'min_samples_leaf': 2, 'random_state': 760, 'n_jobs': -1}
Time cost: 610.4492099285126


[32m[I 2022-09-26 19:29:21,223][0m Trial 43 finished with value: 3.289258508177192 and parameters: {'n_estimators': 401, 'max_depth': 451, 'min_samples_split': 3, 'max_features': 'log2', 'min_samples_leaf': 2}. Best is trial 30 with value: 3.2849702774778318.[0m


Currently running with:
{'n_estimators': 401, 'max_depth': 401, 'min_samples_split': 5, 'max_features': 'log2', 'min_samples_leaf': 2, 'random_state': 760, 'n_jobs': -1}
Time cost: 361.1850004196167


[32m[I 2022-09-26 19:35:23,918][0m Trial 44 finished with value: 3.2975673803842747 and parameters: {'n_estimators': 401, 'max_depth': 401, 'min_samples_split': 5, 'max_features': 'log2', 'min_samples_leaf': 2}. Best is trial 30 with value: 3.2849702774778318.[0m


Currently running with:
{'n_estimators': 251, 'max_depth': 151, 'min_samples_split': 3, 'max_features': 'log2', 'min_samples_leaf': 17, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-26 19:38:26,268][0m Trial 45 finished with value: 3.416888082955115 and parameters: {'n_estimators': 251, 'max_depth': 151, 'min_samples_split': 3, 'max_features': 'log2', 'min_samples_leaf': 17}. Best is trial 30 with value: 3.2849702774778318.[0m


Time cost: 182.27100038528442
Currently running with:
{'n_estimators': 351, 'max_depth': 351, 'min_samples_split': 6, 'max_features': 'log2', 'min_samples_leaf': 1, 'random_state': 760, 'n_jobs': -1}
Time cost: 290.01796555519104


[32m[I 2022-09-26 19:43:17,543][0m Trial 46 finished with value: 3.289407764946518 and parameters: {'n_estimators': 351, 'max_depth': 351, 'min_samples_split': 6, 'max_features': 'log2', 'min_samples_leaf': 1}. Best is trial 30 with value: 3.2849702774778318.[0m


Currently running with:
{'n_estimators': 351, 'max_depth': 451, 'min_samples_split': 4, 'max_features': 'log2', 'min_samples_leaf': 4, 'random_state': 760, 'n_jobs': -1}
Time cost: 279.7081706523895


[32m[I 2022-09-26 19:47:57,883][0m Trial 47 finished with value: 3.320207616981503 and parameters: {'n_estimators': 351, 'max_depth': 451, 'min_samples_split': 4, 'max_features': 'log2', 'min_samples_leaf': 4}. Best is trial 30 with value: 3.2849702774778318.[0m


Currently running with:
{'n_estimators': 401, 'max_depth': 401, 'min_samples_split': 5, 'max_features': 'log2', 'min_samples_leaf': 3, 'random_state': 760, 'n_jobs': -1}
Time cost: 305.42399883270264


[32m[I 2022-09-26 19:53:04,055][0m Trial 48 finished with value: 3.3021344527146477 and parameters: {'n_estimators': 401, 'max_depth': 401, 'min_samples_split': 5, 'max_features': 'log2', 'min_samples_leaf': 3}. Best is trial 30 with value: 3.2849702774778318.[0m


Currently running with:
{'n_estimators': 301, 'max_depth': 351, 'min_samples_split': 6, 'max_features': 'log2', 'min_samples_leaf': 1, 'random_state': 760, 'n_jobs': -1}
Time cost: 231.83333611488342


[32m[I 2022-09-26 19:56:56,337][0m Trial 49 finished with value: 3.288576658416398 and parameters: {'n_estimators': 301, 'max_depth': 351, 'min_samples_split': 6, 'max_features': 'log2', 'min_samples_leaf': 1}. Best is trial 30 with value: 3.2849702774778318.[0m


{'n_estimators': 251,
 'max_depth': 401,
 'min_samples_split': 5,
 'max_features': 'log2',
 'min_samples_leaf': 1}

In [8]:
#optimal model
# n_estimators': 251, 'max_depth': 401, 'min_samples_split': 5, 'max_features': 'log2', 'min_samples_leaf': 1
omodel = RandomForestRegressor(n_estimators = 251, 
                               max_depth = 401, 
                               min_samples_leaf= 1,
                               min_samples_split= 5,
                               max_features= 'log2', random_state = 760, n_jobs=-1)
omodel.fit(X_train, y_train)
#predict train set results
y_pred = omodel.predict(X_train)
rmse = mean_squared_error(y_train, y_pred, squared=False)
mae = mean_absolute_error(y_train, y_pred) 
print(f"\n model train score -> RMSE:", rmse, "MAE:", mae)

#predict validation set results
y_pred_val = omodel.predict(X_val)
rmse = mean_squared_error(y_val, y_pred_val, squared=False)
mae = mean_absolute_error(y_val, y_pred_val) 
print(f"\n model val score -> RMSE:", rmse, "MAE:", mae)

#predict test set results
y_pred_test = omodel.predict(X_test)
rmse = mean_squared_error(y_test, y_pred_test, squared=False)
mae = mean_absolute_error(y_test, y_pred_test) 
print(f"\n model test score -> RMSE:", rmse, "MAE:", mae)


 model train score -> RMSE: 2.1576167544353106 MAE: 0.7695524561956715

 model val score -> RMSE: 3.2849702774778318 MAE: 1.5578832094537869

 model test score -> RMSE: 3.60868735954618 MAE: 1.570931916082599
