## Random Forest with 3-fold CV, StandardScaler and optuna on dataset3.0
dataset: 11 features,  subsampled dataset

20220926

In [1]:
import pandas as pd
import numpy as np
from numpy import random
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
import optuna as opt
import warnings
warnings.filterwarnings("ignore")
import time
from sklearn.model_selection import cross_val_score
from sklearn.pipeline import make_pipeline

In [2]:
#import data
train_df = pd.read_parquet("train_main.parquet.snappy")
test_df = pd.read_parquet("test_main.parquet.snappy")
test_df

Unnamed: 0,r_id,r_stars,r_stars_square,r_length,u_friends_count,u_review_count,u_month_age,b_stars,b_review_count,r_sen,r_sub,r_rea,r_useful
0,4195320,4,16,86,163,40,27.298941,4.0,445,0.117031,0.361875,86.10,1
1,4406379,5,25,28,18,115,0.001011,4.0,636,-0.140000,0.133333,96.48,3
2,1172072,5,25,76,69,1,0.000002,2.5,15,0.256723,0.355398,69.07,1
3,1949778,1,1,186,23,6,23.288288,3.5,185,-0.209252,0.378994,82.54,1
4,4377517,5,25,57,1,25,48.728723,4.5,227,0.295040,0.454762,91.11,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
99995,5373252,5,25,25,288,2,24.026242,3.5,117,0.775000,0.662500,64.04,1
99996,4500278,5,25,76,21,59,44.444338,4.0,532,0.325664,0.561008,71.48,1
99997,4981139,5,25,65,236,143,4.868817,4.0,7568,0.097821,0.502692,83.66,2
99998,3032513,1,1,33,2,28,0.002720,3.0,80,0.300000,0.306250,100.04,3


In [3]:
X_train = train_df.drop(['r_id','r_useful'],axis=1)
y_train = train_df['r_useful']
X_test = test_df.drop(['r_id','r_useful'],axis=1)
y_test = test_df['r_useful']

In [4]:
def objective(trial):
    start = time.time()
    params = {
        "n_estimators": trial.suggest_int('n_estimators', 1, 500, 50),
        "max_depth": trial.suggest_int('max_depth', 1, 500, 50),
        "min_samples_split": trial.suggest_int('min_samples_split', 2, 20),
        "max_features" : trial.suggest_categorical("max_features", ['sqrt', 'log2']),
        "min_samples_leaf": trial.suggest_int('min_samples_leaf', 1, 20), 
        "random_state": 760, 
        "n_jobs": -1
    }
    print("Currently running with:")
    print(params)
    
    model = RandomForestRegressor(**params)
    pipe = make_pipeline(StandardScaler(), model)
    
    # model.fit(X_train, y_train)
    cvscore = np.sqrt(-np.mean(cross_val_score(pipe, X_train, y_train, scoring = "neg_mean_squared_error", cv=3)))
    # y_pred = model.predict(X_val)
    print("Time cost:", time.time()-start)
    # return mean_squared_error(y_val, y_pred, squared=False)
    return cvscore

In [5]:
study = opt.create_study(direction = 'minimize')
study.optimize(objective, n_trials = 50)
study.best_params

[32m[I 2022-09-30 19:15:41,413][0m A new study created in memory with name: no-name-b5677a4c-2564-4612-9bd3-ca450afe6287[0m


Currently running with:
{'n_estimators': 301, 'max_depth': 51, 'min_samples_split': 16, 'max_features': 'sqrt', 'min_samples_leaf': 13, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 19:17:01,620][0m Trial 0 finished with value: 3.607568401544493 and parameters: {'n_estimators': 301, 'max_depth': 51, 'min_samples_split': 16, 'max_features': 'sqrt', 'min_samples_leaf': 13}. Best is trial 0 with value: 3.607568401544493.[0m


Time cost: 80.20488619804382
Currently running with:
{'n_estimators': 201, 'max_depth': 451, 'min_samples_split': 2, 'max_features': 'sqrt', 'min_samples_leaf': 13, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 19:17:58,940][0m Trial 1 finished with value: 3.6090340879103766 and parameters: {'n_estimators': 201, 'max_depth': 451, 'min_samples_split': 2, 'max_features': 'sqrt', 'min_samples_leaf': 13}. Best is trial 0 with value: 3.607568401544493.[0m


Time cost: 57.319971799850464
Currently running with:
{'n_estimators': 401, 'max_depth': 101, 'min_samples_split': 19, 'max_features': 'sqrt', 'min_samples_leaf': 10, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 19:19:57,255][0m Trial 2 finished with value: 3.5958370952186245 and parameters: {'n_estimators': 401, 'max_depth': 101, 'min_samples_split': 19, 'max_features': 'sqrt', 'min_samples_leaf': 10}. Best is trial 2 with value: 3.5958370952186245.[0m


Time cost: 118.31376600265503
Currently running with:
{'n_estimators': 51, 'max_depth': 201, 'min_samples_split': 14, 'max_features': 'log2', 'min_samples_leaf': 5, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 19:20:14,379][0m Trial 3 finished with value: 3.584640621355147 and parameters: {'n_estimators': 51, 'max_depth': 201, 'min_samples_split': 14, 'max_features': 'log2', 'min_samples_leaf': 5}. Best is trial 3 with value: 3.584640621355147.[0m


Time cost: 17.123340606689453
Currently running with:
{'n_estimators': 251, 'max_depth': 401, 'min_samples_split': 19, 'max_features': 'sqrt', 'min_samples_leaf': 14, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 19:21:25,903][0m Trial 4 finished with value: 3.6099652845295798 and parameters: {'n_estimators': 251, 'max_depth': 401, 'min_samples_split': 19, 'max_features': 'sqrt', 'min_samples_leaf': 14}. Best is trial 3 with value: 3.584640621355147.[0m


Time cost: 71.52079963684082
Currently running with:
{'n_estimators': 151, 'max_depth': 351, 'min_samples_split': 4, 'max_features': 'sqrt', 'min_samples_leaf': 1, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 19:22:25,313][0m Trial 5 finished with value: 3.550290475281455 and parameters: {'n_estimators': 151, 'max_depth': 351, 'min_samples_split': 4, 'max_features': 'sqrt', 'min_samples_leaf': 1}. Best is trial 5 with value: 3.550290475281455.[0m


Time cost: 59.4099645614624
Currently running with:
{'n_estimators': 301, 'max_depth': 401, 'min_samples_split': 5, 'max_features': 'log2', 'min_samples_leaf': 1, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 19:24:17,021][0m Trial 6 finished with value: 3.5453469985541295 and parameters: {'n_estimators': 301, 'max_depth': 401, 'min_samples_split': 5, 'max_features': 'log2', 'min_samples_leaf': 1}. Best is trial 6 with value: 3.5453469985541295.[0m


Time cost: 111.70739650726318
Currently running with:
{'n_estimators': 251, 'max_depth': 201, 'min_samples_split': 6, 'max_features': 'sqrt', 'min_samples_leaf': 4, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 19:25:39,934][0m Trial 7 finished with value: 3.5626699644646163 and parameters: {'n_estimators': 251, 'max_depth': 201, 'min_samples_split': 6, 'max_features': 'sqrt', 'min_samples_leaf': 4}. Best is trial 6 with value: 3.5453469985541295.[0m


Time cost: 82.91049361228943
Currently running with:
{'n_estimators': 1, 'max_depth': 401, 'min_samples_split': 5, 'max_features': 'log2', 'min_samples_leaf': 1, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 19:25:42,687][0m Trial 8 finished with value: 5.0644104285156795 and parameters: {'n_estimators': 1, 'max_depth': 401, 'min_samples_split': 5, 'max_features': 'log2', 'min_samples_leaf': 1}. Best is trial 6 with value: 3.5453469985541295.[0m


Time cost: 2.7516000270843506
Currently running with:
{'n_estimators': 51, 'max_depth': 401, 'min_samples_split': 20, 'max_features': 'log2', 'min_samples_leaf': 19, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 19:25:57,242][0m Trial 9 finished with value: 3.6282770077450266 and parameters: {'n_estimators': 51, 'max_depth': 401, 'min_samples_split': 20, 'max_features': 'log2', 'min_samples_leaf': 19}. Best is trial 6 with value: 3.5453469985541295.[0m


Time cost: 14.553575992584229
Currently running with:
{'n_estimators': 401, 'max_depth': 301, 'min_samples_split': 9, 'max_features': 'log2', 'min_samples_leaf': 8, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 19:27:56,615][0m Trial 10 finished with value: 3.585875639147302 and parameters: {'n_estimators': 401, 'max_depth': 301, 'min_samples_split': 9, 'max_features': 'log2', 'min_samples_leaf': 8}. Best is trial 6 with value: 3.5453469985541295.[0m


Time cost: 119.37079334259033
Currently running with:
{'n_estimators': 151, 'max_depth': 301, 'min_samples_split': 2, 'max_features': 'log2', 'min_samples_leaf': 1, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 19:28:59,214][0m Trial 11 finished with value: 3.548414062733149 and parameters: {'n_estimators': 151, 'max_depth': 301, 'min_samples_split': 2, 'max_features': 'log2', 'min_samples_leaf': 1}. Best is trial 6 with value: 3.5453469985541295.[0m


Time cost: 62.59716320037842
Currently running with:
{'n_estimators': 151, 'max_depth': 301, 'min_samples_split': 9, 'max_features': 'log2', 'min_samples_leaf': 5, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 19:29:47,500][0m Trial 12 finished with value: 3.5715576185680886 and parameters: {'n_estimators': 151, 'max_depth': 301, 'min_samples_split': 9, 'max_features': 'log2', 'min_samples_leaf': 5}. Best is trial 6 with value: 3.5453469985541295.[0m


Time cost: 48.28532600402832
Currently running with:
{'n_estimators': 351, 'max_depth': 251, 'min_samples_split': 2, 'max_features': 'log2', 'min_samples_leaf': 1, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 19:32:10,221][0m Trial 13 finished with value: 3.5417859570521797 and parameters: {'n_estimators': 351, 'max_depth': 251, 'min_samples_split': 2, 'max_features': 'log2', 'min_samples_leaf': 1}. Best is trial 13 with value: 3.5417859570521797.[0m


Time cost: 142.71875643730164
Currently running with:
{'n_estimators': 351, 'max_depth': 151, 'min_samples_split': 7, 'max_features': 'log2', 'min_samples_leaf': 7, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 19:33:56,784][0m Trial 14 finished with value: 3.5809190161064293 and parameters: {'n_estimators': 351, 'max_depth': 151, 'min_samples_split': 7, 'max_features': 'log2', 'min_samples_leaf': 7}. Best is trial 13 with value: 3.5417859570521797.[0m


Time cost: 106.56282305717468
Currently running with:
{'n_estimators': 351, 'max_depth': 1, 'min_samples_split': 12, 'max_features': 'log2', 'min_samples_leaf': 3, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 19:34:08,015][0m Trial 15 finished with value: 3.9614880962832992 and parameters: {'n_estimators': 351, 'max_depth': 1, 'min_samples_split': 12, 'max_features': 'log2', 'min_samples_leaf': 3}. Best is trial 13 with value: 3.5417859570521797.[0m


Time cost: 11.229851722717285
Currently running with:
{'n_estimators': 451, 'max_depth': 251, 'min_samples_split': 3, 'max_features': 'log2', 'min_samples_leaf': 20, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 19:36:06,513][0m Trial 16 finished with value: 3.6277431027490463 and parameters: {'n_estimators': 451, 'max_depth': 251, 'min_samples_split': 3, 'max_features': 'log2', 'min_samples_leaf': 20}. Best is trial 13 with value: 3.5417859570521797.[0m


Time cost: 118.49709010124207
Currently running with:
{'n_estimators': 301, 'max_depth': 451, 'min_samples_split': 8, 'max_features': 'log2', 'min_samples_leaf': 7, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 19:37:37,623][0m Trial 17 finished with value: 3.580306869702815 and parameters: {'n_estimators': 301, 'max_depth': 451, 'min_samples_split': 8, 'max_features': 'log2', 'min_samples_leaf': 7}. Best is trial 13 with value: 3.5417859570521797.[0m


Time cost: 91.10865807533264
Currently running with:
{'n_estimators': 451, 'max_depth': 251, 'min_samples_split': 11, 'max_features': 'log2', 'min_samples_leaf': 3, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 19:40:04,673][0m Trial 18 finished with value: 3.560451481565352 and parameters: {'n_estimators': 451, 'max_depth': 251, 'min_samples_split': 11, 'max_features': 'log2', 'min_samples_leaf': 3}. Best is trial 13 with value: 3.5417859570521797.[0m


Time cost: 147.0500946044922
Currently running with:
{'n_estimators': 301, 'max_depth': 151, 'min_samples_split': 5, 'max_features': 'log2', 'min_samples_leaf': 18, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 19:41:24,918][0m Trial 19 finished with value: 3.6201918123627386 and parameters: {'n_estimators': 301, 'max_depth': 151, 'min_samples_split': 5, 'max_features': 'log2', 'min_samples_leaf': 18}. Best is trial 13 with value: 3.5417859570521797.[0m


Time cost: 80.24346804618835
Currently running with:
{'n_estimators': 401, 'max_depth': 301, 'min_samples_split': 4, 'max_features': 'log2', 'min_samples_leaf': 9, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 19:43:20,770][0m Trial 20 finished with value: 3.590488380291316 and parameters: {'n_estimators': 401, 'max_depth': 301, 'min_samples_split': 4, 'max_features': 'log2', 'min_samples_leaf': 9}. Best is trial 13 with value: 3.5417859570521797.[0m


Time cost: 115.8501968383789
Currently running with:
{'n_estimators': 151, 'max_depth': 351, 'min_samples_split': 2, 'max_features': 'log2', 'min_samples_leaf': 1, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 19:44:24,187][0m Trial 21 finished with value: 3.548414062733149 and parameters: {'n_estimators': 151, 'max_depth': 351, 'min_samples_split': 2, 'max_features': 'log2', 'min_samples_leaf': 1}. Best is trial 13 with value: 3.5417859570521797.[0m


Time cost: 63.41607689857483
Currently running with:
{'n_estimators': 201, 'max_depth': 351, 'min_samples_split': 2, 'max_features': 'log2', 'min_samples_leaf': 3, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 19:45:34,492][0m Trial 22 finished with value: 3.5555243606659146 and parameters: {'n_estimators': 201, 'max_depth': 351, 'min_samples_split': 2, 'max_features': 'log2', 'min_samples_leaf': 3}. Best is trial 13 with value: 3.5417859570521797.[0m


Time cost: 70.30282759666443
Currently running with:
{'n_estimators': 101, 'max_depth': 251, 'min_samples_split': 4, 'max_features': 'log2', 'min_samples_leaf': 1, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 19:46:14,506][0m Trial 23 finished with value: 3.5580680419454516 and parameters: {'n_estimators': 101, 'max_depth': 251, 'min_samples_split': 4, 'max_features': 'log2', 'min_samples_leaf': 1}. Best is trial 13 with value: 3.5417859570521797.[0m


Time cost: 40.01275086402893
Currently running with:
{'n_estimators': 351, 'max_depth': 301, 'min_samples_split': 6, 'max_features': 'log2', 'min_samples_leaf': 5, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 19:48:05,269][0m Trial 24 finished with value: 3.5701485955136874 and parameters: {'n_estimators': 351, 'max_depth': 301, 'min_samples_split': 6, 'max_features': 'log2', 'min_samples_leaf': 5}. Best is trial 13 with value: 3.5417859570521797.[0m


Time cost: 110.76160001754761
Currently running with:
{'n_estimators': 251, 'max_depth': 351, 'min_samples_split': 2, 'max_features': 'log2', 'min_samples_leaf': 3, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 19:49:32,621][0m Trial 25 finished with value: 3.555237691997688 and parameters: {'n_estimators': 251, 'max_depth': 351, 'min_samples_split': 2, 'max_features': 'log2', 'min_samples_leaf': 3}. Best is trial 13 with value: 3.5417859570521797.[0m


Time cost: 87.3515784740448
Currently running with:
{'n_estimators': 201, 'max_depth': 401, 'min_samples_split': 7, 'max_features': 'log2', 'min_samples_leaf': 6, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 19:50:35,747][0m Trial 26 finished with value: 3.5776084814689764 and parameters: {'n_estimators': 201, 'max_depth': 401, 'min_samples_split': 7, 'max_features': 'log2', 'min_samples_leaf': 6}. Best is trial 13 with value: 3.5417859570521797.[0m


Time cost: 63.12612247467041
Currently running with:
{'n_estimators': 301, 'max_depth': 451, 'min_samples_split': 4, 'max_features': 'log2', 'min_samples_leaf': 2, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 19:52:27,564][0m Trial 27 finished with value: 3.54465635401176 and parameters: {'n_estimators': 301, 'max_depth': 451, 'min_samples_split': 4, 'max_features': 'log2', 'min_samples_leaf': 2}. Best is trial 13 with value: 3.5417859570521797.[0m


Time cost: 111.81507897377014
Currently running with:
{'n_estimators': 301, 'max_depth': 451, 'min_samples_split': 9, 'max_features': 'log2', 'min_samples_leaf': 16, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 19:53:49,223][0m Trial 28 finished with value: 3.616211879740658 and parameters: {'n_estimators': 301, 'max_depth': 451, 'min_samples_split': 9, 'max_features': 'log2', 'min_samples_leaf': 16}. Best is trial 13 with value: 3.5417859570521797.[0m


Time cost: 81.65788531303406
Currently running with:
{'n_estimators': 301, 'max_depth': 51, 'min_samples_split': 11, 'max_features': 'sqrt', 'min_samples_leaf': 2, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 19:55:28,877][0m Trial 29 finished with value: 3.5572707942739696 and parameters: {'n_estimators': 301, 'max_depth': 51, 'min_samples_split': 11, 'max_features': 'sqrt', 'min_samples_leaf': 2}. Best is trial 13 with value: 3.5417859570521797.[0m


Time cost: 99.65383291244507
Currently running with:
{'n_estimators': 351, 'max_depth': 451, 'min_samples_split': 15, 'max_features': 'log2', 'min_samples_leaf': 12, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 19:57:07,693][0m Trial 30 finished with value: 3.60300688252504 and parameters: {'n_estimators': 351, 'max_depth': 451, 'min_samples_split': 15, 'max_features': 'log2', 'min_samples_leaf': 12}. Best is trial 13 with value: 3.5417859570521797.[0m


Time cost: 98.81453847885132
Currently running with:
{'n_estimators': 251, 'max_depth': 351, 'min_samples_split': 3, 'max_features': 'log2', 'min_samples_leaf': 2, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 19:58:40,422][0m Trial 31 finished with value: 3.5459192560481347 and parameters: {'n_estimators': 251, 'max_depth': 351, 'min_samples_split': 3, 'max_features': 'log2', 'min_samples_leaf': 2}. Best is trial 13 with value: 3.5417859570521797.[0m


Time cost: 92.72649359703064
Currently running with:
{'n_estimators': 251, 'max_depth': 401, 'min_samples_split': 4, 'max_features': 'log2', 'min_samples_leaf': 4, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 20:00:02,511][0m Trial 32 finished with value: 3.5626699644646163 and parameters: {'n_estimators': 251, 'max_depth': 401, 'min_samples_split': 4, 'max_features': 'log2', 'min_samples_leaf': 4}. Best is trial 13 with value: 3.5417859570521797.[0m


Time cost: 82.08928394317627
Currently running with:
{'n_estimators': 301, 'max_depth': 451, 'min_samples_split': 3, 'max_features': 'log2', 'min_samples_leaf': 2, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 20:01:53,705][0m Trial 33 finished with value: 3.54465635401176 and parameters: {'n_estimators': 301, 'max_depth': 451, 'min_samples_split': 3, 'max_features': 'log2', 'min_samples_leaf': 2}. Best is trial 13 with value: 3.5417859570521797.[0m


Time cost: 111.19326877593994
Currently running with:
{'n_estimators': 401, 'max_depth': 451, 'min_samples_split': 5, 'max_features': 'sqrt', 'min_samples_leaf': 4, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 20:04:06,247][0m Trial 34 finished with value: 3.563004534811721 and parameters: {'n_estimators': 401, 'max_depth': 451, 'min_samples_split': 5, 'max_features': 'sqrt', 'min_samples_leaf': 4}. Best is trial 13 with value: 3.5417859570521797.[0m


Time cost: 132.5402057170868
Currently running with:
{'n_estimators': 301, 'max_depth': 451, 'min_samples_split': 3, 'max_features': 'log2', 'min_samples_leaf': 2, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 20:05:56,471][0m Trial 35 finished with value: 3.54465635401176 and parameters: {'n_estimators': 301, 'max_depth': 451, 'min_samples_split': 3, 'max_features': 'log2', 'min_samples_leaf': 2}. Best is trial 13 with value: 3.5417859570521797.[0m


Time cost: 110.22459053993225
Currently running with:
{'n_estimators': 351, 'max_depth': 451, 'min_samples_split': 3, 'max_features': 'sqrt', 'min_samples_leaf': 6, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 20:07:44,911][0m Trial 36 finished with value: 3.5756540769658898 and parameters: {'n_estimators': 351, 'max_depth': 451, 'min_samples_split': 3, 'max_features': 'sqrt', 'min_samples_leaf': 6}. Best is trial 13 with value: 3.5417859570521797.[0m


Time cost: 108.43843626976013
Currently running with:
{'n_estimators': 301, 'max_depth': 101, 'min_samples_split': 3, 'max_features': 'log2', 'min_samples_leaf': 2, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 20:09:36,359][0m Trial 37 finished with value: 3.54465635401176 and parameters: {'n_estimators': 301, 'max_depth': 101, 'min_samples_split': 3, 'max_features': 'log2', 'min_samples_leaf': 2}. Best is trial 13 with value: 3.5417859570521797.[0m


Time cost: 111.44758868217468
Currently running with:
{'n_estimators': 401, 'max_depth': 101, 'min_samples_split': 18, 'max_features': 'sqrt', 'min_samples_leaf': 11, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 20:11:29,551][0m Trial 38 finished with value: 3.600228288850084 and parameters: {'n_estimators': 401, 'max_depth': 101, 'min_samples_split': 18, 'max_features': 'sqrt', 'min_samples_leaf': 11}. Best is trial 13 with value: 3.5417859570521797.[0m


Time cost: 113.18977308273315
Currently running with:
{'n_estimators': 201, 'max_depth': 1, 'min_samples_split': 6, 'max_features': 'log2', 'min_samples_leaf': 14, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 20:11:36,266][0m Trial 39 finished with value: 3.961136368698568 and parameters: {'n_estimators': 201, 'max_depth': 1, 'min_samples_split': 6, 'max_features': 'log2', 'min_samples_leaf': 14}. Best is trial 13 with value: 3.5417859570521797.[0m


Time cost: 6.714385509490967
Currently running with:
{'n_estimators': 351, 'max_depth': 101, 'min_samples_split': 3, 'max_features': 'log2', 'min_samples_leaf': 4, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 20:13:31,396][0m Trial 40 finished with value: 3.563238689671704 and parameters: {'n_estimators': 351, 'max_depth': 101, 'min_samples_split': 3, 'max_features': 'log2', 'min_samples_leaf': 4}. Best is trial 13 with value: 3.5417859570521797.[0m


Time cost: 115.12846207618713
Currently running with:
{'n_estimators': 301, 'max_depth': 401, 'min_samples_split': 3, 'max_features': 'log2', 'min_samples_leaf': 2, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 20:15:22,941][0m Trial 41 finished with value: 3.54465635401176 and parameters: {'n_estimators': 301, 'max_depth': 401, 'min_samples_split': 3, 'max_features': 'log2', 'min_samples_leaf': 2}. Best is trial 13 with value: 3.5417859570521797.[0m


Time cost: 111.54319858551025
Currently running with:
{'n_estimators': 301, 'max_depth': 451, 'min_samples_split': 4, 'max_features': 'log2', 'min_samples_leaf': 2, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 20:17:13,828][0m Trial 42 finished with value: 3.54465635401176 and parameters: {'n_estimators': 301, 'max_depth': 451, 'min_samples_split': 4, 'max_features': 'log2', 'min_samples_leaf': 2}. Best is trial 13 with value: 3.5417859570521797.[0m


Time cost: 110.88569927215576
Currently running with:
{'n_estimators': 251, 'max_depth': 151, 'min_samples_split': 2, 'max_features': 'log2', 'min_samples_leaf': 2, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 20:18:46,099][0m Trial 43 finished with value: 3.5459192560481347 and parameters: {'n_estimators': 251, 'max_depth': 151, 'min_samples_split': 2, 'max_features': 'log2', 'min_samples_leaf': 2}. Best is trial 13 with value: 3.5417859570521797.[0m


Time cost: 92.27036952972412
Currently running with:
{'n_estimators': 351, 'max_depth': 201, 'min_samples_split': 3, 'max_features': 'log2', 'min_samples_leaf': 4, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 20:20:45,157][0m Trial 44 finished with value: 3.563238689671704 and parameters: {'n_estimators': 351, 'max_depth': 201, 'min_samples_split': 3, 'max_features': 'log2', 'min_samples_leaf': 4}. Best is trial 13 with value: 3.5417859570521797.[0m


Time cost: 119.05631613731384
Currently running with:
{'n_estimators': 301, 'max_depth': 401, 'min_samples_split': 4, 'max_features': 'log2', 'min_samples_leaf': 6, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 20:22:19,161][0m Trial 45 finished with value: 3.5755996271462305 and parameters: {'n_estimators': 301, 'max_depth': 401, 'min_samples_split': 4, 'max_features': 'log2', 'min_samples_leaf': 6}. Best is trial 13 with value: 3.5417859570521797.[0m


Time cost: 94.00256752967834
Currently running with:
{'n_estimators': 251, 'max_depth': 51, 'min_samples_split': 7, 'max_features': 'sqrt', 'min_samples_leaf': 1, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 20:23:48,408][0m Trial 46 finished with value: 3.5478160315998846 and parameters: {'n_estimators': 251, 'max_depth': 51, 'min_samples_split': 7, 'max_features': 'sqrt', 'min_samples_leaf': 1}. Best is trial 13 with value: 3.5417859570521797.[0m


Time cost: 89.24545407295227
Currently running with:
{'n_estimators': 401, 'max_depth': 201, 'min_samples_split': 5, 'max_features': 'log2', 'min_samples_leaf': 3, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 20:26:05,558][0m Trial 47 finished with value: 3.5543510250040256 and parameters: {'n_estimators': 401, 'max_depth': 201, 'min_samples_split': 5, 'max_features': 'log2', 'min_samples_leaf': 3}. Best is trial 13 with value: 3.5417859570521797.[0m


Time cost: 137.1489679813385
Currently running with:
{'n_estimators': 301, 'max_depth': 401, 'min_samples_split': 13, 'max_features': 'log2', 'min_samples_leaf': 5, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 20:27:39,126][0m Trial 48 finished with value: 3.5726656847514624 and parameters: {'n_estimators': 301, 'max_depth': 401, 'min_samples_split': 13, 'max_features': 'log2', 'min_samples_leaf': 5}. Best is trial 13 with value: 3.5417859570521797.[0m


Time cost: 93.5672357082367
Currently running with:
{'n_estimators': 351, 'max_depth': 401, 'min_samples_split': 6, 'max_features': 'log2', 'min_samples_leaf': 2, 'random_state': 760, 'n_jobs': -1}


[32m[I 2022-09-30 20:29:41,894][0m Trial 49 finished with value: 3.5507249860295413 and parameters: {'n_estimators': 351, 'max_depth': 401, 'min_samples_split': 6, 'max_features': 'log2', 'min_samples_leaf': 2}. Best is trial 13 with value: 3.5417859570521797.[0m


Time cost: 122.76592135429382


{'n_estimators': 351,
 'max_depth': 251,
 'min_samples_split': 2,
 'max_features': 'log2',
 'min_samples_leaf': 1}

In [8]:
#optimal model
# {'n_estimators': 351, 'max_depth': 251, 'min_samples_split': 2, 'max_features': 'log2', 'min_samples_leaf': 1}
omodel = make_pipeline(StandardScaler(), RandomForestRegressor(n_estimators = 351, 
                               max_depth = 251, 
                               min_samples_leaf= 1,
                               min_samples_split= 2,
                               max_features= 'log2', random_state = 760, n_jobs=-1))


omodel.fit(X_train, y_train)

In [9]:
#predict train set results
y_pred = omodel.predict(X_train)
rmse = mean_squared_error(y_train, y_pred, squared=False)
mae = mean_absolute_error(y_train, y_pred) 
print(f"\n model train score -> RMSE:", rmse, "MAE:", mae)


#predict test set results
y_pred_test = omodel.predict(X_test)
rmse = mean_squared_error(y_test, y_pred_test, squared=False)
mae = mean_absolute_error(y_test, y_pred_test) 
print(f"\n model test score -> RMSE:", rmse, "MAE:", mae)


 model train score -> RMSE: 1.2902994094058773 MAE: 0.5922162862201784

 model test score -> RMSE: 4.086867282916606 MAE: 1.6254888126088403
