## Perform XGBoost using 3 fold CV with Optuna(50 trials) and StandardScaler on the Different Linguistic data calculation

In [1]:
!nvidia-smi

Sat Oct  1 23:13:50 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   58C    P8    10W /  70W |      0MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [2]:
!pip install --quiet optuna

[K     |████████████████████████████████| 348 kB 4.8 MB/s 
[K     |████████████████████████████████| 209 kB 70.7 MB/s 
[K     |████████████████████████████████| 81 kB 11.8 MB/s 
[K     |████████████████████████████████| 78 kB 7.4 MB/s 
[K     |████████████████████████████████| 147 kB 67.5 MB/s 
[K     |████████████████████████████████| 112 kB 70.6 MB/s 
[K     |████████████████████████████████| 49 kB 7.4 MB/s 
[?25h  Building wheel for pyperclip (setup.py) ... [?25l[?25hdone


In [3]:
import numpy as np
import pandas as pd
import optuna as opt
import xgboost as xgb
from optuna.samplers import TPESampler
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold, cross_val_score
from pathlib import Path
import gc
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
RANDOM_SEED = 2
DATA_DIR = Path("/content/drive/MyDrive/CS760")

In [6]:
Combined = pd.read_parquet(DATA_DIR/"Combined.parquet.snappy")

In [7]:
Combined.shape

(500000, 21)

In [8]:
Combined.columns

Index(['r_stars', 'r_stars_square', 'r_length', 'u_friends_count',
       'u_review_count', 'u_month_age', 'b_stars', 'b_review_count', 'r_sen',
       'r_sub', 'r_rea', 'r_useful', 'r_text', 'processed_txt', 'textblob_sen',
       'vadar_sen', 'textblob_sub', 'textstat_read', 'flesch_read',
       'dale_read', 'cli_read'],
      dtype='object')

### Model 1 : Using textblob sentiment, textblob subjectivity and  flesch kincaid grade as the predictors

In [9]:
x1 = Combined[['textblob_sen','textblob_sub','flesch_read']]
y1 = Combined['r_useful']

X_train, X_test, y_train, y_test = train_test_split(x1, y1, test_size=100000, random_state=RANDOM_SEED)


def objective(trial):
  """ Function to tune parameters """
  gc.collect()
  params = {
      "n_estimators":trial.suggest_int('n_estimators', 1, 1001, step=50),
      "max_depth" : trial.suggest_int("max_depth", 2, 20),
      "learning_rate" : trial.suggest_float('learning_rate', 0.001, 0.5, log=True),
      "subsample ": trial.suggest_float("subsample", 0.4, 1, step=0.1),
      "colsample_bytree": trial.suggest_float("colsample_bytree", 0.4, 1, step=0.1),
      "gamma": trial.suggest_float("gamma", 0, 1),
      "min_child_weight": trial.suggest_int("min_child_weight", 1, 31, step=2)
  }

  print("Currently running with:")
  print(params)

  model = xgb.XGBRegressor(objective="reg:squarederror",
                            n_jobs=-1,
                            grow_policy='lossguide',
                            tree_method="gpu_hist",
                            predictor="gpu_predictor",
                            booster='gbtree',
                            sampling_method='gradient_based',
                            eval_metrics=['rmse'],
                            random_state=RANDOM_SEED,
                            enable_categorical=False,
                            **params)
  pipe = make_pipeline(StandardScaler(), model)

  scores = cross_val_score(pipe, X_train, y_train,
    scoring="neg_root_mean_squared_error", cv=KFold(3))

  return -scores.mean()

study = opt.create_study(direction='minimize', sampler=TPESampler(seed=RANDOM_SEED))
study.optimize(objective, n_trials=50)
print("Final best parameters:")
study.best_params


[32m[I 2022-10-01 23:14:31,454][0m A new study created in memory with name: no-name-74e87297-09db-4544-bf6a-19bad1db2174[0m


Currently running with:
{'n_estimators': 451, 'max_depth': 2, 'learning_rate': 0.030445460008040798, 'subsample ': 0.7000000000000001, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.3303348210038741, 'min_child_weight': 7}


[32m[I 2022-10-01 23:14:37,845][0m Trial 0 finished with value: 4.170444491313877 and parameters: {'n_estimators': 451, 'max_depth': 2, 'learning_rate': 0.030445460008040798, 'subsample': 0.7000000000000001, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.3303348210038741, 'min_child_weight': 7}. Best is trial 0 with value: 4.170444491313877.[0m


Currently running with:
{'n_estimators': 651, 'max_depth': 7, 'learning_rate': 0.005249994057689375, 'subsample ': 0.8, 'colsample_bytree': 0.7000000000000001, 'gamma': 0.13457994534493356, 'min_child_weight': 17}


[32m[I 2022-10-01 23:14:57,700][0m Trial 1 finished with value: 4.172208411509144 and parameters: {'n_estimators': 651, 'max_depth': 7, 'learning_rate': 0.005249994057689375, 'subsample': 0.8, 'colsample_bytree': 0.7000000000000001, 'gamma': 0.13457994534493356, 'min_child_weight': 17}. Best is trial 0 with value: 4.170444491313877.[0m


Currently running with:
{'n_estimators': 151, 'max_depth': 16, 'learning_rate': 0.20176865513948422, 'subsample ': 0.7000000000000001, 'colsample_bytree': 0.9, 'gamma': 0.079645477009061, 'min_child_weight': 17}


[32m[I 2022-10-01 23:15:36,375][0m Trial 2 finished with value: 4.376305300880307 and parameters: {'n_estimators': 151, 'max_depth': 16, 'learning_rate': 0.20176865513948422, 'subsample': 0.7000000000000001, 'colsample_bytree': 0.9, 'gamma': 0.079645477009061, 'min_child_weight': 17}. Best is trial 0 with value: 4.170444491313877.[0m


Currently running with:
{'n_estimators': 51, 'max_depth': 10, 'learning_rate': 0.001821939895716244, 'subsample ': 0.4, 'colsample_bytree': 0.8, 'gamma': 0.22601200060423587, 'min_child_weight': 3}


[32m[I 2022-10-01 23:15:43,716][0m Trial 3 finished with value: 4.634175749914935 and parameters: {'n_estimators': 51, 'max_depth': 10, 'learning_rate': 0.001821939895716244, 'subsample': 0.4, 'colsample_bytree': 0.8, 'gamma': 0.22601200060423587, 'min_child_weight': 3}. Best is trial 0 with value: 4.170444491313877.[0m


Currently running with:
{'n_estimators': 201, 'max_depth': 8, 'learning_rate': 0.01830393181458461, 'subsample ': 0.5, 'colsample_bytree': 0.8, 'gamma': 0.48306983555175165, 'min_child_weight': 17}


[32m[I 2022-10-01 23:15:52,654][0m Trial 4 finished with value: 4.175374433854889 and parameters: {'n_estimators': 201, 'max_depth': 8, 'learning_rate': 0.01830393181458461, 'subsample': 0.5, 'colsample_bytree': 0.8, 'gamma': 0.48306983555175165, 'min_child_weight': 17}. Best is trial 0 with value: 4.170444491313877.[0m


Currently running with:
{'n_estimators': 401, 'max_depth': 17, 'learning_rate': 0.036763248613166925, 'subsample ': 0.5, 'colsample_bytree': 0.8, 'gamma': 0.9645510800892552, 'min_child_weight': 17}


[32m[I 2022-10-01 23:18:57,412][0m Trial 5 finished with value: 4.324862940100353 and parameters: {'n_estimators': 401, 'max_depth': 17, 'learning_rate': 0.036763248613166925, 'subsample': 0.5, 'colsample_bytree': 0.8, 'gamma': 0.9645510800892552, 'min_child_weight': 17}. Best is trial 0 with value: 4.170444491313877.[0m


Currently running with:
{'n_estimators': 901, 'max_depth': 8, 'learning_rate': 0.0339394647270345, 'subsample ': 0.6000000000000001, 'colsample_bytree': 0.7000000000000001, 'gamma': 0.7765591849971003, 'min_child_weight': 17}


[32m[I 2022-10-01 23:19:26,838][0m Trial 6 finished with value: 4.203696990019583 and parameters: {'n_estimators': 901, 'max_depth': 8, 'learning_rate': 0.0339394647270345, 'subsample': 0.6000000000000001, 'colsample_bytree': 0.7000000000000001, 'gamma': 0.7765591849971003, 'min_child_weight': 17}. Best is trial 0 with value: 4.170444491313877.[0m


Currently running with:
{'n_estimators': 1001, 'max_depth': 12, 'learning_rate': 0.0016656039140499094, 'subsample ': 0.6000000000000001, 'colsample_bytree': 0.9, 'gamma': 0.4062750430479508, 'min_child_weight': 1}


[32m[I 2022-10-01 23:25:25,730][0m Trial 7 finished with value: 4.217377775848014 and parameters: {'n_estimators': 1001, 'max_depth': 12, 'learning_rate': 0.0016656039140499094, 'subsample': 0.6000000000000001, 'colsample_bytree': 0.9, 'gamma': 0.4062750430479508, 'min_child_weight': 1}. Best is trial 0 with value: 4.170444491313877.[0m


Currently running with:
{'n_estimators': 251, 'max_depth': 3, 'learning_rate': 0.4812566765285445, 'subsample ': 1.0, 'colsample_bytree': 0.9, 'gamma': 0.6018171214054674, 'min_child_weight': 25}


[32m[I 2022-10-01 23:25:28,995][0m Trial 8 finished with value: 4.174616260109104 and parameters: {'n_estimators': 251, 'max_depth': 3, 'learning_rate': 0.4812566765285445, 'subsample': 1.0, 'colsample_bytree': 0.9, 'gamma': 0.6018171214054674, 'min_child_weight': 25}. Best is trial 0 with value: 4.170444491313877.[0m


Currently running with:
{'n_estimators': 151, 'max_depth': 7, 'learning_rate': 0.025968135983975103, 'subsample ': 0.6000000000000001, 'colsample_bytree': 0.4, 'gamma': 0.9831534453572127, 'min_child_weight': 15}


[32m[I 2022-10-01 23:25:33,428][0m Trial 9 finished with value: 4.17187497757536 and parameters: {'n_estimators': 151, 'max_depth': 7, 'learning_rate': 0.025968135983975103, 'subsample': 0.6000000000000001, 'colsample_bytree': 0.4, 'gamma': 0.9831534453572127, 'min_child_weight': 15}. Best is trial 0 with value: 4.170444491313877.[0m


Currently running with:
{'n_estimators': 601, 'max_depth': 2, 'learning_rate': 0.14878592150762665, 'subsample ': 0.9, 'colsample_bytree': 0.5, 'gamma': 0.26755914576442896, 'min_child_weight': 7}


[32m[I 2022-10-01 23:25:40,094][0m Trial 10 finished with value: 4.171218048492162 and parameters: {'n_estimators': 601, 'max_depth': 2, 'learning_rate': 0.14878592150762665, 'subsample': 0.9, 'colsample_bytree': 0.5, 'gamma': 0.26755914576442896, 'min_child_weight': 7}. Best is trial 0 with value: 4.170444491313877.[0m


Currently running with:
{'n_estimators': 601, 'max_depth': 2, 'learning_rate': 0.12108186268688394, 'subsample ': 0.9, 'colsample_bytree': 0.5, 'gamma': 0.31583084883117196, 'min_child_weight': 7}


[32m[I 2022-10-01 23:25:46,777][0m Trial 11 finished with value: 4.171014089483798 and parameters: {'n_estimators': 601, 'max_depth': 2, 'learning_rate': 0.12108186268688394, 'subsample': 0.9, 'colsample_bytree': 0.5, 'gamma': 0.31583084883117196, 'min_child_weight': 7}. Best is trial 0 with value: 4.170444491313877.[0m


Currently running with:
{'n_estimators': 451, 'max_depth': 4, 'learning_rate': 0.09587471064335992, 'subsample ': 0.8, 'colsample_bytree': 0.5, 'gamma': 0.33588433426083936, 'min_child_weight': 9}


[32m[I 2022-10-01 23:25:52,830][0m Trial 12 finished with value: 4.171499217422595 and parameters: {'n_estimators': 451, 'max_depth': 4, 'learning_rate': 0.09587471064335992, 'subsample': 0.8, 'colsample_bytree': 0.5, 'gamma': 0.33588433426083936, 'min_child_weight': 9}. Best is trial 0 with value: 4.170444491313877.[0m


Currently running with:
{'n_estimators': 751, 'max_depth': 5, 'learning_rate': 0.009001032133778415, 'subsample ': 1.0, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.019505792608465855, 'min_child_weight': 9}


[32m[I 2022-10-01 23:26:04,454][0m Trial 13 finished with value: 4.170760520354706 and parameters: {'n_estimators': 751, 'max_depth': 5, 'learning_rate': 0.009001032133778415, 'subsample': 1.0, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.019505792608465855, 'min_child_weight': 9}. Best is trial 0 with value: 4.170444491313877.[0m


Currently running with:
{'n_estimators': 801, 'max_depth': 5, 'learning_rate': 0.007978647512233143, 'subsample ': 1.0, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.07028037368872338, 'min_child_weight': 11}


[32m[I 2022-10-01 23:26:16,816][0m Trial 14 finished with value: 4.170812720042893 and parameters: {'n_estimators': 801, 'max_depth': 5, 'learning_rate': 0.007978647512233143, 'subsample': 1.0, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.07028037368872338, 'min_child_weight': 11}. Best is trial 0 with value: 4.170444491313877.[0m


Currently running with:
{'n_estimators': 751, 'max_depth': 20, 'learning_rate': 0.007389560769159681, 'subsample ': 0.8, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.0071124309037759945, 'min_child_weight': 31}


[32m[I 2022-10-01 23:27:32,613][0m Trial 15 finished with value: 4.172564103573619 and parameters: {'n_estimators': 751, 'max_depth': 20, 'learning_rate': 0.007389560769159681, 'subsample': 0.8, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.0071124309037759945, 'min_child_weight': 31}. Best is trial 0 with value: 4.170444491313877.[0m


Currently running with:
{'n_estimators': 401, 'max_depth': 5, 'learning_rate': 0.003595457255658072, 'subsample ': 0.7000000000000001, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.5888538067189393, 'min_child_weight': 11}


[32m[I 2022-10-01 23:27:39,074][0m Trial 16 finished with value: 4.210582114151369 and parameters: {'n_estimators': 401, 'max_depth': 5, 'learning_rate': 0.003595457255658072, 'subsample': 0.7000000000000001, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.5888538067189393, 'min_child_weight': 11}. Best is trial 0 with value: 4.170444491313877.[0m


Currently running with:
{'n_estimators': 751, 'max_depth': 11, 'learning_rate': 0.01373936101740699, 'subsample ': 0.9, 'colsample_bytree': 0.4, 'gamma': 0.183151607307064, 'min_child_weight': 5}


[32m[I 2022-10-01 23:28:16,993][0m Trial 17 finished with value: 4.172587023097107 and parameters: {'n_estimators': 751, 'max_depth': 11, 'learning_rate': 0.01373936101740699, 'subsample': 0.9, 'colsample_bytree': 0.4, 'gamma': 0.183151607307064, 'min_child_weight': 5}. Best is trial 0 with value: 4.170444491313877.[0m


Currently running with:
{'n_estimators': 301, 'max_depth': 13, 'learning_rate': 0.055301630698558396, 'subsample ': 0.7000000000000001, 'colsample_bytree': 1.0, 'gamma': 0.7259843261121932, 'min_child_weight': 13}


[32m[I 2022-10-01 23:29:02,582][0m Trial 18 finished with value: 4.249576439502321 and parameters: {'n_estimators': 301, 'max_depth': 13, 'learning_rate': 0.055301630698558396, 'subsample': 0.7000000000000001, 'colsample_bytree': 1.0, 'gamma': 0.7259843261121932, 'min_child_weight': 13}. Best is trial 0 with value: 4.170444491313877.[0m


Currently running with:
{'n_estimators': 551, 'max_depth': 6, 'learning_rate': 0.01405695940879355, 'subsample ': 1.0, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.46763005173157735, 'min_child_weight': 23}


[32m[I 2022-10-01 23:29:13,469][0m Trial 19 finished with value: 4.170920400203008 and parameters: {'n_estimators': 551, 'max_depth': 6, 'learning_rate': 0.01405695940879355, 'subsample': 1.0, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.46763005173157735, 'min_child_weight': 23}. Best is trial 0 with value: 4.170444491313877.[0m


Currently running with:
{'n_estimators': 901, 'max_depth': 9, 'learning_rate': 0.0031787525141038185, 'subsample ': 0.8, 'colsample_bytree': 0.7000000000000001, 'gamma': 0.017858553472507695, 'min_child_weight': 1}


[32m[I 2022-10-01 23:30:19,444][0m Trial 20 finished with value: 4.196096746069008 and parameters: {'n_estimators': 901, 'max_depth': 9, 'learning_rate': 0.0031787525141038185, 'subsample': 0.8, 'colsample_bytree': 0.7000000000000001, 'gamma': 0.017858553472507695, 'min_child_weight': 1}. Best is trial 0 with value: 4.170444491313877.[0m


Currently running with:
{'n_estimators': 801, 'max_depth': 4, 'learning_rate': 0.008010388181835304, 'subsample ': 1.0, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.11421692700161701, 'min_child_weight': 11}


[32m[I 2022-10-01 23:30:29,881][0m Trial 21 finished with value: 4.17062889960726 and parameters: {'n_estimators': 801, 'max_depth': 4, 'learning_rate': 0.008010388181835304, 'subsample': 1.0, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.11421692700161701, 'min_child_weight': 11}. Best is trial 0 with value: 4.170444491313877.[0m


Currently running with:
{'n_estimators': 701, 'max_depth': 4, 'learning_rate': 0.012295126389199834, 'subsample ': 0.9, 'colsample_bytree': 0.5, 'gamma': 0.17087415500083808, 'min_child_weight': 7}


[32m[I 2022-10-01 23:30:39,101][0m Trial 22 finished with value: 4.170481433422971 and parameters: {'n_estimators': 701, 'max_depth': 4, 'learning_rate': 0.012295126389199834, 'subsample': 0.9, 'colsample_bytree': 0.5, 'gamma': 0.17087415500083808, 'min_child_weight': 7}. Best is trial 0 with value: 4.170444491313877.[0m


Currently running with:
{'n_estimators': 501, 'max_depth': 3, 'learning_rate': 0.011859386089452119, 'subsample ': 0.9, 'colsample_bytree': 0.5, 'gamma': 0.1652711305100874, 'min_child_weight': 5}


[32m[I 2022-10-01 23:30:45,140][0m Trial 23 finished with value: 4.17070300363554 and parameters: {'n_estimators': 501, 'max_depth': 3, 'learning_rate': 0.011859386089452119, 'subsample': 0.9, 'colsample_bytree': 0.5, 'gamma': 0.1652711305100874, 'min_child_weight': 5}. Best is trial 0 with value: 4.170444491313877.[0m


Currently running with:
{'n_estimators': 651, 'max_depth': 2, 'learning_rate': 0.0010319936285112766, 'subsample ': 0.9, 'colsample_bytree': 0.4, 'gamma': 0.36547344664067244, 'min_child_weight': 21}


[32m[I 2022-10-01 23:30:52,345][0m Trial 24 finished with value: 4.332326906277059 and parameters: {'n_estimators': 651, 'max_depth': 2, 'learning_rate': 0.0010319936285112766, 'subsample': 0.9, 'colsample_bytree': 0.4, 'gamma': 0.36547344664067244, 'min_child_weight': 21}. Best is trial 0 with value: 4.170444491313877.[0m


Currently running with:
{'n_estimators': 901, 'max_depth': 4, 'learning_rate': 0.056086176723193625, 'subsample ': 0.8, 'colsample_bytree': 0.5, 'gamma': 0.25987437534504965, 'min_child_weight': 13}


[32m[I 2022-10-01 23:31:03,935][0m Trial 25 finished with value: 4.171633303794807 and parameters: {'n_estimators': 901, 'max_depth': 4, 'learning_rate': 0.056086176723193625, 'subsample': 0.8, 'colsample_bytree': 0.5, 'gamma': 0.25987437534504965, 'min_child_weight': 13}. Best is trial 0 with value: 4.170444491313877.[0m


Currently running with:
{'n_estimators': 1001, 'max_depth': 4, 'learning_rate': 0.021345889163581308, 'subsample ': 1.0, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.11935505942048355, 'min_child_weight': 5}


[32m[I 2022-10-01 23:31:16,752][0m Trial 26 finished with value: 4.170804056343559 and parameters: {'n_estimators': 1001, 'max_depth': 4, 'learning_rate': 0.021345889163581308, 'subsample': 1.0, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.11935505942048355, 'min_child_weight': 5}. Best is trial 0 with value: 4.170444491313877.[0m


Currently running with:
{'n_estimators': 351, 'max_depth': 6, 'learning_rate': 0.004600521421110267, 'subsample ': 0.9, 'colsample_bytree': 0.7000000000000001, 'gamma': 0.22885374612419082, 'min_child_weight': 9}


[32m[I 2022-10-01 23:31:23,889][0m Trial 27 finished with value: 4.192874582267041 and parameters: {'n_estimators': 351, 'max_depth': 6, 'learning_rate': 0.004600521421110267, 'subsample': 0.9, 'colsample_bytree': 0.7000000000000001, 'gamma': 0.22885374612419082, 'min_child_weight': 9}. Best is trial 0 with value: 4.170444491313877.[0m


Currently running with:
{'n_estimators': 801, 'max_depth': 3, 'learning_rate': 0.056883989188700565, 'subsample ': 0.7000000000000001, 'colsample_bytree': 0.5, 'gamma': 0.42533341186971163, 'min_child_weight': 13}


[32m[I 2022-10-01 23:31:33,231][0m Trial 28 finished with value: 4.171062116744174 and parameters: {'n_estimators': 801, 'max_depth': 3, 'learning_rate': 0.056883989188700565, 'subsample': 0.7000000000000001, 'colsample_bytree': 0.5, 'gamma': 0.42533341186971163, 'min_child_weight': 13}. Best is trial 0 with value: 4.170444491313877.[0m


Currently running with:
{'n_estimators': 651, 'max_depth': 7, 'learning_rate': 0.00573694294284507, 'subsample ': 0.8, 'colsample_bytree': 0.7000000000000001, 'gamma': 0.14560240440368483, 'min_child_weight': 3}


[32m[I 2022-10-01 23:31:51,521][0m Trial 29 finished with value: 4.177345818509505 and parameters: {'n_estimators': 651, 'max_depth': 7, 'learning_rate': 0.00573694294284507, 'subsample': 0.8, 'colsample_bytree': 0.7000000000000001, 'gamma': 0.14560240440368483, 'min_child_weight': 3}. Best is trial 0 with value: 4.170444491313877.[0m


Currently running with:
{'n_estimators': 701, 'max_depth': 14, 'learning_rate': 0.03461052337553825, 'subsample ': 0.6000000000000001, 'colsample_bytree': 0.4, 'gamma': 0.2920733042834885, 'min_child_weight': 7}


[32m[I 2022-10-01 23:32:33,478][0m Trial 30 finished with value: 4.173553320149385 and parameters: {'n_estimators': 701, 'max_depth': 14, 'learning_rate': 0.03461052337553825, 'subsample': 0.6000000000000001, 'colsample_bytree': 0.4, 'gamma': 0.2920733042834885, 'min_child_weight': 7}. Best is trial 0 with value: 4.170444491313877.[0m


Currently running with:
{'n_estimators': 501, 'max_depth': 3, 'learning_rate': 0.012233472694992215, 'subsample ': 0.9, 'colsample_bytree': 0.5, 'gamma': 0.15379164367331033, 'min_child_weight': 5}


[32m[I 2022-10-01 23:32:39,520][0m Trial 31 finished with value: 4.170659498835262 and parameters: {'n_estimators': 501, 'max_depth': 3, 'learning_rate': 0.012233472694992215, 'subsample': 0.9, 'colsample_bytree': 0.5, 'gamma': 0.15379164367331033, 'min_child_weight': 5}. Best is trial 0 with value: 4.170444491313877.[0m


Currently running with:
{'n_estimators': 551, 'max_depth': 2, 'learning_rate': 0.009583539382103486, 'subsample ': 1.0, 'colsample_bytree': 0.5, 'gamma': 0.08880582482159256, 'min_child_weight': 3}


[32m[I 2022-10-01 23:32:45,704][0m Trial 32 finished with value: 4.17138959436119 and parameters: {'n_estimators': 551, 'max_depth': 2, 'learning_rate': 0.009583539382103486, 'subsample': 1.0, 'colsample_bytree': 0.5, 'gamma': 0.08880582482159256, 'min_child_weight': 3}. Best is trial 0 with value: 4.170444491313877.[0m


Currently running with:
{'n_estimators': 501, 'max_depth': 6, 'learning_rate': 0.017428756859167723, 'subsample ': 0.9, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.2002116101063922, 'min_child_weight': 11}


[32m[I 2022-10-01 23:32:55,563][0m Trial 33 finished with value: 4.1709797092509 and parameters: {'n_estimators': 501, 'max_depth': 6, 'learning_rate': 0.017428756859167723, 'subsample': 0.9, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.2002116101063922, 'min_child_weight': 11}. Best is trial 0 with value: 4.170444491313877.[0m


Currently running with:
{'n_estimators': 451, 'max_depth': 4, 'learning_rate': 0.005765840468106001, 'subsample ': 0.8, 'colsample_bytree': 0.5, 'gamma': 0.1114783381821334, 'min_child_weight': 7}


[32m[I 2022-10-01 23:33:01,606][0m Trial 34 finished with value: 4.1770495765387645 and parameters: {'n_estimators': 451, 'max_depth': 4, 'learning_rate': 0.005765840468106001, 'subsample': 0.8, 'colsample_bytree': 0.5, 'gamma': 0.1114783381821334, 'min_child_weight': 7}. Best is trial 0 with value: 4.170444491313877.[0m


Currently running with:
{'n_estimators': 851, 'max_depth': 3, 'learning_rate': 0.024760912232754388, 'subsample ': 0.7000000000000001, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.21899392152351313, 'min_child_weight': 5}


[32m[I 2022-10-01 23:33:11,526][0m Trial 35 finished with value: 4.170545595984097 and parameters: {'n_estimators': 851, 'max_depth': 3, 'learning_rate': 0.024760912232754388, 'subsample': 0.7000000000000001, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.21899392152351313, 'min_child_weight': 5}. Best is trial 0 with value: 4.170444491313877.[0m


Currently running with:
{'n_estimators': 851, 'max_depth': 9, 'learning_rate': 0.02598395874580684, 'subsample ': 0.5, 'colsample_bytree': 0.8, 'gamma': 0.23484746890974081, 'min_child_weight': 9}


[32m[I 2022-10-01 23:33:54,706][0m Trial 36 finished with value: 4.221432634935833 and parameters: {'n_estimators': 851, 'max_depth': 9, 'learning_rate': 0.02598395874580684, 'subsample': 0.5, 'colsample_bytree': 0.8, 'gamma': 0.23484746890974081, 'min_child_weight': 9}. Best is trial 0 with value: 4.170444491313877.[0m


Currently running with:
{'n_estimators': 1, 'max_depth': 8, 'learning_rate': 0.07765528873443159, 'subsample ': 0.7000000000000001, 'colsample_bytree': 0.7000000000000001, 'gamma': 0.38082705311890364, 'min_child_weight': 1}


[32m[I 2022-10-01 23:33:55,263][0m Trial 37 finished with value: 4.644989884235623 and parameters: {'n_estimators': 1, 'max_depth': 8, 'learning_rate': 0.07765528873443159, 'subsample': 0.7000000000000001, 'colsample_bytree': 0.7000000000000001, 'gamma': 0.38082705311890364, 'min_child_weight': 1}. Best is trial 0 with value: 4.170444491313877.[0m


Currently running with:
{'n_estimators': 951, 'max_depth': 6, 'learning_rate': 0.04387085985878907, 'subsample ': 0.4, 'colsample_bytree': 0.7000000000000001, 'gamma': 0.5194121274622292, 'min_child_weight': 15}


[32m[I 2022-10-01 23:34:12,860][0m Trial 38 finished with value: 4.188614779888647 and parameters: {'n_estimators': 951, 'max_depth': 6, 'learning_rate': 0.04387085985878907, 'subsample': 0.4, 'colsample_bytree': 0.7000000000000001, 'gamma': 0.5194121274622292, 'min_child_weight': 15}. Best is trial 0 with value: 4.170444491313877.[0m


Currently running with:
{'n_estimators': 851, 'max_depth': 17, 'learning_rate': 0.2316667183755985, 'subsample ': 0.6000000000000001, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.06100289927765788, 'min_child_weight': 19}


[32m[I 2022-10-01 23:34:33,151][0m Trial 39 finished with value: 4.17360334441439 and parameters: {'n_estimators': 851, 'max_depth': 17, 'learning_rate': 0.2316667183755985, 'subsample': 0.6000000000000001, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.06100289927765788, 'min_child_weight': 19}. Best is trial 0 with value: 4.170444491313877.[0m


Currently running with:
{'n_estimators': 701, 'max_depth': 3, 'learning_rate': 0.0025082381972447534, 'subsample ': 0.7000000000000001, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.3233708768778338, 'min_child_weight': 3}


[32m[I 2022-10-01 23:34:41,385][0m Trial 40 finished with value: 4.194584299847189 and parameters: {'n_estimators': 701, 'max_depth': 3, 'learning_rate': 0.0025082381972447534, 'subsample': 0.7000000000000001, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.3233708768778338, 'min_child_weight': 3}. Best is trial 0 with value: 4.170444491313877.[0m


Currently running with:
{'n_estimators': 601, 'max_depth': 3, 'learning_rate': 0.018092609849236857, 'subsample ': 0.8, 'colsample_bytree': 0.5, 'gamma': 0.17368404706429208, 'min_child_weight': 5}


[32m[I 2022-10-01 23:34:48,521][0m Trial 41 finished with value: 4.170432807048196 and parameters: {'n_estimators': 601, 'max_depth': 3, 'learning_rate': 0.018092609849236857, 'subsample': 0.8, 'colsample_bytree': 0.5, 'gamma': 0.17368404706429208, 'min_child_weight': 5}. Best is trial 41 with value: 4.170432807048196.[0m


Currently running with:
{'n_estimators': 601, 'max_depth': 2, 'learning_rate': 0.02840272934427539, 'subsample ': 0.8, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.19204471483988733, 'min_child_weight': 7}


[32m[I 2022-10-01 23:34:55,180][0m Trial 42 finished with value: 4.1704319840269735 and parameters: {'n_estimators': 601, 'max_depth': 2, 'learning_rate': 0.02840272934427539, 'subsample': 0.8, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.19204471483988733, 'min_child_weight': 7}. Best is trial 42 with value: 4.1704319840269735.[0m


Currently running with:
{'n_estimators': 551, 'max_depth': 2, 'learning_rate': 0.027570952029817863, 'subsample ': 0.7000000000000001, 'colsample_bytree': 0.4, 'gamma': 0.2748792806697422, 'min_child_weight': 5}


[32m[I 2022-10-01 23:35:01,361][0m Trial 43 finished with value: 4.17042856733853 and parameters: {'n_estimators': 551, 'max_depth': 2, 'learning_rate': 0.027570952029817863, 'subsample': 0.7000000000000001, 'colsample_bytree': 0.4, 'gamma': 0.2748792806697422, 'min_child_weight': 5}. Best is trial 43 with value: 4.17042856733853.[0m


Currently running with:
{'n_estimators': 601, 'max_depth': 2, 'learning_rate': 0.01796155868651728, 'subsample ': 0.8, 'colsample_bytree': 0.4, 'gamma': 0.28187967821694787, 'min_child_weight': 7}


[32m[I 2022-10-01 23:35:08,027][0m Trial 44 finished with value: 4.170464030651249 and parameters: {'n_estimators': 601, 'max_depth': 2, 'learning_rate': 0.01796155868651728, 'subsample': 0.8, 'colsample_bytree': 0.4, 'gamma': 0.28187967821694787, 'min_child_weight': 7}. Best is trial 43 with value: 4.17042856733853.[0m


Currently running with:
{'n_estimators': 601, 'max_depth': 2, 'learning_rate': 0.03387862076389237, 'subsample ': 0.8, 'colsample_bytree': 0.4, 'gamma': 0.4287034705571962, 'min_child_weight': 3}


[32m[I 2022-10-01 23:35:14,684][0m Trial 45 finished with value: 4.170441238716656 and parameters: {'n_estimators': 601, 'max_depth': 2, 'learning_rate': 0.03387862076389237, 'subsample': 0.8, 'colsample_bytree': 0.4, 'gamma': 0.4287034705571962, 'min_child_weight': 3}. Best is trial 43 with value: 4.17042856733853.[0m


Currently running with:
{'n_estimators': 401, 'max_depth': 2, 'learning_rate': 0.03251206941728072, 'subsample ': 0.7000000000000001, 'colsample_bytree': 0.4, 'gamma': 0.42165304768868883, 'min_child_weight': 3}


[32m[I 2022-10-01 23:35:19,278][0m Trial 46 finished with value: 4.170455620378326 and parameters: {'n_estimators': 401, 'max_depth': 2, 'learning_rate': 0.03251206941728072, 'subsample': 0.7000000000000001, 'colsample_bytree': 0.4, 'gamma': 0.42165304768868883, 'min_child_weight': 3}. Best is trial 43 with value: 4.17042856733853.[0m


Currently running with:
{'n_estimators': 551, 'max_depth': 5, 'learning_rate': 0.04311484163217578, 'subsample ': 0.8, 'colsample_bytree': 0.4, 'gamma': 0.5325782164634559, 'min_child_weight': 1}


[32m[I 2022-10-01 23:35:27,596][0m Trial 47 finished with value: 4.171349581582413 and parameters: {'n_estimators': 551, 'max_depth': 5, 'learning_rate': 0.04311484163217578, 'subsample': 0.8, 'colsample_bytree': 0.4, 'gamma': 0.5325782164634559, 'min_child_weight': 1}. Best is trial 43 with value: 4.17042856733853.[0m


Currently running with:
{'n_estimators': 451, 'max_depth': 2, 'learning_rate': 0.08229885714914471, 'subsample ': 0.6000000000000001, 'colsample_bytree': 0.4, 'gamma': 0.4616596787607629, 'min_child_weight': 3}


[32m[I 2022-10-01 23:35:32,692][0m Trial 48 finished with value: 4.170617953066028 and parameters: {'n_estimators': 451, 'max_depth': 2, 'learning_rate': 0.08229885714914471, 'subsample': 0.6000000000000001, 'colsample_bytree': 0.4, 'gamma': 0.4616596787607629, 'min_child_weight': 3}. Best is trial 43 with value: 4.17042856733853.[0m


Currently running with:
{'n_estimators': 651, 'max_depth': 3, 'learning_rate': 0.029852887495159474, 'subsample ': 0.8, 'colsample_bytree': 0.4, 'gamma': 0.35613656752290174, 'min_child_weight': 31}


[32m[I 2022-10-01 23:35:40,389][0m Trial 49 finished with value: 4.170502153821777 and parameters: {'n_estimators': 651, 'max_depth': 3, 'learning_rate': 0.029852887495159474, 'subsample': 0.8, 'colsample_bytree': 0.4, 'gamma': 0.35613656752290174, 'min_child_weight': 31}. Best is trial 43 with value: 4.17042856733853.[0m


Final best parameters:


{'n_estimators': 551,
 'max_depth': 2,
 'learning_rate': 0.027570952029817863,
 'subsample': 0.7000000000000001,
 'colsample_bytree': 0.4,
 'gamma': 0.2748792806697422,
 'min_child_weight': 5}

In [10]:
best_params = study.best_params.copy()

model = xgb.XGBRegressor(objective="reg:squarederror",
                          n_jobs=-1,
                          grow_policy='lossguide',
                          tree_method="gpu_hist",
                          predictor="gpu_predictor",
                          booster='gbtree',
                          sampling_method='gradient_based',
                          eval_metrics=['rmse'],
                          random_state=RANDOM_SEED,
                          enable_categorical=False,
                          **best_params)
pipe = make_pipeline(StandardScaler(), model)
pipe.fit(X_train, y_train)

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('xgbregressor',
                 XGBRegressor(colsample_bytree=0.4, enable_categorical=False,
                              eval_metrics=['rmse'], gamma=0.2748792806697422,
                              grow_policy='lossguide',
                              learning_rate=0.027570952029817863, max_depth=2,
                              min_child_weight=5, n_estimators=551, n_jobs=-1,
                              objective='reg:squarederror',
                              predictor='gpu_predictor', random_state=2,
                              sampling_method='gradient_based',
                              subsample=0.7000000000000001,
                              tree_method='gpu_hist'))])

In [11]:
train_pred = pipe.predict(X_train)
test_pred = pipe.predict(X_test)
print(f"train results - RMSE: {mean_squared_error(y_train, train_pred, squared=False)}, MAE: {mean_absolute_error(y_train, train_pred)}")
print(f"test results - RMSE: {mean_squared_error(y_test, test_pred, squared=False)}, MAE: {mean_absolute_error(y_test, test_pred)}")

train results - RMSE: 4.169373151675538, MAE: 1.8406951278670132
test results - RMSE: 4.205674601785969, MAE: 1.844242683005929


### Model 2 : Using vadar sentiment, textblob subjectivity and  dale chall readability as the predictors

In [12]:
x2 = Combined[['vadar_sen','textblob_sub','dale_read']]
y2 = Combined['r_useful']

X_train, X_test, y_train, y_test = train_test_split(x2, y2, test_size=100000, random_state=RANDOM_SEED)

study = opt.create_study(direction='minimize', sampler=TPESampler(seed=RANDOM_SEED))
study.optimize(objective, n_trials=50)
print("Final best parameters:")
study.best_params

[32m[I 2022-10-01 23:35:42,738][0m A new study created in memory with name: no-name-382787ff-ef7c-401d-8a53-744fed63d930[0m


Currently running with:
{'n_estimators': 451, 'max_depth': 2, 'learning_rate': 0.030445460008040798, 'subsample ': 0.7000000000000001, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.3303348210038741, 'min_child_weight': 7}


[32m[I 2022-10-01 23:35:48,530][0m Trial 0 finished with value: 4.184410267963064 and parameters: {'n_estimators': 451, 'max_depth': 2, 'learning_rate': 0.030445460008040798, 'subsample': 0.7000000000000001, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.3303348210038741, 'min_child_weight': 7}. Best is trial 0 with value: 4.184410267963064.[0m


Currently running with:
{'n_estimators': 651, 'max_depth': 7, 'learning_rate': 0.005249994057689375, 'subsample ': 0.8, 'colsample_bytree': 0.7000000000000001, 'gamma': 0.13457994534493356, 'min_child_weight': 17}


[32m[I 2022-10-01 23:36:07,238][0m Trial 1 finished with value: 4.185628690131138 and parameters: {'n_estimators': 651, 'max_depth': 7, 'learning_rate': 0.005249994057689375, 'subsample': 0.8, 'colsample_bytree': 0.7000000000000001, 'gamma': 0.13457994534493356, 'min_child_weight': 17}. Best is trial 0 with value: 4.184410267963064.[0m


Currently running with:
{'n_estimators': 151, 'max_depth': 16, 'learning_rate': 0.20176865513948422, 'subsample ': 0.7000000000000001, 'colsample_bytree': 0.9, 'gamma': 0.079645477009061, 'min_child_weight': 17}


[32m[I 2022-10-01 23:36:46,250][0m Trial 2 finished with value: 4.390257648353219 and parameters: {'n_estimators': 151, 'max_depth': 16, 'learning_rate': 0.20176865513948422, 'subsample': 0.7000000000000001, 'colsample_bytree': 0.9, 'gamma': 0.079645477009061, 'min_child_weight': 17}. Best is trial 0 with value: 4.184410267963064.[0m


Currently running with:
{'n_estimators': 51, 'max_depth': 10, 'learning_rate': 0.001821939895716244, 'subsample ': 0.4, 'colsample_bytree': 0.8, 'gamma': 0.22601200060423587, 'min_child_weight': 3}


[32m[I 2022-10-01 23:36:52,685][0m Trial 3 finished with value: 4.636094697558039 and parameters: {'n_estimators': 51, 'max_depth': 10, 'learning_rate': 0.001821939895716244, 'subsample': 0.4, 'colsample_bytree': 0.8, 'gamma': 0.22601200060423587, 'min_child_weight': 3}. Best is trial 0 with value: 4.184410267963064.[0m


Currently running with:
{'n_estimators': 201, 'max_depth': 8, 'learning_rate': 0.01830393181458461, 'subsample ': 0.5, 'colsample_bytree': 0.8, 'gamma': 0.48306983555175165, 'min_child_weight': 17}


[32m[I 2022-10-01 23:37:02,140][0m Trial 4 finished with value: 4.188184157650932 and parameters: {'n_estimators': 201, 'max_depth': 8, 'learning_rate': 0.01830393181458461, 'subsample': 0.5, 'colsample_bytree': 0.8, 'gamma': 0.48306983555175165, 'min_child_weight': 17}. Best is trial 0 with value: 4.184410267963064.[0m


Currently running with:
{'n_estimators': 401, 'max_depth': 17, 'learning_rate': 0.036763248613166925, 'subsample ': 0.5, 'colsample_bytree': 0.8, 'gamma': 0.9645510800892552, 'min_child_weight': 17}


[32m[I 2022-10-01 23:39:35,526][0m Trial 5 finished with value: 4.320785643727068 and parameters: {'n_estimators': 401, 'max_depth': 17, 'learning_rate': 0.036763248613166925, 'subsample': 0.5, 'colsample_bytree': 0.8, 'gamma': 0.9645510800892552, 'min_child_weight': 17}. Best is trial 0 with value: 4.184410267963064.[0m


Currently running with:
{'n_estimators': 901, 'max_depth': 8, 'learning_rate': 0.0339394647270345, 'subsample ': 0.6000000000000001, 'colsample_bytree': 0.7000000000000001, 'gamma': 0.7765591849971003, 'min_child_weight': 17}


[32m[I 2022-10-01 23:40:06,726][0m Trial 6 finished with value: 4.221742356804713 and parameters: {'n_estimators': 901, 'max_depth': 8, 'learning_rate': 0.0339394647270345, 'subsample': 0.6000000000000001, 'colsample_bytree': 0.7000000000000001, 'gamma': 0.7765591849971003, 'min_child_weight': 17}. Best is trial 0 with value: 4.184410267963064.[0m


Currently running with:
{'n_estimators': 1001, 'max_depth': 12, 'learning_rate': 0.0016656039140499094, 'subsample ': 0.6000000000000001, 'colsample_bytree': 0.9, 'gamma': 0.4062750430479508, 'min_child_weight': 1}


[32m[I 2022-10-01 23:45:38,921][0m Trial 7 finished with value: 4.228745591406989 and parameters: {'n_estimators': 1001, 'max_depth': 12, 'learning_rate': 0.0016656039140499094, 'subsample': 0.6000000000000001, 'colsample_bytree': 0.9, 'gamma': 0.4062750430479508, 'min_child_weight': 1}. Best is trial 0 with value: 4.184410267963064.[0m


Currently running with:
{'n_estimators': 251, 'max_depth': 3, 'learning_rate': 0.4812566765285445, 'subsample ': 1.0, 'colsample_bytree': 0.9, 'gamma': 0.6018171214054674, 'min_child_weight': 25}


[32m[I 2022-10-01 23:45:42,555][0m Trial 8 finished with value: 4.191898233957155 and parameters: {'n_estimators': 251, 'max_depth': 3, 'learning_rate': 0.4812566765285445, 'subsample': 1.0, 'colsample_bytree': 0.9, 'gamma': 0.6018171214054674, 'min_child_weight': 25}. Best is trial 0 with value: 4.184410267963064.[0m


Currently running with:
{'n_estimators': 151, 'max_depth': 7, 'learning_rate': 0.025968135983975103, 'subsample ': 0.6000000000000001, 'colsample_bytree': 0.4, 'gamma': 0.9831534453572127, 'min_child_weight': 15}


[32m[I 2022-10-01 23:45:47,040][0m Trial 9 finished with value: 4.185503585258076 and parameters: {'n_estimators': 151, 'max_depth': 7, 'learning_rate': 0.025968135983975103, 'subsample': 0.6000000000000001, 'colsample_bytree': 0.4, 'gamma': 0.9831534453572127, 'min_child_weight': 15}. Best is trial 0 with value: 4.184410267963064.[0m


Currently running with:
{'n_estimators': 601, 'max_depth': 2, 'learning_rate': 0.14878592150762665, 'subsample ': 0.9, 'colsample_bytree': 0.5, 'gamma': 0.26755914576442896, 'min_child_weight': 7}


[32m[I 2022-10-01 23:45:54,636][0m Trial 10 finished with value: 4.185154309992303 and parameters: {'n_estimators': 601, 'max_depth': 2, 'learning_rate': 0.14878592150762665, 'subsample': 0.9, 'colsample_bytree': 0.5, 'gamma': 0.26755914576442896, 'min_child_weight': 7}. Best is trial 0 with value: 4.184410267963064.[0m


Currently running with:
{'n_estimators': 601, 'max_depth': 2, 'learning_rate': 0.12108186268688394, 'subsample ': 0.9, 'colsample_bytree': 0.5, 'gamma': 0.31583084883117196, 'min_child_weight': 7}


[32m[I 2022-10-01 23:46:02,208][0m Trial 11 finished with value: 4.18490326464192 and parameters: {'n_estimators': 601, 'max_depth': 2, 'learning_rate': 0.12108186268688394, 'subsample': 0.9, 'colsample_bytree': 0.5, 'gamma': 0.31583084883117196, 'min_child_weight': 7}. Best is trial 0 with value: 4.184410267963064.[0m


Currently running with:
{'n_estimators': 451, 'max_depth': 4, 'learning_rate': 0.09587471064335992, 'subsample ': 0.8, 'colsample_bytree': 0.5, 'gamma': 0.33588433426083936, 'min_child_weight': 9}


[32m[I 2022-10-01 23:46:08,980][0m Trial 12 finished with value: 4.185461138900243 and parameters: {'n_estimators': 451, 'max_depth': 4, 'learning_rate': 0.09587471064335992, 'subsample': 0.8, 'colsample_bytree': 0.5, 'gamma': 0.33588433426083936, 'min_child_weight': 9}. Best is trial 0 with value: 4.184410267963064.[0m


Currently running with:
{'n_estimators': 751, 'max_depth': 5, 'learning_rate': 0.009001032133778415, 'subsample ': 1.0, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.019505792608465855, 'min_child_weight': 9}


[32m[I 2022-10-01 23:46:21,703][0m Trial 13 finished with value: 4.184616671061473 and parameters: {'n_estimators': 751, 'max_depth': 5, 'learning_rate': 0.009001032133778415, 'subsample': 1.0, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.019505792608465855, 'min_child_weight': 9}. Best is trial 0 with value: 4.184410267963064.[0m


Currently running with:
{'n_estimators': 801, 'max_depth': 5, 'learning_rate': 0.007978647512233143, 'subsample ': 1.0, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.07028037368872338, 'min_child_weight': 11}


[32m[I 2022-10-01 23:46:35,273][0m Trial 14 finished with value: 4.184631190416563 and parameters: {'n_estimators': 801, 'max_depth': 5, 'learning_rate': 0.007978647512233143, 'subsample': 1.0, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.07028037368872338, 'min_child_weight': 11}. Best is trial 0 with value: 4.184410267963064.[0m


Currently running with:
{'n_estimators': 751, 'max_depth': 20, 'learning_rate': 0.007389560769159681, 'subsample ': 0.8, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.0071124309037759945, 'min_child_weight': 31}


[32m[I 2022-10-01 23:48:00,215][0m Trial 15 finished with value: 4.18666260589011 and parameters: {'n_estimators': 751, 'max_depth': 20, 'learning_rate': 0.007389560769159681, 'subsample': 0.8, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.0071124309037759945, 'min_child_weight': 31}. Best is trial 0 with value: 4.184410267963064.[0m


Currently running with:
{'n_estimators': 401, 'max_depth': 5, 'learning_rate': 0.003595457255658072, 'subsample ': 0.7000000000000001, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.5888538067189393, 'min_child_weight': 11}


[32m[I 2022-10-01 23:48:07,287][0m Trial 16 finished with value: 4.2207608517154585 and parameters: {'n_estimators': 401, 'max_depth': 5, 'learning_rate': 0.003595457255658072, 'subsample': 0.7000000000000001, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.5888538067189393, 'min_child_weight': 11}. Best is trial 0 with value: 4.184410267963064.[0m


Currently running with:
{'n_estimators': 751, 'max_depth': 11, 'learning_rate': 0.01373936101740699, 'subsample ': 0.9, 'colsample_bytree': 0.4, 'gamma': 0.183151607307064, 'min_child_weight': 5}


[32m[I 2022-10-01 23:48:49,032][0m Trial 17 finished with value: 4.18674685569347 and parameters: {'n_estimators': 751, 'max_depth': 11, 'learning_rate': 0.01373936101740699, 'subsample': 0.9, 'colsample_bytree': 0.4, 'gamma': 0.183151607307064, 'min_child_weight': 5}. Best is trial 0 with value: 4.184410267963064.[0m


Currently running with:
{'n_estimators': 301, 'max_depth': 13, 'learning_rate': 0.055301630698558396, 'subsample ': 0.7000000000000001, 'colsample_bytree': 1.0, 'gamma': 0.7259843261121932, 'min_child_weight': 13}


[32m[I 2022-10-01 23:49:35,466][0m Trial 18 finished with value: 4.259209901271743 and parameters: {'n_estimators': 301, 'max_depth': 13, 'learning_rate': 0.055301630698558396, 'subsample': 0.7000000000000001, 'colsample_bytree': 1.0, 'gamma': 0.7259843261121932, 'min_child_weight': 13}. Best is trial 0 with value: 4.184410267963064.[0m


Currently running with:
{'n_estimators': 551, 'max_depth': 6, 'learning_rate': 0.01405695940879355, 'subsample ': 1.0, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.46763005173157735, 'min_child_weight': 23}


[32m[I 2022-10-01 23:49:47,045][0m Trial 19 finished with value: 4.184792790101917 and parameters: {'n_estimators': 551, 'max_depth': 6, 'learning_rate': 0.01405695940879355, 'subsample': 1.0, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.46763005173157735, 'min_child_weight': 23}. Best is trial 0 with value: 4.184410267963064.[0m


Currently running with:
{'n_estimators': 901, 'max_depth': 9, 'learning_rate': 0.0031787525141038185, 'subsample ': 0.8, 'colsample_bytree': 0.7000000000000001, 'gamma': 0.017858553472507695, 'min_child_weight': 1}


[32m[I 2022-10-01 23:50:51,232][0m Trial 20 finished with value: 4.198287204623548 and parameters: {'n_estimators': 901, 'max_depth': 9, 'learning_rate': 0.0031787525141038185, 'subsample': 0.8, 'colsample_bytree': 0.7000000000000001, 'gamma': 0.017858553472507695, 'min_child_weight': 1}. Best is trial 0 with value: 4.184410267963064.[0m


Currently running with:
{'n_estimators': 801, 'max_depth': 4, 'learning_rate': 0.008010388181835304, 'subsample ': 1.0, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.11421692700161701, 'min_child_weight': 11}


[32m[I 2022-10-01 23:51:02,874][0m Trial 21 finished with value: 4.184526360496847 and parameters: {'n_estimators': 801, 'max_depth': 4, 'learning_rate': 0.008010388181835304, 'subsample': 1.0, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.11421692700161701, 'min_child_weight': 11}. Best is trial 0 with value: 4.184410267963064.[0m


Currently running with:
{'n_estimators': 701, 'max_depth': 4, 'learning_rate': 0.012295126389199834, 'subsample ': 0.9, 'colsample_bytree': 0.5, 'gamma': 0.17087415500083808, 'min_child_weight': 7}


[32m[I 2022-10-01 23:51:13,131][0m Trial 22 finished with value: 4.184377254017597 and parameters: {'n_estimators': 701, 'max_depth': 4, 'learning_rate': 0.012295126389199834, 'subsample': 0.9, 'colsample_bytree': 0.5, 'gamma': 0.17087415500083808, 'min_child_weight': 7}. Best is trial 22 with value: 4.184377254017597.[0m


Currently running with:
{'n_estimators': 501, 'max_depth': 3, 'learning_rate': 0.011859386089452119, 'subsample ': 0.9, 'colsample_bytree': 0.5, 'gamma': 0.1652711305100874, 'min_child_weight': 5}


[32m[I 2022-10-01 23:51:19,911][0m Trial 23 finished with value: 4.184657625010016 and parameters: {'n_estimators': 501, 'max_depth': 3, 'learning_rate': 0.011859386089452119, 'subsample': 0.9, 'colsample_bytree': 0.5, 'gamma': 0.1652711305100874, 'min_child_weight': 5}. Best is trial 22 with value: 4.184377254017597.[0m


Currently running with:
{'n_estimators': 651, 'max_depth': 2, 'learning_rate': 0.0010319936285112766, 'subsample ': 0.9, 'colsample_bytree': 0.4, 'gamma': 0.36547344664067244, 'min_child_weight': 21}


[32m[I 2022-10-01 23:51:28,087][0m Trial 24 finished with value: 4.337776174758993 and parameters: {'n_estimators': 651, 'max_depth': 2, 'learning_rate': 0.0010319936285112766, 'subsample': 0.9, 'colsample_bytree': 0.4, 'gamma': 0.36547344664067244, 'min_child_weight': 21}. Best is trial 22 with value: 4.184377254017597.[0m


Currently running with:
{'n_estimators': 901, 'max_depth': 4, 'learning_rate': 0.056086176723193625, 'subsample ': 0.8, 'colsample_bytree': 0.5, 'gamma': 0.25987437534504965, 'min_child_weight': 13}


[32m[I 2022-10-01 23:51:41,089][0m Trial 25 finished with value: 4.185656963138397 and parameters: {'n_estimators': 901, 'max_depth': 4, 'learning_rate': 0.056086176723193625, 'subsample': 0.8, 'colsample_bytree': 0.5, 'gamma': 0.25987437534504965, 'min_child_weight': 13}. Best is trial 22 with value: 4.184377254017597.[0m


Currently running with:
{'n_estimators': 1001, 'max_depth': 4, 'learning_rate': 0.021345889163581308, 'subsample ': 1.0, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.11935505942048355, 'min_child_weight': 5}


[32m[I 2022-10-01 23:51:55,490][0m Trial 26 finished with value: 4.184581173273938 and parameters: {'n_estimators': 1001, 'max_depth': 4, 'learning_rate': 0.021345889163581308, 'subsample': 1.0, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.11935505942048355, 'min_child_weight': 5}. Best is trial 22 with value: 4.184377254017597.[0m


Currently running with:
{'n_estimators': 351, 'max_depth': 6, 'learning_rate': 0.004600521421110267, 'subsample ': 0.9, 'colsample_bytree': 0.7000000000000001, 'gamma': 0.22885374612419082, 'min_child_weight': 9}


[32m[I 2022-10-01 23:52:03,114][0m Trial 27 finished with value: 4.206398299329062 and parameters: {'n_estimators': 351, 'max_depth': 6, 'learning_rate': 0.004600521421110267, 'subsample': 0.9, 'colsample_bytree': 0.7000000000000001, 'gamma': 0.22885374612419082, 'min_child_weight': 9}. Best is trial 22 with value: 4.184377254017597.[0m


Currently running with:
{'n_estimators': 801, 'max_depth': 3, 'learning_rate': 0.056883989188700565, 'subsample ': 0.7000000000000001, 'colsample_bytree': 0.5, 'gamma': 0.42533341186971163, 'min_child_weight': 13}


[32m[I 2022-10-01 23:52:13,685][0m Trial 28 finished with value: 4.184917116171226 and parameters: {'n_estimators': 801, 'max_depth': 3, 'learning_rate': 0.056883989188700565, 'subsample': 0.7000000000000001, 'colsample_bytree': 0.5, 'gamma': 0.42533341186971163, 'min_child_weight': 13}. Best is trial 22 with value: 4.184377254017597.[0m


Currently running with:
{'n_estimators': 651, 'max_depth': 7, 'learning_rate': 0.00573694294284507, 'subsample ': 0.8, 'colsample_bytree': 0.7000000000000001, 'gamma': 0.14560240440368483, 'min_child_weight': 3}


[32m[I 2022-10-01 23:52:32,606][0m Trial 29 finished with value: 4.1879339165115566 and parameters: {'n_estimators': 651, 'max_depth': 7, 'learning_rate': 0.00573694294284507, 'subsample': 0.8, 'colsample_bytree': 0.7000000000000001, 'gamma': 0.14560240440368483, 'min_child_weight': 3}. Best is trial 22 with value: 4.184377254017597.[0m


Currently running with:
{'n_estimators': 701, 'max_depth': 14, 'learning_rate': 0.03461052337553825, 'subsample ': 0.6000000000000001, 'colsample_bytree': 0.4, 'gamma': 0.2920733042834885, 'min_child_weight': 7}


[32m[I 2022-10-01 23:53:20,698][0m Trial 30 finished with value: 4.188009057262705 and parameters: {'n_estimators': 701, 'max_depth': 14, 'learning_rate': 0.03461052337553825, 'subsample': 0.6000000000000001, 'colsample_bytree': 0.4, 'gamma': 0.2920733042834885, 'min_child_weight': 7}. Best is trial 22 with value: 4.184377254017597.[0m


Currently running with:
{'n_estimators': 1001, 'max_depth': 4, 'learning_rate': 0.01996181984303479, 'subsample ': 1.0, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.10056342184652142, 'min_child_weight': 5}


[32m[I 2022-10-01 23:53:35,119][0m Trial 31 finished with value: 4.184476331862344 and parameters: {'n_estimators': 1001, 'max_depth': 4, 'learning_rate': 0.01996181984303479, 'subsample': 1.0, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.10056342184652142, 'min_child_weight': 5}. Best is trial 22 with value: 4.184377254017597.[0m


Currently running with:
{'n_estimators': 851, 'max_depth': 6, 'learning_rate': 0.017703866961107716, 'subsample ': 1.0, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.09399809125540032, 'min_child_weight': 3}


[32m[I 2022-10-01 23:53:51,870][0m Trial 32 finished with value: 4.185119403282804 and parameters: {'n_estimators': 851, 'max_depth': 6, 'learning_rate': 0.017703866961107716, 'subsample': 1.0, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.09399809125540032, 'min_child_weight': 3}. Best is trial 22 with value: 4.184377254017597.[0m


Currently running with:
{'n_estimators': 951, 'max_depth': 2, 'learning_rate': 0.012279183687168207, 'subsample ': 1.0, 'colsample_bytree': 0.5, 'gamma': 0.16696400691141483, 'min_child_weight': 11}


[32m[I 2022-10-01 23:54:03,599][0m Trial 33 finished with value: 4.1844483503311025 and parameters: {'n_estimators': 951, 'max_depth': 2, 'learning_rate': 0.012279183687168207, 'subsample': 1.0, 'colsample_bytree': 0.5, 'gamma': 0.16696400691141483, 'min_child_weight': 11}. Best is trial 22 with value: 4.184377254017597.[0m


Currently running with:
{'n_estimators': 951, 'max_depth': 3, 'learning_rate': 0.024904057503886284, 'subsample ': 0.9, 'colsample_bytree': 0.5, 'gamma': 0.19534665236418927, 'min_child_weight': 7}


[32m[I 2022-10-01 23:54:16,052][0m Trial 34 finished with value: 4.184353300767969 and parameters: {'n_estimators': 951, 'max_depth': 3, 'learning_rate': 0.024904057503886284, 'subsample': 0.9, 'colsample_bytree': 0.5, 'gamma': 0.19534665236418927, 'min_child_weight': 7}. Best is trial 34 with value: 4.184353300767969.[0m


Currently running with:
{'n_estimators': 951, 'max_depth': 2, 'learning_rate': 0.07941752912738308, 'subsample ': 0.9, 'colsample_bytree': 0.5, 'gamma': 0.1974322348766473, 'min_child_weight': 9}


[32m[I 2022-10-01 23:54:27,753][0m Trial 35 finished with value: 4.1848977607962565 and parameters: {'n_estimators': 951, 'max_depth': 2, 'learning_rate': 0.07941752912738308, 'subsample': 0.9, 'colsample_bytree': 0.5, 'gamma': 0.1974322348766473, 'min_child_weight': 9}. Best is trial 34 with value: 4.184353300767969.[0m


Currently running with:
{'n_estimators': 551, 'max_depth': 3, 'learning_rate': 0.027512043365897736, 'subsample ': 0.4, 'colsample_bytree': 0.4, 'gamma': 0.23612365178456846, 'min_child_weight': 15}


[32m[I 2022-10-01 23:54:35,162][0m Trial 36 finished with value: 4.18430339183535 and parameters: {'n_estimators': 551, 'max_depth': 3, 'learning_rate': 0.027512043365897736, 'subsample': 0.4, 'colsample_bytree': 0.4, 'gamma': 0.23612365178456846, 'min_child_weight': 15}. Best is trial 36 with value: 4.18430339183535.[0m


Currently running with:
{'n_estimators': 501, 'max_depth': 8, 'learning_rate': 0.03072353994340349, 'subsample ': 0.4, 'colsample_bytree': 0.4, 'gamma': 0.39598860424194093, 'min_child_weight': 19}


[32m[I 2022-10-01 23:54:49,829][0m Trial 37 finished with value: 4.186175911136306 and parameters: {'n_estimators': 501, 'max_depth': 8, 'learning_rate': 0.03072353994340349, 'subsample': 0.4, 'colsample_bytree': 0.4, 'gamma': 0.39598860424194093, 'min_child_weight': 19}. Best is trial 36 with value: 4.18430339183535.[0m


Currently running with:
{'n_estimators': 551, 'max_depth': 16, 'learning_rate': 0.04756957094452986, 'subsample ': 0.4, 'colsample_bytree': 0.4, 'gamma': 0.5194121274622292, 'min_child_weight': 15}


[32m[I 2022-10-01 23:55:33,813][0m Trial 38 finished with value: 4.1880696334486265 and parameters: {'n_estimators': 551, 'max_depth': 16, 'learning_rate': 0.04756957094452986, 'subsample': 0.4, 'colsample_bytree': 0.4, 'gamma': 0.5194121274622292, 'min_child_weight': 15}. Best is trial 36 with value: 4.18430339183535.[0m


Currently running with:
{'n_estimators': 51, 'max_depth': 10, 'learning_rate': 0.3876774185713443, 'subsample ': 0.5, 'colsample_bytree': 0.4, 'gamma': 0.24393719922472687, 'min_child_weight': 1}


[32m[I 2022-10-01 23:55:36,269][0m Trial 39 finished with value: 4.18759447459652 and parameters: {'n_estimators': 51, 'max_depth': 10, 'learning_rate': 0.3876774185713443, 'subsample': 0.5, 'colsample_bytree': 0.4, 'gamma': 0.24393719922472687, 'min_child_weight': 1}. Best is trial 36 with value: 4.18430339183535.[0m


Currently running with:
{'n_estimators': 401, 'max_depth': 3, 'learning_rate': 0.04292579537535174, 'subsample ': 0.5, 'colsample_bytree': 0.5, 'gamma': 0.3233708768778338, 'min_child_weight': 27}


[32m[I 2022-10-01 23:55:41,804][0m Trial 40 finished with value: 4.18431563564802 and parameters: {'n_estimators': 401, 'max_depth': 3, 'learning_rate': 0.04292579537535174, 'subsample': 0.5, 'colsample_bytree': 0.5, 'gamma': 0.3233708768778338, 'min_child_weight': 27}. Best is trial 36 with value: 4.18430339183535.[0m


Currently running with:
{'n_estimators': 401, 'max_depth': 3, 'learning_rate': 0.04264736899360023, 'subsample ': 0.5, 'colsample_bytree': 0.5, 'gamma': 0.32771049088968796, 'min_child_weight': 31}


[32m[I 2022-10-01 23:55:47,332][0m Trial 41 finished with value: 4.184294978544556 and parameters: {'n_estimators': 401, 'max_depth': 3, 'learning_rate': 0.04264736899360023, 'subsample': 0.5, 'colsample_bytree': 0.5, 'gamma': 0.32771049088968796, 'min_child_weight': 31}. Best is trial 41 with value: 4.184294978544556.[0m


Currently running with:
{'n_estimators': 401, 'max_depth': 3, 'learning_rate': 0.026334068115748455, 'subsample ': 0.5, 'colsample_bytree': 0.5, 'gamma': 0.33156477655469224, 'min_child_weight': 31}


[32m[I 2022-10-01 23:55:52,865][0m Trial 42 finished with value: 4.184357100830186 and parameters: {'n_estimators': 401, 'max_depth': 3, 'learning_rate': 0.026334068115748455, 'subsample': 0.5, 'colsample_bytree': 0.5, 'gamma': 0.33156477655469224, 'min_child_weight': 31}. Best is trial 41 with value: 4.184294978544556.[0m


Currently running with:
{'n_estimators': 351, 'max_depth': 3, 'learning_rate': 0.043382146985568644, 'subsample ': 0.5, 'colsample_bytree': 0.4, 'gamma': 0.3431486225569692, 'min_child_weight': 31}


[32m[I 2022-10-01 23:55:57,776][0m Trial 43 finished with value: 4.184307994723188 and parameters: {'n_estimators': 351, 'max_depth': 3, 'learning_rate': 0.043382146985568644, 'subsample': 0.5, 'colsample_bytree': 0.4, 'gamma': 0.3431486225569692, 'min_child_weight': 31}. Best is trial 41 with value: 4.184294978544556.[0m


Currently running with:
{'n_estimators': 251, 'max_depth': 3, 'learning_rate': 0.04182070317681725, 'subsample ': 0.5, 'colsample_bytree': 0.4, 'gamma': 0.3793718081569719, 'min_child_weight': 27}


[32m[I 2022-10-01 23:56:01,444][0m Trial 44 finished with value: 4.1843860865419265 and parameters: {'n_estimators': 251, 'max_depth': 3, 'learning_rate': 0.04182070317681725, 'subsample': 0.5, 'colsample_bytree': 0.4, 'gamma': 0.3793718081569719, 'min_child_weight': 27}. Best is trial 41 with value: 4.184294978544556.[0m


Currently running with:
{'n_estimators': 301, 'max_depth': 5, 'learning_rate': 0.08128582898377772, 'subsample ': 0.4, 'colsample_bytree': 0.4, 'gamma': 0.5182414585566966, 'min_child_weight': 27}


[32m[I 2022-10-01 23:56:06,698][0m Trial 45 finished with value: 4.185211027503537 and parameters: {'n_estimators': 301, 'max_depth': 5, 'learning_rate': 0.08128582898377772, 'subsample': 0.4, 'colsample_bytree': 0.4, 'gamma': 0.5182414585566966, 'min_child_weight': 27}. Best is trial 41 with value: 4.184294978544556.[0m


Currently running with:
{'n_estimators': 451, 'max_depth': 6, 'learning_rate': 0.16434930082205848, 'subsample ': 0.5, 'colsample_bytree': 0.4, 'gamma': 0.2928571071723984, 'min_child_weight': 29}


[32m[I 2022-10-01 23:56:15,080][0m Trial 46 finished with value: 4.187490477507732 and parameters: {'n_estimators': 451, 'max_depth': 6, 'learning_rate': 0.16434930082205848, 'subsample': 0.5, 'colsample_bytree': 0.4, 'gamma': 0.2928571071723984, 'min_child_weight': 29}. Best is trial 41 with value: 4.184294978544556.[0m


Currently running with:
{'n_estimators': 151, 'max_depth': 3, 'learning_rate': 0.10367113279183794, 'subsample ': 0.6000000000000001, 'colsample_bytree': 0.5, 'gamma': 0.4382733649892141, 'min_child_weight': 29}


[32m[I 2022-10-01 23:56:17,455][0m Trial 47 finished with value: 4.184376363554878 and parameters: {'n_estimators': 151, 'max_depth': 3, 'learning_rate': 0.10367113279183794, 'subsample': 0.6000000000000001, 'colsample_bytree': 0.5, 'gamma': 0.4382733649892141, 'min_child_weight': 29}. Best is trial 41 with value: 4.184294978544556.[0m


Currently running with:
{'n_estimators': 351, 'max_depth': 2, 'learning_rate': 0.06743163986970109, 'subsample ': 0.4, 'colsample_bytree': 0.4, 'gamma': 0.3533514176127199, 'min_child_weight': 25}


[32m[I 2022-10-01 23:56:22,076][0m Trial 48 finished with value: 4.184452292928646 and parameters: {'n_estimators': 351, 'max_depth': 2, 'learning_rate': 0.06743163986970109, 'subsample': 0.4, 'colsample_bytree': 0.4, 'gamma': 0.3533514176127199, 'min_child_weight': 25}. Best is trial 41 with value: 4.184294978544556.[0m


Currently running with:
{'n_estimators': 251, 'max_depth': 7, 'learning_rate': 0.026645345469657165, 'subsample ': 0.5, 'colsample_bytree': 0.5, 'gamma': 0.5619614850253246, 'min_child_weight': 29}


[32m[I 2022-10-01 23:56:29,069][0m Trial 49 finished with value: 4.18502031814434 and parameters: {'n_estimators': 251, 'max_depth': 7, 'learning_rate': 0.026645345469657165, 'subsample': 0.5, 'colsample_bytree': 0.5, 'gamma': 0.5619614850253246, 'min_child_weight': 29}. Best is trial 41 with value: 4.184294978544556.[0m


Final best parameters:


{'n_estimators': 401,
 'max_depth': 3,
 'learning_rate': 0.04264736899360023,
 'subsample': 0.5,
 'colsample_bytree': 0.5,
 'gamma': 0.32771049088968796,
 'min_child_weight': 31}

In [13]:
best_params = study.best_params.copy()

model = xgb.XGBRegressor(objective="reg:squarederror",
                          n_jobs=-1,
                          grow_policy='lossguide',
                          tree_method="gpu_hist",
                          predictor="gpu_predictor",
                          booster='gbtree',
                          sampling_method='gradient_based',
                          eval_metrics=['rmse'],
                          random_state=RANDOM_SEED,
                          enable_categorical=False,
                          **best_params)
pipe = make_pipeline(StandardScaler(), model)
pipe.fit(X_train, y_train)

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('xgbregressor',
                 XGBRegressor(colsample_bytree=0.5, enable_categorical=False,
                              eval_metrics=['rmse'], gamma=0.32771049088968796,
                              grow_policy='lossguide',
                              learning_rate=0.04264736899360023,
                              min_child_weight=31, n_estimators=401, n_jobs=-1,
                              objective='reg:squarederror',
                              predictor='gpu_predictor', random_state=2,
                              sampling_method='gradient_based', subsample=0.5,
                              tree_method='gpu_hist'))])

In [14]:
train_pred = pipe.predict(X_train)
test_pred = pipe.predict(X_test)
print(f"train results - RMSE: {mean_squared_error(y_train, train_pred, squared=False)}, MAE: {mean_absolute_error(y_train, train_pred)}")
print(f"test results - RMSE: {mean_squared_error(y_test, test_pred, squared=False)}, MAE: {mean_absolute_error(y_test, test_pred)}")

train results - RMSE: 4.181972227079587, MAE: 1.8525715696616472
test results - RMSE: 4.217486070702622, MAE: 1.8575606261485815


### Model 3 : Using vadar sentiment, textblob subjectivity and coleman liau index as the predictors

In [15]:
x3 = Combined[['vadar_sen','textblob_sub','cli_read']]
y3 = Combined['r_useful']

X_train, X_test, y_train, y_test = train_test_split(x3, y3, test_size=100000, random_state=RANDOM_SEED)

study = opt.create_study(direction='minimize', sampler=TPESampler(seed=RANDOM_SEED))
study.optimize(objective, n_trials=50)
print("Final best parameters:")
study.best_params

[32m[I 2022-10-01 23:56:31,179][0m A new study created in memory with name: no-name-fccc6172-4eab-40f3-a346-fa2e7eaaa9ae[0m


Currently running with:
{'n_estimators': 451, 'max_depth': 2, 'learning_rate': 0.030445460008040798, 'subsample ': 0.7000000000000001, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.3303348210038741, 'min_child_weight': 7}


[32m[I 2022-10-01 23:56:36,995][0m Trial 0 finished with value: 4.179194592068344 and parameters: {'n_estimators': 451, 'max_depth': 2, 'learning_rate': 0.030445460008040798, 'subsample': 0.7000000000000001, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.3303348210038741, 'min_child_weight': 7}. Best is trial 0 with value: 4.179194592068344.[0m


Currently running with:
{'n_estimators': 651, 'max_depth': 7, 'learning_rate': 0.005249994057689375, 'subsample ': 0.8, 'colsample_bytree': 0.7000000000000001, 'gamma': 0.13457994534493356, 'min_child_weight': 17}


[32m[I 2022-10-01 23:56:55,944][0m Trial 1 finished with value: 4.181253151286108 and parameters: {'n_estimators': 651, 'max_depth': 7, 'learning_rate': 0.005249994057689375, 'subsample': 0.8, 'colsample_bytree': 0.7000000000000001, 'gamma': 0.13457994534493356, 'min_child_weight': 17}. Best is trial 0 with value: 4.179194592068344.[0m


Currently running with:
{'n_estimators': 151, 'max_depth': 16, 'learning_rate': 0.20176865513948422, 'subsample ': 0.7000000000000001, 'colsample_bytree': 0.9, 'gamma': 0.079645477009061, 'min_child_weight': 17}


[32m[I 2022-10-01 23:57:33,446][0m Trial 2 finished with value: 4.389364551345759 and parameters: {'n_estimators': 151, 'max_depth': 16, 'learning_rate': 0.20176865513948422, 'subsample': 0.7000000000000001, 'colsample_bytree': 0.9, 'gamma': 0.079645477009061, 'min_child_weight': 17}. Best is trial 0 with value: 4.179194592068344.[0m


Currently running with:
{'n_estimators': 51, 'max_depth': 10, 'learning_rate': 0.001821939895716244, 'subsample ': 0.4, 'colsample_bytree': 0.8, 'gamma': 0.22601200060423587, 'min_child_weight': 3}


[32m[I 2022-10-01 23:57:40,115][0m Trial 3 finished with value: 4.635623369605753 and parameters: {'n_estimators': 51, 'max_depth': 10, 'learning_rate': 0.001821939895716244, 'subsample': 0.4, 'colsample_bytree': 0.8, 'gamma': 0.22601200060423587, 'min_child_weight': 3}. Best is trial 0 with value: 4.179194592068344.[0m


Currently running with:
{'n_estimators': 201, 'max_depth': 8, 'learning_rate': 0.01830393181458461, 'subsample ': 0.5, 'colsample_bytree': 0.8, 'gamma': 0.48306983555175165, 'min_child_weight': 17}


[32m[I 2022-10-01 23:57:49,058][0m Trial 4 finished with value: 4.183930074505818 and parameters: {'n_estimators': 201, 'max_depth': 8, 'learning_rate': 0.01830393181458461, 'subsample': 0.5, 'colsample_bytree': 0.8, 'gamma': 0.48306983555175165, 'min_child_weight': 17}. Best is trial 0 with value: 4.179194592068344.[0m


Currently running with:
{'n_estimators': 401, 'max_depth': 17, 'learning_rate': 0.036763248613166925, 'subsample ': 0.5, 'colsample_bytree': 0.8, 'gamma': 0.9645510800892552, 'min_child_weight': 17}


[32m[I 2022-10-02 00:00:30,370][0m Trial 5 finished with value: 4.324422046033247 and parameters: {'n_estimators': 401, 'max_depth': 17, 'learning_rate': 0.036763248613166925, 'subsample': 0.5, 'colsample_bytree': 0.8, 'gamma': 0.9645510800892552, 'min_child_weight': 17}. Best is trial 0 with value: 4.179194592068344.[0m


Currently running with:
{'n_estimators': 901, 'max_depth': 8, 'learning_rate': 0.0339394647270345, 'subsample ': 0.6000000000000001, 'colsample_bytree': 0.7000000000000001, 'gamma': 0.7765591849971003, 'min_child_weight': 17}


[32m[I 2022-10-02 00:01:01,632][0m Trial 6 finished with value: 4.213593635745285 and parameters: {'n_estimators': 901, 'max_depth': 8, 'learning_rate': 0.0339394647270345, 'subsample': 0.6000000000000001, 'colsample_bytree': 0.7000000000000001, 'gamma': 0.7765591849971003, 'min_child_weight': 17}. Best is trial 0 with value: 4.179194592068344.[0m


Currently running with:
{'n_estimators': 1001, 'max_depth': 12, 'learning_rate': 0.0016656039140499094, 'subsample ': 0.6000000000000001, 'colsample_bytree': 0.9, 'gamma': 0.4062750430479508, 'min_child_weight': 1}


[32m[I 2022-10-02 00:06:55,031][0m Trial 7 finished with value: 4.234645026611073 and parameters: {'n_estimators': 1001, 'max_depth': 12, 'learning_rate': 0.0016656039140499094, 'subsample': 0.6000000000000001, 'colsample_bytree': 0.9, 'gamma': 0.4062750430479508, 'min_child_weight': 1}. Best is trial 0 with value: 4.179194592068344.[0m


Currently running with:
{'n_estimators': 251, 'max_depth': 3, 'learning_rate': 0.4812566765285445, 'subsample ': 1.0, 'colsample_bytree': 0.9, 'gamma': 0.6018171214054674, 'min_child_weight': 25}


[32m[I 2022-10-02 00:06:58,682][0m Trial 8 finished with value: 4.184054100154342 and parameters: {'n_estimators': 251, 'max_depth': 3, 'learning_rate': 0.4812566765285445, 'subsample': 1.0, 'colsample_bytree': 0.9, 'gamma': 0.6018171214054674, 'min_child_weight': 25}. Best is trial 0 with value: 4.179194592068344.[0m


Currently running with:
{'n_estimators': 151, 'max_depth': 7, 'learning_rate': 0.025968135983975103, 'subsample ': 0.6000000000000001, 'colsample_bytree': 0.4, 'gamma': 0.9831534453572127, 'min_child_weight': 15}


[32m[I 2022-10-02 00:07:03,322][0m Trial 9 finished with value: 4.180423517009305 and parameters: {'n_estimators': 151, 'max_depth': 7, 'learning_rate': 0.025968135983975103, 'subsample': 0.6000000000000001, 'colsample_bytree': 0.4, 'gamma': 0.9831534453572127, 'min_child_weight': 15}. Best is trial 0 with value: 4.179194592068344.[0m


Currently running with:
{'n_estimators': 601, 'max_depth': 2, 'learning_rate': 0.14878592150762665, 'subsample ': 0.9, 'colsample_bytree': 0.5, 'gamma': 0.26755914576442896, 'min_child_weight': 7}


[32m[I 2022-10-02 00:07:10,949][0m Trial 10 finished with value: 4.1792832509765665 and parameters: {'n_estimators': 601, 'max_depth': 2, 'learning_rate': 0.14878592150762665, 'subsample': 0.9, 'colsample_bytree': 0.5, 'gamma': 0.26755914576442896, 'min_child_weight': 7}. Best is trial 0 with value: 4.179194592068344.[0m


Currently running with:
{'n_estimators': 601, 'max_depth': 2, 'learning_rate': 0.12108186268688394, 'subsample ': 0.9, 'colsample_bytree': 0.5, 'gamma': 0.31583084883117196, 'min_child_weight': 7}


[32m[I 2022-10-02 00:07:18,586][0m Trial 11 finished with value: 4.179189719339554 and parameters: {'n_estimators': 601, 'max_depth': 2, 'learning_rate': 0.12108186268688394, 'subsample': 0.9, 'colsample_bytree': 0.5, 'gamma': 0.31583084883117196, 'min_child_weight': 7}. Best is trial 11 with value: 4.179189719339554.[0m


Currently running with:
{'n_estimators': 451, 'max_depth': 4, 'learning_rate': 0.09587471064335992, 'subsample ': 0.8, 'colsample_bytree': 0.5, 'gamma': 0.33588433426083936, 'min_child_weight': 9}


[32m[I 2022-10-02 00:07:25,372][0m Trial 12 finished with value: 4.178974131434575 and parameters: {'n_estimators': 451, 'max_depth': 4, 'learning_rate': 0.09587471064335992, 'subsample': 0.8, 'colsample_bytree': 0.5, 'gamma': 0.33588433426083936, 'min_child_weight': 9}. Best is trial 12 with value: 4.178974131434575.[0m


Currently running with:
{'n_estimators': 751, 'max_depth': 5, 'learning_rate': 0.123972595010698, 'subsample ': 0.9, 'colsample_bytree': 0.4, 'gamma': 0.7022599309203897, 'min_child_weight': 9}


[32m[I 2022-10-02 00:07:37,530][0m Trial 13 finished with value: 4.180412416776805 and parameters: {'n_estimators': 751, 'max_depth': 5, 'learning_rate': 0.123972595010698, 'subsample': 0.9, 'colsample_bytree': 0.4, 'gamma': 0.7022599309203897, 'min_child_weight': 9}. Best is trial 12 with value: 4.178974131434575.[0m


Currently running with:
{'n_estimators': 351, 'max_depth': 5, 'learning_rate': 0.11051923151008129, 'subsample ': 1.0, 'colsample_bytree': 0.5, 'gamma': 0.4890690974641637, 'min_child_weight': 11}


[32m[I 2022-10-02 00:07:43,503][0m Trial 14 finished with value: 4.179242585894881 and parameters: {'n_estimators': 351, 'max_depth': 5, 'learning_rate': 0.11051923151008129, 'subsample': 1.0, 'colsample_bytree': 0.5, 'gamma': 0.4890690974641637, 'min_child_weight': 11}. Best is trial 12 with value: 4.178974131434575.[0m


Currently running with:
{'n_estimators': 551, 'max_depth': 20, 'learning_rate': 0.3691316534982592, 'subsample ': 0.8, 'colsample_bytree': 0.5, 'gamma': 0.02292380820403933, 'min_child_weight': 31}


[32m[I 2022-10-02 00:08:07,932][0m Trial 15 finished with value: 4.1812139652243525 and parameters: {'n_estimators': 551, 'max_depth': 20, 'learning_rate': 0.3691316534982592, 'subsample': 0.8, 'colsample_bytree': 0.5, 'gamma': 0.02292380820403933, 'min_child_weight': 31}. Best is trial 12 with value: 4.178974131434575.[0m


Currently running with:
{'n_estimators': 751, 'max_depth': 4, 'learning_rate': 0.07819141554345434, 'subsample ': 0.9, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.18938509982428958, 'min_child_weight': 11}


[32m[I 2022-10-02 00:08:18,891][0m Trial 16 finished with value: 4.17929207668053 and parameters: {'n_estimators': 751, 'max_depth': 4, 'learning_rate': 0.07819141554345434, 'subsample': 0.9, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.18938509982428958, 'min_child_weight': 11}. Best is trial 12 with value: 4.178974131434575.[0m


Currently running with:
{'n_estimators': 751, 'max_depth': 13, 'learning_rate': 0.010596553667569213, 'subsample ': 0.8, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.3571469000849381, 'min_child_weight': 5}


[32m[I 2022-10-02 00:09:15,397][0m Trial 17 finished with value: 4.180152114861267 and parameters: {'n_estimators': 751, 'max_depth': 13, 'learning_rate': 0.010596553667569213, 'subsample': 0.8, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.3571469000849381, 'min_child_weight': 5}. Best is trial 12 with value: 4.178974131434575.[0m


Currently running with:
{'n_estimators': 351, 'max_depth': 10, 'learning_rate': 0.0697901831571362, 'subsample ': 1.0, 'colsample_bytree': 0.4, 'gamma': 0.5865542160019308, 'min_child_weight': 13}


[32m[I 2022-10-02 00:09:28,465][0m Trial 18 finished with value: 4.180684398609542 and parameters: {'n_estimators': 351, 'max_depth': 10, 'learning_rate': 0.0697901831571362, 'subsample': 1.0, 'colsample_bytree': 0.4, 'gamma': 0.5865542160019308, 'min_child_weight': 13}. Best is trial 12 with value: 4.178974131434575.[0m


Currently running with:
{'n_estimators': 501, 'max_depth': 6, 'learning_rate': 0.21242943886052557, 'subsample ': 0.7000000000000001, 'colsample_bytree': 0.5, 'gamma': 0.29661169050180924, 'min_child_weight': 23}


[32m[I 2022-10-02 00:09:37,503][0m Trial 19 finished with value: 4.180906587131859 and parameters: {'n_estimators': 501, 'max_depth': 6, 'learning_rate': 0.21242943886052557, 'subsample': 0.7000000000000001, 'colsample_bytree': 0.5, 'gamma': 0.29661169050180924, 'min_child_weight': 23}. Best is trial 12 with value: 4.178974131434575.[0m


Currently running with:
{'n_estimators': 651, 'max_depth': 4, 'learning_rate': 0.05312275326140315, 'subsample ': 0.9, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.41348607477210364, 'min_child_weight': 1}


[32m[I 2022-10-02 00:09:47,117][0m Trial 20 finished with value: 4.178786336908467 and parameters: {'n_estimators': 651, 'max_depth': 4, 'learning_rate': 0.05312275326140315, 'subsample': 0.9, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.41348607477210364, 'min_child_weight': 1}. Best is trial 20 with value: 4.178786336908467.[0m


Currently running with:
{'n_estimators': 651, 'max_depth': 4, 'learning_rate': 0.06549166404588064, 'subsample ': 0.9, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.41091111237348843, 'min_child_weight': 3}


[32m[I 2022-10-02 00:09:56,687][0m Trial 21 finished with value: 4.178926163891651 and parameters: {'n_estimators': 651, 'max_depth': 4, 'learning_rate': 0.06549166404588064, 'subsample': 0.9, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.41091111237348843, 'min_child_weight': 3}. Best is trial 20 with value: 4.178786336908467.[0m


Currently running with:
{'n_estimators': 851, 'max_depth': 5, 'learning_rate': 0.06276731389624311, 'subsample ': 0.8, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.40937140557531354, 'min_child_weight': 1}


[32m[I 2022-10-02 00:10:10,484][0m Trial 22 finished with value: 4.17963798542424 and parameters: {'n_estimators': 851, 'max_depth': 5, 'learning_rate': 0.06276731389624311, 'subsample': 0.8, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.40937140557531354, 'min_child_weight': 1}. Best is trial 20 with value: 4.178786336908467.[0m


Currently running with:
{'n_estimators': 701, 'max_depth': 3, 'learning_rate': 0.014298237141647706, 'subsample ': 0.9, 'colsample_bytree': 0.7000000000000001, 'gamma': 0.6098120124360216, 'min_child_weight': 3}


[32m[I 2022-10-02 00:10:19,848][0m Trial 23 finished with value: 4.176112338334519 and parameters: {'n_estimators': 701, 'max_depth': 3, 'learning_rate': 0.014298237141647706, 'subsample': 0.9, 'colsample_bytree': 0.7000000000000001, 'gamma': 0.6098120124360216, 'min_child_weight': 3}. Best is trial 23 with value: 4.176112338334519.[0m


Currently running with:
{'n_estimators': 701, 'max_depth': 9, 'learning_rate': 0.012605030685140349, 'subsample ': 0.9, 'colsample_bytree': 0.7000000000000001, 'gamma': 0.5999952899947919, 'min_child_weight': 3}


[32m[I 2022-10-02 00:11:04,046][0m Trial 24 finished with value: 4.221817523633262 and parameters: {'n_estimators': 701, 'max_depth': 9, 'learning_rate': 0.012605030685140349, 'subsample': 0.9, 'colsample_bytree': 0.7000000000000001, 'gamma': 0.5999952899947919, 'min_child_weight': 3}. Best is trial 23 with value: 4.176112338334519.[0m


Currently running with:
{'n_estimators': 851, 'max_depth': 4, 'learning_rate': 0.005709539579827443, 'subsample ': 1.0, 'colsample_bytree': 1.0, 'gamma': 0.7307985863243737, 'min_child_weight': 3}


[32m[I 2022-10-02 00:11:16,302][0m Trial 25 finished with value: 4.176113728465143 and parameters: {'n_estimators': 851, 'max_depth': 4, 'learning_rate': 0.005709539579827443, 'subsample': 1.0, 'colsample_bytree': 1.0, 'gamma': 0.7307985863243737, 'min_child_weight': 3}. Best is trial 23 with value: 4.176112338334519.[0m


Currently running with:
{'n_estimators': 851, 'max_depth': 6, 'learning_rate': 0.004566955323826143, 'subsample ': 1.0, 'colsample_bytree': 1.0, 'gamma': 0.7701588872835062, 'min_child_weight': 1}


[32m[I 2022-10-02 00:11:34,039][0m Trial 26 finished with value: 4.183506111046189 and parameters: {'n_estimators': 851, 'max_depth': 6, 'learning_rate': 0.004566955323826143, 'subsample': 1.0, 'colsample_bytree': 1.0, 'gamma': 0.7701588872835062, 'min_child_weight': 1}. Best is trial 23 with value: 4.176112338334519.[0m


Currently running with:
{'n_estimators': 1001, 'max_depth': 3, 'learning_rate': 0.005760545447429321, 'subsample ': 1.0, 'colsample_bytree': 1.0, 'gamma': 0.6953596733214178, 'min_child_weight': 5}


[32m[I 2022-10-02 00:11:47,053][0m Trial 27 finished with value: 4.1762941650780485 and parameters: {'n_estimators': 1001, 'max_depth': 3, 'learning_rate': 0.005760545447429321, 'subsample': 1.0, 'colsample_bytree': 1.0, 'gamma': 0.6953596733214178, 'min_child_weight': 5}. Best is trial 23 with value: 4.176112338334519.[0m


Currently running with:
{'n_estimators': 1001, 'max_depth': 3, 'learning_rate': 0.004591225547602008, 'subsample ': 1.0, 'colsample_bytree': 1.0, 'gamma': 0.877450621799819, 'min_child_weight': 5}


[32m[I 2022-10-02 00:12:00,078][0m Trial 28 finished with value: 4.1767234838708 and parameters: {'n_estimators': 1001, 'max_depth': 3, 'learning_rate': 0.004591225547602008, 'subsample': 1.0, 'colsample_bytree': 1.0, 'gamma': 0.877450621799819, 'min_child_weight': 5}. Best is trial 23 with value: 4.176112338334519.[0m


Currently running with:
{'n_estimators': 901, 'max_depth': 2, 'learning_rate': 0.0026327220422306813, 'subsample ': 1.0, 'colsample_bytree': 1.0, 'gamma': 0.6940804638774589, 'min_child_weight': 5}


[32m[I 2022-10-02 00:12:11,210][0m Trial 29 finished with value: 4.186237650177428 and parameters: {'n_estimators': 901, 'max_depth': 2, 'learning_rate': 0.0026327220422306813, 'subsample': 1.0, 'colsample_bytree': 1.0, 'gamma': 0.6940804638774589, 'min_child_weight': 5}. Best is trial 23 with value: 4.176112338334519.[0m


Currently running with:
{'n_estimators': 951, 'max_depth': 14, 'learning_rate': 0.007991932972716833, 'subsample ': 1.0, 'colsample_bytree': 0.9, 'gamma': 0.8426413122808062, 'min_child_weight': 9}


[32m[I 2022-10-02 00:18:24,072][0m Trial 30 finished with value: 4.274611824672747 and parameters: {'n_estimators': 951, 'max_depth': 14, 'learning_rate': 0.007991932972716833, 'subsample': 1.0, 'colsample_bytree': 0.9, 'gamma': 0.8426413122808062, 'min_child_weight': 9}. Best is trial 23 with value: 4.176112338334519.[0m


Currently running with:
{'n_estimators': 1001, 'max_depth': 3, 'learning_rate': 0.0035748784024822195, 'subsample ': 1.0, 'colsample_bytree': 1.0, 'gamma': 0.8939775087506765, 'min_child_weight': 5}


[32m[I 2022-10-02 00:18:37,110][0m Trial 31 finished with value: 4.177666641847996 and parameters: {'n_estimators': 1001, 'max_depth': 3, 'learning_rate': 0.0035748784024822195, 'subsample': 1.0, 'colsample_bytree': 1.0, 'gamma': 0.8939775087506765, 'min_child_weight': 5}. Best is trial 23 with value: 4.176112338334519.[0m


Currently running with:
{'n_estimators': 851, 'max_depth': 3, 'learning_rate': 0.006724570602896908, 'subsample ': 1.0, 'colsample_bytree': 1.0, 'gamma': 0.6628528838356941, 'min_child_weight': 5}


[32m[I 2022-10-02 00:18:48,272][0m Trial 32 finished with value: 4.176296698908259 and parameters: {'n_estimators': 851, 'max_depth': 3, 'learning_rate': 0.006724570602896908, 'subsample': 1.0, 'colsample_bytree': 1.0, 'gamma': 0.6628528838356941, 'min_child_weight': 5}. Best is trial 23 with value: 4.176112338334519.[0m


Currently running with:
{'n_estimators': 801, 'max_depth': 6, 'learning_rate': 0.007442571864203811, 'subsample ': 1.0, 'colsample_bytree': 0.9, 'gamma': 0.6686353802733426, 'min_child_weight': 21}


[32m[I 2022-10-02 00:19:05,029][0m Trial 33 finished with value: 4.178716466575623 and parameters: {'n_estimators': 801, 'max_depth': 6, 'learning_rate': 0.007442571864203811, 'subsample': 1.0, 'colsample_bytree': 0.9, 'gamma': 0.6686353802733426, 'min_child_weight': 21}. Best is trial 23 with value: 4.176112338334519.[0m


Currently running with:
{'n_estimators': 901, 'max_depth': 2, 'learning_rate': 0.015755756175167494, 'subsample ': 0.9, 'colsample_bytree': 1.0, 'gamma': 0.5531783826945744, 'min_child_weight': 3}


[32m[I 2022-10-02 00:19:16,118][0m Trial 34 finished with value: 4.1765042643429835 and parameters: {'n_estimators': 901, 'max_depth': 2, 'learning_rate': 0.015755756175167494, 'subsample': 0.9, 'colsample_bytree': 1.0, 'gamma': 0.5531783826945744, 'min_child_weight': 3}. Best is trial 23 with value: 4.176112338334519.[0m


Currently running with:
{'n_estimators': 801, 'max_depth': 7, 'learning_rate': 0.007012864501891255, 'subsample ': 1.0, 'colsample_bytree': 0.7000000000000001, 'gamma': 0.7693345627163048, 'min_child_weight': 7}


[32m[I 2022-10-02 00:19:39,166][0m Trial 35 finished with value: 4.185612720491391 and parameters: {'n_estimators': 801, 'max_depth': 7, 'learning_rate': 0.007012864501891255, 'subsample': 1.0, 'colsample_bytree': 0.7000000000000001, 'gamma': 0.7693345627163048, 'min_child_weight': 7}. Best is trial 23 with value: 4.176112338334519.[0m


Currently running with:
{'n_estimators': 851, 'max_depth': 3, 'learning_rate': 0.0027036069583011788, 'subsample ': 0.8, 'colsample_bytree': 0.8, 'gamma': 0.6541479653005803, 'min_child_weight': 11}


[32m[I 2022-10-02 00:19:50,433][0m Trial 36 finished with value: 4.185024981489607 and parameters: {'n_estimators': 851, 'max_depth': 3, 'learning_rate': 0.0027036069583011788, 'subsample': 0.8, 'colsample_bytree': 0.8, 'gamma': 0.6541479653005803, 'min_child_weight': 11}. Best is trial 23 with value: 4.176112338334519.[0m


Currently running with:
{'n_estimators': 951, 'max_depth': 5, 'learning_rate': 0.001080997266389098, 'subsample ': 0.9, 'colsample_bytree': 0.8, 'gamma': 0.5294153411806469, 'min_child_weight': 5}


[32m[I 2022-10-02 00:20:06,300][0m Trial 37 finished with value: 4.253953838330119 and parameters: {'n_estimators': 951, 'max_depth': 5, 'learning_rate': 0.001080997266389098, 'subsample': 0.9, 'colsample_bytree': 0.8, 'gamma': 0.5294153411806469, 'min_child_weight': 5}. Best is trial 23 with value: 4.176112338334519.[0m


Currently running with:
{'n_estimators': 951, 'max_depth': 8, 'learning_rate': 0.02105713662264059, 'subsample ': 0.4, 'colsample_bytree': 1.0, 'gamma': 0.7384284394019128, 'min_child_weight': 3}


[32m[I 2022-10-02 00:20:43,006][0m Trial 38 finished with value: 4.246905115618431 and parameters: {'n_estimators': 951, 'max_depth': 8, 'learning_rate': 0.02105713662264059, 'subsample': 0.4, 'colsample_bytree': 1.0, 'gamma': 0.7384284394019128, 'min_child_weight': 3}. Best is trial 23 with value: 4.176112338334519.[0m


Currently running with:
{'n_estimators': 801, 'max_depth': 17, 'learning_rate': 0.01030621557072489, 'subsample ': 1.0, 'colsample_bytree': 0.9, 'gamma': 0.8059591814278385, 'min_child_weight': 7}


[32m[I 2022-10-02 00:33:34,055][0m Trial 39 finished with value: 4.386026219887744 and parameters: {'n_estimators': 801, 'max_depth': 17, 'learning_rate': 0.01030621557072489, 'subsample': 1.0, 'colsample_bytree': 0.9, 'gamma': 0.8059591814278385, 'min_child_weight': 7}. Best is trial 23 with value: 4.176112338334519.[0m


Currently running with:
{'n_estimators': 1, 'max_depth': 3, 'learning_rate': 0.006553363014738421, 'subsample ': 0.7000000000000001, 'colsample_bytree': 0.9, 'gamma': 0.6345767294435145, 'min_child_weight': 13}


[32m[I 2022-10-02 00:33:34,495][0m Trial 40 finished with value: 4.7157224473800605 and parameters: {'n_estimators': 1, 'max_depth': 3, 'learning_rate': 0.006553363014738421, 'subsample': 0.7000000000000001, 'colsample_bytree': 0.9, 'gamma': 0.6345767294435145, 'min_child_weight': 13}. Best is trial 23 with value: 4.176112338334519.[0m


Currently running with:
{'n_estimators': 901, 'max_depth': 2, 'learning_rate': 0.01693919339998827, 'subsample ': 0.9, 'colsample_bytree': 1.0, 'gamma': 0.5433154576946828, 'min_child_weight': 3}


[32m[I 2022-10-02 00:33:45,600][0m Trial 41 finished with value: 4.176469438028423 and parameters: {'n_estimators': 901, 'max_depth': 2, 'learning_rate': 0.01693919339998827, 'subsample': 0.9, 'colsample_bytree': 1.0, 'gamma': 0.5433154576946828, 'min_child_weight': 3}. Best is trial 23 with value: 4.176112338334519.[0m


Currently running with:
{'n_estimators': 901, 'max_depth': 2, 'learning_rate': 0.015753601622013783, 'subsample ': 0.9, 'colsample_bytree': 1.0, 'gamma': 0.727974658369849, 'min_child_weight': 3}


[32m[I 2022-10-02 00:33:56,694][0m Trial 42 finished with value: 4.176493046070219 and parameters: {'n_estimators': 901, 'max_depth': 2, 'learning_rate': 0.015753601622013783, 'subsample': 0.9, 'colsample_bytree': 1.0, 'gamma': 0.727974658369849, 'min_child_weight': 3}. Best is trial 23 with value: 4.176112338334519.[0m


Currently running with:
{'n_estimators': 701, 'max_depth': 4, 'learning_rate': 0.03627461661160093, 'subsample ': 1.0, 'colsample_bytree': 1.0, 'gamma': 0.5506748051621423, 'min_child_weight': 1}


[32m[I 2022-10-02 00:34:06,911][0m Trial 43 finished with value: 4.183611139498345 and parameters: {'n_estimators': 701, 'max_depth': 4, 'learning_rate': 0.03627461661160093, 'subsample': 1.0, 'colsample_bytree': 1.0, 'gamma': 0.5506748051621423, 'min_child_weight': 1}. Best is trial 23 with value: 4.176112338334519.[0m


Currently running with:
{'n_estimators': 1001, 'max_depth': 3, 'learning_rate': 0.0033286765427124492, 'subsample ': 0.5, 'colsample_bytree': 0.9, 'gamma': 0.4632542871539193, 'min_child_weight': 5}


[32m[I 2022-10-02 00:34:20,051][0m Trial 44 finished with value: 4.17850037345401 and parameters: {'n_estimators': 1001, 'max_depth': 3, 'learning_rate': 0.0033286765427124492, 'subsample': 0.5, 'colsample_bytree': 0.9, 'gamma': 0.4632542871539193, 'min_child_weight': 5}. Best is trial 23 with value: 4.176112338334519.[0m


Currently running with:
{'n_estimators': 851, 'max_depth': 2, 'learning_rate': 0.025652116599233388, 'subsample ': 0.9, 'colsample_bytree': 0.8, 'gamma': 0.6195836842416972, 'min_child_weight': 7}


[32m[I 2022-10-02 00:34:30,642][0m Trial 45 finished with value: 4.17624406155283 and parameters: {'n_estimators': 851, 'max_depth': 2, 'learning_rate': 0.025652116599233388, 'subsample': 0.9, 'colsample_bytree': 0.8, 'gamma': 0.6195836842416972, 'min_child_weight': 7}. Best is trial 23 with value: 4.176112338334519.[0m


Currently running with:
{'n_estimators': 851, 'max_depth': 6, 'learning_rate': 0.02738837834918351, 'subsample ': 1.0, 'colsample_bytree': 0.8, 'gamma': 0.6289801947815473, 'min_child_weight': 9}


[32m[I 2022-10-02 00:34:48,137][0m Trial 46 finished with value: 4.192117419513212 and parameters: {'n_estimators': 851, 'max_depth': 6, 'learning_rate': 0.02738837834918351, 'subsample': 1.0, 'colsample_bytree': 0.8, 'gamma': 0.6289801947815473, 'min_child_weight': 9}. Best is trial 23 with value: 4.176112338334519.[0m


Currently running with:
{'n_estimators': 701, 'max_depth': 5, 'learning_rate': 0.005539643012828344, 'subsample ': 0.8, 'colsample_bytree': 0.7000000000000001, 'gamma': 0.9289093798282513, 'min_child_weight': 7}


[32m[I 2022-10-02 00:34:59,989][0m Trial 47 finished with value: 4.177254263194014 and parameters: {'n_estimators': 701, 'max_depth': 5, 'learning_rate': 0.005539643012828344, 'subsample': 0.8, 'colsample_bytree': 0.7000000000000001, 'gamma': 0.9289093798282513, 'min_child_weight': 7}. Best is trial 23 with value: 4.176112338334519.[0m


Currently running with:
{'n_estimators': 951, 'max_depth': 7, 'learning_rate': 0.00937934460488247, 'subsample ': 0.9, 'colsample_bytree': 0.8, 'gamma': 0.7098362343694936, 'min_child_weight': 7}


[32m[I 2022-10-02 00:35:26,581][0m Trial 48 finished with value: 4.190438116724972 and parameters: {'n_estimators': 951, 'max_depth': 7, 'learning_rate': 0.00937934460488247, 'subsample': 0.9, 'colsample_bytree': 0.8, 'gamma': 0.7098362343694936, 'min_child_weight': 7}. Best is trial 23 with value: 4.176112338334519.[0m


Currently running with:
{'n_estimators': 801, 'max_depth': 4, 'learning_rate': 0.00195581865378845, 'subsample ': 1.0, 'colsample_bytree': 0.9, 'gamma': 0.8422767918251292, 'min_child_weight': 19}


[32m[I 2022-10-02 00:35:38,181][0m Trial 49 finished with value: 4.204909109559069 and parameters: {'n_estimators': 801, 'max_depth': 4, 'learning_rate': 0.00195581865378845, 'subsample': 1.0, 'colsample_bytree': 0.9, 'gamma': 0.8422767918251292, 'min_child_weight': 19}. Best is trial 23 with value: 4.176112338334519.[0m


Final best parameters:


{'n_estimators': 701,
 'max_depth': 3,
 'learning_rate': 0.014298237141647706,
 'subsample': 0.9,
 'colsample_bytree': 0.7000000000000001,
 'gamma': 0.6098120124360216,
 'min_child_weight': 3}

In [16]:
best_params = study.best_params.copy()

model = xgb.XGBRegressor(objective="reg:squarederror",
                          n_jobs=-1,
                          grow_policy='lossguide',
                          tree_method="gpu_hist",
                          predictor="gpu_predictor",
                          booster='gbtree',
                          sampling_method='gradient_based',
                          eval_metrics=['rmse'],
                          random_state=RANDOM_SEED,
                          enable_categorical=False,
                          **best_params)
pipe = make_pipeline(StandardScaler(), model)
pipe.fit(X_train, y_train)

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('xgbregressor',
                 XGBRegressor(colsample_bytree=0.7000000000000001,
                              enable_categorical=False, eval_metrics=['rmse'],
                              gamma=0.6098120124360216, grow_policy='lossguide',
                              learning_rate=0.014298237141647706,
                              min_child_weight=3, n_estimators=701, n_jobs=-1,
                              objective='reg:squarederror',
                              predictor='gpu_predictor', random_state=2,
                              sampling_method='gradient_based', subsample=0.9,
                              tree_method='gpu_hist'))])

In [17]:
train_pred = pipe.predict(X_train)
test_pred = pipe.predict(X_test)
print(f"train results - RMSE: {mean_squared_error(y_train, train_pred, squared=False)}, MAE: {mean_absolute_error(y_train, train_pred)}")
print(f"test results - RMSE: {mean_squared_error(y_test, test_pred, squared=False)}, MAE: {mean_absolute_error(y_test, test_pred)}")

train results - RMSE: 4.173407799819726, MAE: 1.8459321267873048
test results - RMSE: 4.210057745049822, MAE: 1.8520084429883956


### Model 4 : Using  vadar sentiment, textblob subjectivity and  flesch reading ease core as the predictors

In [18]:
x4 = Combined[['vadar_sen','textblob_sub','textstat_read']]
y4 = Combined['r_useful']

X_train, X_test, y_train, y_test = train_test_split(x4, y4, test_size=100000, random_state=RANDOM_SEED)

study = opt.create_study(direction='minimize', sampler=TPESampler(seed=RANDOM_SEED))
study.optimize(objective, n_trials=50)
print("Final best parameters:")
study.best_params

[32m[I 2022-10-02 00:35:41,662][0m A new study created in memory with name: no-name-d01497b9-ab77-4ae9-bd33-083ead7d2f2c[0m


Currently running with:
{'n_estimators': 451, 'max_depth': 2, 'learning_rate': 0.030445460008040798, 'subsample ': 0.7000000000000001, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.3303348210038741, 'min_child_weight': 7}


[32m[I 2022-10-02 00:35:47,485][0m Trial 0 finished with value: 4.177273863292622 and parameters: {'n_estimators': 451, 'max_depth': 2, 'learning_rate': 0.030445460008040798, 'subsample': 0.7000000000000001, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.3303348210038741, 'min_child_weight': 7}. Best is trial 0 with value: 4.177273863292622.[0m


Currently running with:
{'n_estimators': 651, 'max_depth': 7, 'learning_rate': 0.005249994057689375, 'subsample ': 0.8, 'colsample_bytree': 0.7000000000000001, 'gamma': 0.13457994534493356, 'min_child_weight': 17}


[32m[I 2022-10-02 00:36:06,420][0m Trial 1 finished with value: 4.178436439079513 and parameters: {'n_estimators': 651, 'max_depth': 7, 'learning_rate': 0.005249994057689375, 'subsample': 0.8, 'colsample_bytree': 0.7000000000000001, 'gamma': 0.13457994534493356, 'min_child_weight': 17}. Best is trial 0 with value: 4.177273863292622.[0m


Currently running with:
{'n_estimators': 151, 'max_depth': 16, 'learning_rate': 0.20176865513948422, 'subsample ': 0.7000000000000001, 'colsample_bytree': 0.9, 'gamma': 0.079645477009061, 'min_child_weight': 17}


[32m[I 2022-10-02 00:36:43,740][0m Trial 2 finished with value: 4.389191049073662 and parameters: {'n_estimators': 151, 'max_depth': 16, 'learning_rate': 0.20176865513948422, 'subsample': 0.7000000000000001, 'colsample_bytree': 0.9, 'gamma': 0.079645477009061, 'min_child_weight': 17}. Best is trial 0 with value: 4.177273863292622.[0m


Currently running with:
{'n_estimators': 51, 'max_depth': 10, 'learning_rate': 0.001821939895716244, 'subsample ': 0.4, 'colsample_bytree': 0.8, 'gamma': 0.22601200060423587, 'min_child_weight': 3}


[32m[I 2022-10-02 00:36:50,444][0m Trial 3 finished with value: 4.635439433274933 and parameters: {'n_estimators': 51, 'max_depth': 10, 'learning_rate': 0.001821939895716244, 'subsample': 0.4, 'colsample_bytree': 0.8, 'gamma': 0.22601200060423587, 'min_child_weight': 3}. Best is trial 0 with value: 4.177273863292622.[0m


Currently running with:
{'n_estimators': 201, 'max_depth': 8, 'learning_rate': 0.01830393181458461, 'subsample ': 0.5, 'colsample_bytree': 0.8, 'gamma': 0.48306983555175165, 'min_child_weight': 17}


[32m[I 2022-10-02 00:36:59,405][0m Trial 4 finished with value: 4.181066341330893 and parameters: {'n_estimators': 201, 'max_depth': 8, 'learning_rate': 0.01830393181458461, 'subsample': 0.5, 'colsample_bytree': 0.8, 'gamma': 0.48306983555175165, 'min_child_weight': 17}. Best is trial 0 with value: 4.177273863292622.[0m


Currently running with:
{'n_estimators': 401, 'max_depth': 17, 'learning_rate': 0.036763248613166925, 'subsample ': 0.5, 'colsample_bytree': 0.8, 'gamma': 0.9645510800892552, 'min_child_weight': 17}


[32m[I 2022-10-02 00:39:40,280][0m Trial 5 finished with value: 4.325800406232126 and parameters: {'n_estimators': 401, 'max_depth': 17, 'learning_rate': 0.036763248613166925, 'subsample': 0.5, 'colsample_bytree': 0.8, 'gamma': 0.9645510800892552, 'min_child_weight': 17}. Best is trial 0 with value: 4.177273863292622.[0m


Currently running with:
{'n_estimators': 901, 'max_depth': 8, 'learning_rate': 0.0339394647270345, 'subsample ': 0.6000000000000001, 'colsample_bytree': 0.7000000000000001, 'gamma': 0.7765591849971003, 'min_child_weight': 17}


[32m[I 2022-10-02 00:40:11,432][0m Trial 6 finished with value: 4.211701083595501 and parameters: {'n_estimators': 901, 'max_depth': 8, 'learning_rate': 0.0339394647270345, 'subsample': 0.6000000000000001, 'colsample_bytree': 0.7000000000000001, 'gamma': 0.7765591849971003, 'min_child_weight': 17}. Best is trial 0 with value: 4.177273863292622.[0m


Currently running with:
{'n_estimators': 1001, 'max_depth': 12, 'learning_rate': 0.0016656039140499094, 'subsample ': 0.6000000000000001, 'colsample_bytree': 0.9, 'gamma': 0.4062750430479508, 'min_child_weight': 1}


[32m[I 2022-10-02 00:45:58,295][0m Trial 7 finished with value: 4.225993890244549 and parameters: {'n_estimators': 1001, 'max_depth': 12, 'learning_rate': 0.0016656039140499094, 'subsample': 0.6000000000000001, 'colsample_bytree': 0.9, 'gamma': 0.4062750430479508, 'min_child_weight': 1}. Best is trial 0 with value: 4.177273863292622.[0m


Currently running with:
{'n_estimators': 251, 'max_depth': 3, 'learning_rate': 0.4812566765285445, 'subsample ': 1.0, 'colsample_bytree': 0.9, 'gamma': 0.6018171214054674, 'min_child_weight': 25}


[32m[I 2022-10-02 00:46:01,951][0m Trial 8 finished with value: 4.181335730471545 and parameters: {'n_estimators': 251, 'max_depth': 3, 'learning_rate': 0.4812566765285445, 'subsample': 1.0, 'colsample_bytree': 0.9, 'gamma': 0.6018171214054674, 'min_child_weight': 25}. Best is trial 0 with value: 4.177273863292622.[0m


Currently running with:
{'n_estimators': 151, 'max_depth': 7, 'learning_rate': 0.025968135983975103, 'subsample ': 0.6000000000000001, 'colsample_bytree': 0.4, 'gamma': 0.9831534453572127, 'min_child_weight': 15}


[32m[I 2022-10-02 00:46:06,577][0m Trial 9 finished with value: 4.17868854098539 and parameters: {'n_estimators': 151, 'max_depth': 7, 'learning_rate': 0.025968135983975103, 'subsample': 0.6000000000000001, 'colsample_bytree': 0.4, 'gamma': 0.9831534453572127, 'min_child_weight': 15}. Best is trial 0 with value: 4.177273863292622.[0m


Currently running with:
{'n_estimators': 601, 'max_depth': 2, 'learning_rate': 0.14878592150762665, 'subsample ': 0.9, 'colsample_bytree': 0.5, 'gamma': 0.26755914576442896, 'min_child_weight': 7}


[32m[I 2022-10-02 00:46:14,219][0m Trial 10 finished with value: 4.177480772992163 and parameters: {'n_estimators': 601, 'max_depth': 2, 'learning_rate': 0.14878592150762665, 'subsample': 0.9, 'colsample_bytree': 0.5, 'gamma': 0.26755914576442896, 'min_child_weight': 7}. Best is trial 0 with value: 4.177273863292622.[0m


Currently running with:
{'n_estimators': 601, 'max_depth': 2, 'learning_rate': 0.12108186268688394, 'subsample ': 0.9, 'colsample_bytree': 0.5, 'gamma': 0.31583084883117196, 'min_child_weight': 7}


[32m[I 2022-10-02 00:46:21,870][0m Trial 11 finished with value: 4.177416385022274 and parameters: {'n_estimators': 601, 'max_depth': 2, 'learning_rate': 0.12108186268688394, 'subsample': 0.9, 'colsample_bytree': 0.5, 'gamma': 0.31583084883117196, 'min_child_weight': 7}. Best is trial 0 with value: 4.177273863292622.[0m


Currently running with:
{'n_estimators': 451, 'max_depth': 4, 'learning_rate': 0.09587471064335992, 'subsample ': 0.8, 'colsample_bytree': 0.5, 'gamma': 0.33588433426083936, 'min_child_weight': 9}


[32m[I 2022-10-02 00:46:28,652][0m Trial 12 finished with value: 4.177585305344988 and parameters: {'n_estimators': 451, 'max_depth': 4, 'learning_rate': 0.09587471064335992, 'subsample': 0.8, 'colsample_bytree': 0.5, 'gamma': 0.33588433426083936, 'min_child_weight': 9}. Best is trial 0 with value: 4.177273863292622.[0m


Currently running with:
{'n_estimators': 751, 'max_depth': 5, 'learning_rate': 0.009001032133778415, 'subsample ': 1.0, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.019505792608465855, 'min_child_weight': 9}


[32m[I 2022-10-02 00:46:41,458][0m Trial 13 finished with value: 4.177290654520502 and parameters: {'n_estimators': 751, 'max_depth': 5, 'learning_rate': 0.009001032133778415, 'subsample': 1.0, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.019505792608465855, 'min_child_weight': 9}. Best is trial 0 with value: 4.177273863292622.[0m


Currently running with:
{'n_estimators': 801, 'max_depth': 5, 'learning_rate': 0.007978647512233143, 'subsample ': 1.0, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.07028037368872338, 'min_child_weight': 11}


[32m[I 2022-10-02 00:46:55,085][0m Trial 14 finished with value: 4.177351969082526 and parameters: {'n_estimators': 801, 'max_depth': 5, 'learning_rate': 0.007978647512233143, 'subsample': 1.0, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.07028037368872338, 'min_child_weight': 11}. Best is trial 0 with value: 4.177273863292622.[0m


Currently running with:
{'n_estimators': 751, 'max_depth': 20, 'learning_rate': 0.007389560769159681, 'subsample ': 0.8, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.0071124309037759945, 'min_child_weight': 31}


[32m[I 2022-10-02 00:48:21,583][0m Trial 15 finished with value: 4.178831810022722 and parameters: {'n_estimators': 751, 'max_depth': 20, 'learning_rate': 0.007389560769159681, 'subsample': 0.8, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.0071124309037759945, 'min_child_weight': 31}. Best is trial 0 with value: 4.177273863292622.[0m


Currently running with:
{'n_estimators': 401, 'max_depth': 5, 'learning_rate': 0.003595457255658072, 'subsample ': 0.7000000000000001, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.5888538067189393, 'min_child_weight': 11}


[32m[I 2022-10-02 00:48:28,663][0m Trial 16 finished with value: 4.21686336976459 and parameters: {'n_estimators': 401, 'max_depth': 5, 'learning_rate': 0.003595457255658072, 'subsample': 0.7000000000000001, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.5888538067189393, 'min_child_weight': 11}. Best is trial 0 with value: 4.177273863292622.[0m


Currently running with:
{'n_estimators': 751, 'max_depth': 11, 'learning_rate': 0.01373936101740699, 'subsample ': 0.9, 'colsample_bytree': 0.4, 'gamma': 0.183151607307064, 'min_child_weight': 5}


[32m[I 2022-10-02 00:49:11,739][0m Trial 17 finished with value: 4.178859340546471 and parameters: {'n_estimators': 751, 'max_depth': 11, 'learning_rate': 0.01373936101740699, 'subsample': 0.9, 'colsample_bytree': 0.4, 'gamma': 0.183151607307064, 'min_child_weight': 5}. Best is trial 0 with value: 4.177273863292622.[0m


Currently running with:
{'n_estimators': 301, 'max_depth': 13, 'learning_rate': 0.055301630698558396, 'subsample ': 0.7000000000000001, 'colsample_bytree': 1.0, 'gamma': 0.7259843261121932, 'min_child_weight': 13}


[32m[I 2022-10-02 00:49:59,306][0m Trial 18 finished with value: 4.258415697912388 and parameters: {'n_estimators': 301, 'max_depth': 13, 'learning_rate': 0.055301630698558396, 'subsample': 0.7000000000000001, 'colsample_bytree': 1.0, 'gamma': 0.7259843261121932, 'min_child_weight': 13}. Best is trial 0 with value: 4.177273863292622.[0m


Currently running with:
{'n_estimators': 551, 'max_depth': 6, 'learning_rate': 0.01405695940879355, 'subsample ': 1.0, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.46763005173157735, 'min_child_weight': 23}


[32m[I 2022-10-02 00:50:10,982][0m Trial 19 finished with value: 4.177320045955384 and parameters: {'n_estimators': 551, 'max_depth': 6, 'learning_rate': 0.01405695940879355, 'subsample': 1.0, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.46763005173157735, 'min_child_weight': 23}. Best is trial 0 with value: 4.177273863292622.[0m


Currently running with:
{'n_estimators': 901, 'max_depth': 9, 'learning_rate': 0.0031787525141038185, 'subsample ': 0.8, 'colsample_bytree': 0.7000000000000001, 'gamma': 0.017858553472507695, 'min_child_weight': 1}


[32m[I 2022-10-02 00:51:16,576][0m Trial 20 finished with value: 4.196940667185124 and parameters: {'n_estimators': 901, 'max_depth': 9, 'learning_rate': 0.0031787525141038185, 'subsample': 0.8, 'colsample_bytree': 0.7000000000000001, 'gamma': 0.017858553472507695, 'min_child_weight': 1}. Best is trial 0 with value: 4.177273863292622.[0m


Currently running with:
{'n_estimators': 501, 'max_depth': 5, 'learning_rate': 0.012294028190805084, 'subsample ': 1.0, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.4120974037461369, 'min_child_weight': 23}


[32m[I 2022-10-02 00:51:25,336][0m Trial 21 finished with value: 4.177415911053665 and parameters: {'n_estimators': 501, 'max_depth': 5, 'learning_rate': 0.012294028190805084, 'subsample': 1.0, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.4120974037461369, 'min_child_weight': 23}. Best is trial 0 with value: 4.177273863292622.[0m


Currently running with:
{'n_estimators': 551, 'max_depth': 6, 'learning_rate': 0.06699332402220884, 'subsample ': 0.9, 'colsample_bytree': 0.5, 'gamma': 0.5093015715487756, 'min_child_weight': 23}


[32m[I 2022-10-02 00:51:35,714][0m Trial 22 finished with value: 4.178320637454417 and parameters: {'n_estimators': 551, 'max_depth': 6, 'learning_rate': 0.06699332402220884, 'subsample': 0.9, 'colsample_bytree': 0.5, 'gamma': 0.5093015715487756, 'min_child_weight': 23}. Best is trial 0 with value: 4.177273863292622.[0m


Currently running with:
{'n_estimators': 701, 'max_depth': 3, 'learning_rate': 0.010657197233623367, 'subsample ': 1.0, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.6939830411792742, 'min_child_weight': 27}


[32m[I 2022-10-02 00:51:45,095][0m Trial 23 finished with value: 4.177367733433382 and parameters: {'n_estimators': 701, 'max_depth': 3, 'learning_rate': 0.010657197233623367, 'subsample': 1.0, 'colsample_bytree': 0.6000000000000001, 'gamma': 0.6939830411792742, 'min_child_weight': 27}. Best is trial 0 with value: 4.177273863292622.[0m


Currently running with:
{'n_estimators': 351, 'max_depth': 4, 'learning_rate': 0.020233330171073996, 'subsample ': 0.9, 'colsample_bytree': 0.7000000000000001, 'gamma': 0.4051062484575695, 'min_child_weight': 21}


[32m[I 2022-10-02 00:51:50,478][0m Trial 24 finished with value: 4.173910423275615 and parameters: {'n_estimators': 351, 'max_depth': 4, 'learning_rate': 0.020233330171073996, 'subsample': 0.9, 'colsample_bytree': 0.7000000000000001, 'gamma': 0.4051062484575695, 'min_child_weight': 21}. Best is trial 24 with value: 4.173910423275615.[0m


Currently running with:
{'n_estimators': 351, 'max_depth': 3, 'learning_rate': 0.0229125302226888, 'subsample ': 0.9, 'colsample_bytree': 0.7000000000000001, 'gamma': 0.3141964054109838, 'min_child_weight': 13}


[32m[I 2022-10-02 00:51:55,397][0m Trial 25 finished with value: 4.173923353793158 and parameters: {'n_estimators': 351, 'max_depth': 3, 'learning_rate': 0.0229125302226888, 'subsample': 0.9, 'colsample_bytree': 0.7000000000000001, 'gamma': 0.3141964054109838, 'min_child_weight': 13}. Best is trial 24 with value: 4.173910423275615.[0m


Currently running with:
{'n_estimators': 351, 'max_depth': 2, 'learning_rate': 0.023268243404161684, 'subsample ': 0.8, 'colsample_bytree': 0.7000000000000001, 'gamma': 0.36325088442526166, 'min_child_weight': 13}


[32m[I 2022-10-02 00:52:00,025][0m Trial 26 finished with value: 4.175030313234248 and parameters: {'n_estimators': 351, 'max_depth': 2, 'learning_rate': 0.023268243404161684, 'subsample': 0.8, 'colsample_bytree': 0.7000000000000001, 'gamma': 0.36325088442526166, 'min_child_weight': 13}. Best is trial 24 with value: 4.173910423275615.[0m


Currently running with:
{'n_estimators': 301, 'max_depth': 3, 'learning_rate': 0.0553913964489368, 'subsample ': 0.9, 'colsample_bytree': 0.8, 'gamma': 0.3996798008809596, 'min_child_weight': 21}


[32m[I 2022-10-02 00:52:04,312][0m Trial 27 finished with value: 4.17346378448078 and parameters: {'n_estimators': 301, 'max_depth': 3, 'learning_rate': 0.0553913964489368, 'subsample': 0.9, 'colsample_bytree': 0.8, 'gamma': 0.3996798008809596, 'min_child_weight': 21}. Best is trial 27 with value: 4.17346378448078.[0m


Currently running with:
{'n_estimators': 1, 'max_depth': 4, 'learning_rate': 0.06286433646416714, 'subsample ': 0.9, 'colsample_bytree': 0.8, 'gamma': 0.5373909978341727, 'min_child_weight': 21}


[32m[I 2022-10-02 00:52:04,772][0m Trial 28 finished with value: 4.66008902370447 and parameters: {'n_estimators': 1, 'max_depth': 4, 'learning_rate': 0.06286433646416714, 'subsample': 0.9, 'colsample_bytree': 0.8, 'gamma': 0.5373909978341727, 'min_child_weight': 21}. Best is trial 27 with value: 4.17346378448078.[0m


Currently running with:
{'n_estimators': 301, 'max_depth': 14, 'learning_rate': 0.04448723091019811, 'subsample ': 0.9, 'colsample_bytree': 0.8, 'gamma': 0.2759503969716559, 'min_child_weight': 21}


[32m[I 2022-10-02 00:53:07,726][0m Trial 29 finished with value: 4.255966801078 and parameters: {'n_estimators': 301, 'max_depth': 14, 'learning_rate': 0.04448723091019811, 'subsample': 0.9, 'colsample_bytree': 0.8, 'gamma': 0.2759503969716559, 'min_child_weight': 21}. Best is trial 27 with value: 4.17346378448078.[0m


Currently running with:
{'n_estimators': 101, 'max_depth': 3, 'learning_rate': 0.2512938119116657, 'subsample ': 0.8, 'colsample_bytree': 1.0, 'gamma': 0.41109267650688974, 'min_child_weight': 29}


[32m[I 2022-10-02 00:53:09,471][0m Trial 30 finished with value: 4.174117121601237 and parameters: {'n_estimators': 101, 'max_depth': 3, 'learning_rate': 0.2512938119116657, 'subsample': 0.8, 'colsample_bytree': 1.0, 'gamma': 0.41109267650688974, 'min_child_weight': 29}. Best is trial 27 with value: 4.17346378448078.[0m


Currently running with:
{'n_estimators': 101, 'max_depth': 3, 'learning_rate': 0.32874858920068817, 'subsample ': 0.8, 'colsample_bytree': 1.0, 'gamma': 0.3838238899448096, 'min_child_weight': 31}


[32m[I 2022-10-02 00:53:11,226][0m Trial 31 finished with value: 4.1751802425250215 and parameters: {'n_estimators': 101, 'max_depth': 3, 'learning_rate': 0.32874858920068817, 'subsample': 0.8, 'colsample_bytree': 1.0, 'gamma': 0.3838238899448096, 'min_child_weight': 31}. Best is trial 27 with value: 4.17346378448078.[0m


Currently running with:
{'n_estimators': 251, 'max_depth': 4, 'learning_rate': 0.21726466537009098, 'subsample ': 0.9, 'colsample_bytree': 0.9, 'gamma': 0.18304013598755914, 'min_child_weight': 27}


[32m[I 2022-10-02 00:53:15,237][0m Trial 32 finished with value: 4.182622759857451 and parameters: {'n_estimators': 251, 'max_depth': 4, 'learning_rate': 0.21726466537009098, 'subsample': 0.9, 'colsample_bytree': 0.9, 'gamma': 0.18304013598755914, 'min_child_weight': 27}. Best is trial 27 with value: 4.17346378448078.[0m


Currently running with:
{'n_estimators': 351, 'max_depth': 2, 'learning_rate': 0.08904186506228078, 'subsample ': 0.8, 'colsample_bytree': 0.7000000000000001, 'gamma': 0.4280844835601525, 'min_child_weight': 19}


[32m[I 2022-10-02 00:53:19,885][0m Trial 33 finished with value: 4.173541469054217 and parameters: {'n_estimators': 351, 'max_depth': 2, 'learning_rate': 0.08904186506228078, 'subsample': 0.8, 'colsample_bytree': 0.7000000000000001, 'gamma': 0.4280844835601525, 'min_child_weight': 19}. Best is trial 27 with value: 4.17346378448078.[0m


Currently running with:
{'n_estimators': 351, 'max_depth': 2, 'learning_rate': 0.08470970587900829, 'subsample ': 0.9, 'colsample_bytree': 0.7000000000000001, 'gamma': 0.5759415537141229, 'min_child_weight': 19}


[32m[I 2022-10-02 00:53:24,536][0m Trial 34 finished with value: 4.1736033673505935 and parameters: {'n_estimators': 351, 'max_depth': 2, 'learning_rate': 0.08470970587900829, 'subsample': 0.9, 'colsample_bytree': 0.7000000000000001, 'gamma': 0.5759415537141229, 'min_child_weight': 19}. Best is trial 27 with value: 4.17346378448078.[0m


Currently running with:
{'n_estimators': 451, 'max_depth': 2, 'learning_rate': 0.12239175848022592, 'subsample ': 0.8, 'colsample_bytree': 0.7000000000000001, 'gamma': 0.6069447045537512, 'min_child_weight': 19}


[32m[I 2022-10-02 00:53:30,373][0m Trial 35 finished with value: 4.173339327163398 and parameters: {'n_estimators': 451, 'max_depth': 2, 'learning_rate': 0.12239175848022592, 'subsample': 0.8, 'colsample_bytree': 0.7000000000000001, 'gamma': 0.6069447045537512, 'min_child_weight': 19}. Best is trial 35 with value: 4.173339327163398.[0m


Currently running with:
{'n_estimators': 451, 'max_depth': 2, 'learning_rate': 0.0957161867643007, 'subsample ': 0.8, 'colsample_bytree': 0.8, 'gamma': 0.6155909387711848, 'min_child_weight': 19}


[32m[I 2022-10-02 00:53:36,207][0m Trial 36 finished with value: 4.173334161802724 and parameters: {'n_estimators': 451, 'max_depth': 2, 'learning_rate': 0.0957161867643007, 'subsample': 0.8, 'colsample_bytree': 0.8, 'gamma': 0.6155909387711848, 'min_child_weight': 19}. Best is trial 36 with value: 4.173334161802724.[0m


Currently running with:
{'n_estimators': 451, 'max_depth': 7, 'learning_rate': 0.13419355164293065, 'subsample ': 0.7000000000000001, 'colsample_bytree': 0.8, 'gamma': 0.6558998760017779, 'min_child_weight': 19}


[32m[I 2022-10-02 00:53:47,954][0m Trial 37 finished with value: 4.224189192776184 and parameters: {'n_estimators': 451, 'max_depth': 7, 'learning_rate': 0.13419355164293065, 'subsample': 0.7000000000000001, 'colsample_bytree': 0.8, 'gamma': 0.6558998760017779, 'min_child_weight': 19}. Best is trial 36 with value: 4.173334161802724.[0m


Currently running with:
{'n_estimators': 201, 'max_depth': 2, 'learning_rate': 0.08077747409081547, 'subsample ': 0.8, 'colsample_bytree': 0.8, 'gamma': 0.8434679446239894, 'min_child_weight': 19}


[32m[I 2022-10-02 00:53:50,805][0m Trial 38 finished with value: 4.1742880096152435 and parameters: {'n_estimators': 201, 'max_depth': 2, 'learning_rate': 0.08077747409081547, 'subsample': 0.8, 'colsample_bytree': 0.8, 'gamma': 0.8434679446239894, 'min_child_weight': 19}. Best is trial 36 with value: 4.173334161802724.[0m


Currently running with:
{'n_estimators': 501, 'max_depth': 6, 'learning_rate': 0.0010480285112053228, 'subsample ': 0.7000000000000001, 'colsample_bytree': 0.9, 'gamma': 0.8277835494940824, 'min_child_weight': 15}


[32m[I 2022-10-02 00:54:01,564][0m Trial 39 finished with value: 4.377802561952401 and parameters: {'n_estimators': 501, 'max_depth': 6, 'learning_rate': 0.0010480285112053228, 'subsample': 0.7000000000000001, 'colsample_bytree': 0.9, 'gamma': 0.8277835494940824, 'min_child_weight': 15}. Best is trial 36 with value: 4.173334161802724.[0m


Currently running with:
{'n_estimators': 451, 'max_depth': 8, 'learning_rate': 0.04067526088523761, 'subsample ': 0.6000000000000001, 'colsample_bytree': 0.8, 'gamma': 0.4664661834977319, 'min_child_weight': 19}


[32m[I 2022-10-02 00:54:17,887][0m Trial 40 finished with value: 4.196940108153901 and parameters: {'n_estimators': 451, 'max_depth': 8, 'learning_rate': 0.04067526088523761, 'subsample': 0.6000000000000001, 'colsample_bytree': 0.8, 'gamma': 0.4664661834977319, 'min_child_weight': 19}. Best is trial 36 with value: 4.173334161802724.[0m


Currently running with:
{'n_estimators': 401, 'max_depth': 2, 'learning_rate': 0.09906322835559164, 'subsample ': 0.8, 'colsample_bytree': 0.7000000000000001, 'gamma': 0.5728132687194095, 'min_child_weight': 19}


[32m[I 2022-10-02 00:54:23,131][0m Trial 41 finished with value: 4.17336415125132 and parameters: {'n_estimators': 401, 'max_depth': 2, 'learning_rate': 0.09906322835559164, 'subsample': 0.8, 'colsample_bytree': 0.7000000000000001, 'gamma': 0.5728132687194095, 'min_child_weight': 19}. Best is trial 36 with value: 4.173334161802724.[0m


Currently running with:
{'n_estimators': 401, 'max_depth': 2, 'learning_rate': 0.1775648688213594, 'subsample ': 0.8, 'colsample_bytree': 0.7000000000000001, 'gamma': 0.6349553401244622, 'min_child_weight': 17}


[32m[I 2022-10-02 00:54:28,387][0m Trial 42 finished with value: 4.173262952548937 and parameters: {'n_estimators': 401, 'max_depth': 2, 'learning_rate': 0.1775648688213594, 'subsample': 0.8, 'colsample_bytree': 0.7000000000000001, 'gamma': 0.6349553401244622, 'min_child_weight': 17}. Best is trial 42 with value: 4.173262952548937.[0m


Currently running with:
{'n_estimators': 501, 'max_depth': 3, 'learning_rate': 0.16639201020769395, 'subsample ': 0.8, 'colsample_bytree': 0.7000000000000001, 'gamma': 0.624948658896535, 'min_child_weight': 15}


[32m[I 2022-10-02 00:54:35,229][0m Trial 43 finished with value: 4.1769167784524335 and parameters: {'n_estimators': 501, 'max_depth': 3, 'learning_rate': 0.16639201020769395, 'subsample': 0.8, 'colsample_bytree': 0.7000000000000001, 'gamma': 0.624948658896535, 'min_child_weight': 15}. Best is trial 42 with value: 4.173262952548937.[0m


Currently running with:
{'n_estimators': 401, 'max_depth': 4, 'learning_rate': 0.32838367259669177, 'subsample ': 0.8, 'colsample_bytree': 0.9, 'gamma': 0.5402203961818479, 'min_child_weight': 17}


[32m[I 2022-10-02 00:54:41,311][0m Trial 44 finished with value: 4.2030434843405855 and parameters: {'n_estimators': 401, 'max_depth': 4, 'learning_rate': 0.32838367259669177, 'subsample': 0.8, 'colsample_bytree': 0.9, 'gamma': 0.5402203961818479, 'min_child_weight': 17}. Best is trial 42 with value: 4.173262952548937.[0m


Currently running with:
{'n_estimators': 251, 'max_depth': 16, 'learning_rate': 0.12008094985742847, 'subsample ': 0.4, 'colsample_bytree': 0.8, 'gamma': 0.6768859933209209, 'min_child_weight': 25}


[32m[I 2022-10-02 00:55:31,617][0m Trial 45 finished with value: 4.330990883357067 and parameters: {'n_estimators': 251, 'max_depth': 16, 'learning_rate': 0.12008094985742847, 'subsample': 0.4, 'colsample_bytree': 0.8, 'gamma': 0.6768859933209209, 'min_child_weight': 25}. Best is trial 42 with value: 4.173262952548937.[0m


Currently running with:
{'n_estimators': 601, 'max_depth': 2, 'learning_rate': 0.19118343420526712, 'subsample ': 0.7000000000000001, 'colsample_bytree': 0.8, 'gamma': 0.7491061444681048, 'min_child_weight': 17}


[32m[I 2022-10-02 00:55:39,276][0m Trial 46 finished with value: 4.173537966017844 and parameters: {'n_estimators': 601, 'max_depth': 2, 'learning_rate': 0.19118343420526712, 'subsample': 0.7000000000000001, 'colsample_bytree': 0.8, 'gamma': 0.7491061444681048, 'min_child_weight': 17}. Best is trial 42 with value: 4.173262952548937.[0m


Currently running with:
{'n_estimators': 401, 'max_depth': 3, 'learning_rate': 0.29203731516612985, 'subsample ': 0.8, 'colsample_bytree': 0.7000000000000001, 'gamma': 0.6366451305436707, 'min_child_weight': 21}


[32m[I 2022-10-02 00:55:44,872][0m Trial 47 finished with value: 4.179586065256707 and parameters: {'n_estimators': 401, 'max_depth': 3, 'learning_rate': 0.29203731516612985, 'subsample': 0.8, 'colsample_bytree': 0.7000000000000001, 'gamma': 0.6366451305436707, 'min_child_weight': 21}. Best is trial 42 with value: 4.173262952548937.[0m


Currently running with:
{'n_estimators': 551, 'max_depth': 5, 'learning_rate': 0.051119387554078195, 'subsample ': 0.5, 'colsample_bytree': 0.8, 'gamma': 0.790762773163753, 'min_child_weight': 25}


[32m[I 2022-10-02 00:55:54,300][0m Trial 48 finished with value: 4.18038993332992 and parameters: {'n_estimators': 551, 'max_depth': 5, 'learning_rate': 0.051119387554078195, 'subsample': 0.5, 'colsample_bytree': 0.8, 'gamma': 0.790762773163753, 'min_child_weight': 25}. Best is trial 42 with value: 4.173262952548937.[0m


Currently running with:
{'n_estimators': 301, 'max_depth': 4, 'learning_rate': 0.1115528917652135, 'subsample ': 0.7000000000000001, 'colsample_bytree': 0.9, 'gamma': 0.5619614850253246, 'min_child_weight': 17}


[32m[I 2022-10-02 00:55:59,005][0m Trial 49 finished with value: 4.177367826263378 and parameters: {'n_estimators': 301, 'max_depth': 4, 'learning_rate': 0.1115528917652135, 'subsample': 0.7000000000000001, 'colsample_bytree': 0.9, 'gamma': 0.5619614850253246, 'min_child_weight': 17}. Best is trial 42 with value: 4.173262952548937.[0m


Final best parameters:


{'n_estimators': 401,
 'max_depth': 2,
 'learning_rate': 0.1775648688213594,
 'subsample': 0.8,
 'colsample_bytree': 0.7000000000000001,
 'gamma': 0.6349553401244622,
 'min_child_weight': 17}

In [19]:
best_params = study.best_params.copy()

model = xgb.XGBRegressor(objective="reg:squarederror",
                          n_jobs=-1,
                          grow_policy='lossguide',
                          tree_method="gpu_hist",
                          predictor="gpu_predictor",
                          booster='gbtree',
                          sampling_method='gradient_based',
                          eval_metrics=['rmse'],
                          random_state=RANDOM_SEED,
                          enable_categorical=False,
                          **best_params)
pipe = make_pipeline(StandardScaler(), model)
pipe.fit(X_train, y_train)

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('xgbregressor',
                 XGBRegressor(colsample_bytree=0.7000000000000001,
                              enable_categorical=False, eval_metrics=['rmse'],
                              gamma=0.6349553401244622, grow_policy='lossguide',
                              learning_rate=0.1775648688213594, max_depth=2,
                              min_child_weight=17, n_estimators=401, n_jobs=-1,
                              objective='reg:squarederror',
                              predictor='gpu_predictor', random_state=2,
                              sampling_method='gradient_based', subsample=0.8,
                              tree_method='gpu_hist'))])

In [20]:
train_pred = pipe.predict(X_train)
test_pred = pipe.predict(X_test)
print(f"train results - RMSE: {mean_squared_error(y_train, train_pred, squared=False)}, MAE: {mean_absolute_error(y_train, train_pred)}")
print(f"test results - RMSE: {mean_squared_error(y_test, test_pred, squared=False)}, MAE: {mean_absolute_error(y_test, test_pred)}")

train results - RMSE: 4.16872315760192, MAE: 1.8424664445900918
test results - RMSE: 4.207492987194956, MAE: 1.848834966430664
