In [1]:
!pip install --quiet optuna

[K     |████████████████████████████████| 348 kB 7.2 MB/s 
[K     |████████████████████████████████| 81 kB 6.4 MB/s 
[K     |████████████████████████████████| 209 kB 15.5 MB/s 
[K     |████████████████████████████████| 78 kB 6.7 MB/s 
[K     |████████████████████████████████| 112 kB 16.2 MB/s 
[K     |████████████████████████████████| 49 kB 6.0 MB/s 
[K     |████████████████████████████████| 147 kB 15.6 MB/s 
[?25h  Building wheel for pyperclip (setup.py) ... [?25l[?25hdone


In [2]:
import numpy as np
import pandas as pd
import optuna as opt
import xgboost as xgb
from optuna.samplers import TPESampler
from sklearn.preprocessing import StandardScaler
from pathlib import Path
import gc

from sklearn.metrics import mean_absolute_error, mean_squared_error

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
RANDOM_SEED = 2
DATA_DIR = Path("/content/drive/MyDrive/CS760")

In [5]:
df_train = pd.read_parquet(DATA_DIR/"train_main.parquet.snappy")
df_val = pd.read_parquet(DATA_DIR/"val_main.parquet.snappy")
df_test = pd.read_parquet(DATA_DIR/"test_main.parquet.snappy")

print(f"Shape of the training data : {df_train.shape}")
print(f"Shape of the validation data : {df_val.shape}")
print(f"Shape of the test data : {df_test.shape}")

Shape of the training data : (2060626, 13)
Shape of the validation data : (257578, 13)
Shape of the test data : (257579, 13)


In [6]:
df_train.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
r_id,2060626.0,3522730.0,2016756.0,4.0,1803866.0,3483320.0,5288280.0,6990278.0
r_stars,2060626.0,3.607754,1.538412,1.0,2.0,4.0,5.0,5.0
r_stars_square,2060626.0,15.3826,9.617924,1.0,4.0,16.0,25.0,25.0
r_length,2060626.0,133.2717,114.9773,0.0,57.0,101.0,171.0,1061.0
u_friends_count,2060626.0,188.8371,566.3421,1.0,2.0,40.0,168.0,14995.0
u_review_count,2060626.0,191.1808,496.6023,0.0,11.0,44.0,176.0,17473.0
u_month_age,2060626.0,42.74377,34.5464,3.802649e-07,13.64517,36.44688,65.15015,201.8375
b_stars,2060626.0,3.742639,0.7950195,1.0,3.5,4.0,4.5,5.0
b_review_count,2060626.0,337.1921,666.8911,5.0,40.0,125.0,341.0,7568.0
r_sen,2060626.0,0.1823936,0.1926882,-1.0,0.060078,0.1808695,0.3004768,1.0


In [7]:
X_train, y_train = df_train.drop(['r_useful', 'r_id'], axis=1).values, df_train['r_useful'].values
X_val, y_val = df_val.drop(['r_useful', 'r_id'], axis=1).values, df_val['r_useful'].values
X_test, y_test = df_test.drop(['r_useful', 'r_id'], axis=1).values, df_test['r_useful'].values

# standardise the features
ss = StandardScaler()
X_train = ss.fit_transform(X_train)
X_val = ss.transform(X_val)
X_test = ss.transform(X_test)

In [8]:
def objective(trial):
  """ Function to tune parameters """
  gc.collect()
  params = {
      "n_estimators":trial.suggest_int('n_estimators', 1, 1000),
      "max_depth" : trial.suggest_int("max_depth", 2, 20),
      "learning_rate" : trial.suggest_float('lr', 1e-2, 3e-1),
  }

  print("Currently running with:")
  print(params)

  model = xgb.XGBRegressor(objective="reg:squarederror",
                            n_jobs=-1,
                            grow_policy='lossguide',
                            tree_method="gpu_hist",
                            predictor="gpu_predictor",
                            booster='gbtree',
                            sampling_method='gradient_based',
                            eval_metrics=['rmse'], 
                            random_state=RANDOM_SEED,
                            enable_categorical=False,
                            early_stopping_rounds=100,
                            **params)
  
  model.fit(X_train, y_train, eval_set=[(X_val, y_val)], verbose=False)
  y_pred = model.predict(X_val)

  return mean_squared_error(y_val, y_pred, squared=False)

study = opt.create_study(direction='minimize', sampler=TPESampler(seed=RANDOM_SEED))
study.optimize(objective, n_trials=50)
study.best_params

[32m[I 2022-09-23 07:15:17,825][0m A new study created in memory with name: no-name-90d42e77-4305-4de6-b4c5-8d1c1d4f8566[0m


Currently running with:
{'n_estimators': 436, 'max_depth': 2, 'learning_rate': 0.16940211858482565}


[32m[I 2022-09-23 07:15:22,515][0m Trial 0 finished with value: 3.6350085188926378 and parameters: {'n_estimators': 436, 'max_depth': 2, 'lr': 0.16940211858482565}. Best is trial 0 with value: 3.6350085188926378.[0m


Currently running with:
{'n_estimators': 436, 'max_depth': 9, 'learning_rate': 0.10579709809112349}


[32m[I 2022-09-23 07:15:38,687][0m Trial 1 finished with value: 3.3277683600149843 and parameters: {'n_estimators': 436, 'max_depth': 9, 'lr': 0.10579709809112349}. Best is trial 1 with value: 3.3277683600149843.[0m


Currently running with:
{'n_estimators': 205, 'max_depth': 13, 'learning_rate': 0.0968998553656117}


[32m[I 2022-09-23 07:16:22,149][0m Trial 2 finished with value: 3.34230659649308 and parameters: {'n_estimators': 205, 'max_depth': 13, 'lr': 0.0968998553656117}. Best is trial 1 with value: 3.3277683600149843.[0m


Currently running with:
{'n_estimators': 267, 'max_depth': 13, 'learning_rate': 0.16345120734034133}


[32m[I 2022-09-23 07:17:20,640][0m Trial 3 finished with value: 3.391456058153894 and parameters: {'n_estimators': 267, 'max_depth': 13, 'lr': 0.16345120734034133}. Best is trial 1 with value: 3.3277683600149843.[0m


Currently running with:
{'n_estimators': 135, 'max_depth': 11, 'learning_rate': 0.06348756103760543}


[32m[I 2022-09-23 07:17:33,264][0m Trial 4 finished with value: 3.3684186301683274 and parameters: {'n_estimators': 135, 'max_depth': 11, 'lr': 0.06348756103760543}. Best is trial 1 with value: 3.3277683600149843.[0m


Currently running with:
{'n_estimators': 786, 'max_depth': 18, 'learning_rate': 0.15332868284075907}


[32m[I 2022-09-23 07:45:59,438][0m Trial 5 finished with value: 3.5141601703824286 and parameters: {'n_estimators': 786, 'max_depth': 18, 'lr': 0.15332868284075907}. Best is trial 1 with value: 3.3277683600149843.[0m


Currently running with:
{'n_estimators': 847, 'max_depth': 3, 'learning_rate': 0.15652136613529416}


[32m[I 2022-09-23 07:46:05,780][0m Trial 6 finished with value: 3.499169956159859 and parameters: {'n_estimators': 847, 'max_depth': 3, 'lr': 0.15652136613529416}. Best is trial 1 with value: 3.3277683600149843.[0m


Currently running with:
{'n_estimators': 66, 'max_depth': 10, 'learning_rate': 0.03799396554157764}


[32m[I 2022-09-23 07:46:10,515][0m Trial 7 finished with value: 3.4308284369808155 and parameters: {'n_estimators': 66, 'max_depth': 10, 'lr': 0.03799396554157764}. Best is trial 1 with value: 3.3277683600149843.[0m


Currently running with:
{'n_estimators': 128, 'max_depth': 13, 'learning_rate': 0.0755434801752284}


[32m[I 2022-09-23 07:46:41,748][0m Trial 8 finished with value: 3.3504941432064594 and parameters: {'n_estimators': 128, 'max_depth': 13, 'lr': 0.0755434801752284}. Best is trial 1 with value: 3.3277683600149843.[0m


Currently running with:
{'n_estimators': 107, 'max_depth': 6, 'learning_rate': 0.11144962265095677}


[32m[I 2022-09-23 07:46:43,619][0m Trial 9 finished with value: 3.454732536357315 and parameters: {'n_estimators': 107, 'max_depth': 6, 'lr': 0.11144962265095677}. Best is trial 1 with value: 3.3277683600149843.[0m


Currently running with:
{'n_estimators': 577, 'max_depth': 19, 'learning_rate': 0.2603014690260087}


[32m[I 2022-09-23 08:12:33,227][0m Trial 10 finished with value: 3.6603909238994516 and parameters: {'n_estimators': 577, 'max_depth': 19, 'lr': 0.2603014690260087}. Best is trial 1 with value: 3.3277683600149843.[0m


Currently running with:
{'n_estimators': 415, 'max_depth': 8, 'learning_rate': 0.2195736690437795}


[32m[I 2022-09-23 08:12:42,587][0m Trial 11 finished with value: 3.364696934489284 and parameters: {'n_estimators': 415, 'max_depth': 8, 'lr': 0.2195736690437795}. Best is trial 1 with value: 3.3277683600149843.[0m


Currently running with:
{'n_estimators': 646, 'max_depth': 15, 'learning_rate': 0.11131754261140306}


[32m[I 2022-09-23 08:18:37,045][0m Trial 12 finished with value: 3.397808910436627 and parameters: {'n_estimators': 646, 'max_depth': 15, 'lr': 0.11131754261140306}. Best is trial 1 with value: 3.3277683600149843.[0m


Currently running with:
{'n_estimators': 310, 'max_depth': 7, 'learning_rate': 0.10485038842696312}


[32m[I 2022-09-23 08:18:42,350][0m Trial 13 finished with value: 3.3698784810431 and parameters: {'n_estimators': 310, 'max_depth': 7, 'lr': 0.10485038842696312}. Best is trial 1 with value: 3.3277683600149843.[0m


Currently running with:
{'n_estimators': 295, 'max_depth': 15, 'learning_rate': 0.013669498217078649}


[32m[I 2022-09-23 08:22:17,867][0m Trial 14 finished with value: 3.3664032461488436 and parameters: {'n_estimators': 295, 'max_depth': 15, 'lr': 0.013669498217078649}. Best is trial 1 with value: 3.3277683600149843.[0m


Currently running with:
{'n_estimators': 981, 'max_depth': 9, 'learning_rate': 0.07100824475949939}


[32m[I 2022-09-23 08:22:49,203][0m Trial 15 finished with value: 3.32537625882591 and parameters: {'n_estimators': 981, 'max_depth': 9, 'lr': 0.07100824475949939}. Best is trial 15 with value: 3.32537625882591.[0m


Currently running with:
{'n_estimators': 998, 'max_depth': 5, 'learning_rate': 0.040930601838291086}


[32m[I 2022-09-23 08:22:59,281][0m Trial 16 finished with value: 3.435962460823117 and parameters: {'n_estimators': 998, 'max_depth': 5, 'lr': 0.040930601838291086}. Best is trial 15 with value: 3.32537625882591.[0m


Currently running with:
{'n_estimators': 697, 'max_depth': 9, 'learning_rate': 0.2984089242587392}


[32m[I 2022-09-23 08:23:22,388][0m Trial 17 finished with value: 3.4567032260345307 and parameters: {'n_estimators': 697, 'max_depth': 9, 'lr': 0.2984089242587392}. Best is trial 15 with value: 3.32537625882591.[0m


Currently running with:
{'n_estimators': 981, 'max_depth': 4, 'learning_rate': 0.13670638758171347}


[32m[I 2022-09-23 08:23:30,666][0m Trial 18 finished with value: 3.4426105106691005 and parameters: {'n_estimators': 981, 'max_depth': 4, 'lr': 0.13670638758171347}. Best is trial 15 with value: 3.32537625882591.[0m


Currently running with:
{'n_estimators': 514, 'max_depth': 8, 'learning_rate': 0.20033881178213184}


[32m[I 2022-09-23 08:23:42,063][0m Trial 19 finished with value: 3.413979926586566 and parameters: {'n_estimators': 514, 'max_depth': 8, 'lr': 0.20033881178213184}. Best is trial 15 with value: 3.32537625882591.[0m


Currently running with:
{'n_estimators': 863, 'max_depth': 10, 'learning_rate': 0.06876978180883794}


[32m[I 2022-09-23 08:24:25,613][0m Trial 20 finished with value: 3.316253850550612 and parameters: {'n_estimators': 863, 'max_depth': 10, 'lr': 0.06876978180883794}. Best is trial 20 with value: 3.316253850550612.[0m


Currently running with:
{'n_estimators': 880, 'max_depth': 11, 'learning_rate': 0.07021137898851215}


[32m[I 2022-09-23 08:25:35,827][0m Trial 21 finished with value: 3.3174511167309366 and parameters: {'n_estimators': 880, 'max_depth': 11, 'lr': 0.07021137898851215}. Best is trial 20 with value: 3.316253850550612.[0m


Currently running with:
{'n_estimators': 865, 'max_depth': 11, 'learning_rate': 0.06475352294226047}


[32m[I 2022-09-23 08:26:45,127][0m Trial 22 finished with value: 3.321953460342684 and parameters: {'n_estimators': 865, 'max_depth': 11, 'lr': 0.06475352294226047}. Best is trial 20 with value: 3.316253850550612.[0m


Currently running with:
{'n_estimators': 858, 'max_depth': 11, 'learning_rate': 0.028479918084633224}


[32m[I 2022-09-23 08:27:53,877][0m Trial 23 finished with value: 3.325320405189942 and parameters: {'n_estimators': 858, 'max_depth': 11, 'lr': 0.028479918084633224}. Best is trial 20 with value: 3.316253850550612.[0m


Currently running with:
{'n_estimators': 882, 'max_depth': 15, 'learning_rate': 0.057920571154149225}


[32m[I 2022-09-23 08:35:25,240][0m Trial 24 finished with value: 3.34977317548069 and parameters: {'n_estimators': 882, 'max_depth': 15, 'lr': 0.057920571154149225}. Best is trial 20 with value: 3.316253850550612.[0m


Currently running with:
{'n_estimators': 786, 'max_depth': 12, 'learning_rate': 0.0907557917779474}


[32m[I 2022-09-23 08:37:07,725][0m Trial 25 finished with value: 3.3396755305086825 and parameters: {'n_estimators': 786, 'max_depth': 12, 'lr': 0.0907557917779474}. Best is trial 20 with value: 3.316253850550612.[0m


Currently running with:
{'n_estimators': 685, 'max_depth': 17, 'learning_rate': 0.04900193360742126}


[32m[I 2022-09-23 08:51:18,052][0m Trial 26 finished with value: 3.4250808362403307 and parameters: {'n_estimators': 685, 'max_depth': 17, 'lr': 0.04900193360742126}. Best is trial 20 with value: 3.316253850550612.[0m


Currently running with:
{'n_estimators': 760, 'max_depth': 11, 'learning_rate': 0.1363155479197914}


[32m[I 2022-09-23 08:52:19,961][0m Trial 27 finished with value: 3.369633010592686 and parameters: {'n_estimators': 760, 'max_depth': 11, 'lr': 0.1363155479197914}. Best is trial 20 with value: 3.316253850550612.[0m


Currently running with:
{'n_estimators': 901, 'max_depth': 14, 'learning_rate': 0.01744111564450889}


[32m[I 2022-09-23 08:57:35,162][0m Trial 28 finished with value: 3.33802331165121 and parameters: {'n_estimators': 901, 'max_depth': 14, 'lr': 0.01744111564450889}. Best is trial 20 with value: 3.316253850550612.[0m


Currently running with:
{'n_estimators': 914, 'max_depth': 2, 'learning_rate': 0.1862152616739493}


[32m[I 2022-09-23 08:57:41,284][0m Trial 29 finished with value: 3.5987255526039266 and parameters: {'n_estimators': 914, 'max_depth': 2, 'lr': 0.1862152616739493}. Best is trial 20 with value: 3.316253850550612.[0m


Currently running with:
{'n_estimators': 583, 'max_depth': 10, 'learning_rate': 0.08392201424654089}


[32m[I 2022-09-23 08:58:10,522][0m Trial 30 finished with value: 3.333505384870623 and parameters: {'n_estimators': 583, 'max_depth': 10, 'lr': 0.08392201424654089}. Best is trial 20 with value: 3.316253850550612.[0m


Currently running with:
{'n_estimators': 835, 'max_depth': 12, 'learning_rate': 0.04226055006926836}


[32m[I 2022-09-23 08:59:56,925][0m Trial 31 finished with value: 3.3150649508003984 and parameters: {'n_estimators': 835, 'max_depth': 12, 'lr': 0.04226055006926836}. Best is trial 31 with value: 3.3150649508003984.[0m


Currently running with:
{'n_estimators': 750, 'max_depth': 12, 'learning_rate': 0.051938708328612346}


[32m[I 2022-09-23 09:01:31,491][0m Trial 32 finished with value: 3.3183330248003693 and parameters: {'n_estimators': 750, 'max_depth': 12, 'lr': 0.051938708328612346}. Best is trial 31 with value: 3.3150649508003984.[0m


Currently running with:
{'n_estimators': 735, 'max_depth': 12, 'learning_rate': 0.02806710440388869}


[32m[I 2022-09-23 09:03:07,450][0m Trial 33 finished with value: 3.313289183689388 and parameters: {'n_estimators': 735, 'max_depth': 12, 'lr': 0.02806710440388869}. Best is trial 33 with value: 3.313289183689388.[0m


Currently running with:
{'n_estimators': 813, 'max_depth': 16, 'learning_rate': 0.02524668065414425}


[32m[I 2022-09-23 09:14:28,113][0m Trial 34 finished with value: 3.3718300075355976 and parameters: {'n_estimators': 813, 'max_depth': 16, 'lr': 0.02524668065414425}. Best is trial 33 with value: 3.313289183689388.[0m


Currently running with:
{'n_estimators': 933, 'max_depth': 13, 'learning_rate': 0.12461749673147965}


[32m[I 2022-09-23 09:17:49,959][0m Trial 35 finished with value: 3.3791169464906954 and parameters: {'n_estimators': 933, 'max_depth': 13, 'lr': 0.12461749673147965}. Best is trial 33 with value: 3.313289183689388.[0m


Currently running with:
{'n_estimators': 729, 'max_depth': 12, 'learning_rate': 0.035057564389710194}


[32m[I 2022-09-23 09:19:24,260][0m Trial 36 finished with value: 3.3032036377549625 and parameters: {'n_estimators': 729, 'max_depth': 12, 'lr': 0.035057564389710194}. Best is trial 36 with value: 3.3032036377549625.[0m


Currently running with:
{'n_estimators': 634, 'max_depth': 14, 'learning_rate': 0.03335870984268673}


[32m[I 2022-09-23 09:22:51,423][0m Trial 37 finished with value: 3.340201052852342 and parameters: {'n_estimators': 634, 'max_depth': 14, 'lr': 0.03335870984268673}. Best is trial 36 with value: 3.3032036377549625.[0m


Currently running with:
{'n_estimators': 684, 'max_depth': 12, 'learning_rate': 0.010422576153280145}


[32m[I 2022-09-23 09:24:36,194][0m Trial 38 finished with value: 3.337440321115541 and parameters: {'n_estimators': 684, 'max_depth': 12, 'lr': 0.010422576153280145}. Best is trial 36 with value: 3.3032036377549625.[0m


Currently running with:
{'n_estimators': 738, 'max_depth': 10, 'learning_rate': 0.04603868957685672}


[32m[I 2022-09-23 09:25:13,372][0m Trial 39 finished with value: 3.327681143517943 and parameters: {'n_estimators': 738, 'max_depth': 10, 'lr': 0.04603868957685672}. Best is trial 36 with value: 3.3032036377549625.[0m


Currently running with:
{'n_estimators': 485, 'max_depth': 14, 'learning_rate': 0.08973363214720242}


[32m[I 2022-09-23 09:27:57,836][0m Trial 40 finished with value: 3.359159474295807 and parameters: {'n_estimators': 485, 'max_depth': 14, 'lr': 0.08973363214720242}. Best is trial 36 with value: 3.3032036377549625.[0m


Currently running with:
{'n_estimators': 813, 'max_depth': 12, 'learning_rate': 0.07528882500178732}


[32m[I 2022-09-23 09:29:44,838][0m Trial 41 finished with value: 3.329910520890744 and parameters: {'n_estimators': 813, 'max_depth': 12, 'lr': 0.07528882500178732}. Best is trial 36 with value: 3.3032036377549625.[0m


Currently running with:
{'n_estimators': 831, 'max_depth': 10, 'learning_rate': 0.05640741670520527}


[32m[I 2022-09-23 09:30:25,990][0m Trial 42 finished with value: 3.3150104838744165 and parameters: {'n_estimators': 831, 'max_depth': 10, 'lr': 0.05640741670520527}. Best is trial 36 with value: 3.3032036377549625.[0m


Currently running with:
{'n_estimators': 810, 'max_depth': 10, 'learning_rate': 0.039827078243577875}


[32m[I 2022-09-23 09:31:06,621][0m Trial 43 finished with value: 3.325714485812639 and parameters: {'n_estimators': 810, 'max_depth': 10, 'lr': 0.039827078243577875}. Best is trial 36 with value: 3.3032036377549625.[0m


Currently running with:
{'n_estimators': 940, 'max_depth': 8, 'learning_rate': 0.02824271177876824}


[32m[I 2022-09-23 09:31:27,817][0m Trial 44 finished with value: 3.366241010746222 and parameters: {'n_estimators': 940, 'max_depth': 8, 'lr': 0.02824271177876824}. Best is trial 36 with value: 3.3032036377549625.[0m


Currently running with:
{'n_estimators': 613, 'max_depth': 9, 'learning_rate': 0.05238720664040551}


[32m[I 2022-09-23 09:31:48,268][0m Trial 45 finished with value: 3.323649703048148 and parameters: {'n_estimators': 613, 'max_depth': 9, 'lr': 0.05238720664040551}. Best is trial 36 with value: 3.3032036377549625.[0m


Currently running with:
{'n_estimators': 727, 'max_depth': 7, 'learning_rate': 0.06037034548556225}


[32m[I 2022-09-23 09:31:59,937][0m Trial 46 finished with value: 3.3550973663782457 and parameters: {'n_estimators': 727, 'max_depth': 7, 'lr': 0.06037034548556225}. Best is trial 36 with value: 3.3032036377549625.[0m


Currently running with:
{'n_estimators': 841, 'max_depth': 20, 'learning_rate': 0.019954307418179544}


[32m[I 2022-09-23 10:26:06,956][0m Trial 47 finished with value: 3.565825137106448 and parameters: {'n_estimators': 841, 'max_depth': 20, 'lr': 0.019954307418179544}. Best is trial 36 with value: 3.3032036377549625.[0m


Currently running with:
{'n_estimators': 361, 'max_depth': 13, 'learning_rate': 0.08306189615616857}


[32m[I 2022-09-23 10:27:22,860][0m Trial 48 finished with value: 3.3451023834888325 and parameters: {'n_estimators': 361, 'max_depth': 13, 'lr': 0.08306189615616857}. Best is trial 36 with value: 3.3032036377549625.[0m


Currently running with:
{'n_estimators': 772, 'max_depth': 10, 'learning_rate': 0.03990043913833198}


[32m[I 2022-09-23 10:28:02,263][0m Trial 49 finished with value: 3.3306879761483645 and parameters: {'n_estimators': 772, 'max_depth': 10, 'lr': 0.03990043913833198}. Best is trial 36 with value: 3.3032036377549625.[0m


{'n_estimators': 729, 'max_depth': 12, 'lr': 0.035057564389710194}

In [10]:
model = xgb.XGBRegressor(objective="reg:squarederror",
                          n_jobs=-1,
                          grow_policy='lossguide',
                          tree_method="gpu_hist",
                          predictor="gpu_predictor",
                          booster='gbtree',
                          sampling_method='gradient_based',
                          eval_metrics=['rmse'],
                          random_state=RANDOM_SEED,
                          enable_categorical=False,
                          early_stopping_rounds=100,
                          **study.best_params)
model.fit(X_train, y_train, eval_set=[(X_val, y_val)], verbose=False)

XGBRegressor(early_stopping_rounds=100, enable_categorical=False,
             eval_metrics=['rmse'], grow_policy='lossguide',
             lr=0.035057564389710194, max_depth=12, n_estimators=729, n_jobs=-1,
             objective='reg:squarederror', predictor='gpu_predictor',
             random_state=2, sampling_method='gradient_based',
             tree_method='gpu_hist')

In [11]:
train_pred = model.predict(X_train)
test_pred = model.predict(X_test)
print(f"train results - RMSE: {mean_squared_error(y_train, train_pred, squared=False)}, MAE: {mean_absolute_error(y_train, train_pred)}")
print(f"test results - RMSE: {mean_squared_error(y_test, test_pred, squared=False)}, MAE: {mean_absolute_error(y_test, test_pred)}")

train results - RMSE: 1.7048513955227107, MAE: 1.0215062964365695
test results - RMSE: 3.7631706447347137, MAE: 1.5170065674559365


In [12]:
mean_train = np.mean(y_train)
print(f"Baseline mean model - test RMSE: {np.sqrt(np.mean((y_test - mean_train)**2))}")
print(f"Baseline mean model - test MAE: {np.mean(np.abs(y_test - mean_train))}")

Baseline mean model - test RMSE: 4.312942842075539
Baseline mean model - test MAE: 1.9075376592590827
