In [3]:
import numpy as np
import pandas as pd

from sklearn.model_selection import KFold, cross_val_score
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler, OrdinalEncoder
from sklearn.compose import ColumnTransformer
from sklearn.svm import SVR
from sklearn.linear_model import Ridge, Lasso
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import (
    RandomForestRegressor,
    ExtraTreesRegressor,
    GradientBoostingRegressor,
    AdaBoostRegressor
)
from sklearn.neural_network import MLPRegressor
from xgboost import XGBRegressor

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error,r2_score

from sklearn.decomposition import PCA

In [4]:
df = pd.read_csv('../../../data/processed/advance/gurgaon_properties_post_feature_selection_v2.csv')

In [5]:
df.head()

Unnamed: 0,property_type,sector,price,bedRoom,bathroom,balcony,agePossession,built_up_area,servant room,store room,furnishing_type,luxury_category,floor_category
0,flat,sector 36,0.82,3.0,2.0,2,New Property,850.0,0.0,0.0,0.0,Low,Low Floor
1,flat,sector 89,0.95,2.0,2.0,2,New Property,1226.0,1.0,0.0,0.0,Low,Mid Floor
2,flat,sohna road,0.32,2.0,2.0,1,New Property,1000.0,0.0,0.0,0.0,Low,High Floor
3,flat,sector 92,1.6,3.0,4.0,3+,Relatively New,1615.0,1.0,0.0,1.0,High,Mid Floor
4,flat,sector 102,0.48,2.0,2.0,1,Relatively New,582.0,0.0,1.0,0.0,High,Mid Floor


In [6]:
df['furnishing_type'].value_counts()

furnishing_type
0.0    2349
1.0    1018
2.0     187
Name: count, dtype: int64

In [7]:
# 0 -> unfurnished
# 1 -> semifurnished
# 2 -> furnished
df['furnishing_type'] = df['furnishing_type'].replace({0.0:'unfurnished',1.0:'semifurnished',2.0:'furnished'})

In [8]:
df.head()

Unnamed: 0,property_type,sector,price,bedRoom,bathroom,balcony,agePossession,built_up_area,servant room,store room,furnishing_type,luxury_category,floor_category
0,flat,sector 36,0.82,3.0,2.0,2,New Property,850.0,0.0,0.0,unfurnished,Low,Low Floor
1,flat,sector 89,0.95,2.0,2.0,2,New Property,1226.0,1.0,0.0,unfurnished,Low,Mid Floor
2,flat,sohna road,0.32,2.0,2.0,1,New Property,1000.0,0.0,0.0,unfurnished,Low,High Floor
3,flat,sector 92,1.6,3.0,4.0,3+,Relatively New,1615.0,1.0,0.0,semifurnished,High,Mid Floor
4,flat,sector 102,0.48,2.0,2.0,1,Relatively New,582.0,0.0,1.0,unfurnished,High,Mid Floor


In [9]:
X = df.drop(columns=['price'])
y = df['price']

In [10]:
# Applying the log1p transformation to the target variable
y_transformed = np.log1p(y)

### Ordinal Encoding

In [11]:
columns_to_encode = ['property_type','sector', 'balcony', 'agePossession', 'furnishing_type', 'luxury_category', 'floor_category']

In [14]:
# Creating a column transformer for preprocessing
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), ['bedRoom', 'bathroom', 'built_up_area', 'servant room', 'store room']),
        ('cat', OrdinalEncoder(), columns_to_encode)
    ], 
    remainder='passthrough'
)

In [15]:
# Creating a pipeline
pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('regressor', LinearRegression())
])

In [16]:
# K-fold cross-validation
kfold = KFold(n_splits=10, shuffle=True, random_state=42)
scores = cross_val_score(pipeline, X, y_transformed, cv=kfold, scoring='r2')

In [17]:
scores.mean(),scores.std()

(np.float64(0.7363096633436828), np.float64(0.03238005754429938))

In [18]:
X_train, X_test, y_train, y_test = train_test_split(X,y_transformed,test_size=0.2,random_state=42)

In [19]:
pipeline.fit(X_train,y_train)

In [20]:
y_pred = pipeline.predict(X_test)

In [21]:
y_pred = np.expm1(y_pred)

In [22]:
mean_absolute_error(np.expm1(y_test),y_pred)

np.float64(0.9463822160089355)

In [23]:
def scorer(model_name, model):
    
    output = []
    
    output.append(model_name)
    
    pipeline = Pipeline([
        ('preprocessor', preprocessor),
        ('regressor', model)
    ])
    
    # K-fold cross-validation
    kfold = KFold(n_splits=10, shuffle=True, random_state=42)
    scores = cross_val_score(pipeline, X, y_transformed, cv=kfold, scoring='r2')
    
    output.append(scores.mean())
    
    X_train, X_test, y_train, y_test = train_test_split(X,y_transformed,test_size=0.2,random_state=42)
    
    pipeline.fit(X_train,y_train)
    
    y_pred = pipeline.predict(X_test)
    
    y_pred = np.expm1(y_pred)
    
    output.append(mean_absolute_error(np.expm1(y_test),y_pred))
    
    return output
    

In [24]:
model_dict = {
    'linear_reg':LinearRegression(),
    'svr':SVR(),
    'ridge':Ridge(),
    'LASSO':Lasso(),
    'decision tree': DecisionTreeRegressor(),
    'random forest':RandomForestRegressor(),
    'extra trees': ExtraTreesRegressor(),
    'gradient boosting': GradientBoostingRegressor(),
    'adaboost': AdaBoostRegressor(),
    'mlp': MLPRegressor(),
    'xgboost':XGBRegressor()
}

In [25]:
model_output = []
for model_name,model in model_dict.items():
    model_output.append(scorer(model_name, model))

In [26]:
model_output

[['linear_reg',
  np.float64(0.7363096633436828),
  np.float64(0.9463822160089355)],
 ['svr', np.float64(0.7642012011196353), np.float64(0.8472636473483927)],
 ['ridge', np.float64(0.7363125343993554), np.float64(0.9463387741853388)],
 ['LASSO', np.float64(0.05943378064493572), np.float64(1.528905986892753)],
 ['decision tree',
  np.float64(0.7673085026151244),
  np.float64(0.7442615607081917)],
 ['random forest',
  np.float64(0.8821227799147175),
  np.float64(0.5328741406130693)],
 ['extra trees',
  np.float64(0.8669776195842113),
  np.float64(0.5484517439895076)],
 ['gradient boosting',
  np.float64(0.8726504776846961),
  np.float64(0.5759827420192102)],
 ['adaboost', np.float64(0.7538773267354032), np.float64(0.8319593806608996)],
 ['mlp', np.float64(0.8090576243465157), np.float64(0.7177366996634695)],
 ['xgboost', np.float64(0.8894876835260124), np.float64(0.5040475141482346)]]

In [27]:
model_df = pd.DataFrame(model_output, columns=['name','r2','mae'])

In [28]:
model_df.sort_values(['mae'])

Unnamed: 0,name,r2,mae
10,xgboost,0.889488,0.504048
5,random forest,0.882123,0.532874
6,extra trees,0.866978,0.548452
7,gradient boosting,0.87265,0.575983
9,mlp,0.809058,0.717737
4,decision tree,0.767309,0.744262
8,adaboost,0.753877,0.831959
1,svr,0.764201,0.847264
2,ridge,0.736313,0.946339
0,linear_reg,0.73631,0.946382


### OneHotEncoding

In [29]:
# Creating a column transformer for preprocessing
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), ['bedRoom', 'bathroom', 'built_up_area', 'servant room', 'store room']),
        ('cat', OrdinalEncoder(), columns_to_encode),
        ('cat1',OneHotEncoder(drop='first'),['sector','agePossession','furnishing_type'])
    ], 
    remainder='passthrough'
)

In [30]:
# Creating a pipeline
pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('regressor', LinearRegression())
])

In [31]:
# K-fold cross-validation
kfold = KFold(n_splits=10, shuffle=True, random_state=42)
scores = cross_val_score(pipeline, X, y_transformed, cv=kfold, scoring='r2')

In [32]:
scores.mean()

np.float64(0.8546054073648314)

In [33]:
scores.std()

np.float64(0.01599847663314007)

In [34]:
X_train, X_test, y_train, y_test = train_test_split(X,y_transformed,test_size=0.2,random_state=42)

In [35]:
pipeline.fit(X_train,y_train)

In [36]:
y_pred = pipeline.predict(X_test)

In [37]:
y_pred = np.expm1(y_pred)

In [38]:
mean_absolute_error(np.expm1(y_test),y_pred)

np.float64(0.6497382874070646)

In [39]:
def scorer(model_name, model):
    
    output = []
    
    output.append(model_name)
    
    pipeline = Pipeline([
        ('preprocessor', preprocessor),
        ('regressor', model)
    ])
    
    # K-fold cross-validation
    kfold = KFold(n_splits=10, shuffle=True, random_state=42)
    scores = cross_val_score(pipeline, X, y_transformed, cv=kfold, scoring='r2')
    
    output.append(scores.mean())
    
    X_train, X_test, y_train, y_test = train_test_split(X,y_transformed,test_size=0.2,random_state=42)
    
    pipeline.fit(X_train,y_train)
    
    y_pred = pipeline.predict(X_test)
    
    y_pred = np.expm1(y_pred)
    
    output.append(mean_absolute_error(np.expm1(y_test),y_pred))
    
    return output
    

In [40]:
model_dict = {
    'linear_reg':LinearRegression(),
    'svr':SVR(),
    'ridge':Ridge(),
    'LASSO':Lasso(),
    'decision tree': DecisionTreeRegressor(),
    'random forest':RandomForestRegressor(),
    'extra trees': ExtraTreesRegressor(),
    'gradient boosting': GradientBoostingRegressor(),
    'adaboost': AdaBoostRegressor(),
    'mlp': MLPRegressor(),
    'xgboost':XGBRegressor()
}

In [41]:
model_output = []
for model_name,model in model_dict.items():
    model_output.append(scorer(model_name, model))

In [42]:
model_df = pd.DataFrame(model_output, columns=['name','r2','mae'])

In [43]:
model_df.sort_values(['mae'])

Unnamed: 0,name,r2,mae
6,extra trees,0.894422,0.466914
10,xgboost,0.89585,0.493456
5,random forest,0.890771,0.500038
9,mlp,0.874392,0.543562
7,gradient boosting,0.876294,0.570925
0,linear_reg,0.854605,0.649738
2,ridge,0.854678,0.652914
4,decision tree,0.807073,0.698884
1,svr,0.769741,0.834124
8,adaboost,0.751862,0.846813


### OneHotEncoding With PCA

In [47]:
# Creating a column transformer for preprocessing
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), ['bedRoom', 'bathroom', 'built_up_area', 'servant room', 'store room']),
        ('cat', OrdinalEncoder(), columns_to_encode),
        ('cat1',OneHotEncoder(drop='first',sparse_output=False),['sector','agePossession'])
    ], 
    remainder='passthrough'
)

In [48]:
# Creating a pipeline
pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('pca', PCA(n_components=1)),
    ('regressor', LinearRegression())
])

In [49]:
# K-fold cross-validation
kfold = KFold(n_splits=10, shuffle=True, random_state=42)
scores = cross_val_score(pipeline, X, y_transformed, cv=kfold, scoring='r2')

In [19]:
scores.mean()

np.float64(0.06225201431451136)

In [20]:
scores.std()

np.float64(0.01986059407164015)

In [21]:
def scorer(model_name, model):
    
    output = []
    
    output.append(model_name)
    
    pipeline = Pipeline([
        ('preprocessor', preprocessor),
        ('pca', PCA(n_components=0.95)),
        ('regressor', model)
    ])
    
    # K-fold cross-validation
    kfold = KFold(n_splits=10, shuffle=True, random_state=42)
    scores = cross_val_score(pipeline, X, y_transformed, cv=kfold, scoring='r2')
    
    output.append(scores.mean())
    
    X_train, X_test, y_train, y_test = train_test_split(X,y_transformed,test_size=0.2,random_state=42)
    
    pipeline.fit(X_train,y_train)
    
    y_pred = pipeline.predict(X_test)
    
    y_pred = np.expm1(y_pred)
    
    output.append(mean_absolute_error(np.expm1(y_test),y_pred))
    
    return output
    

In [22]:
model_dict = {
    'linear_reg':LinearRegression(),
    'svr':SVR(),
    'ridge':Ridge(),
    'LASSO':Lasso(),
    'decision tree': DecisionTreeRegressor(),
    'random forest':RandomForestRegressor(),
    'extra trees': ExtraTreesRegressor(),
    'gradient boosting': GradientBoostingRegressor(),
    'adaboost': AdaBoostRegressor(),
    'mlp': MLPRegressor(),
    'xgboost':XGBRegressor()
}

In [23]:
model_output = []
for model_name,model in model_dict.items():
    model_output.append(scorer(model_name, model))

In [24]:
model_df = pd.DataFrame(model_output, columns=['name','r2','mae'])

In [25]:
model_df.sort_values(['mae'])

Unnamed: 0,name,r2,mae
5,random forest,0.762022,0.656978
6,extra trees,0.74042,0.695043
4,decision tree,0.696442,0.761509
10,xgboost,0.622205,0.967581
7,gradient boosting,0.610623,0.987906
1,svr,0.218073,1.361163
8,adaboost,0.305643,1.403254
9,mlp,0.20823,1.450977
2,ridge,0.062252,1.526707
0,linear_reg,0.062252,1.526707


In [66]:
# Creating a pipeline
pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('pca', PCA()),
    ('regressor', LinearRegression())
])

In [67]:
X_train, X_test, y_train, y_test = train_test_split(X,y_transformed,test_size=0.2,random_state=42)
pipeline.fit(X_train,y_train)

In [68]:
y_pred = pipeline.predict(X_test)

In [69]:
y_pred = np.expm1(y_pred)

In [70]:
mean_absolute_error(np.expm1(y_test),y_pred)

np.float64(0.6491718597838264)

In [72]:
pipeline.named_steps['pca'].explained_variance_ratio_

array([9.89749744e-01, 2.90220572e-03, 1.68816536e-03, 9.31298980e-04,
       8.44026292e-04, 6.36740619e-04, 5.92022782e-04, 4.54962884e-04,
       4.17327781e-04, 2.82788570e-04, 1.87814840e-04, 1.64976967e-04,
       1.24921517e-04, 8.39921059e-05, 6.66087886e-05, 3.85820012e-05,
       2.76359720e-05, 2.58337502e-05, 2.43649092e-05, 2.42017556e-05,
       2.31068295e-05, 2.25533114e-05, 2.10023668e-05, 2.01997122e-05,
       1.87820810e-05, 1.74002386e-05, 1.69050844e-05, 1.63566237e-05,
       1.60336100e-05, 1.57538036e-05, 1.51270828e-05, 1.50071309e-05,
       1.49639547e-05, 1.42120409e-05, 1.41009031e-05, 1.38577795e-05,
       1.37957491e-05, 1.37128078e-05, 1.32341498e-05, 1.29527938e-05,
       1.28342844e-05, 1.22982822e-05, 1.21536402e-05, 1.16408767e-05,
       1.14468537e-05, 1.13839928e-05, 1.10374731e-05, 1.04931985e-05,
       1.04822944e-05, 1.01475504e-05, 9.93834155e-06, 9.76002650e-06,
       9.70271556e-06, 9.38192944e-06, 8.71443785e-06, 8.10680149e-06,
      

### Target Encoder

In [None]:
!pip install category_encoders




[notice] A new release of pip available: 22.3 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [13]:
import category_encoders as ce

In [55]:


columns_to_encode = ['property_type','sector', 'balcony', 'agePossession', 'furnishing_type', 'luxury_category', 'floor_category']

# Creating a column transformer for preprocessing
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), ['bedRoom', 'bathroom', 'built_up_area', 'servant room', 'store room']),
        ('cat', OrdinalEncoder(), columns_to_encode),
        ('cat1',OneHotEncoder(drop='first',sparse_output=False),['agePossession']),
        ('target_enc', ce.TargetEncoder(), ['sector'])
    ], 
    remainder='passthrough'
)

In [56]:
# Creating a pipeline
pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('regressor', LinearRegression())
])

In [57]:
# K-fold cross-validation
kfold = KFold(n_splits=10, shuffle=True, random_state=42)
scores = cross_val_score(pipeline, X, y_transformed, cv=kfold, scoring='r2')

In [58]:
scores.mean(),scores.std()

(np.float64(0.829521918225536), np.float64(0.01838446337912282))

In [59]:
def scorer(model_name, model):
    
    output = []
    
    output.append(model_name)
    
    pipeline = Pipeline([
        ('preprocessor', preprocessor),
        ('regressor', model)
    ])
    
    # K-fold cross-validation
    kfold = KFold(n_splits=10, shuffle=True, random_state=42)
    scores = cross_val_score(pipeline, X, y_transformed, cv=kfold, scoring='r2')
    
    output.append(scores.mean())
    
    X_train, X_test, y_train, y_test = train_test_split(X,y_transformed,test_size=0.2,random_state=42)
    
    pipeline.fit(X_train,y_train)
    
    y_pred = pipeline.predict(X_test)
    
    y_pred = np.expm1(y_pred)
    
    output.append(mean_absolute_error(np.expm1(y_test),y_pred))
    
    return output
    

In [60]:
model_dict = {
    'linear_reg':LinearRegression(),
    'svr':SVR(),
    'ridge':Ridge(),
    'LASSO':Lasso(),
    'decision tree': DecisionTreeRegressor(),
    'random forest':RandomForestRegressor(),
    'extra trees': ExtraTreesRegressor(),
    'gradient boosting': GradientBoostingRegressor(),
    'adaboost': AdaBoostRegressor(),
    'mlp': MLPRegressor(),
    'xgboost':XGBRegressor()
}

In [61]:
model_output = []
for model_name,model in model_dict.items():
    model_output.append(scorer(model_name, model))

In [62]:
model_df = pd.DataFrame(model_output, columns=['name','r2','mae'])

In [63]:
model_df.sort_values(['mae'])

Unnamed: 0,name,r2,mae
10,xgboost,0.904798,0.447518
5,random forest,0.901154,0.452289
6,extra trees,0.901255,0.457758
7,gradient boosting,0.888952,0.50787
4,decision tree,0.827177,0.558826
9,mlp,0.849907,0.595438
8,adaboost,0.817599,0.707958
0,linear_reg,0.829522,0.713011
2,ridge,0.829536,0.713523
1,svr,0.782917,0.818851


### Hyperparameter Tuning

In [64]:
from sklearn.model_selection import GridSearchCV

In [65]:
param_grid = {
    'regressor__n_estimators': [50, 100, 200, 300],
    'regressor__max_depth': [None, 10, 20, 30],
    'regressor__max_samples':[0.1, 0.25, 0.5, 1.0],
    'regressor__max_features': ['auto', 'sqrt']
}

In [66]:
columns_to_encode = ['property_type','sector', 'balcony', 'agePossession', 'furnishing_type', 'luxury_category', 'floor_category']

# Creating a column transformer for preprocessing
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), ['bedRoom', 'bathroom', 'built_up_area', 'servant room', 'store room']),
        ('cat', OrdinalEncoder(), columns_to_encode),
        ('cat1',OneHotEncoder(drop='first',sparse_output=False),['agePossession']),
        ('target_enc', ce.TargetEncoder(), ['sector'])
    ], 
    remainder='passthrough'
)

In [67]:
pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor())
])

In [68]:
kfold = KFold(n_splits=10, shuffle=True, random_state=42)

In [69]:
search = GridSearchCV(pipeline, param_grid, cv=kfold, scoring='r2', n_jobs=-1, verbose=4)

In [71]:
search.fit(X, y_transformed)

Fitting 10 folds for each of 128 candidates, totalling 1280 fits


640 fits failed out of a total of 1280.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
302 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\vijit_singh\Desktop\Personal projects repo\Estate-Radar\env\Lib\site-packages\sklearn\model_selection\_validation.py", line 888, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\vijit_singh\Desktop\Personal projects repo\Estate-Radar\env\Lib\site-packages\sklearn\base.py", line 1473, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\vijit_singh\Desktop\Personal projects repo\Estate-Radar\env\Lib\site-packages\sklearn\pipeline.py", line 473, in fit

In [72]:
final_pipe = search.best_estimator_

In [73]:
search.best_params_

{'regressor__max_depth': 30,
 'regressor__max_features': 'sqrt',
 'regressor__max_samples': 1.0,
 'regressor__n_estimators': 300}

In [74]:
search.best_score_

np.float64(0.9026121231171291)

In [75]:
final_pipe.fit(X,y_transformed)

### Exporting the model

In [76]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), ['bedRoom', 'bathroom', 'built_up_area', 'servant room', 'store room']),
        ('cat', OrdinalEncoder(), columns_to_encode),
        ('cat1',OneHotEncoder(drop='first',sparse_output=False),['sector','agePossession'])
    ], 
    remainder='passthrough'
)

In [77]:
pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(n_estimators=500))
])

In [78]:
pipeline.fit(X,y_transformed)

In [80]:
import pickle

with open('../../../models/pipeline.pkl', 'wb') as file:
    pickle.dump(pipeline, file)

In [89]:
with open('../../../models/df.pkl', 'wb') as file:
    pickle.dump(X, file)

In [84]:
X

Unnamed: 0,property_type,sector,bedRoom,bathroom,balcony,agePossession,built_up_area,servant room,store room,furnishing_type,luxury_category,floor_category
0,flat,sector 36,3.0,2.0,2,New Property,850.0,0.0,0.0,unfurnished,Low,Low Floor
1,flat,sector 89,2.0,2.0,2,New Property,1226.0,1.0,0.0,unfurnished,Low,Mid Floor
2,flat,sohna road,2.0,2.0,1,New Property,1000.0,0.0,0.0,unfurnished,Low,High Floor
3,flat,sector 92,3.0,4.0,3+,Relatively New,1615.0,1.0,0.0,semifurnished,High,Mid Floor
4,flat,sector 102,2.0,2.0,1,Relatively New,582.0,0.0,1.0,unfurnished,High,Mid Floor
...,...,...,...,...,...,...,...,...,...,...,...,...
3549,flat,sector 84,2.0,2.0,1,Relatively New,532.0,0.0,0.0,unfurnished,Medium,Mid Floor
3550,house,sector 109,5.0,5.0,3+,Relatively New,6228.0,1.0,1.0,unfurnished,High,Low Floor
3551,flat,sector 2,1.0,1.0,1,Moderately Old,665.0,0.0,0.0,semifurnished,Medium,Mid Floor
3552,house,sector 43,5.0,6.0,3,Moderately Old,5490.0,1.0,1.0,unfurnished,Medium,Mid Floor


### Trying out the predictions

In [85]:
X.columns

Index(['property_type', 'sector', 'bedRoom', 'bathroom', 'balcony',
       'agePossession', 'built_up_area', 'servant room', 'store room',
       'furnishing_type', 'luxury_category', 'floor_category'],
      dtype='object')

In [86]:
X.iloc[0].values

array(['flat', 'sector 36', np.float64(3.0), np.float64(2.0), '2',
       'New Property', np.float64(850.0), np.float64(0.0),
       np.float64(0.0), 'unfurnished', 'Low', 'Low Floor'], dtype=object)

In [87]:
data = [['house', 'sector 102', 4, 3, '3+', 'New Property', 2750, 0, 0, 'unfurnished', 'Low', 'Low Floor']]
columns = ['property_type', 'sector', 'bedRoom', 'bathroom', 'balcony',
       'agePossession', 'built_up_area', 'servant room', 'store room',
       'furnishing_type', 'luxury_category', 'floor_category']

# Convert to DataFrame
one_df = pd.DataFrame(data, columns=columns)

one_df


Unnamed: 0,property_type,sector,bedRoom,bathroom,balcony,agePossession,built_up_area,servant room,store room,furnishing_type,luxury_category,floor_category
0,house,sector 102,4,3,3+,New Property,2750,0,0,unfurnished,Low,Low Floor


In [88]:
np.expm1(pipeline.predict(one_df))

array([3.19052593])

In [307]:
X.dtypes

property_type       object
sector              object
bedRoom            float64
bathroom           float64
balcony             object
agePossession       object
built_up_area      float64
servant room       float64
store room         float64
furnishing_type     object
luxury_category     object
floor_category      object
dtype: object

In [306]:
sorted(X['sector'].unique().tolist())

['dwarka expressway',
 'gwal pahari',
 'manesar',
 'sector 1',
 'sector 10',
 'sector 102',
 'sector 103',
 'sector 104',
 'sector 105',
 'sector 106',
 'sector 107',
 'sector 108',
 'sector 109',
 'sector 11',
 'sector 110',
 'sector 111',
 'sector 112',
 'sector 113',
 'sector 12',
 'sector 13',
 'sector 14',
 'sector 15',
 'sector 17',
 'sector 2',
 'sector 21',
 'sector 22',
 'sector 23',
 'sector 24',
 'sector 25',
 'sector 26',
 'sector 27',
 'sector 28',
 'sector 3',
 'sector 30',
 'sector 31',
 'sector 33',
 'sector 36',
 'sector 37',
 'sector 37d',
 'sector 38',
 'sector 39',
 'sector 4',
 'sector 40',
 'sector 41',
 'sector 43',
 'sector 45',
 'sector 46',
 'sector 47',
 'sector 48',
 'sector 49',
 'sector 5',
 'sector 50',
 'sector 51',
 'sector 52',
 'sector 53',
 'sector 54',
 'sector 55',
 'sector 56',
 'sector 57',
 'sector 58',
 'sector 59',
 'sector 6',
 'sector 60',
 'sector 61',
 'sector 62',
 'sector 63',
 'sector 63a',
 'sector 65',
 'sector 66',
 'sector 67',
 'se

### Optuna

In [11]:
# Importing the required libraries
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
import optuna
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
! pip install optuna-integration[xgboost]

In [None]:
def objective(trial):
    # Define the preprocessor
    columns_to_encode = ['property_type', 'sector', 'balcony', 'agePossession', 
                        'furnishing_type', 'luxury_category', 'floor_category']
    
    preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), ['bedRoom', 'bathroom', 'built_up_area', 'servant room', 'store room']),
        ('cat', OrdinalEncoder(), columns_to_encode),
        ('cat1',OneHotEncoder(drop='first',sparse_output=False),['agePossession']),
        ('target_enc', ce.TargetEncoder(), ['sector'])
    ], 
    remainder='passthrough'
    )

    # Choose the algorithm to tune
    regressor_name = trial.suggest_categorical('regressor', ['RandomForest', 'GradientBoosting', 'XGBoost'])

    if regressor_name == 'RandomForest':
        model = RandomForestRegressor(
            n_estimators=trial.suggest_int('n_estimators', 100, 500),
            max_depth=trial.suggest_int('max_depth', 5, 30),
            min_samples_split=trial.suggest_int('min_samples_split', 2, 10),
            min_samples_leaf=trial.suggest_int('min_samples_leaf', 1, 10),
            random_state=42
        )

    elif regressor_name == 'GradientBoosting':
        model = GradientBoostingRegressor(
            n_estimators=trial.suggest_int('n_estimators', 100, 500),
            learning_rate=trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
            max_depth=trial.suggest_int('max_depth', 3, 20),
            min_samples_split=trial.suggest_int('min_samples_split', 2, 10),
            min_samples_leaf=trial.suggest_int('min_samples_leaf', 1, 10),
            random_state=42
        )

    else:  # XGBoost
        model = XGBRegressor(
            n_estimators=trial.suggest_int('n_estimators', 100, 500),
            max_depth=trial.suggest_int('max_depth', 3, 20),
            learning_rate=trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
            min_child_weight=trial.suggest_int('min_child_weight', 1, 7),
            subsample=trial.suggest_float('subsample', 0.6, 1.0),
            colsample_bytree=trial.suggest_float('colsample_bytree', 0.6, 1.0),
            random_state=42
        )

    # Create pipeline
    pipeline = Pipeline([
        ('preprocessor', preprocessor),
        ('regressor', model)
    ])

    try:
        # Custom scorer for MAE on original scale
        def mae_scorer(estimator, X, y):
            y_pred = estimator.predict(X)
            return -mean_absolute_error(np.expm1(y), np.expm1(y_pred))  # Negative because we want to maximize
        
        # Perform cross-validation
        kfold = KFold(n_splits=5, shuffle=True, random_state=42)
        
        # Get both R2 and MAE scores
        r2_scores = cross_val_score(pipeline, X, y_transformed, cv=kfold, scoring='r2')
        mae_scores = cross_val_score(pipeline, X, y_transformed, cv=kfold, scoring=mae_scorer)
        
        mean_r2 = r2_scores.mean()
        mean_mae = -mae_scores.mean()  # Convert back to positive
        
        print(f"\nTrial Results:")
        print(f"Mean R2 Score: {mean_r2:.4f} (+/- {r2_scores.std() * 2:.4f})")
        print(f"Mean MAE: {mean_mae:.4f} (+/- {mae_scores.std() * 2:.4f})")
        
        return mean_mae
        
    except Exception as e:
        print(f"Error during evaluation: {str(e)}")
        return float('inf')

# Create and run the study
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50, n_jobs=-1)

# Print the best results
print("\nBest trial:")
print("  MAE: ", study.best_trial.value)
print("  Params: ")
for key, value in study.best_trial.params.items():
    print(f"    {key}: {value}")

In [19]:
# First, do the train-test split outside the objective function
X_train, X_test, y_train, y_test = train_test_split(X, y_transformed, test_size=0.2, random_state=42)

def objective(trial):
    # Define the preprocessor
    columns_to_encode = ['property_type', 'sector', 'balcony', 'agePossession', 
                        'furnishing_type', 'luxury_category', 'floor_category']
    
    preprocessor = ColumnTransformer(
        transformers=[
            ('num', StandardScaler(), ['bedRoom', 'bathroom', 'built_up_area', 'servant room', 'store room']),
            ('cat', OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1), columns_to_encode),
            ('cat1', OneHotEncoder(drop='first', sparse_output=False, handle_unknown='ignore'), ['agePossession']),
            ('target_enc', ce.TargetEncoder(), ['sector'])
    ], 
    remainder='passthrough'
    )

    # XGBoost hyperparameters - expanded search space
    model = XGBRegressor(
        # Tree-specific parameters
        n_estimators=trial.suggest_int('n_estimators', 100, 1000),
        max_depth=trial.suggest_int('max_depth', 3, 25),
        min_child_weight=trial.suggest_int('min_child_weight', 1, 10),
        
        # Learning rate and regularization
        learning_rate=trial.suggest_float('learning_rate', 0.001, 0.3, log=True),
        gamma=trial.suggest_float('gamma', 1e-8, 1.0, log=True),
        reg_alpha=trial.suggest_float('reg_alpha', 1e-8, 1.0, log=True),
        reg_lambda=trial.suggest_float('reg_lambda', 1e-8, 1.0, log=True),
        
        # Sampling parameters
        subsample=trial.suggest_float('subsample', 0.5, 1.0),
        colsample_bytree=trial.suggest_float('colsample_bytree', 0.5, 1.0),
        colsample_bylevel=trial.suggest_float('colsample_bylevel', 0.5, 1.0),
        colsample_bynode=trial.suggest_float('colsample_bynode', 0.5, 1.0),
        
        # Other parameters
        max_delta_step=trial.suggest_int('max_delta_step', 0, 10),
        scale_pos_weight=trial.suggest_float('scale_pos_weight', 0.8, 1.2),
        
        # Fixed parameters
        random_state=42,
        n_jobs=-1,
        tree_method='hist'
    )

    # Create pipeline
    pipeline = Pipeline([
        ('preprocessor', preprocessor),
        ('regressor', model)
    ])

    try:
        # Use K-fold CV on training data only
        kfold = KFold(n_splits=5, shuffle=True, random_state=42)
        
        # Get cross-validation scores on training data
        cv_scores_r2 = []
        cv_scores_mae = []
        
        for train_idx, val_idx in kfold.split(X_train):
            # Split the training data into train and validation
            X_train_fold, X_val_fold = X_train.iloc[train_idx], X_train.iloc[val_idx]
            y_train_fold, y_val_fold = y_train.iloc[train_idx], y_train.iloc[val_idx]
            
            # Fit the pipeline on training fold
            pipeline.fit(X_train_fold, y_train_fold)
            
            # Make predictions on validation fold
            y_pred_fold = pipeline.predict(X_val_fold)
            
            # Calculate metrics
            r2 = r2_score(y_val_fold, y_pred_fold)
            mae = mean_absolute_error(np.expm1(y_val_fold), np.expm1(y_pred_fold))
            
            cv_scores_r2.append(r2)
            cv_scores_mae.append(mae)
        
        mean_r2 = np.mean(cv_scores_r2)
        mean_mae = np.mean(cv_scores_mae)
        
        print(f"\nTrial Results:")
        print(f"Mean R2 Score: {mean_r2:.4f} (+/- {np.std(cv_scores_r2) * 2:.4f})")
        print(f"Mean MAE: {mean_mae:.4f} (+/- {np.std(cv_scores_mae) * 2:.4f})")
        
        return mean_mae
        
    except Exception as e:
        print(f"Error during evaluation: {str(e)}")
        return float('inf')

# Create and run the study
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=100, n_jobs=-1)

# Get the best pipeline
best_params = study.best_trial.params
best_pipeline = Pipeline([
    ('preprocessor', ColumnTransformer(
        transformers=[
            ('num', StandardScaler(), ['bedRoom', 'bathroom', 'built_up_area', 'servant room', 'store room']),
            ('cat', OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1), columns_to_encode),
            ('cat1', OneHotEncoder(drop='first', sparse_output=False, handle_unknown='ignore'), ['agePossession'])
        ], 
        remainder='passthrough'
    )),
    ('regressor', XGBRegressor(**best_params, random_state=42, n_jobs=-1, tree_method='hist'))
])

# Fit the best pipeline on the full training data
best_pipeline.fit(X_train, y_train)

# Evaluate on test set
y_pred_test = best_pipeline.predict(X_test)
final_r2 = r2_score(y_test, y_pred_test)
final_mae = mean_absolute_error(np.expm1(y_test), np.expm1(y_pred_test))

print("\nBest trial:")
print("  MAE: ", study.best_trial.value)
print("\nBest hyperparameters:")
for key, value in study.best_trial.params.items():
    print(f"    {key}: {value}")

print("\nFinal Test Set Performance:")
print(f"R2 Score: {final_r2:.4f}")
print(f"MAE: {final_mae:.4f}")

# Plot optimization history
try:
    optuna.visualization.plot_optimization_history(study)
    optuna.visualization.plot_param_importances(study)
except:
    print("Visualization requires plotly to be installed")

[I 2024-12-19 17:53:28,280] A new study created in memory with name: no-name-af39cc82-28a3-4692-be75-c57c3626b2ed
[I 2024-12-19 17:53:38,571] Trial 2 finished with value: 0.5210861573772798 and parameters: {'n_estimators': 668, 'max_depth': 13, 'min_child_weight': 9, 'learning_rate': 0.11066622389083881, 'gamma': 0.03715246886984261, 'reg_alpha': 0.02791000086457536, 'reg_lambda': 0.0006982515207901851, 'subsample': 0.7505168621972407, 'colsample_bytree': 0.575841481611983, 'colsample_bylevel': 0.5366001890133405, 'colsample_bynode': 0.9470682401238679, 'max_delta_step': 1, 'scale_pos_weight': 1.0794545856347113}. Best is trial 2 with value: 0.5210861573772798.



Trial Results:
Mean R2 Score: 0.8934 (+/- 0.0145)
Mean MAE: 0.5211 (+/- 0.0779)


[I 2024-12-19 17:53:40,622] Trial 6 finished with value: 0.5393645798834242 and parameters: {'n_estimators': 632, 'max_depth': 3, 'min_child_weight': 3, 'learning_rate': 0.02023902150981271, 'gamma': 0.000539739680341382, 'reg_alpha': 1.3232361013323386e-07, 'reg_lambda': 0.19423287299196917, 'subsample': 0.5615983104864994, 'colsample_bytree': 0.6062295717737727, 'colsample_bylevel': 0.8454719962796674, 'colsample_bynode': 0.799011081354126, 'max_delta_step': 6, 'scale_pos_weight': 1.1112846087871042}. Best is trial 2 with value: 0.5210861573772798.



Trial Results:
Mean R2 Score: 0.8884 (+/- 0.0216)
Mean MAE: 0.5394 (+/- 0.0876)


[I 2024-12-19 17:53:51,478] Trial 5 finished with value: 0.5186115845229715 and parameters: {'n_estimators': 481, 'max_depth': 6, 'min_child_weight': 8, 'learning_rate': 0.15481798102127978, 'gamma': 0.00011036137695867412, 'reg_alpha': 0.4361073118688641, 'reg_lambda': 4.15083856753968e-06, 'subsample': 0.580846146488123, 'colsample_bytree': 0.572302904536752, 'colsample_bylevel': 0.5646934493267055, 'colsample_bynode': 0.6846302772120175, 'max_delta_step': 4, 'scale_pos_weight': 1.1363892744312902}. Best is trial 5 with value: 0.5186115845229715.



Trial Results:
Mean R2 Score: 0.8941 (+/- 0.0171)
Mean MAE: 0.5186 (+/- 0.0905)


[I 2024-12-19 17:53:55,111] Trial 8 finished with value: 0.5268981552533789 and parameters: {'n_estimators': 248, 'max_depth': 5, 'min_child_weight': 10, 'learning_rate': 0.02429984324747994, 'gamma': 1.0687130367251141e-05, 'reg_alpha': 0.09658382869578974, 'reg_lambda': 0.012557746211026287, 'subsample': 0.8752104729470198, 'colsample_bytree': 0.6529735459140613, 'colsample_bylevel': 0.7358126084053302, 'colsample_bynode': 0.7964839667608405, 'max_delta_step': 2, 'scale_pos_weight': 1.064272392165283}. Best is trial 5 with value: 0.5186115845229715.



Trial Results:
Mean R2 Score: 0.8931 (+/- 0.0171)
Mean MAE: 0.5269 (+/- 0.0838)


[I 2024-12-19 17:54:18,686] Trial 10 finished with value: 0.49959251932021065 and parameters: {'n_estimators': 404, 'max_depth': 5, 'min_child_weight': 6, 'learning_rate': 0.04372354341597201, 'gamma': 4.95256640997652e-06, 'reg_alpha': 2.5414576262193328e-06, 'reg_lambda': 0.00496115472754545, 'subsample': 0.9540243942917178, 'colsample_bytree': 0.9830415008144193, 'colsample_bylevel': 0.7205924896550969, 'colsample_bynode': 0.9020967424085542, 'max_delta_step': 5, 'scale_pos_weight': 0.902444691605863}. Best is trial 10 with value: 0.49959251932021065.



Trial Results:
Mean R2 Score: 0.9002 (+/- 0.0172)
Mean MAE: 0.4996 (+/- 0.0760)


[I 2024-12-19 17:54:30,338] Trial 1 finished with value: 0.4964343700267193 and parameters: {'n_estimators': 653, 'max_depth': 15, 'min_child_weight': 9, 'learning_rate': 0.04293376045858388, 'gamma': 0.0002691172127053114, 'reg_alpha': 0.00925345396437726, 'reg_lambda': 0.007148651303082217, 'subsample': 0.9972330521488503, 'colsample_bytree': 0.6611879075613051, 'colsample_bylevel': 0.5642736695288597, 'colsample_bynode': 0.9249595668184183, 'max_delta_step': 6, 'scale_pos_weight': 1.0565585680120908}. Best is trial 1 with value: 0.4964343700267193.



Trial Results:
Mean R2 Score: 0.9010 (+/- 0.0178)
Mean MAE: 0.4964 (+/- 0.0827)


[I 2024-12-19 17:54:41,436] Trial 12 finished with value: 0.4954479312007627 and parameters: {'n_estimators': 274, 'max_depth': 10, 'min_child_weight': 5, 'learning_rate': 0.04718717471786624, 'gamma': 0.008031714222816925, 'reg_alpha': 0.00010368869185058572, 'reg_lambda': 0.0016638533308604507, 'subsample': 0.8400273076945513, 'colsample_bytree': 0.8450212919122044, 'colsample_bylevel': 0.6366328906830936, 'colsample_bynode': 0.939572848044073, 'max_delta_step': 6, 'scale_pos_weight': 1.1669893355751113}. Best is trial 12 with value: 0.4954479312007627.



Trial Results:
Mean R2 Score: 0.8984 (+/- 0.0182)
Mean MAE: 0.4954 (+/- 0.0702)


[I 2024-12-19 17:54:56,956] Trial 13 finished with value: 0.7456972061626751 and parameters: {'n_estimators': 182, 'max_depth': 11, 'min_child_weight': 10, 'learning_rate': 0.007304183908508203, 'gamma': 0.02447953149253578, 'reg_alpha': 0.0001119569869903991, 'reg_lambda': 8.673300917597987e-06, 'subsample': 0.9281919861768163, 'colsample_bytree': 0.8525924758944756, 'colsample_bylevel': 0.783514609088853, 'colsample_bynode': 0.899760870629664, 'max_delta_step': 5, 'scale_pos_weight': 1.1252029578977543}. Best is trial 12 with value: 0.4954479312007627.



Trial Results:
Mean R2 Score: 0.8168 (+/- 0.0138)
Mean MAE: 0.7457 (+/- 0.1502)


[I 2024-12-19 17:55:03,221] Trial 14 finished with value: 0.6762035780026447 and parameters: {'n_estimators': 255, 'max_depth': 6, 'min_child_weight': 1, 'learning_rate': 0.007102570183019564, 'gamma': 0.2911189956897252, 'reg_alpha': 0.019383866855597138, 'reg_lambda': 9.998505799397162e-06, 'subsample': 0.8539092251699516, 'colsample_bytree': 0.7210291414533851, 'colsample_bylevel': 0.8215815835171736, 'colsample_bynode': 0.9454075810734142, 'max_delta_step': 9, 'scale_pos_weight': 1.1024593397170819}. Best is trial 12 with value: 0.4954479312007627.



Trial Results:
Mean R2 Score: 0.8468 (+/- 0.0136)
Mean MAE: 0.6762 (+/- 0.1261)


[I 2024-12-19 17:55:07,507] Trial 3 finished with value: 0.4906567815093249 and parameters: {'n_estimators': 591, 'max_depth': 25, 'min_child_weight': 8, 'learning_rate': 0.028749465844697734, 'gamma': 0.0009654374133106989, 'reg_alpha': 1.711069623740637e-06, 'reg_lambda': 0.895103115019426, 'subsample': 0.8594958031064237, 'colsample_bytree': 0.655358129638383, 'colsample_bylevel': 0.6004017976350803, 'colsample_bynode': 0.6949001557369577, 'max_delta_step': 1, 'scale_pos_weight': 1.095344261154644}. Best is trial 3 with value: 0.4906567815093249.



Trial Results:
Mean R2 Score: 0.9033 (+/- 0.0173)
Mean MAE: 0.4907 (+/- 0.0833)


[I 2024-12-19 17:55:08,193] Trial 0 finished with value: 0.8041337240120571 and parameters: {'n_estimators': 769, 'max_depth': 11, 'min_child_weight': 3, 'learning_rate': 0.001789164354259545, 'gamma': 1.6105142643359658e-06, 'reg_alpha': 0.00022078070450775716, 'reg_lambda': 0.004882245155985365, 'subsample': 0.680418034998055, 'colsample_bytree': 0.5224167615745474, 'colsample_bylevel': 0.5175681650492526, 'colsample_bynode': 0.8041150754186237, 'max_delta_step': 8, 'scale_pos_weight': 0.9111112671459584}. Best is trial 3 with value: 0.4906567815093249.



Trial Results:
Mean R2 Score: 0.7860 (+/- 0.0149)
Mean MAE: 0.8041 (+/- 0.1575)


[I 2024-12-19 17:55:17,445] Trial 16 finished with value: 1.2193307015804369 and parameters: {'n_estimators': 178, 'max_depth': 11, 'min_child_weight': 4, 'learning_rate': 0.0021292390128575967, 'gamma': 1.3329228841226093e-07, 'reg_alpha': 2.7990211624455366e-08, 'reg_lambda': 6.202307916878096e-07, 'subsample': 0.9390670807352159, 'colsample_bytree': 0.8553117932421432, 'colsample_bylevel': 0.6211250828695347, 'colsample_bynode': 0.7995819148343086, 'max_delta_step': 2, 'scale_pos_weight': 1.1985692825064385}. Best is trial 3 with value: 0.4906567815093249.



Trial Results:
Mean R2 Score: 0.4545 (+/- 0.0105)
Mean MAE: 1.2193 (+/- 0.2116)


[I 2024-12-19 17:55:20,596] Trial 7 finished with value: 0.7718490939978494 and parameters: {'n_estimators': 538, 'max_depth': 16, 'min_child_weight': 1, 'learning_rate': 0.0023499680227238723, 'gamma': 0.0003888151737542825, 'reg_alpha': 0.05473303467859488, 'reg_lambda': 0.038733832145712806, 'subsample': 0.8030249422773283, 'colsample_bytree': 0.9330272516526095, 'colsample_bylevel': 0.6302525943343833, 'colsample_bynode': 0.9901224574689083, 'max_delta_step': 8, 'scale_pos_weight': 0.9762702132623632}. Best is trial 3 with value: 0.4906567815093249.



Trial Results:
Mean R2 Score: 0.8044 (+/- 0.0153)
Mean MAE: 0.7718 (+/- 0.1512)


[I 2024-12-19 17:55:23,023] Trial 9 finished with value: 0.5186360236898759 and parameters: {'n_estimators': 952, 'max_depth': 13, 'min_child_weight': 8, 'learning_rate': 0.08445175339580531, 'gamma': 2.893018308750864e-05, 'reg_alpha': 1.1495840591922096e-07, 'reg_lambda': 0.006549167041049108, 'subsample': 0.8647245830775294, 'colsample_bytree': 0.5245472138053239, 'colsample_bylevel': 0.5267881655202993, 'colsample_bynode': 0.6696592283509748, 'max_delta_step': 4, 'scale_pos_weight': 0.974063262970639}. Best is trial 3 with value: 0.4906567815093249.



Trial Results:
Mean R2 Score: 0.8945 (+/- 0.0132)
Mean MAE: 0.5186 (+/- 0.1098)


[I 2024-12-19 17:55:27,455] Trial 15 finished with value: 0.5841603975020828 and parameters: {'n_estimators': 210, 'max_depth': 25, 'min_child_weight': 2, 'learning_rate': 0.011088277663741609, 'gamma': 5.262272135086478e-06, 'reg_alpha': 1.4825658791545988e-06, 'reg_lambda': 0.00036643574517845916, 'subsample': 0.790143973031344, 'colsample_bytree': 0.6651454386159157, 'colsample_bylevel': 0.5348089743358564, 'colsample_bynode': 0.9908990897947674, 'max_delta_step': 4, 'scale_pos_weight': 0.8127393530570347}. Best is trial 3 with value: 0.4906567815093249.



Trial Results:
Mean R2 Score: 0.8764 (+/- 0.0140)
Mean MAE: 0.5842 (+/- 0.1104)


[I 2024-12-19 17:55:29,010] Trial 20 finished with value: 0.5500848531641601 and parameters: {'n_estimators': 859, 'max_depth': 24, 'min_child_weight': 6, 'learning_rate': 0.2955441042058913, 'gamma': 0.013854928470342436, 'reg_alpha': 6.613563257124254e-06, 'reg_lambda': 1.1027779712538929e-08, 'subsample': 0.7036404815656162, 'colsample_bytree': 0.7840707902046966, 'colsample_bylevel': 0.9643627542713337, 'colsample_bynode': 0.5230332388106869, 'max_delta_step': 0, 'scale_pos_weight': 1.1979682502390117}. Best is trial 3 with value: 0.4906567815093249.



Trial Results:
Mean R2 Score: 0.8842 (+/- 0.0208)
Mean MAE: 0.5501 (+/- 0.0956)


[I 2024-12-19 17:55:30,307] Trial 21 finished with value: 0.5527238222686567 and parameters: {'n_estimators': 396, 'max_depth': 24, 'min_child_weight': 6, 'learning_rate': 0.2579740417332097, 'gamma': 0.005289995056579696, 'reg_alpha': 4.721908360650701e-06, 'reg_lambda': 0.7011688919146315, 'subsample': 0.7078830544948946, 'colsample_bytree': 0.8199310697690831, 'colsample_bylevel': 0.9407560796988317, 'colsample_bynode': 0.5600207870161191, 'max_delta_step': 0, 'scale_pos_weight': 1.198658491311497}. Best is trial 3 with value: 0.4906567815093249.



Trial Results:
Mean R2 Score: 0.8826 (+/- 0.0174)
Mean MAE: 0.5527 (+/- 0.1149)


[I 2024-12-19 17:55:33,469] Trial 24 finished with value: 0.5792472487052744 and parameters: {'n_estimators': 337, 'max_depth': 19, 'min_child_weight': 5, 'learning_rate': 0.05251794608682627, 'gamma': 0.5866217313828123, 'reg_alpha': 0.0009198344820787221, 'reg_lambda': 0.9002963033660544, 'subsample': 0.8034110368881985, 'colsample_bytree': 0.72029790220475, 'colsample_bylevel': 0.6687996895069879, 'colsample_bynode': 0.6853924035580199, 'max_delta_step': 10, 'scale_pos_weight': 1.0175253489511147}. Best is trial 3 with value: 0.4906567815093249.



Trial Results:
Mean R2 Score: 0.8756 (+/- 0.0244)
Mean MAE: 0.5792 (+/- 0.0935)


[I 2024-12-19 17:55:38,161] Trial 19 finished with value: 0.49892678810162955 and parameters: {'n_estimators': 973, 'max_depth': 25, 'min_child_weight': 6, 'learning_rate': 0.05338599987673623, 'gamma': 0.00514728073519989, 'reg_alpha': 1.6420972497238904e-06, 'reg_lambda': 0.8237708216060275, 'subsample': 0.7783669825371458, 'colsample_bytree': 0.811626286993371, 'colsample_bylevel': 0.9683136083105741, 'colsample_bynode': 0.5203188774816603, 'max_delta_step': 0, 'scale_pos_weight': 0.9926100531497404}. Best is trial 3 with value: 0.4906567815093249.



Trial Results:
Mean R2 Score: 0.9014 (+/- 0.0209)
Mean MAE: 0.4989 (+/- 0.0875)


[I 2024-12-19 17:55:40,321] Trial 11 finished with value: 0.6431076308876056 and parameters: {'n_estimators': 1000, 'max_depth': 12, 'min_child_weight': 4, 'learning_rate': 0.0017038883318149084, 'gamma': 0.00028813616430662845, 'reg_alpha': 3.532335832221147e-08, 'reg_lambda': 6.671061290338312e-05, 'subsample': 0.7185585943727185, 'colsample_bytree': 0.7789510047847777, 'colsample_bylevel': 0.7402469559347691, 'colsample_bynode': 0.9987599443412276, 'max_delta_step': 3, 'scale_pos_weight': 0.8189182922820581}. Best is trial 3 with value: 0.4906567815093249.



Trial Results:
Mean R2 Score: 0.8585 (+/- 0.0139)
Mean MAE: 0.6431 (+/- 0.1233)


[I 2024-12-19 17:55:41,893] Trial 22 finished with value: 0.49580481354386857 and parameters: {'n_estimators': 371, 'max_depth': 25, 'min_child_weight': 6, 'learning_rate': 0.05400920644867416, 'gamma': 0.005571812918338269, 'reg_alpha': 8.767270233495661e-06, 'reg_lambda': 0.8866404637217797, 'subsample': 0.6890734102764084, 'colsample_bytree': 0.7884257799328394, 'colsample_bylevel': 0.9235390757141071, 'colsample_bynode': 0.524419666592207, 'max_delta_step': 0, 'scale_pos_weight': 1.1990779383396977}. Best is trial 3 with value: 0.4906567815093249.



Trial Results:
Mean R2 Score: 0.8996 (+/- 0.0207)
Mean MAE: 0.4958 (+/- 0.0850)


[I 2024-12-19 17:55:42,222] Trial 18 finished with value: 0.5065053202763072 and parameters: {'n_estimators': 397, 'max_depth': 24, 'min_child_weight': 6, 'learning_rate': 0.08696517269609862, 'gamma': 2.9500534695074453e-08, 'reg_alpha': 1.9423269156773167e-06, 'reg_lambda': 1.0941768198618041e-08, 'subsample': 0.7997719339387932, 'colsample_bytree': 0.8327098884661384, 'colsample_bylevel': 0.6705306416890041, 'colsample_bynode': 0.529942599165228, 'max_delta_step': 0, 'scale_pos_weight': 1.1744057339737015}. Best is trial 3 with value: 0.4906567815093249.



Trial Results:
Mean R2 Score: 0.8964 (+/- 0.0187)
Mean MAE: 0.5065 (+/- 0.0917)


[I 2024-12-19 17:55:44,904] Trial 26 finished with value: 1.1100165102340736 and parameters: {'n_estimators': 118, 'max_depth': 20, 'min_child_weight': 7, 'learning_rate': 0.004631889593260138, 'gamma': 0.0019173480474307051, 'reg_alpha': 2.216122851243136e-05, 'reg_lambda': 0.00010386860165828454, 'subsample': 0.6228031239558185, 'colsample_bytree': 0.9155013254210614, 'colsample_bylevel': 0.6743103508250102, 'colsample_bynode': 0.6188988132343343, 'max_delta_step': 7, 'scale_pos_weight': 1.151755811499956}. Best is trial 3 with value: 0.4906567815093249.



Trial Results:
Mean R2 Score: 0.5654 (+/- 0.0149)
Mean MAE: 1.1100 (+/- 0.2027)


[I 2024-12-19 17:55:46,868] Trial 23 finished with value: 0.5007256725734863 and parameters: {'n_estimators': 373, 'max_depth': 20, 'min_child_weight': 5, 'learning_rate': 0.04562491756417005, 'gamma': 0.002832729035728147, 'reg_alpha': 0.0008272903255921889, 'reg_lambda': 0.9390323279988139, 'subsample': 0.6392066348118279, 'colsample_bytree': 0.7656326091235507, 'colsample_bylevel': 0.663344450008206, 'colsample_bynode': 0.5832859071794122, 'max_delta_step': 10, 'scale_pos_weight': 1.162429026340233}. Best is trial 3 with value: 0.4906567815093249.



Trial Results:
Mean R2 Score: 0.8995 (+/- 0.0144)
Mean MAE: 0.5007 (+/- 0.0859)


[I 2024-12-19 17:55:47,924] Trial 4 finished with value: 0.7269909623134945 and parameters: {'n_estimators': 913, 'max_depth': 19, 'min_child_weight': 3, 'learning_rate': 0.0014613743977411012, 'gamma': 0.004628855506232109, 'reg_alpha': 0.0006949824090776954, 'reg_lambda': 0.00027348736939217667, 'subsample': 0.7655064550485347, 'colsample_bytree': 0.8146059093631673, 'colsample_bylevel': 0.9817956905735788, 'colsample_bynode': 0.9953334997677594, 'max_delta_step': 6, 'scale_pos_weight': 0.9932703163192649}. Best is trial 3 with value: 0.4906567815093249.
[I 2024-12-19 17:55:47,935] Trial 28 finished with value: 0.5723812916755666 and parameters: {'n_estimators': 102, 'max_depth': 21, 'min_child_weight': 7, 'learning_rate': 0.022248145037773025, 'gamma': 0.002025211408213175, 'reg_alpha': 2.7405765012572936e-05, 'reg_lambda': 0.07299962290768795, 'subsample': 0.6416014125464374, 'colsample_bytree': 0.9006217820358253, 'colsample_bylevel': 0.9000340249048333, 'colsample_bynode': 0.6096


Trial Results:
Mean R2 Score: 0.8247 (+/- 0.0133)
Mean MAE: 0.7270 (+/- 0.1416)

Trial Results:
Mean R2 Score: 0.8801 (+/- 0.0170)
Mean MAE: 0.5724 (+/- 0.1090)


[I 2024-12-19 17:55:50,916] Trial 30 finished with value: 0.5395402045219553 and parameters: {'n_estimators': 314, 'max_depth': 21, 'min_child_weight': 7, 'learning_rate': 0.02746417324830505, 'gamma': 0.15192882537384608, 'reg_alpha': 0.0013742378053447708, 'reg_lambda': 0.09164464877254501, 'subsample': 0.6561606761761598, 'colsample_bytree': 0.7339670908460728, 'colsample_bylevel': 0.8794147889806694, 'colsample_bynode': 0.599861915639529, 'max_delta_step': 2, 'scale_pos_weight': 1.1564405571607226}. Best is trial 3 with value: 0.4906567815093249.



Trial Results:
Mean R2 Score: 0.8888 (+/- 0.0177)
Mean MAE: 0.5395 (+/- 0.0863)


[I 2024-12-19 17:55:56,170] Trial 31 finished with value: 0.5093240834502138 and parameters: {'n_estimators': 519, 'max_depth': 22, 'min_child_weight': 7, 'learning_rate': 0.028044282270339196, 'gamma': 0.06745060704243161, 'reg_alpha': 2.4040150792092663e-05, 'reg_lambda': 0.08689534403026573, 'subsample': 0.8928164892649795, 'colsample_bytree': 0.7177155215695994, 'colsample_bylevel': 0.8772205821085745, 'colsample_bynode': 0.7336513496734873, 'max_delta_step': 2, 'scale_pos_weight': 1.15990605164812}. Best is trial 3 with value: 0.4906567815093249.



Trial Results:
Mean R2 Score: 0.8980 (+/- 0.0205)
Mean MAE: 0.5093 (+/- 0.0742)


[I 2024-12-19 17:55:56,869] Trial 34 finished with value: 0.5281843353921557 and parameters: {'n_estimators': 476, 'max_depth': 22, 'min_child_weight': 8, 'learning_rate': 0.15113555628736827, 'gamma': 0.07443959649342534, 'reg_alpha': 4.1441765137930207e-07, 'reg_lambda': 0.0012638020181523898, 'subsample': 0.5156031461742097, 'colsample_bytree': 0.6772135468512257, 'colsample_bylevel': 0.6034962250615191, 'colsample_bynode': 0.7490821675738718, 'max_delta_step': 1, 'scale_pos_weight': 1.0340378224585514}. Best is trial 3 with value: 0.4906567815093249.



Trial Results:
Mean R2 Score: 0.8923 (+/- 0.0183)
Mean MAE: 0.5282 (+/- 0.0746)


[I 2024-12-19 17:55:59,511] Trial 33 finished with value: 0.5484427977168537 and parameters: {'n_estimators': 488, 'max_depth': 9, 'min_child_weight': 7, 'learning_rate': 0.0127638680224245, 'gamma': 0.22635869636818454, 'reg_alpha': 2.37385304706608e-07, 'reg_lambda': 0.13145686638023915, 'subsample': 0.8516278988338029, 'colsample_bytree': 0.7248395959199762, 'colsample_bylevel': 0.6071716602117903, 'colsample_bynode': 0.7358410767910024, 'max_delta_step': 1, 'scale_pos_weight': 1.101446871219227}. Best is trial 3 with value: 0.4906567815093249.



Trial Results:
Mean R2 Score: 0.8863 (+/- 0.0210)
Mean MAE: 0.5484 (+/- 0.0885)


[I 2024-12-19 17:56:00,881] Trial 32 finished with value: 0.5331682662050478 and parameters: {'n_estimators': 519, 'max_depth': 8, 'min_child_weight': 7, 'learning_rate': 0.012624250275996384, 'gamma': 0.13726948034826947, 'reg_alpha': 3.4654754778113176e-07, 'reg_lambda': 0.10489718030945774, 'subsample': 0.9006840289266954, 'colsample_bytree': 0.7218781985966128, 'colsample_bylevel': 0.5955297923123499, 'colsample_bynode': 0.7375286541693024, 'max_delta_step': 1, 'scale_pos_weight': 1.1093025726116066}. Best is trial 3 with value: 0.4906567815093249.



Trial Results:
Mean R2 Score: 0.8912 (+/- 0.0209)
Mean MAE: 0.5332 (+/- 0.0853)


[I 2024-12-19 17:56:02,676] Trial 17 finished with value: 0.7867580330035864 and parameters: {'n_estimators': 924, 'max_depth': 25, 'min_child_weight': 6, 'learning_rate': 0.001602195115232181, 'gamma': 1.6374814021358703e-08, 'reg_alpha': 1.3703777791070766e-08, 'reg_lambda': 6.725153866190531e-08, 'subsample': 0.7120031178603141, 'colsample_bytree': 0.5068936079422122, 'colsample_bylevel': 0.9231780861143771, 'colsample_bynode': 0.5235702823260531, 'max_delta_step': 0, 'scale_pos_weight': 0.9553709769387859}. Best is trial 3 with value: 0.4906567815093249.



Trial Results:
Mean R2 Score: 0.7948 (+/- 0.0202)
Mean MAE: 0.7868 (+/- 0.1625)


[I 2024-12-19 17:56:03,629] Trial 25 finished with value: 0.6047908082637331 and parameters: {'n_estimators': 532, 'max_depth': 20, 'min_child_weight': 8, 'learning_rate': 0.004031711828580314, 'gamma': 0.0024035815085183664, 'reg_alpha': 5.174933031299818e-05, 'reg_lambda': 0.0839165038914516, 'subsample': 0.6251537247595589, 'colsample_bytree': 0.9046516585600559, 'colsample_bylevel': 0.6567878693060278, 'colsample_bynode': 0.629428808419048, 'max_delta_step': 7, 'scale_pos_weight': 1.1569224768835038}. Best is trial 3 with value: 0.4906567815093249.



Trial Results:
Mean R2 Score: 0.8711 (+/- 0.0158)
Mean MAE: 0.6048 (+/- 0.1106)


[I 2024-12-19 17:56:04,938] Trial 29 finished with value: 0.4905038609984488 and parameters: {'n_estimators': 314, 'max_depth': 21, 'min_child_weight': 7, 'learning_rate': 0.026531391122763332, 'gamma': 0.0025128755787333043, 'reg_alpha': 2.4032015049261387e-05, 'reg_lambda': 0.10283057564200286, 'subsample': 0.6340338467335831, 'colsample_bytree': 0.9020804010123896, 'colsample_bylevel': 0.8656149317643109, 'colsample_bynode': 0.6000006140823133, 'max_delta_step': 2, 'scale_pos_weight': 1.1473878200908723}. Best is trial 29 with value: 0.4905038609984488.



Trial Results:
Mean R2 Score: 0.9014 (+/- 0.0185)
Mean MAE: 0.4905 (+/- 0.0802)


[I 2024-12-19 17:56:13,554] Trial 27 finished with value: 0.49461296794301113 and parameters: {'n_estimators': 488, 'max_depth': 21, 'min_child_weight': 7, 'learning_rate': 0.013097042999907445, 'gamma': 0.0028607152734083756, 'reg_alpha': 2.5569513554179707e-05, 'reg_lambda': 0.05603803103754843, 'subsample': 0.6251244992062053, 'colsample_bytree': 0.9241287644782503, 'colsample_bylevel': 0.6738035546982202, 'colsample_bynode': 0.6147726052070871, 'max_delta_step': 7, 'scale_pos_weight': 1.15603541589564}. Best is trial 29 with value: 0.4905038609984488.



Trial Results:
Mean R2 Score: 0.9015 (+/- 0.0194)
Mean MAE: 0.4946 (+/- 0.0817)


[I 2024-12-19 17:56:16,757] Trial 35 finished with value: 0.4965908691446582 and parameters: {'n_estimators': 454, 'max_depth': 9, 'min_child_weight': 8, 'learning_rate': 0.011826847865087612, 'gamma': 4.275327281665162e-05, 'reg_alpha': 2.115763584457908e-07, 'reg_lambda': 0.022218342388491276, 'subsample': 0.8338188370654196, 'colsample_bytree': 0.8847478643095097, 'colsample_bylevel': 0.6024283909845212, 'colsample_bynode': 0.8667387994319177, 'max_delta_step': 1, 'scale_pos_weight': 1.032688179501544}. Best is trial 29 with value: 0.4905038609984488.



Trial Results:
Mean R2 Score: 0.9003 (+/- 0.0195)
Mean MAE: 0.4966 (+/- 0.0827)


[I 2024-12-19 17:56:25,855] Trial 36 finished with value: 0.4887838861732515 and parameters: {'n_estimators': 751, 'max_depth': 8, 'min_child_weight': 4, 'learning_rate': 0.013143742768462556, 'gamma': 5.4558812399181674e-05, 'reg_alpha': 0.00018146754869045433, 'reg_lambda': 0.02336273341510333, 'subsample': 0.8283672208708563, 'colsample_bytree': 0.8843051802349811, 'colsample_bylevel': 0.5014147107364173, 'colsample_bynode': 0.869495626141501, 'max_delta_step': 1, 'scale_pos_weight': 1.097055264228363}. Best is trial 36 with value: 0.4887838861732515.



Trial Results:
Mean R2 Score: 0.9026 (+/- 0.0191)
Mean MAE: 0.4888 (+/- 0.0792)


[I 2024-12-19 17:56:28,390] Trial 37 finished with value: 0.5008902825797499 and parameters: {'n_estimators': 729, 'max_depth': 9, 'min_child_weight': 4, 'learning_rate': 0.0723549276740104, 'gamma': 5.0102421384763884e-05, 'reg_alpha': 0.00012785819375085836, 'reg_lambda': 0.021587604613771297, 'subsample': 0.8326052772308848, 'colsample_bytree': 0.9794213895150384, 'colsample_bylevel': 0.7868680106401356, 'colsample_bynode': 0.8553084762557328, 'max_delta_step': 7, 'scale_pos_weight': 0.9341430620253413}. Best is trial 36 with value: 0.4887838861732515.



Trial Results:
Mean R2 Score: 0.8957 (+/- 0.0183)
Mean MAE: 0.5009 (+/- 0.0665)


[I 2024-12-19 17:56:32,807] Trial 41 finished with value: 0.5081031709976869 and parameters: {'n_estimators': 733, 'max_depth': 16, 'min_child_weight': 9, 'learning_rate': 0.06739398930485349, 'gamma': 0.0007854309601978713, 'reg_alpha': 0.00022595106260634893, 'reg_lambda': 0.02285680414774456, 'subsample': 0.8333704685042405, 'colsample_bytree': 0.8737642036183656, 'colsample_bylevel': 0.7987392484981787, 'colsample_bynode': 0.8658360950176425, 'max_delta_step': 1, 'scale_pos_weight': 1.0775348836313143}. Best is trial 36 with value: 0.4887838861732515.



Trial Results:
Mean R2 Score: 0.8960 (+/- 0.0211)
Mean MAE: 0.5081 (+/- 0.0844)


[I 2024-12-19 17:56:37,208] Trial 40 finished with value: 0.4950759101340152 and parameters: {'n_estimators': 636, 'max_depth': 15, 'min_child_weight': 9, 'learning_rate': 0.035356400135426555, 'gamma': 6.726830702737888e-05, 'reg_alpha': 0.0031440644282942016, 'reg_lambda': 0.022003936685162744, 'subsample': 0.9992841350050402, 'colsample_bytree': 0.6373310111533378, 'colsample_bylevel': 0.5674207420533439, 'colsample_bynode': 0.8338584044956526, 'max_delta_step': 6, 'scale_pos_weight': 1.068032738835945}. Best is trial 36 with value: 0.4887838861732515.



Trial Results:
Mean R2 Score: 0.9031 (+/- 0.0185)
Mean MAE: 0.4951 (+/- 0.0912)


[I 2024-12-19 17:56:39,168] Trial 39 finished with value: 0.5007419772489214 and parameters: {'n_estimators': 632, 'max_depth': 15, 'min_child_weight': 9, 'learning_rate': 0.033944632919102206, 'gamma': 6.696400793293583e-05, 'reg_alpha': 0.004627993828798327, 'reg_lambda': 0.020806864747518913, 'subsample': 0.9707416442041259, 'colsample_bytree': 0.6132044612884358, 'colsample_bylevel': 0.5679372095755258, 'colsample_bynode': 0.8641345982423517, 'max_delta_step': 7, 'scale_pos_weight': 1.0615820464531283}. Best is trial 36 with value: 0.4887838861732515.



Trial Results:
Mean R2 Score: 0.9013 (+/- 0.0153)
Mean MAE: 0.5007 (+/- 0.0816)


[I 2024-12-19 17:56:42,140] Trial 38 finished with value: 0.4957485231147496 and parameters: {'n_estimators': 681, 'max_depth': 16, 'min_child_weight': 9, 'learning_rate': 0.03307277514484208, 'gamma': 4.9167373496386256e-05, 'reg_alpha': 0.00031291474993758796, 'reg_lambda': 0.026674906306129, 'subsample': 0.9894215515408956, 'colsample_bytree': 0.615733790317275, 'colsample_bylevel': 0.7849143525090322, 'colsample_bynode': 0.8572682116365972, 'max_delta_step': 7, 'scale_pos_weight': 1.059243052119012}. Best is trial 36 with value: 0.4887838861732515.



Trial Results:
Mean R2 Score: 0.9023 (+/- 0.0186)
Mean MAE: 0.4957 (+/- 0.0818)


[I 2024-12-19 17:56:48,013] Trial 42 finished with value: 0.4999028938723046 and parameters: {'n_estimators': 620, 'max_depth': 16, 'min_child_weight': 9, 'learning_rate': 0.032233223024443154, 'gamma': 0.00011176176973735682, 'reg_alpha': 0.00014491364374603108, 'reg_lambda': 0.0013959891904331543, 'subsample': 0.5866494060543882, 'colsample_bytree': 0.973352250784826, 'colsample_bylevel': 0.7079991273507317, 'colsample_bynode': 0.6525506634101355, 'max_delta_step': 7, 'scale_pos_weight': 1.077084737376116}. Best is trial 36 with value: 0.4887838861732515.



Trial Results:
Mean R2 Score: 0.8999 (+/- 0.0209)
Mean MAE: 0.4999 (+/- 0.0891)


[I 2024-12-19 17:56:49,004] Trial 43 finished with value: 0.4992408197253083 and parameters: {'n_estimators': 616, 'max_depth': 15, 'min_child_weight': 9, 'learning_rate': 0.03474721542735047, 'gamma': 0.0001032508320272883, 'reg_alpha': 0.00019136633153303965, 'reg_lambda': 0.0015953115931418805, 'subsample': 0.5876881007510915, 'colsample_bytree': 0.9605343755790475, 'colsample_bylevel': 0.5654879884002459, 'colsample_bynode': 0.6543430205090504, 'max_delta_step': 8, 'scale_pos_weight': 1.0832161172143244}. Best is trial 36 with value: 0.4887838861732515.



Trial Results:
Mean R2 Score: 0.8998 (+/- 0.0184)
Mean MAE: 0.4992 (+/- 0.0903)


[I 2024-12-19 17:57:02,304] Trial 45 finished with value: 0.4926219831761715 and parameters: {'n_estimators': 614, 'max_depth': 17, 'min_child_weight': 9, 'learning_rate': 0.017136379005372595, 'gamma': 0.0007262006564412931, 'reg_alpha': 0.002296210908374593, 'reg_lambda': 0.27180679410851627, 'subsample': 0.7424930000108977, 'colsample_bytree': 0.9577305878267278, 'colsample_bylevel': 0.5007407500683688, 'colsample_bynode': 0.6525097348283709, 'max_delta_step': 3, 'scale_pos_weight': 1.0789567669404563}. Best is trial 36 with value: 0.4887838861732515.



Trial Results:
Mean R2 Score: 0.9017 (+/- 0.0187)
Mean MAE: 0.4926 (+/- 0.0910)


[I 2024-12-19 17:57:03,936] Trial 44 finished with value: 0.4954265005974655 and parameters: {'n_estimators': 698, 'max_depth': 17, 'min_child_weight': 9, 'learning_rate': 0.016224323460427164, 'gamma': 0.00015014457571325138, 'reg_alpha': 0.006068165511139507, 'reg_lambda': 0.3597506882509981, 'subsample': 0.598733386906001, 'colsample_bytree': 0.9652144051045064, 'colsample_bylevel': 0.5043970101194977, 'colsample_bynode': 0.6524751256116573, 'max_delta_step': 3, 'scale_pos_weight': 1.0670322560089676}. Best is trial 36 with value: 0.4887838861732515.



Trial Results:
Mean R2 Score: 0.9016 (+/- 0.0178)
Mean MAE: 0.4954 (+/- 0.0962)


[I 2024-12-19 17:57:07,023] Trial 46 finished with value: 0.4916361292326565 and parameters: {'n_estimators': 611, 'max_depth': 18, 'min_child_weight': 9, 'learning_rate': 0.01692790924662749, 'gamma': 0.00012002062164856264, 'reg_alpha': 0.0022626453549954204, 'reg_lambda': 0.3339617747228317, 'subsample': 0.5770800862057296, 'colsample_bytree': 0.9264033711247062, 'colsample_bylevel': 0.7116118119180315, 'colsample_bynode': 0.6415526961430749, 'max_delta_step': 3, 'scale_pos_weight': 1.128466941945324}. Best is trial 36 with value: 0.4887838861732515.



Trial Results:
Mean R2 Score: 0.9025 (+/- 0.0180)
Mean MAE: 0.4916 (+/- 0.0858)


[I 2024-12-19 17:57:08,327] Trial 49 finished with value: 0.5030325330905461 and parameters: {'n_estimators': 592, 'max_depth': 18, 'min_child_weight': 10, 'learning_rate': 0.0188406670477553, 'gamma': 0.0001757988201405489, 'reg_alpha': 0.7081140313890648, 'reg_lambda': 0.25940803650582833, 'subsample': 0.7572747715499049, 'colsample_bytree': 0.963768895029989, 'colsample_bylevel': 0.5031049654371963, 'colsample_bynode': 0.6486309725228888, 'max_delta_step': 3, 'scale_pos_weight': 1.1264819692022476}. Best is trial 36 with value: 0.4887838861732515.



Trial Results:
Mean R2 Score: 0.9002 (+/- 0.0187)
Mean MAE: 0.5030 (+/- 0.0871)


[I 2024-12-19 17:57:13,453] Trial 48 finished with value: 0.4922605819736514 and parameters: {'n_estimators': 600, 'max_depth': 18, 'min_child_weight': 10, 'learning_rate': 0.016656788316736233, 'gamma': 0.0001879967162141297, 'reg_alpha': 0.0002665819026162922, 'reg_lambda': 0.30269249783797514, 'subsample': 0.9963398100290692, 'colsample_bytree': 0.9365049753050999, 'colsample_bylevel': 0.5161497099184932, 'colsample_bynode': 0.6588683350838727, 'max_delta_step': 3, 'scale_pos_weight': 1.1282587921884406}. Best is trial 36 with value: 0.4887838861732515.



Trial Results:
Mean R2 Score: 0.9023 (+/- 0.0154)
Mean MAE: 0.4923 (+/- 0.0833)


[I 2024-12-19 17:57:19,476] Trial 50 finished with value: 0.4922862915785882 and parameters: {'n_estimators': 581, 'max_depth': 18, 'min_child_weight': 10, 'learning_rate': 0.015673527853553873, 'gamma': 0.0008421903020860448, 'reg_alpha': 0.002261487526558825, 'reg_lambda': 0.35222942631733445, 'subsample': 0.7336753964512844, 'colsample_bytree': 0.9443917426402103, 'colsample_bylevel': 0.5678936096999551, 'colsample_bynode': 0.8261936718139347, 'max_delta_step': 3, 'scale_pos_weight': 1.1272640003892493}. Best is trial 36 with value: 0.4887838861732515.



Trial Results:
Mean R2 Score: 0.9023 (+/- 0.0211)
Mean MAE: 0.4923 (+/- 0.0904)


[I 2024-12-19 17:57:25,978] Trial 47 finished with value: 0.9453567603351356 and parameters: {'n_estimators': 834, 'max_depth': 18, 'min_child_weight': 3, 'learning_rate': 0.001026989897554625, 'gamma': 0.0008483696908403564, 'reg_alpha': 0.005460603773037161, 'reg_lambda': 0.30531875307790013, 'subsample': 0.5932404333960392, 'colsample_bytree': 0.9582353612775456, 'colsample_bylevel': 0.5024942324727931, 'colsample_bynode': 0.6466890267858081, 'max_delta_step': 3, 'scale_pos_weight': 1.092301736928049}. Best is trial 36 with value: 0.4887838861732515.



Trial Results:
Mean R2 Score: 0.6992 (+/- 0.0155)
Mean MAE: 0.9454 (+/- 0.1838)


[I 2024-12-19 17:57:28,619] Trial 51 finished with value: 0.4983070304957666 and parameters: {'n_estimators': 837, 'max_depth': 18, 'min_child_weight': 10, 'learning_rate': 0.016292695756121467, 'gamma': 0.0010314913493842887, 'reg_alpha': 0.0026980874353271333, 'reg_lambda': 0.23589416592141702, 'subsample': 0.543392948477132, 'colsample_bytree': 0.9395735759393301, 'colsample_bylevel': 0.5009806181052263, 'colsample_bynode': 0.7019853566477514, 'max_delta_step': 3, 'scale_pos_weight': 1.1247832639512503}. Best is trial 36 with value: 0.4887838861732515.



Trial Results:
Mean R2 Score: 0.9008 (+/- 0.0155)
Mean MAE: 0.4983 (+/- 0.0855)


[I 2024-12-19 17:57:32,536] Trial 52 finished with value: 0.4906269527390036 and parameters: {'n_estimators': 576, 'max_depth': 18, 'min_child_weight': 10, 'learning_rate': 0.016887072627565364, 'gamma': 0.0007775977597187298, 'reg_alpha': 3.574318529157175e-05, 'reg_lambda': 0.27436751651806895, 'subsample': 0.7321388525017029, 'colsample_bytree': 0.9452657874025304, 'colsample_bylevel': 0.520072400739358, 'colsample_bynode': 0.6874424609520586, 'max_delta_step': 3, 'scale_pos_weight': 1.126984953408897}. Best is trial 36 with value: 0.4887838861732515.
[I 2024-12-19 17:57:32,638] Trial 58 finished with value: 0.5897799747699886 and parameters: {'n_estimators': 573, 'max_depth': 3, 'min_child_weight': 10, 'learning_rate': 0.008269720757748086, 'gamma': 0.00041590236427176304, 'reg_alpha': 0.012907290662584397, 'reg_lambda': 0.006671833928929056, 'subsample': 0.5600818961065178, 'colsample_bytree': 0.9354965450368907, 'colsample_bylevel': 0.5436466819260116, 'colsample_bynode': 0.70115


Trial Results:
Mean R2 Score: 0.9028 (+/- 0.0191)
Mean MAE: 0.4906 (+/- 0.0796)

Trial Results:
Mean R2 Score: 0.8720 (+/- 0.0249)
Mean MAE: 0.5898 (+/- 0.0899)


[I 2024-12-19 17:57:35,646] Trial 59 finished with value: 0.6086957503095796 and parameters: {'n_estimators': 574, 'max_depth': 3, 'min_child_weight': 10, 'learning_rate': 0.006295760700821316, 'gamma': 1.6748823891542358e-05, 'reg_alpha': 0.01624225678653267, 'reg_lambda': 0.006692337041747882, 'subsample': 0.7382076289544434, 'colsample_bytree': 0.9981484691068128, 'colsample_bylevel': 0.5475898060226042, 'colsample_bynode': 0.780396668728508, 'max_delta_step': 4, 'scale_pos_weight': 1.1380715631879628}. Best is trial 36 with value: 0.4887838861732515.



Trial Results:
Mean R2 Score: 0.8663 (+/- 0.0237)
Mean MAE: 0.6087 (+/- 0.0987)


[I 2024-12-19 17:57:37,026] Trial 54 finished with value: 0.5055028633214935 and parameters: {'n_estimators': 841, 'max_depth': 18, 'min_child_weight': 10, 'learning_rate': 0.018214654544867377, 'gamma': 0.0008412818525419577, 'reg_alpha': 0.9464807214795948, 'reg_lambda': 0.27629698461040125, 'subsample': 0.5454587920138827, 'colsample_bytree': 0.9434958771758445, 'colsample_bylevel': 0.5083739728033448, 'colsample_bynode': 0.7103433906985391, 'max_delta_step': 3, 'scale_pos_weight': 1.1307418763621395}. Best is trial 36 with value: 0.4887838861732515.



Trial Results:
Mean R2 Score: 0.8982 (+/- 0.0199)
Mean MAE: 0.5055 (+/- 0.0823)


[I 2024-12-19 17:57:41,308] Trial 55 finished with value: 0.5025982862897738 and parameters: {'n_estimators': 834, 'max_depth': 13, 'min_child_weight': 10, 'learning_rate': 0.007887489787955828, 'gamma': 0.000781777126654537, 'reg_alpha': 0.015414643322125455, 'reg_lambda': 0.21318598411894082, 'subsample': 0.541472241106367, 'colsample_bytree': 0.998865669306628, 'colsample_bylevel': 0.5395518248058476, 'colsample_bynode': 0.7014208315062591, 'max_delta_step': 3, 'scale_pos_weight': 1.1215394723958645}. Best is trial 36 with value: 0.4887838861732515.



Trial Results:
Mean R2 Score: 0.8997 (+/- 0.0194)
Mean MAE: 0.5026 (+/- 0.0869)


[I 2024-12-19 17:57:41,682] Trial 60 finished with value: 0.5679185309670178 and parameters: {'n_estimators': 767, 'max_depth': 3, 'min_child_weight': 8, 'learning_rate': 0.008610304881421872, 'gamma': 0.0002991331225204987, 'reg_alpha': 0.016585477184696624, 'reg_lambda': 0.46470282251621403, 'subsample': 0.5410213574037619, 'colsample_bytree': 0.9940209656973586, 'colsample_bylevel': 0.5424573066296556, 'colsample_bynode': 0.7084070357457404, 'max_delta_step': 4, 'scale_pos_weight': 1.1419407044910146}. Best is trial 36 with value: 0.4887838861732515.



Trial Results:
Mean R2 Score: 0.8777 (+/- 0.0279)
Mean MAE: 0.5679 (+/- 0.0855)


[I 2024-12-19 17:57:43,563] Trial 53 finished with value: 0.49432681832299463 and parameters: {'n_estimators': 816, 'max_depth': 18, 'min_child_weight': 10, 'learning_rate': 0.016505783510361846, 'gamma': 0.0007300175824814108, 'reg_alpha': 0.30647947836819944, 'reg_lambda': 0.2604076895417614, 'subsample': 0.7315451618102279, 'colsample_bytree': 0.943421890446652, 'colsample_bylevel': 0.5012025330866376, 'colsample_bynode': 0.6982079664717561, 'max_delta_step': 3, 'scale_pos_weight': 1.1293890908231963}. Best is trial 36 with value: 0.4887838861732515.



Trial Results:
Mean R2 Score: 0.9020 (+/- 0.0189)
Mean MAE: 0.4943 (+/- 0.0890)


[I 2024-12-19 17:57:49,765] Trial 56 finished with value: 0.4971489013868773 and parameters: {'n_estimators': 856, 'max_depth': 13, 'min_child_weight': 10, 'learning_rate': 0.009359216180183767, 'gamma': 1.7376851804945492e-05, 'reg_alpha': 0.015576343436563304, 'reg_lambda': 0.0072870187043549434, 'subsample': 0.544535109603004, 'colsample_bytree': 0.9985639493576692, 'colsample_bylevel': 0.5412259634489455, 'colsample_bynode': 0.7017259445365526, 'max_delta_step': 4, 'scale_pos_weight': 1.1341414477625416}. Best is trial 36 with value: 0.4887838861732515.



Trial Results:
Mean R2 Score: 0.9006 (+/- 0.0194)
Mean MAE: 0.4971 (+/- 0.0893)


[I 2024-12-19 17:57:51,858] Trial 57 finished with value: 0.5294885463397994 and parameters: {'n_estimators': 801, 'max_depth': 13, 'min_child_weight': 3, 'learning_rate': 0.007979587981576499, 'gamma': 1.9250654675848127e-05, 'reg_alpha': 0.0003793060354386337, 'reg_lambda': 0.19593732344217774, 'subsample': 0.5058016245933228, 'colsample_bytree': 0.5733433584076786, 'colsample_bylevel': 0.5439344491185013, 'colsample_bynode': 0.7150529639119623, 'max_delta_step': 4, 'scale_pos_weight': 0.8657000968734289}. Best is trial 36 with value: 0.4887838861732515.



Trial Results:
Mean R2 Score: 0.8918 (+/- 0.0151)
Mean MAE: 0.5295 (+/- 0.0966)


[I 2024-12-19 17:58:02,959] Trial 65 finished with value: 0.5259539496402138 and parameters: {'n_estimators': 439, 'max_depth': 23, 'min_child_weight': 8, 'learning_rate': 0.010076604978572884, 'gamma': 2.542247360388763e-06, 'reg_alpha': 5.104106784037279e-05, 'reg_lambda': 0.05273127966032153, 'subsample': 0.6709863834906291, 'colsample_bytree': 0.8743264065751258, 'colsample_bylevel': 0.5292467980489639, 'colsample_bynode': 0.5631449761148906, 'max_delta_step': 2, 'scale_pos_weight': 0.8337689637804827}. Best is trial 36 with value: 0.4887838861732515.



Trial Results:
Mean R2 Score: 0.8934 (+/- 0.0159)
Mean MAE: 0.5260 (+/- 0.0848)


[I 2024-12-19 17:58:04,142] Trial 62 finished with value: 0.5269833217467299 and parameters: {'n_estimators': 780, 'max_depth': 13, 'min_child_weight': 8, 'learning_rate': 0.010087346681736411, 'gamma': 1.8653157309175604e-06, 'reg_alpha': 0.00037581009578000596, 'reg_lambda': 0.003170790177499268, 'subsample': 0.670051701190281, 'colsample_bytree': 0.5686901419662802, 'colsample_bylevel': 0.5226628689482387, 'colsample_bynode': 0.7133981495538597, 'max_delta_step': 2, 'scale_pos_weight': 1.114213753255509}. Best is trial 36 with value: 0.4887838861732515.



Trial Results:
Mean R2 Score: 0.8932 (+/- 0.0128)
Mean MAE: 0.5270 (+/- 0.0825)


[I 2024-12-19 17:58:10,709] Trial 66 finished with value: 0.5115037518855603 and parameters: {'n_estimators': 671, 'max_depth': 23, 'min_child_weight': 8, 'learning_rate': 0.024887926075787298, 'gamma': 4.549438594385647e-06, 'reg_alpha': 0.000362970281369099, 'reg_lambda': 0.04243529227297275, 'subsample': 0.6699694271546051, 'colsample_bytree': 0.575851635061406, 'colsample_bylevel': 0.5235330939113653, 'colsample_bynode': 0.5626785078043021, 'max_delta_step': 2, 'scale_pos_weight': 1.1765144969447316}. Best is trial 36 with value: 0.4887838861732515.



Trial Results:
Mean R2 Score: 0.8984 (+/- 0.0134)
Mean MAE: 0.5115 (+/- 0.0854)


[I 2024-12-19 17:58:14,026] Trial 61 finished with value: 0.49283848453149953 and parameters: {'n_estimators': 750, 'max_depth': 22, 'min_child_weight': 8, 'learning_rate': 0.008539964591582095, 'gamma': 1.5605061406386392e-05, 'reg_alpha': 5.301917084469678e-05, 'reg_lambda': 0.010458749182507871, 'subsample': 0.6773930509089329, 'colsample_bytree': 0.9997702245883626, 'colsample_bylevel': 0.5434480444091774, 'colsample_bynode': 0.782087591918134, 'max_delta_step': 2, 'scale_pos_weight': 1.1798945924919713}. Best is trial 36 with value: 0.4887838861732515.



Trial Results:
Mean R2 Score: 0.9020 (+/- 0.0191)
Mean MAE: 0.4928 (+/- 0.0816)


[I 2024-12-19 17:58:16,339] Trial 67 finished with value: 0.5091752151396133 and parameters: {'n_estimators': 663, 'max_depth': 23, 'min_child_weight': 8, 'learning_rate': 0.02279203403158006, 'gamma': 1.861086486439363e-06, 'reg_alpha': 0.0004674665618741046, 'reg_lambda': 0.04265085010706916, 'subsample': 0.6823827432527108, 'colsample_bytree': 0.5401053556627888, 'colsample_bylevel': 0.5847112670296861, 'colsample_bynode': 0.5700502115138252, 'max_delta_step': 2, 'scale_pos_weight': 1.1782053720864152}. Best is trial 36 with value: 0.4887838861732515.



Trial Results:
Mean R2 Score: 0.8975 (+/- 0.0149)
Mean MAE: 0.5092 (+/- 0.0892)


[I 2024-12-19 17:58:17,983] Trial 64 finished with value: 0.5076509161849706 and parameters: {'n_estimators': 786, 'max_depth': 23, 'min_child_weight': 8, 'learning_rate': 0.00919252679439768, 'gamma': 3.5508723129830843e-06, 'reg_alpha': 0.00035480086167749916, 'reg_lambda': 0.48654180684075327, 'subsample': 0.6670053875493466, 'colsample_bytree': 0.5652766849527733, 'colsample_bylevel': 0.7199884325825828, 'colsample_bynode': 0.5629398189384288, 'max_delta_step': 2, 'scale_pos_weight': 1.179612509070416}. Best is trial 36 with value: 0.4887838861732515.



Trial Results:
Mean R2 Score: 0.8991 (+/- 0.0152)
Mean MAE: 0.5077 (+/- 0.0897)


[I 2024-12-19 17:58:18,826] Trial 70 finished with value: 0.495696530381462 and parameters: {'n_estimators': 664, 'max_depth': 19, 'min_child_weight': 2, 'learning_rate': 0.022787018061575074, 'gamma': 0.014156408916447004, 'reg_alpha': 0.04408830103690426, 'reg_lambda': 0.5176361294500346, 'subsample': 0.7742944594040286, 'colsample_bytree': 0.8990283122200835, 'colsample_bylevel': 0.7522230783254066, 'colsample_bynode': 0.6772274635952557, 'max_delta_step': 2, 'scale_pos_weight': 1.1702895559438502}. Best is trial 36 with value: 0.4887838861732515.



Trial Results:
Mean R2 Score: 0.8995 (+/- 0.0184)
Mean MAE: 0.4957 (+/- 0.0850)


[I 2024-12-19 17:58:22,092] Trial 63 finished with value: 0.4913308308442387 and parameters: {'n_estimators': 768, 'max_depth': 23, 'min_child_weight': 8, 'learning_rate': 0.009004059480601186, 'gamma': 5.678065510947803e-07, 'reg_alpha': 0.00038300898318820257, 'reg_lambda': 0.5430428815566491, 'subsample': 0.9258546585085943, 'colsample_bytree': 0.8789130892873718, 'colsample_bylevel': 0.5346066577384138, 'colsample_bynode': 0.5722722353157372, 'max_delta_step': 2, 'scale_pos_weight': 0.8702775653688282}. Best is trial 36 with value: 0.4887838861732515.



Trial Results:
Mean R2 Score: 0.9020 (+/- 0.0165)
Mean MAE: 0.4913 (+/- 0.0813)


[I 2024-12-19 17:58:27,909] Trial 72 finished with value: 0.504478751668298 and parameters: {'n_estimators': 554, 'max_depth': 19, 'min_child_weight': 2, 'learning_rate': 0.02357066776923926, 'gamma': 0.015999385355596727, 'reg_alpha': 0.04676223057659159, 'reg_lambda': 0.5033106476931468, 'subsample': 0.9139209520027448, 'colsample_bytree': 0.8980306551658317, 'colsample_bylevel': 0.5856306675048226, 'colsample_bynode': 0.6763129762742698, 'max_delta_step': 5, 'scale_pos_weight': 1.096753468766625}. Best is trial 36 with value: 0.4887838861732515.



Trial Results:
Mean R2 Score: 0.8973 (+/- 0.0170)
Mean MAE: 0.5045 (+/- 0.0863)


[I 2024-12-19 17:58:31,084] Trial 71 finished with value: 0.4955643723972855 and parameters: {'n_estimators': 569, 'max_depth': 19, 'min_child_weight': 9, 'learning_rate': 0.023248675857485416, 'gamma': 0.009277711700637804, 'reg_alpha': 7.11085730708235e-05, 'reg_lambda': 0.5199193319494249, 'subsample': 0.7747721139185021, 'colsample_bytree': 0.9009606845097479, 'colsample_bylevel': 0.5807918883966056, 'colsample_bynode': 0.675735810382827, 'max_delta_step': 5, 'scale_pos_weight': 1.1768991644262454}. Best is trial 36 with value: 0.4887838861732515.



Trial Results:
Mean R2 Score: 0.9026 (+/- 0.0190)
Mean MAE: 0.4956 (+/- 0.0875)


[I 2024-12-19 17:58:31,482] Trial 68 finished with value: 0.48969530796458577 and parameters: {'n_estimators': 669, 'max_depth': 23, 'min_child_weight': 8, 'learning_rate': 0.021671155425681713, 'gamma': 1.481450950449047e-06, 'reg_alpha': 5.9447286900058854e-05, 'reg_lambda': 0.49876145766003804, 'subsample': 0.6725967301154686, 'colsample_bytree': 0.8764549081342204, 'colsample_bylevel': 0.5834032063148181, 'colsample_bynode': 0.7737612703134842, 'max_delta_step': 2, 'scale_pos_weight': 1.180262412511397}. Best is trial 36 with value: 0.4887838861732515.



Trial Results:
Mean R2 Score: 0.9022 (+/- 0.0183)
Mean MAE: 0.4897 (+/- 0.0790)


[I 2024-12-19 17:58:36,843] Trial 69 finished with value: 0.49320078769006115 and parameters: {'n_estimators': 547, 'max_depth': 19, 'min_child_weight': 9, 'learning_rate': 0.02408906442906377, 'gamma': 0.0002195921266371329, 'reg_alpha': 0.0005712044833762638, 'reg_lambda': 0.5587167721867885, 'subsample': 0.9279349952496356, 'colsample_bytree': 0.8966909528866995, 'colsample_bylevel': 0.5857977531916663, 'colsample_bynode': 0.7846587225779605, 'max_delta_step': 2, 'scale_pos_weight': 1.1723052370263571}. Best is trial 36 with value: 0.4887838861732515.



Trial Results:
Mean R2 Score: 0.9023 (+/- 0.0198)
Mean MAE: 0.4932 (+/- 0.0846)


[I 2024-12-19 17:58:50,660] Trial 73 finished with value: 0.517912853953949 and parameters: {'n_estimators': 555, 'max_depth': 19, 'min_child_weight': 2, 'learning_rate': 0.005744777830787351, 'gamma': 0.015087722193282306, 'reg_alpha': 1.1554766757828091e-05, 'reg_lambda': 0.5056869992388976, 'subsample': 0.7756776281706365, 'colsample_bytree': 0.9004714296934209, 'colsample_bylevel': 0.7578004547687759, 'colsample_bynode': 0.6799944287292142, 'max_delta_step': 1, 'scale_pos_weight': 1.097119113625201}. Best is trial 36 with value: 0.4887838861732515.



Trial Results:
Mean R2 Score: 0.8949 (+/- 0.0148)
Mean MAE: 0.5179 (+/- 0.0882)


[I 2024-12-19 17:58:55,371] Trial 74 finished with value: 0.5185944741023472 and parameters: {'n_estimators': 561, 'max_depth': 19, 'min_child_weight': 2, 'learning_rate': 0.005302710208287945, 'gamma': 0.011437188810401589, 'reg_alpha': 8.856703298065042e-06, 'reg_lambda': 0.1425528389327586, 'subsample': 0.9205469487943895, 'colsample_bytree': 0.9062469214058547, 'colsample_bylevel': 0.6381630996560882, 'colsample_bynode': 0.6822231995370127, 'max_delta_step': 5, 'scale_pos_weight': 1.1117511720974171}. Best is trial 36 with value: 0.4887838861732515.



Trial Results:
Mean R2 Score: 0.8941 (+/- 0.0170)
Mean MAE: 0.5186 (+/- 0.0885)


[I 2024-12-19 17:59:05,793] Trial 75 finished with value: 0.49570561155250525 and parameters: {'n_estimators': 567, 'max_depth': 19, 'min_child_weight': 4, 'learning_rate': 0.006155241157328408, 'gamma': 0.0014847066081467403, 'reg_alpha': 3.2748291543819298e-06, 'reg_lambda': 0.12939813054862206, 'subsample': 0.9216613292929942, 'colsample_bytree': 0.9138462807520529, 'colsample_bylevel': 0.8276854662608119, 'colsample_bynode': 0.8272277809048781, 'max_delta_step': 5, 'scale_pos_weight': 1.0960363346422135}. Best is trial 36 with value: 0.4887838861732515.



Trial Results:
Mean R2 Score: 0.8978 (+/- 0.0192)
Mean MAE: 0.4957 (+/- 0.0739)


[I 2024-12-19 17:59:14,727] Trial 78 finished with value: 0.5206925463944156 and parameters: {'n_estimators': 707, 'max_depth': 17, 'min_child_weight': 10, 'learning_rate': 0.005829352223525017, 'gamma': 1.0491996106127328e-07, 'reg_alpha': 0.0010480406549924885, 'reg_lambda': 0.9695347837836333, 'subsample': 0.8740774646236285, 'colsample_bytree': 0.8643285527738664, 'colsample_bylevel': 0.6368426955973425, 'colsample_bynode': 0.5448222322654681, 'max_delta_step': 1, 'scale_pos_weight': 1.0455352040025112}. Best is trial 36 with value: 0.4887838861732515.



Trial Results:
Mean R2 Score: 0.8950 (+/- 0.0165)
Mean MAE: 0.5207 (+/- 0.0904)


[I 2024-12-19 17:59:22,210] Trial 76 finished with value: 0.5011215041017972 and parameters: {'n_estimators': 711, 'max_depth': 24, 'min_child_weight': 5, 'learning_rate': 0.005006283749006648, 'gamma': 3.4084406140108486e-07, 'reg_alpha': 1.1936282539368716e-05, 'reg_lambda': 0.13721756437289143, 'subsample': 0.9279760284629825, 'colsample_bytree': 0.8383744648457637, 'colsample_bylevel': 0.8413242962124462, 'colsample_bynode': 0.589725485919532, 'max_delta_step': 1, 'scale_pos_weight': 1.1002858829611695}. Best is trial 36 with value: 0.4887838861732515.



Trial Results:
Mean R2 Score: 0.8985 (+/- 0.0157)
Mean MAE: 0.5011 (+/- 0.0809)


[I 2024-12-19 17:59:36,312] Trial 79 finished with value: 0.49222132377988165 and parameters: {'n_estimators': 710, 'max_depth': 25, 'min_child_weight': 5, 'learning_rate': 0.005605949023455056, 'gamma': 4.241652173595003e-07, 'reg_alpha': 1.4834243339207146e-05, 'reg_lambda': 0.11545888663722198, 'subsample': 0.9455908423021222, 'colsample_bytree': 0.8421226322155357, 'colsample_bylevel': 0.6379357939556183, 'colsample_bynode': 0.9743213341010422, 'max_delta_step': 1, 'scale_pos_weight': 1.1118245694117221}. Best is trial 36 with value: 0.4887838861732515.



Trial Results:
Mean R2 Score: 0.9005 (+/- 0.0170)
Mean MAE: 0.4922 (+/- 0.0767)


[I 2024-12-19 17:59:44,527] Trial 80 finished with value: 0.4915119124791351 and parameters: {'n_estimators': 704, 'max_depth': 24, 'min_child_weight': 5, 'learning_rate': 0.014412663137263129, 'gamma': 6.922254597513798e-07, 'reg_alpha': 9.772942044091725e-06, 'reg_lambda': 0.13254808600373197, 'subsample': 0.9636699833380167, 'colsample_bytree': 0.9208331727429254, 'colsample_bylevel': 0.6321537317846678, 'colsample_bynode': 0.5449881451077327, 'max_delta_step': 1, 'scale_pos_weight': 0.8966182248804873}. Best is trial 36 with value: 0.4887838861732515.



Trial Results:
Mean R2 Score: 0.9012 (+/- 0.0166)
Mean MAE: 0.4915 (+/- 0.0803)


[I 2024-12-19 17:59:47,403] Trial 77 finished with value: 0.4915706379868909 and parameters: {'n_estimators': 898, 'max_depth': 24, 'min_child_weight': 5, 'learning_rate': 0.005937632354097461, 'gamma': 2.7565107310862254e-07, 'reg_alpha': 9.274645675207509e-06, 'reg_lambda': 0.11740286093631169, 'subsample': 0.9289974835261617, 'colsample_bytree': 0.9186152236712788, 'colsample_bylevel': 0.6365282418060562, 'colsample_bynode': 0.5987715449313388, 'max_delta_step': 1, 'scale_pos_weight': 0.8907645781970209}. Best is trial 36 with value: 0.4887838861732515.



Trial Results:
Mean R2 Score: 0.9008 (+/- 0.0171)
Mean MAE: 0.4916 (+/- 0.0807)


[I 2024-12-19 17:59:50,410] Trial 82 finished with value: 0.5917060547052653 and parameters: {'n_estimators': 684, 'max_depth': 21, 'min_child_weight': 5, 'learning_rate': 0.003132797434425725, 'gamma': 9.268909016499072e-06, 'reg_alpha': 1.1099430300925417e-06, 'reg_lambda': 0.12868541475284256, 'subsample': 0.8748853390039071, 'colsample_bytree': 0.8593075750962951, 'colsample_bylevel': 0.848300763074346, 'colsample_bynode': 0.5430630701856931, 'max_delta_step': 1, 'scale_pos_weight': 0.9064602826228395}. Best is trial 36 with value: 0.4887838861732515.



Trial Results:
Mean R2 Score: 0.8754 (+/- 0.0132)
Mean MAE: 0.5917 (+/- 0.1090)


[I 2024-12-19 17:59:59,811] Trial 81 finished with value: 0.4974995288372813 and parameters: {'n_estimators': 717, 'max_depth': 24, 'min_child_weight': 4, 'learning_rate': 0.040758966406802766, 'gamma': 5.345914416866859e-07, 'reg_alpha': 1.2593502994188755e-05, 'reg_lambda': 0.14113825700457638, 'subsample': 0.9540966401674665, 'colsample_bytree': 0.8616595137712257, 'colsample_bylevel': 0.6331635979179493, 'colsample_bynode': 0.587445894061923, 'max_delta_step': 1, 'scale_pos_weight': 0.9067820279981097}. Best is trial 36 with value: 0.4887838861732515.



Trial Results:
Mean R2 Score: 0.8985 (+/- 0.0176)
Mean MAE: 0.4975 (+/- 0.0785)


[I 2024-12-19 18:00:08,205] Trial 84 finished with value: 0.5889150570785707 and parameters: {'n_estimators': 704, 'max_depth': 24, 'min_child_weight': 7, 'learning_rate': 0.003184503954686403, 'gamma': 2.8169523096644505e-07, 'reg_alpha': 1.0951275188061356e-06, 'reg_lambda': 2.3818651454444607e-05, 'subsample': 0.9741372170557725, 'colsample_bytree': 0.8819895891668654, 'colsample_bylevel': 0.5551475225025346, 'colsample_bynode': 0.5905441693323845, 'max_delta_step': 0, 'scale_pos_weight': 1.1427558956510069}. Best is trial 36 with value: 0.4887838861732515.



Trial Results:
Mean R2 Score: 0.8758 (+/- 0.0154)
Mean MAE: 0.5889 (+/- 0.1131)


[I 2024-12-19 18:00:10,711] Trial 83 finished with value: 0.4899330224523486 and parameters: {'n_estimators': 720, 'max_depth': 24, 'min_child_weight': 7, 'learning_rate': 0.013315830777304995, 'gamma': 6.122918739907592e-07, 'reg_alpha': 0.0011443145688248032, 'reg_lambda': 0.9211546558464804, 'subsample': 0.9597184630045221, 'colsample_bytree': 0.8401433986496325, 'colsample_bylevel': 0.9916344729374332, 'colsample_bynode': 0.5889579301407731, 'max_delta_step': 1, 'scale_pos_weight': 1.1468942874878445}. Best is trial 36 with value: 0.4887838861732515.



Trial Results:
Mean R2 Score: 0.9018 (+/- 0.0199)
Mean MAE: 0.4899 (+/- 0.0760)


[I 2024-12-19 18:00:15,144] Trial 85 finished with value: 0.49059520350797453 and parameters: {'n_estimators': 600, 'max_depth': 25, 'min_child_weight': 7, 'learning_rate': 0.014101669502067602, 'gamma': 7.608512710370752e-06, 'reg_alpha': 9.661679951213599e-05, 'reg_lambda': 1.2349803009978859e-06, 'subsample': 0.9634672596168173, 'colsample_bytree': 0.8198430981645374, 'colsample_bylevel': 0.6135138280556713, 'colsample_bynode': 0.63064580037718, 'max_delta_step': 0, 'scale_pos_weight': 1.1484459768444477}. Best is trial 36 with value: 0.4887838861732515.



Trial Results:
Mean R2 Score: 0.9026 (+/- 0.0162)
Mean MAE: 0.4906 (+/- 0.0863)


[I 2024-12-19 18:00:54,207] Trial 90 finished with value: 0.4927090169482472 and parameters: {'n_estimators': 638, 'max_depth': 22, 'min_child_weight': 6, 'learning_rate': 0.0129657204537339, 'gamma': 7.419289885387664e-07, 'reg_alpha': 9.274553828315509e-07, 'reg_lambda': 5.544564879564176e-07, 'subsample': 0.8944612951697297, 'colsample_bytree': 0.9245318403883519, 'colsample_bylevel': 0.6084901775945051, 'colsample_bynode': 0.5059049466181625, 'max_delta_step': 0, 'scale_pos_weight': 0.8810417191858367}. Best is trial 36 with value: 0.4887838861732515.



Trial Results:
Mean R2 Score: 0.9014 (+/- 0.0157)
Mean MAE: 0.4927 (+/- 0.0882)


[I 2024-12-19 18:00:56,328] Trial 86 finished with value: 0.5473925501331411 and parameters: {'n_estimators': 877, 'max_depth': 25, 'min_child_weight': 4, 'learning_rate': 0.0026911195402945496, 'gamma': 6.483124208089504e-07, 'reg_alpha': 1.2818684182820327e-06, 'reg_lambda': 0.06665122495471722, 'subsample': 0.9520068892691936, 'colsample_bytree': 0.7989364477759865, 'colsample_bylevel': 0.693415163474181, 'colsample_bynode': 0.955024464495388, 'max_delta_step': 0, 'scale_pos_weight': 1.1485857597873865}. Best is trial 36 with value: 0.4887838861732515.



Trial Results:
Mean R2 Score: 0.8870 (+/- 0.0154)
Mean MAE: 0.5474 (+/- 0.0952)


[I 2024-12-19 18:01:11,012] Trial 93 finished with value: 0.49007174786994623 and parameters: {'n_estimators': 649, 'max_depth': 25, 'min_child_weight': 6, 'learning_rate': 0.014047362552703991, 'gamma': 1.0010325533138442e-06, 'reg_alpha': 3.1938974197741674e-05, 'reg_lambda': 2.0932344879179364e-06, 'subsample': 0.9057082403241302, 'colsample_bytree': 0.8079088931182046, 'colsample_bylevel': 0.6116133786940356, 'colsample_bynode': 0.6306401651407226, 'max_delta_step': 0, 'scale_pos_weight': 0.8638069342649549}. Best is trial 36 with value: 0.4887838861732515.



Trial Results:
Mean R2 Score: 0.9022 (+/- 0.0166)
Mean MAE: 0.4901 (+/- 0.0801)


[I 2024-12-19 18:01:12,912] Trial 87 finished with value: 0.4943224764366619 and parameters: {'n_estimators': 897, 'max_depth': 24, 'min_child_weight': 4, 'learning_rate': 0.014177838395311787, 'gamma': 8.464814205561308e-07, 'reg_alpha': 7.380377256752437e-07, 'reg_lambda': 0.06720993849955355, 'subsample': 0.9796177187285309, 'colsample_bytree': 0.9217571347031756, 'colsample_bylevel': 0.6118681565703996, 'colsample_bynode': 0.5046765278584956, 'max_delta_step': 0, 'scale_pos_weight': 0.891635563140001}. Best is trial 36 with value: 0.4887838861732515.



Trial Results:
Mean R2 Score: 0.8994 (+/- 0.0148)
Mean MAE: 0.4943 (+/- 0.0805)


[I 2024-12-19 18:01:13,228] Trial 88 finished with value: 0.49310501290276554 and parameters: {'n_estimators': 892, 'max_depth': 24, 'min_child_weight': 4, 'learning_rate': 0.014332148638234626, 'gamma': 8.637858259305386e-07, 'reg_alpha': 4.932821497918839e-06, 'reg_lambda': 0.06843574724096003, 'subsample': 0.9642172162794154, 'colsample_bytree': 0.9228664662823292, 'colsample_bylevel': 0.6091945884013379, 'colsample_bynode': 0.5473707361020935, 'max_delta_step': 0, 'scale_pos_weight': 0.8983991262945716}. Best is trial 36 with value: 0.4887838861732515.



Trial Results:
Mean R2 Score: 0.8994 (+/- 0.0150)
Mean MAE: 0.4931 (+/- 0.0825)


[I 2024-12-19 18:01:18,674] Trial 89 finished with value: 0.4930521731568601 and parameters: {'n_estimators': 896, 'max_depth': 24, 'min_child_weight': 4, 'learning_rate': 0.013771453109843353, 'gamma': 5.034364897600272e-07, 'reg_alpha': 5.399639651107608e-06, 'reg_lambda': 0.06863462162144858, 'subsample': 0.9575240054782921, 'colsample_bytree': 0.8830970145639129, 'colsample_bylevel': 0.6203710632592951, 'colsample_bynode': 0.6336169261276969, 'max_delta_step': 0, 'scale_pos_weight': 0.8874505506216454}. Best is trial 36 with value: 0.4887838861732515.



Trial Results:
Mean R2 Score: 0.8990 (+/- 0.0168)
Mean MAE: 0.4931 (+/- 0.0795)


[I 2024-12-19 18:01:25,938] Trial 92 finished with value: 0.48599784767341714 and parameters: {'n_estimators': 890, 'max_depth': 22, 'min_child_weight': 6, 'learning_rate': 0.013894939430122396, 'gamma': 8.850284671073111e-07, 'reg_alpha': 4.702724569459745e-06, 'reg_lambda': 0.06820048029803732, 'subsample': 0.8940609379239972, 'colsample_bytree': 0.8102952066278133, 'colsample_bylevel': 0.6912472726243479, 'colsample_bynode': 0.627282584000448, 'max_delta_step': 0, 'scale_pos_weight': 0.8874979346935884}. Best is trial 92 with value: 0.48599784767341714.



Trial Results:
Mean R2 Score: 0.9027 (+/- 0.0163)
Mean MAE: 0.4860 (+/- 0.0840)


[I 2024-12-19 18:01:29,045] Trial 91 finished with value: 0.4888159521001148 and parameters: {'n_estimators': 877, 'max_depth': 22, 'min_child_weight': 4, 'learning_rate': 0.01426558465888668, 'gamma': 7.467382105397527e-07, 'reg_alpha': 5.889720363185237e-06, 'reg_lambda': 0.06683734452388373, 'subsample': 0.8069163187462959, 'colsample_bytree': 0.9202819645367144, 'colsample_bylevel': 0.6948527581830672, 'colsample_bynode': 0.6208597814384725, 'max_delta_step': 0, 'scale_pos_weight': 0.8807512554462493}. Best is trial 92 with value: 0.48599784767341714.



Trial Results:
Mean R2 Score: 0.9010 (+/- 0.0162)
Mean MAE: 0.4888 (+/- 0.0754)


[I 2024-12-19 18:01:49,012] Trial 94 finished with value: 0.4932597822926317 and parameters: {'n_estimators': 749, 'max_depth': 25, 'min_child_weight': 7, 'learning_rate': 0.01392622031376096, 'gamma': 7.176049779355086e-08, 'reg_alpha': 3.561987939503499e-05, 'reg_lambda': 2.6556106623931698e-06, 'subsample': 0.9523662918962602, 'colsample_bytree': 0.8213825947313224, 'colsample_bylevel': 0.6184502967564329, 'colsample_bynode': 0.6237109796825199, 'max_delta_step': 0, 'scale_pos_weight': 0.9391878675451023}. Best is trial 92 with value: 0.48599784767341714.



Trial Results:
Mean R2 Score: 0.9020 (+/- 0.0157)
Mean MAE: 0.4933 (+/- 0.0879)


[I 2024-12-19 18:01:52,312] Trial 95 finished with value: 0.4887865305038533 and parameters: {'n_estimators': 759, 'max_depth': 25, 'min_child_weight': 7, 'learning_rate': 0.014415585024892872, 'gamma': 6.120272890542695e-08, 'reg_alpha': 7.441553243838692e-05, 'reg_lambda': 1.8987577553510863e-08, 'subsample': 0.970913700126093, 'colsample_bytree': 0.8158698413026867, 'colsample_bylevel': 0.9811411025916328, 'colsample_bynode': 0.6252568558192232, 'max_delta_step': 0, 'scale_pos_weight': 0.8403277478462001}. Best is trial 92 with value: 0.48599784767341714.



Trial Results:
Mean R2 Score: 0.9017 (+/- 0.0191)
Mean MAE: 0.4888 (+/- 0.0758)


[I 2024-12-19 18:01:57,562] Trial 98 finished with value: 0.49180816850759984 and parameters: {'n_estimators': 680, 'max_depth': 25, 'min_child_weight': 7, 'learning_rate': 0.019619907187057525, 'gamma': 1.2983010538281166e-06, 'reg_alpha': 3.757930349665017e-05, 'reg_lambda': 3.1928406073174863e-06, 'subsample': 0.9086553790615222, 'colsample_bytree': 0.8221215431658437, 'colsample_bylevel': 0.9477175025424281, 'colsample_bynode': 0.6207945045238263, 'max_delta_step': 0, 'scale_pos_weight': 0.8494305247330843}. Best is trial 92 with value: 0.48599784767341714.



Trial Results:
Mean R2 Score: 0.9016 (+/- 0.0184)
Mean MAE: 0.4918 (+/- 0.0734)


[I 2024-12-19 18:01:57,846] Trial 96 finished with value: 0.4933215129630849 and parameters: {'n_estimators': 747, 'max_depth': 25, 'min_child_weight': 7, 'learning_rate': 0.028830940330700838, 'gamma': 6.620142809653217e-06, 'reg_alpha': 7.506524351278843e-05, 'reg_lambda': 7.799480668382266e-07, 'subsample': 0.7017626849800546, 'colsample_bytree': 0.8167019350058207, 'colsample_bylevel': 0.9483396876470378, 'colsample_bynode': 0.6266455633688593, 'max_delta_step': 0, 'scale_pos_weight': 0.8529847390854057}. Best is trial 92 with value: 0.48599784767341714.



Trial Results:
Mean R2 Score: 0.9008 (+/- 0.0177)
Mean MAE: 0.4933 (+/- 0.0821)


[I 2024-12-19 18:01:58,329] Trial 97 finished with value: 0.48876666795864765 and parameters: {'n_estimators': 738, 'max_depth': 25, 'min_child_weight': 7, 'learning_rate': 0.011308454519366918, 'gamma': 1.236796847760128e-07, 'reg_alpha': 3.630404592799856e-05, 'reg_lambda': 1.2671882776238324e-06, 'subsample': 0.9062884841198149, 'colsample_bytree': 0.7535870016840406, 'colsample_bylevel': 0.936582508813245, 'colsample_bynode': 0.6286603358601679, 'max_delta_step': 0, 'scale_pos_weight': 0.9227318480515998}. Best is trial 92 with value: 0.48599784767341714.



Trial Results:
Mean R2 Score: 0.9032 (+/- 0.0173)
Mean MAE: 0.4888 (+/- 0.0804)


[I 2024-12-19 18:02:00,312] Trial 99 finished with value: 0.4890745436722409 and parameters: {'n_estimators': 743, 'max_depth': 25, 'min_child_weight': 7, 'learning_rate': 0.011459734748772531, 'gamma': 1.439419158513737e-07, 'reg_alpha': 7.71174751101627e-05, 'reg_lambda': 1.1545324517572843e-06, 'subsample': 0.8212224370969629, 'colsample_bytree': 0.8181248457403625, 'colsample_bylevel': 0.9814972977749811, 'colsample_bynode': 0.6214880415773358, 'max_delta_step': 1, 'scale_pos_weight': 0.854235669584738}. Best is trial 92 with value: 0.48599784767341714.



Trial Results:
Mean R2 Score: 0.9026 (+/- 0.0190)
Mean MAE: 0.4891 (+/- 0.0803)

Best trial:
  MAE:  0.48599784767341714

Best hyperparameters:
    n_estimators: 890
    max_depth: 22
    min_child_weight: 6
    learning_rate: 0.013894939430122396
    gamma: 8.850284671073111e-07
    reg_alpha: 4.702724569459745e-06
    reg_lambda: 0.06820048029803732
    subsample: 0.8940609379239972
    colsample_bytree: 0.8102952066278133
    colsample_bylevel: 0.6912472726243479
    colsample_bynode: 0.627282584000448
    max_delta_step: 0
    scale_pos_weight: 0.8874979346935884

Final Test Set Performance:
R2 Score: 0.9111
MAE: 0.4934


In [21]:
import optuna
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score
import numpy as np
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OrdinalEncoder
from sklearn.pipeline import Pipeline

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y_transformed, test_size=0.2, random_state=42)

# Define preprocessing
numeric_features = ['bedRoom', 'bathroom', 'built_up_area', 'servant room', 'store room']
categorical_features = ['property_type', 'sector', 'balcony', 'agePossession', 
                       'furnishing_type', 'luxury_category', 'floor_category']

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric_features),
        ('cat', OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1), 
         categorical_features)
    ]
)

def objective(trial):
    # Hyperparameter search space
    param = {
        'verbosity': 0,
        'objective': 'reg:squarederror',
        'eval_metric': 'mae',
        'booster': 'gbtree',
        'lambda': trial.suggest_float('lambda', 1e-8, 1.0, log=True),
        'alpha': trial.suggest_float('alpha', 1e-8, 1.0, log=True),
        'eta': trial.suggest_float('eta', 0.01, 0.3),
        'gamma': trial.suggest_float('gamma', 1e-8, 1.0, log=True),
        'max_depth': trial.suggest_int('max_depth', 3, 25),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'subsample': trial.suggest_float('subsample', 0.4, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.4, 1.0),
        'colsample_bylevel': trial.suggest_float('colsample_bylevel', 0.4, 1.0),
        'colsample_bynode': trial.suggest_float('colsample_bynode', 0.4, 1.0),
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000)
    }

    # Preprocess the data
    X_train_processed = preprocessor.fit_transform(X_train)
    X_test_processed = preprocessor.transform(X_test)
    
    # Create DMatrix for XGBoost
    dtrain = xgb.DMatrix(X_train_processed, label=y_train)
    dtest = xgb.DMatrix(X_test_processed, label=y_test)

    # Define pruning callback
    pruning_callback = optuna.integration.XGBoostPruningCallback(trial, "eval-mae")

    # Train the model
    bst = xgb.train(
        param,
        dtrain,
        num_boost_round=param['n_estimators'],
        evals=[(dtrain, "train"), (dtest, "eval")],
        early_stopping_rounds=50,
        callbacks=[pruning_callback]
    )

    # Predict on test set
    preds = bst.predict(dtest)
    
    # Calculate metrics
    mae = mean_absolute_error(np.expm1(y_test), np.expm1(preds))
    r2 = r2_score(y_test, preds)
    
    print(f"\nTrial Results:")
    print(f"MAE: {mae:.4f}")
    print(f"R2 Score: {r2:.4f}")
    
    return mae

# Create and run study
study = optuna.create_study(
    direction='minimize',
    pruner=optuna.pruners.SuccessiveHalvingPruner()
)
study.optimize(objective, n_trials=100)

# Train final model with best parameters
best_params = study.best_trial.params
best_params['objective'] = 'reg:squarederror'
best_params['eval_metric'] = 'mae'

# Preprocess final data
X_train_processed = preprocessor.fit_transform(X_train)
X_test_processed = preprocessor.transform(X_test)

# Create and train final model
dtrain = xgb.DMatrix(X_train_processed, label=y_train)
dtest = xgb.DMatrix(X_test_processed, label=y_test)
final_model = xgb.train(
    best_params,
    dtrain,
    num_boost_round=best_params['n_estimators'],
    early_stopping_rounds=50,
    evals=[(dtrain, "train"), (dtest, "eval")]
)

# Final evaluation
final_preds = final_model.predict(dtest)
final_mae = mean_absolute_error(np.expm1(y_test), np.expm1(final_preds))
final_r2 = r2_score(y_test, final_preds)

print("\nBest trial:")
print("  MAE: ", study.best_trial.value)
print("\nBest hyperparameters:")
for key, value in study.best_trial.params.items():
    print(f"    {key}: {value}")

print("\nFinal Model Performance:")
print(f"MAE: {final_mae:.4f}")
print(f"R2 Score: {final_r2:.4f}")

# Visualization
try:
    import plotly
    fig1 = optuna.visualization.plot_optimization_history(study)
    fig2 = optuna.visualization.plot_param_importances(study)
    fig1.show()
    fig2.show()
except:
    print("Visualization requires plotly to be installed")

[I 2024-12-19 20:29:38,360] A new study created in memory with name: no-name-00228131-4dd5-4b35-a268-64a7a8ab193e


[0]	train-mae:0.38079	eval-mae:0.38673
[1]	train-mae:0.35410	eval-mae:0.36117
[2]	train-mae:0.31490	eval-mae:0.32599
[3]	train-mae:0.28561	eval-mae:0.29834
[4]	train-mae:0.25807	eval-mae:0.27260
[5]	train-mae:0.23939	eval-mae:0.25437
[6]	train-mae:0.21840	eval-mae:0.23728
[7]	train-mae:0.20000	eval-mae:0.22006
[8]	train-mae:0.18384	eval-mae:0.20548
[9]	train-mae:0.16832	eval-mae:0.19196
[10]	train-mae:0.15617	eval-mae:0.18319
[11]	train-mae:0.14424	eval-mae:0.17375
[12]	train-mae:0.13371	eval-mae:0.16617
[13]	train-mae:0.12406	eval-mae:0.16014
[14]	train-mae:0.11693	eval-mae:0.15603
[15]	train-mae:0.11327	eval-mae:0.15435
[16]	train-mae:0.10741	eval-mae:0.15162
[17]	train-mae:0.10147	eval-mae:0.14850
[18]	train-mae:0.09530	eval-mae:0.14562
[19]	train-mae:0.08933	eval-mae:0.14186
[20]	train-mae:0.08634	eval-mae:0.14015
[21]	train-mae:0.08302	eval-mae:0.13811
[22]	train-mae:0.07823	eval-mae:0.13544
[23]	train-mae:0.07610	eval-mae:0.13487
[24]	train-mae:0.07274	eval-mae:0.13379
[25]	train

[I 2024-12-19 20:29:39,715] Trial 0 finished with value: 0.5365749346991966 and parameters: {'lambda': 3.208553013867788e-05, 'alpha': 2.954581248925638e-06, 'eta': 0.12868103454866872, 'gamma': 1.72299590537457e-05, 'max_depth': 25, 'min_child_weight': 3, 'subsample': 0.9329615962263436, 'colsample_bytree': 0.697687903627443, 'colsample_bylevel': 0.440669602906547, 'colsample_bynode': 0.8397072686133679, 'n_estimators': 669}. Best is trial 0 with value: 0.5365749346991966.



Trial Results:
MAE: 0.5366
R2 Score: 0.8976
[0]	train-mae:0.33646	eval-mae:0.34922
[1]	train-mae:0.26862	eval-mae:0.28252
[2]	train-mae:0.22047	eval-mae:0.24125
[3]	train-mae:0.18068	eval-mae:0.20609
[4]	train-mae:0.15029	eval-mae:0.18008
[5]	train-mae:0.12739	eval-mae:0.16341
[6]	train-mae:0.11117	eval-mae:0.15213
[7]	train-mae:0.09788	eval-mae:0.14324
[8]	train-mae:0.08931	eval-mae:0.13937
[9]	train-mae:0.08164	eval-mae:0.13586
[10]	train-mae:0.07587	eval-mae:0.13377
[11]	train-mae:0.07035	eval-mae:0.13083
[12]	train-mae:0.06511	eval-mae:0.12855
[13]	train-mae:0.06154	eval-mae:0.12675
[14]	train-mae:0.05863	eval-mae:0.12532
[15]	train-mae:0.05590	eval-mae:0.12469
[16]	train-mae:0.05366	eval-mae:0.12471
[17]	train-mae:0.05154	eval-mae:0.12452
[18]	train-mae:0.04976	eval-mae:0.12427
[19]	train-mae:0.04796	eval-mae:0.12437
[20]	train-mae:0.04678	eval-mae:0.12445
[21]	train-mae:0.04493	eval-mae:0.12425
[22]	train-mae:0.04374	eval-mae:0.12440
[23]	train-mae:0.04243	eval-mae:0.12434
[24]	

[I 2024-12-19 20:29:40,563] Trial 1 finished with value: 0.5278477369280304 and parameters: {'lambda': 2.315389887198197e-08, 'alpha': 1.2486122404739032e-07, 'eta': 0.24328128641642952, 'gamma': 0.00013553911078582632, 'max_depth': 15, 'min_child_weight': 4, 'subsample': 0.7861633757353756, 'colsample_bytree': 0.9480942783433611, 'colsample_bylevel': 0.5429772823826569, 'colsample_bynode': 0.8584820253634963, 'n_estimators': 625}. Best is trial 1 with value: 0.5278477369280304.


[0]	train-mae:0.36443	eval-mae:0.36747
[1]	train-mae:0.32121	eval-mae:0.32160


[I 2024-12-19 20:29:40,608] Trial 2 pruned. Trial was pruned at iteration 1.


[0]	train-mae:0.40911	eval-mae:0.41552
[1]	train-mae:0.39240	eval-mae:0.40059


[I 2024-12-19 20:29:40,706] Trial 3 pruned. Trial was pruned at iteration 1.


[0]	train-mae:0.36821	eval-mae:0.37527


[I 2024-12-19 20:29:40,762] Trial 4 pruned. Trial was pruned at iteration 1.


[0]	train-mae:0.37325	eval-mae:0.37768


[I 2024-12-19 20:29:40,816] Trial 5 pruned. Trial was pruned at iteration 1.


[0]	train-mae:0.38600	eval-mae:0.38994


[I 2024-12-19 20:29:40,874] Trial 6 pruned. Trial was pruned at iteration 1.


[0]	train-mae:0.41869	eval-mae:0.42284


[I 2024-12-19 20:29:40,923] Trial 7 pruned. Trial was pruned at iteration 1.


[0]	train-mae:0.38329	eval-mae:0.39150
[1]	train-mae:0.34521	eval-mae:0.35529


[I 2024-12-19 20:29:41,003] Trial 8 pruned. Trial was pruned at iteration 1.


[0]	train-mae:0.42106	eval-mae:0.42585


[I 2024-12-19 20:29:41,046] Trial 9 pruned. Trial was pruned at iteration 1.


[0]	train-mae:0.33596	eval-mae:0.34507
[1]	train-mae:0.28151	eval-mae:0.28789
[2]	train-mae:0.24912	eval-mae:0.25248
[3]	train-mae:0.21627	eval-mae:0.21798
[4]	train-mae:0.19868	eval-mae:0.20075


[I 2024-12-19 20:29:41,170] Trial 10 pruned. Trial was pruned at iteration 4.


[0]	train-mae:0.33963	eval-mae:0.34644
[1]	train-mae:0.27417	eval-mae:0.28526
[2]	train-mae:0.22309	eval-mae:0.23668
[3]	train-mae:0.18503	eval-mae:0.20699
[4]	train-mae:0.16265	eval-mae:0.18725


[I 2024-12-19 20:29:41,357] Trial 11 pruned. Trial was pruned at iteration 4.


[0]	train-mae:0.34008	eval-mae:0.34825
[1]	train-mae:0.27547	eval-mae:0.28383
[2]	train-mae:0.22998	eval-mae:0.24283
[3]	train-mae:0.19720	eval-mae:0.21523
[4]	train-mae:0.16687	eval-mae:0.18922


[I 2024-12-19 20:29:41,535] Trial 12 pruned. Trial was pruned at iteration 4.


[0]	train-mae:0.38869	eval-mae:0.39446


[I 2024-12-19 20:29:41,680] Trial 13 pruned. Trial was pruned at iteration 1.


[0]	train-mae:0.35074	eval-mae:0.35717


[I 2024-12-19 20:29:41,790] Trial 14 pruned. Trial was pruned at iteration 1.


[0]	train-mae:0.40313	eval-mae:0.40748
[1]	train-mae:0.37685	eval-mae:0.38261


[I 2024-12-19 20:29:41,953] Trial 15 pruned. Trial was pruned at iteration 1.


[0]	train-mae:0.34646	eval-mae:0.35305


[I 2024-12-19 20:29:42,065] Trial 16 pruned. Trial was pruned at iteration 1.


[0]	train-mae:0.35786	eval-mae:0.36294


[I 2024-12-19 20:29:42,190] Trial 17 pruned. Trial was pruned at iteration 1.


[0]	train-mae:0.42033	eval-mae:0.42568
[1]	train-mae:0.41348	eval-mae:0.41908


[I 2024-12-19 20:29:42,327] Trial 18 pruned. Trial was pruned at iteration 1.


[0]	train-mae:0.35945	eval-mae:0.36445


[I 2024-12-19 20:29:42,452] Trial 19 pruned. Trial was pruned at iteration 1.


[0]	train-mae:0.37460	eval-mae:0.38023


[I 2024-12-19 20:29:42,590] Trial 20 pruned. Trial was pruned at iteration 1.


[0]	train-mae:0.33523	eval-mae:0.34353
[1]	train-mae:0.27766	eval-mae:0.28929
[2]	train-mae:0.23395	eval-mae:0.24746
[3]	train-mae:0.19721	eval-mae:0.21488
[4]	train-mae:0.16799	eval-mae:0.18735


[I 2024-12-19 20:29:42,773] Trial 21 pruned. Trial was pruned at iteration 4.


[0]	train-mae:0.35057	eval-mae:0.35707


[I 2024-12-19 20:29:42,907] Trial 22 pruned. Trial was pruned at iteration 1.


[0]	train-mae:0.33547	eval-mae:0.34350
[1]	train-mae:0.26471	eval-mae:0.27661
[2]	train-mae:0.21871	eval-mae:0.23844
[3]	train-mae:0.18592	eval-mae:0.20943


[I 2024-12-19 20:29:43,080] Trial 23 pruned. Trial was pruned at iteration 4.


[0]	train-mae:0.33494	eval-mae:0.34348
[1]	train-mae:0.28781	eval-mae:0.29869


[I 2024-12-19 20:29:43,210] Trial 24 pruned. Trial was pruned at iteration 1.


[0]	train-mae:0.39510	eval-mae:0.39945


[I 2024-12-19 20:29:43,390] Trial 25 pruned. Trial was pruned at iteration 1.


[0]	train-mae:0.33878	eval-mae:0.33907


[I 2024-12-19 20:29:43,503] Trial 26 pruned. Trial was pruned at iteration 1.


[0]	train-mae:0.34842	eval-mae:0.35672


[I 2024-12-19 20:29:43,626] Trial 27 pruned. Trial was pruned at iteration 1.


[0]	train-mae:0.37705	eval-mae:0.38295
[1]	train-mae:0.33698	eval-mae:0.34316


[I 2024-12-19 20:29:43,739] Trial 28 pruned. Trial was pruned at iteration 1.


[0]	train-mae:0.35224	eval-mae:0.35463
[1]	train-mae:0.29305	eval-mae:0.29790


[I 2024-12-19 20:29:43,890] Trial 29 pruned. Trial was pruned at iteration 1.


[0]	train-mae:0.33268	eval-mae:0.34281
[1]	train-mae:0.26193	eval-mae:0.27671
[2]	train-mae:0.21023	eval-mae:0.22621
[3]	train-mae:0.17104	eval-mae:0.19110
[4]	train-mae:0.14110	eval-mae:0.16798
[5]	train-mae:0.11982	eval-mae:0.15436
[6]	train-mae:0.10394	eval-mae:0.14441
[7]	train-mae:0.09098	eval-mae:0.13698
[8]	train-mae:0.08055	eval-mae:0.13224
[9]	train-mae:0.07276	eval-mae:0.12874
[10]	train-mae:0.06710	eval-mae:0.12767
[11]	train-mae:0.06129	eval-mae:0.12606
[12]	train-mae:0.05700	eval-mae:0.12552
[13]	train-mae:0.05383	eval-mae:0.12486
[14]	train-mae:0.05097	eval-mae:0.12529
[15]	train-mae:0.04870	eval-mae:0.12532
[16]	train-mae:0.04589	eval-mae:0.12428
[17]	train-mae:0.04333	eval-mae:0.12354
[18]	train-mae:0.04096	eval-mae:0.12362
[19]	train-mae:0.03874	eval-mae:0.12309
[20]	train-mae:0.03746	eval-mae:0.12242
[21]	train-mae:0.03568	eval-mae:0.12292
[22]	train-mae:0.03454	eval-mae:0.12299
[23]	train-mae:0.03303	eval-mae:0.12286
[24]	train-mae:0.03153	eval-mae:0.12304
[25]	train

[I 2024-12-19 20:29:44,723] Trial 30 pruned. Trial was pruned at iteration 64.


[0]	train-mae:0.33992	eval-mae:0.34766
[1]	train-mae:0.27357	eval-mae:0.28468
[2]	train-mae:0.22328	eval-mae:0.23736
[3]	train-mae:0.18672	eval-mae:0.20419
[4]	train-mae:0.15952	eval-mae:0.17996
[5]	train-mae:0.13783	eval-mae:0.16310
[6]	train-mae:0.12465	eval-mae:0.15594
[7]	train-mae:0.11229	eval-mae:0.14890
[8]	train-mae:0.10355	eval-mae:0.14462
[9]	train-mae:0.09651	eval-mae:0.14101
[10]	train-mae:0.09125	eval-mae:0.13923
[11]	train-mae:0.08549	eval-mae:0.13662
[12]	train-mae:0.08161	eval-mae:0.13596
[13]	train-mae:0.07802	eval-mae:0.13599
[14]	train-mae:0.07452	eval-mae:0.13590
[15]	train-mae:0.07188	eval-mae:0.13593
[16]	train-mae:0.06919	eval-mae:0.13552


[I 2024-12-19 20:29:44,991] Trial 31 pruned. Trial was pruned at iteration 16.


[0]	train-mae:0.34044	eval-mae:0.34841
[1]	train-mae:0.27304	eval-mae:0.28483
[2]	train-mae:0.22821	eval-mae:0.24281
[3]	train-mae:0.18931	eval-mae:0.21002


[I 2024-12-19 20:29:45,174] Trial 32 pruned. Trial was pruned at iteration 4.


[0]	train-mae:0.36302	eval-mae:0.37009
[1]	train-mae:0.31172	eval-mae:0.32274


[I 2024-12-19 20:29:45,314] Trial 33 pruned. Trial was pruned at iteration 1.


[0]	train-mae:0.34063	eval-mae:0.34730
[1]	train-mae:0.27654	eval-mae:0.28437
[2]	train-mae:0.22755	eval-mae:0.23903
[3]	train-mae:0.19401	eval-mae:0.20798
[4]	train-mae:0.16566	eval-mae:0.18244


[I 2024-12-19 20:29:45,474] Trial 34 pruned. Trial was pruned at iteration 4.


[0]	train-mae:0.34268	eval-mae:0.35538
[1]	train-mae:0.28942	eval-mae:0.30731


[I 2024-12-19 20:29:45,660] Trial 35 pruned. Trial was pruned at iteration 1.


[0]	train-mae:0.35684	eval-mae:0.36320
[1]	train-mae:0.30126	eval-mae:0.31039


[I 2024-12-19 20:29:45,833] Trial 36 pruned. Trial was pruned at iteration 1.


[0]	train-mae:0.36090	eval-mae:0.37016


[I 2024-12-19 20:29:45,970] Trial 37 pruned. Trial was pruned at iteration 1.


[0]	train-mae:0.37907	eval-mae:0.38439


[I 2024-12-19 20:29:46,090] Trial 38 pruned. Trial was pruned at iteration 1.


[0]	train-mae:0.36189	eval-mae:0.37126
[1]	train-mae:0.30720	eval-mae:0.32265


[I 2024-12-19 20:29:46,315] Trial 39 pruned. Trial was pruned at iteration 1.


[0]	train-mae:0.39035	eval-mae:0.39658
[1]	train-mae:0.35833	eval-mae:0.36618


[I 2024-12-19 20:29:46,455] Trial 40 pruned. Trial was pruned at iteration 1.


[0]	train-mae:0.33984	eval-mae:0.34746
[1]	train-mae:0.27206	eval-mae:0.28123
[2]	train-mae:0.22233	eval-mae:0.23404
[3]	train-mae:0.18434	eval-mae:0.19895
[4]	train-mae:0.15727	eval-mae:0.17404
[5]	train-mae:0.13743	eval-mae:0.15847
[6]	train-mae:0.12311	eval-mae:0.14942
[7]	train-mae:0.11119	eval-mae:0.14208
[8]	train-mae:0.10339	eval-mae:0.13669
[9]	train-mae:0.09732	eval-mae:0.13360
[10]	train-mae:0.09243	eval-mae:0.13158
[11]	train-mae:0.08764	eval-mae:0.13058
[12]	train-mae:0.08368	eval-mae:0.12943
[13]	train-mae:0.08133	eval-mae:0.12823
[14]	train-mae:0.07876	eval-mae:0.12809
[15]	train-mae:0.07620	eval-mae:0.12784


[I 2024-12-19 20:29:46,740] Trial 41 pruned. Trial was pruned at iteration 16.


[0]	train-mae:0.34158	eval-mae:0.34509
[1]	train-mae:0.27545	eval-mae:0.28129
[2]	train-mae:0.23117	eval-mae:0.23834
[3]	train-mae:0.19301	eval-mae:0.20354


[I 2024-12-19 20:29:46,886] Trial 42 pruned. Trial was pruned at iteration 4.


[0]	train-mae:0.33060	eval-mae:0.33752
[1]	train-mae:0.25951	eval-mae:0.26737
[2]	train-mae:0.20936	eval-mae:0.22205
[3]	train-mae:0.17120	eval-mae:0.18701
[4]	train-mae:0.14699	eval-mae:0.16641
[5]	train-mae:0.12932	eval-mae:0.15440
[6]	train-mae:0.11681	eval-mae:0.14641
[7]	train-mae:0.10837	eval-mae:0.14273
[8]	train-mae:0.10188	eval-mae:0.14056
[9]	train-mae:0.09486	eval-mae:0.13618
[10]	train-mae:0.08955	eval-mae:0.13288
[11]	train-mae:0.08490	eval-mae:0.13227
[12]	train-mae:0.08123	eval-mae:0.13117
[13]	train-mae:0.07839	eval-mae:0.13048
[14]	train-mae:0.07515	eval-mae:0.12907
[15]	train-mae:0.07306	eval-mae:0.12975


[I 2024-12-19 20:29:47,223] Trial 43 pruned. Trial was pruned at iteration 16.


[0]	train-mae:0.33284	eval-mae:0.34049
[1]	train-mae:0.27144	eval-mae:0.28142
[2]	train-mae:0.22270	eval-mae:0.23316
[3]	train-mae:0.18655	eval-mae:0.19857


[I 2024-12-19 20:29:47,377] Trial 44 pruned. Trial was pruned at iteration 4.


[0]	train-mae:0.32772	eval-mae:0.33168
[1]	train-mae:0.25822	eval-mae:0.26660
[2]	train-mae:0.21071	eval-mae:0.21872
[3]	train-mae:0.17531	eval-mae:0.18608
[4]	train-mae:0.15133	eval-mae:0.16528
[5]	train-mae:0.13374	eval-mae:0.15037
[6]	train-mae:0.12204	eval-mae:0.14326
[7]	train-mae:0.11258	eval-mae:0.13887
[8]	train-mae:0.10548	eval-mae:0.13473
[9]	train-mae:0.10033	eval-mae:0.13322
[10]	train-mae:0.09768	eval-mae:0.13344
[11]	train-mae:0.09403	eval-mae:0.13281
[12]	train-mae:0.09083	eval-mae:0.13198
[13]	train-mae:0.08760	eval-mae:0.13035
[14]	train-mae:0.08451	eval-mae:0.13008
[15]	train-mae:0.08177	eval-mae:0.12984
[16]	train-mae:0.07995	eval-mae:0.12987


[I 2024-12-19 20:29:47,619] Trial 45 pruned. Trial was pruned at iteration 16.


[0]	train-mae:0.38103	eval-mae:0.38569
[1]	train-mae:0.34163	eval-mae:0.34686


[I 2024-12-19 20:29:47,754] Trial 46 pruned. Trial was pruned at iteration 1.


[0]	train-mae:0.32251	eval-mae:0.32560
[1]	train-mae:0.25289	eval-mae:0.25866
[2]	train-mae:0.20237	eval-mae:0.21005
[3]	train-mae:0.17002	eval-mae:0.18057
[4]	train-mae:0.14751	eval-mae:0.16028
[5]	train-mae:0.13157	eval-mae:0.14917
[6]	train-mae:0.12024	eval-mae:0.14174
[7]	train-mae:0.11205	eval-mae:0.13823
[8]	train-mae:0.10609	eval-mae:0.13531
[9]	train-mae:0.10166	eval-mae:0.13312
[10]	train-mae:0.09842	eval-mae:0.13189
[11]	train-mae:0.09473	eval-mae:0.13083
[12]	train-mae:0.09079	eval-mae:0.12952
[13]	train-mae:0.08844	eval-mae:0.12855
[14]	train-mae:0.08599	eval-mae:0.12848
[15]	train-mae:0.08464	eval-mae:0.12925


[I 2024-12-19 20:29:47,973] Trial 47 pruned. Trial was pruned at iteration 16.


[0]	train-mae:0.32341	eval-mae:0.33139
[1]	train-mae:0.25504	eval-mae:0.26411
[2]	train-mae:0.20730	eval-mae:0.21646
[3]	train-mae:0.17547	eval-mae:0.18361
[4]	train-mae:0.15303	eval-mae:0.16335
[5]	train-mae:0.13894	eval-mae:0.15032
[6]	train-mae:0.12981	eval-mae:0.14246
[7]	train-mae:0.12149	eval-mae:0.13686
[8]	train-mae:0.11549	eval-mae:0.13460
[9]	train-mae:0.11230	eval-mae:0.13284
[10]	train-mae:0.10869	eval-mae:0.13134
[11]	train-mae:0.10728	eval-mae:0.13091
[12]	train-mae:0.10468	eval-mae:0.13024
[13]	train-mae:0.10318	eval-mae:0.12972
[14]	train-mae:0.10232	eval-mae:0.12959
[15]	train-mae:0.10198	eval-mae:0.12957
[16]	train-mae:0.10093	eval-mae:0.12880


[I 2024-12-19 20:29:48,157] Trial 48 pruned. Trial was pruned at iteration 16.


[0]	train-mae:0.35221	eval-mae:0.35570
[1]	train-mae:0.30073	eval-mae:0.30301


[I 2024-12-19 20:29:48,291] Trial 49 pruned. Trial was pruned at iteration 1.


[0]	train-mae:0.35475	eval-mae:0.36134
[1]	train-mae:0.29780	eval-mae:0.30511


[I 2024-12-19 20:29:48,497] Trial 50 pruned. Trial was pruned at iteration 1.


[0]	train-mae:0.32777	eval-mae:0.33358
[1]	train-mae:0.26333	eval-mae:0.27106
[2]	train-mae:0.21799	eval-mae:0.22842
[3]	train-mae:0.18028	eval-mae:0.19201


[I 2024-12-19 20:29:48,646] Trial 51 pruned. Trial was pruned at iteration 4.


[0]	train-mae:0.31812	eval-mae:0.32337
[1]	train-mae:0.24577	eval-mae:0.25389
[2]	train-mae:0.19958	eval-mae:0.21081
[3]	train-mae:0.16697	eval-mae:0.18035
[4]	train-mae:0.14695	eval-mae:0.16241
[5]	train-mae:0.13285	eval-mae:0.15205
[6]	train-mae:0.12422	eval-mae:0.14640
[7]	train-mae:0.11712	eval-mae:0.14132
[8]	train-mae:0.11309	eval-mae:0.13821
[9]	train-mae:0.11024	eval-mae:0.13667
[10]	train-mae:0.10832	eval-mae:0.13596
[11]	train-mae:0.10729	eval-mae:0.13485
[12]	train-mae:0.10520	eval-mae:0.13404
[13]	train-mae:0.10520	eval-mae:0.13404
[14]	train-mae:0.10442	eval-mae:0.13443
[15]	train-mae:0.10424	eval-mae:0.13466


[I 2024-12-19 20:29:48,840] Trial 52 pruned. Trial was pruned at iteration 16.


[0]	train-mae:0.33039	eval-mae:0.33432
[1]	train-mae:0.27378	eval-mae:0.27897
[2]	train-mae:0.23814	eval-mae:0.24307
[3]	train-mae:0.20542	eval-mae:0.21095
[4]	train-mae:0.18720	eval-mae:0.19359


[I 2024-12-19 20:29:48,981] Trial 53 pruned. Trial was pruned at iteration 4.


[0]	train-mae:0.33816	eval-mae:0.34418
[1]	train-mae:0.27470	eval-mae:0.28179
[2]	train-mae:0.22653	eval-mae:0.23605
[3]	train-mae:0.19072	eval-mae:0.20243
[4]	train-mae:0.16546	eval-mae:0.18029


[I 2024-12-19 20:29:49,140] Trial 54 pruned. Trial was pruned at iteration 4.


[0]	train-mae:0.33288	eval-mae:0.34012
[1]	train-mae:0.26666	eval-mae:0.27513
[2]	train-mae:0.21855	eval-mae:0.22889
[3]	train-mae:0.19198	eval-mae:0.20233
[4]	train-mae:0.16385	eval-mae:0.17816


[I 2024-12-19 20:29:49,297] Trial 55 pruned. Trial was pruned at iteration 4.


[0]	train-mae:0.32085	eval-mae:0.32830
[1]	train-mae:0.24662	eval-mae:0.25624
[2]	train-mae:0.19565	eval-mae:0.20635
[3]	train-mae:0.16137	eval-mae:0.17469
[4]	train-mae:0.13697	eval-mae:0.15435
[5]	train-mae:0.12074	eval-mae:0.14225
[6]	train-mae:0.10909	eval-mae:0.13518
[7]	train-mae:0.10032	eval-mae:0.13044
[8]	train-mae:0.09396	eval-mae:0.12721
[9]	train-mae:0.08844	eval-mae:0.12633
[10]	train-mae:0.08447	eval-mae:0.12523
[11]	train-mae:0.08199	eval-mae:0.12426
[12]	train-mae:0.07942	eval-mae:0.12377
[13]	train-mae:0.07707	eval-mae:0.12361
[14]	train-mae:0.07518	eval-mae:0.12407
[15]	train-mae:0.07316	eval-mae:0.12404
[16]	train-mae:0.07083	eval-mae:0.12364
[17]	train-mae:0.06921	eval-mae:0.12340
[18]	train-mae:0.06830	eval-mae:0.12330
[19]	train-mae:0.06709	eval-mae:0.12216
[20]	train-mae:0.06592	eval-mae:0.12233
[21]	train-mae:0.06430	eval-mae:0.12269
[22]	train-mae:0.06289	eval-mae:0.12248
[23]	train-mae:0.06168	eval-mae:0.12197
[24]	train-mae:0.06076	eval-mae:0.12177
[25]	train

[I 2024-12-19 20:29:49,774] Trial 56 pruned. Trial was pruned at iteration 64.


[0]	train-mae:0.34020	eval-mae:0.34648


[I 2024-12-19 20:29:49,914] Trial 57 pruned. Trial was pruned at iteration 1.


[0]	train-mae:0.33730	eval-mae:0.34292
[1]	train-mae:0.27463	eval-mae:0.28038
[2]	train-mae:0.22460	eval-mae:0.23176
[3]	train-mae:0.19158	eval-mae:0.20083


[I 2024-12-19 20:29:50,075] Trial 58 pruned. Trial was pruned at iteration 4.


[0]	train-mae:0.32154	eval-mae:0.32695
[1]	train-mae:0.24987	eval-mae:0.25545
[2]	train-mae:0.20052	eval-mae:0.20932
[3]	train-mae:0.16895	eval-mae:0.18374
[4]	train-mae:0.14369	eval-mae:0.16245
[5]	train-mae:0.12668	eval-mae:0.14995
[6]	train-mae:0.11459	eval-mae:0.14086
[7]	train-mae:0.10517	eval-mae:0.13389
[8]	train-mae:0.09849	eval-mae:0.13186
[9]	train-mae:0.09436	eval-mae:0.13038
[10]	train-mae:0.09052	eval-mae:0.12952
[11]	train-mae:0.08671	eval-mae:0.12833
[12]	train-mae:0.08363	eval-mae:0.12786
[13]	train-mae:0.08161	eval-mae:0.12782
[14]	train-mae:0.07971	eval-mae:0.12774
[15]	train-mae:0.07723	eval-mae:0.12583


[I 2024-12-19 20:29:50,337] Trial 59 pruned. Trial was pruned at iteration 16.


[0]	train-mae:0.35253	eval-mae:0.35797
[1]	train-mae:0.29739	eval-mae:0.30549


[I 2024-12-19 20:29:50,491] Trial 60 pruned. Trial was pruned at iteration 1.


[0]	train-mae:0.32383	eval-mae:0.33162
[1]	train-mae:0.25113	eval-mae:0.26175
[2]	train-mae:0.20146	eval-mae:0.21433
[3]	train-mae:0.16634	eval-mae:0.18051
[4]	train-mae:0.14536	eval-mae:0.16284
[5]	train-mae:0.12805	eval-mae:0.14773
[6]	train-mae:0.11603	eval-mae:0.13984
[7]	train-mae:0.10856	eval-mae:0.13431
[8]	train-mae:0.10335	eval-mae:0.13093
[9]	train-mae:0.09725	eval-mae:0.12631
[10]	train-mae:0.09223	eval-mae:0.12476
[11]	train-mae:0.08931	eval-mae:0.12434
[12]	train-mae:0.08593	eval-mae:0.12217
[13]	train-mae:0.08253	eval-mae:0.12098
[14]	train-mae:0.08125	eval-mae:0.12159
[15]	train-mae:0.07921	eval-mae:0.12112
[16]	train-mae:0.07784	eval-mae:0.12126
[17]	train-mae:0.07670	eval-mae:0.12187
[18]	train-mae:0.07583	eval-mae:0.12199
[19]	train-mae:0.07437	eval-mae:0.12152
[20]	train-mae:0.07241	eval-mae:0.12128
[21]	train-mae:0.07093	eval-mae:0.12144
[22]	train-mae:0.06946	eval-mae:0.12126
[23]	train-mae:0.06864	eval-mae:0.12147
[24]	train-mae:0.06682	eval-mae:0.12137
[25]	train

[I 2024-12-19 20:29:51,437] Trial 61 finished with value: 0.5452610507184108 and parameters: {'lambda': 8.211825030121477e-05, 'alpha': 9.030223475223533e-07, 'eta': 0.28513425976003826, 'gamma': 0.0002535843559218643, 'max_depth': 15, 'min_child_weight': 10, 'subsample': 0.7836800237165055, 'colsample_bytree': 0.9097658753086723, 'colsample_bylevel': 0.8181101540518346, 'colsample_bynode': 0.9648521130626588, 'n_estimators': 912}. Best is trial 1 with value: 0.5278477369280304.


[0]	train-mae:0.33009	eval-mae:0.33525
[1]	train-mae:0.25989	eval-mae:0.26696
[2]	train-mae:0.21460	eval-mae:0.22547
[3]	train-mae:0.17855	eval-mae:0.19402
[4]	train-mae:0.15658	eval-mae:0.17588


[I 2024-12-19 20:29:51,597] Trial 62 pruned. Trial was pruned at iteration 4.


[0]	train-mae:0.32693	eval-mae:0.33467
[1]	train-mae:0.25797	eval-mae:0.26816
[2]	train-mae:0.20640	eval-mae:0.21805
[3]	train-mae:0.17086	eval-mae:0.18486
[4]	train-mae:0.14676	eval-mae:0.16239
[5]	train-mae:0.12922	eval-mae:0.14820
[6]	train-mae:0.11736	eval-mae:0.13879
[7]	train-mae:0.10887	eval-mae:0.13304
[8]	train-mae:0.10168	eval-mae:0.12771
[9]	train-mae:0.09683	eval-mae:0.12496
[10]	train-mae:0.09327	eval-mae:0.12386
[11]	train-mae:0.09032	eval-mae:0.12241
[12]	train-mae:0.08742	eval-mae:0.12086
[13]	train-mae:0.08577	eval-mae:0.12109
[14]	train-mae:0.08342	eval-mae:0.12020
[15]	train-mae:0.08155	eval-mae:0.12029
[16]	train-mae:0.08011	eval-mae:0.12033
[17]	train-mae:0.07865	eval-mae:0.12016
[18]	train-mae:0.07719	eval-mae:0.11950
[19]	train-mae:0.07652	eval-mae:0.11988
[20]	train-mae:0.07522	eval-mae:0.12002
[21]	train-mae:0.07390	eval-mae:0.11994
[22]	train-mae:0.07280	eval-mae:0.12036
[23]	train-mae:0.07198	eval-mae:0.11994
[24]	train-mae:0.07044	eval-mae:0.11962
[25]	train

[I 2024-12-19 20:29:52,131] Trial 63 pruned. Trial was pruned at iteration 64.


[0]	train-mae:0.32609	eval-mae:0.33225
[1]	train-mae:0.26045	eval-mae:0.26804
[2]	train-mae:0.20784	eval-mae:0.21911
[3]	train-mae:0.17338	eval-mae:0.18708
[4]	train-mae:0.14798	eval-mae:0.16354


[I 2024-12-19 20:29:52,282] Trial 64 pruned. Trial was pruned at iteration 4.


[0]	train-mae:0.32511	eval-mae:0.33154
[1]	train-mae:0.25323	eval-mae:0.26170
[2]	train-mae:0.20537	eval-mae:0.21456
[3]	train-mae:0.17213	eval-mae:0.18337
[4]	train-mae:0.14785	eval-mae:0.16044
[5]	train-mae:0.13051	eval-mae:0.14691
[6]	train-mae:0.11987	eval-mae:0.14077
[7]	train-mae:0.11228	eval-mae:0.13607
[8]	train-mae:0.10597	eval-mae:0.13398
[9]	train-mae:0.10007	eval-mae:0.13066
[10]	train-mae:0.09655	eval-mae:0.12918
[11]	train-mae:0.09328	eval-mae:0.12877
[12]	train-mae:0.08905	eval-mae:0.12723
[13]	train-mae:0.08776	eval-mae:0.12670
[14]	train-mae:0.08582	eval-mae:0.12660
[15]	train-mae:0.08542	eval-mae:0.12698
[16]	train-mae:0.08339	eval-mae:0.12725


[I 2024-12-19 20:29:52,505] Trial 65 pruned. Trial was pruned at iteration 16.


[0]	train-mae:0.33243	eval-mae:0.33538
[1]	train-mae:0.26584	eval-mae:0.26653
[2]	train-mae:0.22090	eval-mae:0.22072
[3]	train-mae:0.19310	eval-mae:0.19481
[4]	train-mae:0.17497	eval-mae:0.17759


[I 2024-12-19 20:29:52,648] Trial 66 pruned. Trial was pruned at iteration 4.


[0]	train-mae:0.32760	eval-mae:0.33253
[1]	train-mae:0.27453	eval-mae:0.28149


[I 2024-12-19 20:29:52,775] Trial 67 pruned. Trial was pruned at iteration 1.


[0]	train-mae:0.40279	eval-mae:0.40831
[1]	train-mae:0.38028	eval-mae:0.38591


[I 2024-12-19 20:29:52,921] Trial 68 pruned. Trial was pruned at iteration 1.


[0]	train-mae:0.38465	eval-mae:0.38991


[I 2024-12-19 20:29:53,039] Trial 69 pruned. Trial was pruned at iteration 1.


[0]	train-mae:0.33933	eval-mae:0.34590


[I 2024-12-19 20:29:53,175] Trial 70 pruned. Trial was pruned at iteration 1.


[0]	train-mae:0.32568	eval-mae:0.33261
[1]	train-mae:0.25959	eval-mae:0.26799
[2]	train-mae:0.20733	eval-mae:0.21764
[3]	train-mae:0.17534	eval-mae:0.18716
[4]	train-mae:0.14777	eval-mae:0.16216
[5]	train-mae:0.13114	eval-mae:0.14856
[6]	train-mae:0.11907	eval-mae:0.14000
[7]	train-mae:0.11112	eval-mae:0.13588
[8]	train-mae:0.10321	eval-mae:0.13076
[9]	train-mae:0.09838	eval-mae:0.12887
[10]	train-mae:0.09516	eval-mae:0.12771
[11]	train-mae:0.09128	eval-mae:0.12543
[12]	train-mae:0.08785	eval-mae:0.12319
[13]	train-mae:0.08649	eval-mae:0.12324
[14]	train-mae:0.08447	eval-mae:0.12226
[15]	train-mae:0.08335	eval-mae:0.12244
[16]	train-mae:0.08173	eval-mae:0.12238
[17]	train-mae:0.08048	eval-mae:0.12185
[18]	train-mae:0.07917	eval-mae:0.12172
[19]	train-mae:0.07802	eval-mae:0.12108
[20]	train-mae:0.07625	eval-mae:0.12084
[21]	train-mae:0.07535	eval-mae:0.12106
[22]	train-mae:0.07453	eval-mae:0.12111
[23]	train-mae:0.07324	eval-mae:0.12129
[24]	train-mae:0.07244	eval-mae:0.12142
[25]	train

[I 2024-12-19 20:29:53,657] Trial 71 pruned. Trial was pruned at iteration 64.


[0]	train-mae:0.32226	eval-mae:0.32885
[1]	train-mae:0.24994	eval-mae:0.25555
[2]	train-mae:0.20207	eval-mae:0.21083
[3]	train-mae:0.16684	eval-mae:0.17856
[4]	train-mae:0.14439	eval-mae:0.16077
[5]	train-mae:0.12978	eval-mae:0.15093
[6]	train-mae:0.12001	eval-mae:0.14441
[7]	train-mae:0.10994	eval-mae:0.13662
[8]	train-mae:0.10411	eval-mae:0.13301
[9]	train-mae:0.09819	eval-mae:0.13004
[10]	train-mae:0.09441	eval-mae:0.12747
[11]	train-mae:0.09216	eval-mae:0.12669
[12]	train-mae:0.08963	eval-mae:0.12511
[13]	train-mae:0.08820	eval-mae:0.12427
[14]	train-mae:0.08639	eval-mae:0.12381
[15]	train-mae:0.08437	eval-mae:0.12301
[16]	train-mae:0.08321	eval-mae:0.12218
[17]	train-mae:0.08242	eval-mae:0.12202
[18]	train-mae:0.08105	eval-mae:0.12113
[19]	train-mae:0.08001	eval-mae:0.12067
[20]	train-mae:0.07922	eval-mae:0.12116
[21]	train-mae:0.07816	eval-mae:0.12127
[22]	train-mae:0.07757	eval-mae:0.12102
[23]	train-mae:0.07621	eval-mae:0.12060
[24]	train-mae:0.07491	eval-mae:0.12016
[25]	train

[I 2024-12-19 20:29:54,096] Trial 72 pruned. Trial was pruned at iteration 64.


[0]	train-mae:0.32943	eval-mae:0.33570
[1]	train-mae:0.26370	eval-mae:0.27182
[2]	train-mae:0.21356	eval-mae:0.22317
[3]	train-mae:0.17970	eval-mae:0.19039
[4]	train-mae:0.15743	eval-mae:0.16901


[I 2024-12-19 20:29:54,251] Trial 73 pruned. Trial was pruned at iteration 4.


[0]	train-mae:0.32949	eval-mae:0.33663
[1]	train-mae:0.25896	eval-mae:0.26922
[2]	train-mae:0.21176	eval-mae:0.22396
[3]	train-mae:0.17510	eval-mae:0.18964


[I 2024-12-19 20:29:54,398] Trial 74 pruned. Trial was pruned at iteration 4.


[0]	train-mae:0.37113	eval-mae:0.37546


[I 2024-12-19 20:29:54,529] Trial 75 pruned. Trial was pruned at iteration 1.


[0]	train-mae:0.33751	eval-mae:0.34018
[1]	train-mae:0.27358	eval-mae:0.27676


[I 2024-12-19 20:29:54,668] Trial 76 pruned. Trial was pruned at iteration 1.


[0]	train-mae:0.32235	eval-mae:0.32611


[I 2024-12-19 20:29:54,808] Trial 77 pruned. Trial was pruned at iteration 1.


[0]	train-mae:0.42441	eval-mae:0.42926
[1]	train-mae:0.42224	eval-mae:0.42676


[I 2024-12-19 20:29:54,938] Trial 78 pruned. Trial was pruned at iteration 1.


[0]	train-mae:0.31635	eval-mae:0.32631
[1]	train-mae:0.24083	eval-mae:0.26101
[2]	train-mae:0.18162	eval-mae:0.21175
[3]	train-mae:0.14025	eval-mae:0.17912
[4]	train-mae:0.10938	eval-mae:0.15423
[5]	train-mae:0.08744	eval-mae:0.14114
[6]	train-mae:0.07189	eval-mae:0.13438
[7]	train-mae:0.06137	eval-mae:0.13137
[8]	train-mae:0.05289	eval-mae:0.12777
[9]	train-mae:0.04674	eval-mae:0.12619
[10]	train-mae:0.04191	eval-mae:0.12530
[11]	train-mae:0.03777	eval-mae:0.12410
[12]	train-mae:0.03391	eval-mae:0.12382
[13]	train-mae:0.03158	eval-mae:0.12419
[14]	train-mae:0.02909	eval-mae:0.12428
[15]	train-mae:0.02741	eval-mae:0.12437


[I 2024-12-19 20:29:55,334] Trial 79 pruned. Trial was pruned at iteration 16.


[0]	train-mae:0.38710	eval-mae:0.39186
[1]	train-mae:0.35505	eval-mae:0.36030


[I 2024-12-19 20:29:55,473] Trial 80 pruned. Trial was pruned at iteration 1.


[0]	train-mae:0.31318	eval-mae:0.32731
[1]	train-mae:0.23714	eval-mae:0.25936
[2]	train-mae:0.17823	eval-mae:0.20846
[3]	train-mae:0.13851	eval-mae:0.17888
[4]	train-mae:0.11012	eval-mae:0.16191
[5]	train-mae:0.08723	eval-mae:0.14749
[6]	train-mae:0.07076	eval-mae:0.13819
[7]	train-mae:0.05910	eval-mae:0.13380
[8]	train-mae:0.05161	eval-mae:0.13247
[9]	train-mae:0.04467	eval-mae:0.13156
[10]	train-mae:0.03994	eval-mae:0.12997
[11]	train-mae:0.03480	eval-mae:0.12888
[12]	train-mae:0.03096	eval-mae:0.12832
[13]	train-mae:0.02857	eval-mae:0.12825
[14]	train-mae:0.02622	eval-mae:0.12833
[15]	train-mae:0.02352	eval-mae:0.12800
[16]	train-mae:0.02165	eval-mae:0.12778


[I 2024-12-19 20:29:55,878] Trial 81 pruned. Trial was pruned at iteration 16.


[0]	train-mae:0.31838	eval-mae:0.33140
[1]	train-mae:0.24134	eval-mae:0.26237
[2]	train-mae:0.18884	eval-mae:0.21845
[3]	train-mae:0.14591	eval-mae:0.18366
[4]	train-mae:0.11648	eval-mae:0.16348


[I 2024-12-19 20:29:56,110] Trial 82 pruned. Trial was pruned at iteration 4.


[0]	train-mae:0.32698	eval-mae:0.33256


[I 2024-12-19 20:29:56,262] Trial 83 pruned. Trial was pruned at iteration 1.


[0]	train-mae:0.31722	eval-mae:0.32988
[1]	train-mae:0.23741	eval-mae:0.25914
[2]	train-mae:0.17838	eval-mae:0.20939
[3]	train-mae:0.13475	eval-mae:0.17457
[4]	train-mae:0.10470	eval-mae:0.15274
[5]	train-mae:0.08145	eval-mae:0.13939
[6]	train-mae:0.06412	eval-mae:0.13046
[7]	train-mae:0.05084	eval-mae:0.12462
[8]	train-mae:0.04076	eval-mae:0.12136
[9]	train-mae:0.03339	eval-mae:0.11920
[10]	train-mae:0.02771	eval-mae:0.11825
[11]	train-mae:0.02344	eval-mae:0.11753
[12]	train-mae:0.01998	eval-mae:0.11709
[13]	train-mae:0.01754	eval-mae:0.11686
[14]	train-mae:0.01616	eval-mae:0.11671
[15]	train-mae:0.01488	eval-mae:0.11664
[16]	train-mae:0.01383	eval-mae:0.11649
[17]	train-mae:0.01325	eval-mae:0.11634
[18]	train-mae:0.01296	eval-mae:0.11630
[19]	train-mae:0.01256	eval-mae:0.11628
[20]	train-mae:0.01224	eval-mae:0.11630
[21]	train-mae:0.01210	eval-mae:0.11627
[22]	train-mae:0.01206	eval-mae:0.11631
[23]	train-mae:0.01173	eval-mae:0.11621
[24]	train-mae:0.01170	eval-mae:0.11617
[25]	train

[I 2024-12-19 20:29:56,979] Trial 84 finished with value: 0.5064962911882481 and parameters: {'lambda': 2.082444427792187e-05, 'alpha': 5.745967889776368e-06, 'eta': 0.26987172141344323, 'gamma': 0.0003236724186546382, 'max_depth': 24, 'min_child_weight': 2, 'subsample': 0.8913380102988381, 'colsample_bytree': 0.9021565933570709, 'colsample_bylevel': 0.7941136815562204, 'colsample_bynode': 0.9196565270400485, 'n_estimators': 735}. Best is trial 84 with value: 0.5064962911882481.


[0]	train-mae:0.31922	eval-mae:0.33366
[1]	train-mae:0.24025	eval-mae:0.26495
[2]	train-mae:0.18616	eval-mae:0.22064
[3]	train-mae:0.14157	eval-mae:0.18549


[I 2024-12-19 20:29:57,171] Trial 85 pruned. Trial was pruned at iteration 4.


[0]	train-mae:0.31759	eval-mae:0.33023
[1]	train-mae:0.24130	eval-mae:0.26364
[2]	train-mae:0.18157	eval-mae:0.21452
[3]	train-mae:0.13767	eval-mae:0.17880
[4]	train-mae:0.10751	eval-mae:0.15901
[5]	train-mae:0.08356	eval-mae:0.14322
[6]	train-mae:0.06561	eval-mae:0.13361
[7]	train-mae:0.05260	eval-mae:0.12784
[8]	train-mae:0.04297	eval-mae:0.12495
[9]	train-mae:0.03552	eval-mae:0.12250
[10]	train-mae:0.03039	eval-mae:0.12087
[11]	train-mae:0.02648	eval-mae:0.11968
[12]	train-mae:0.02267	eval-mae:0.11870
[13]	train-mae:0.02070	eval-mae:0.11859
[14]	train-mae:0.01890	eval-mae:0.11831
[15]	train-mae:0.01709	eval-mae:0.11799
[16]	train-mae:0.01585	eval-mae:0.11768
[17]	train-mae:0.01504	eval-mae:0.11749
[18]	train-mae:0.01470	eval-mae:0.11744
[19]	train-mae:0.01402	eval-mae:0.11733
[20]	train-mae:0.01380	eval-mae:0.11726
[21]	train-mae:0.01350	eval-mae:0.11718
[22]	train-mae:0.01318	eval-mae:0.11727
[23]	train-mae:0.01305	eval-mae:0.11729
[24]	train-mae:0.01305	eval-mae:0.11729
[25]	train

[I 2024-12-19 20:29:57,774] Trial 86 finished with value: 0.4957817641483245 and parameters: {'lambda': 1.864093533152719e-05, 'alpha': 7.363026320239259e-07, 'eta': 0.27030204644270245, 'gamma': 0.0003421508381159336, 'max_depth': 12, 'min_child_weight': 1, 'subsample': 0.9055019463947818, 'colsample_bytree': 0.928705375727398, 'colsample_bylevel': 0.7738609182641321, 'colsample_bynode': 0.8406472882356667, 'n_estimators': 645}. Best is trial 86 with value: 0.4957817641483245.


[0]	train-mae:0.33468	eval-mae:0.34966


[I 2024-12-19 20:29:57,924] Trial 87 pruned. Trial was pruned at iteration 1.


[0]	train-mae:0.32519	eval-mae:0.34399
[1]	train-mae:0.24769	eval-mae:0.27566


[I 2024-12-19 20:29:58,200] Trial 88 pruned. Trial was pruned at iteration 1.


[0]	train-mae:0.31571	eval-mae:0.32935
[1]	train-mae:0.23523	eval-mae:0.25658
[2]	train-mae:0.17604	eval-mae:0.20825
[3]	train-mae:0.13306	eval-mae:0.17681
[4]	train-mae:0.10173	eval-mae:0.15461
[5]	train-mae:0.07847	eval-mae:0.14160
[6]	train-mae:0.06142	eval-mae:0.13443
[7]	train-mae:0.04865	eval-mae:0.12974
[8]	train-mae:0.03909	eval-mae:0.12630
[9]	train-mae:0.03211	eval-mae:0.12487
[10]	train-mae:0.02683	eval-mae:0.12437
[11]	train-mae:0.02320	eval-mae:0.12325
[12]	train-mae:0.02036	eval-mae:0.12298
[13]	train-mae:0.01869	eval-mae:0.12293
[14]	train-mae:0.01709	eval-mae:0.12253
[15]	train-mae:0.01628	eval-mae:0.12255


[I 2024-12-19 20:29:58,597] Trial 89 pruned. Trial was pruned at iteration 16.


[0]	train-mae:0.35763	eval-mae:0.36022


[I 2024-12-19 20:29:58,736] Trial 90 pruned. Trial was pruned at iteration 1.


[0]	train-mae:0.37405	eval-mae:0.38233
[1]	train-mae:0.33147	eval-mae:0.34442


[I 2024-12-19 20:29:58,914] Trial 91 pruned. Trial was pruned at iteration 1.


[0]	train-mae:0.38686	eval-mae:0.39532
[1]	train-mae:0.35077	eval-mae:0.36278


[I 2024-12-19 20:29:59,126] Trial 92 pruned. Trial was pruned at iteration 1.


[0]	train-mae:0.31891	eval-mae:0.32862
[1]	train-mae:0.24548	eval-mae:0.26202
[2]	train-mae:0.18953	eval-mae:0.21473
[3]	train-mae:0.15005	eval-mae:0.18440
[4]	train-mae:0.12562	eval-mae:0.16733


[I 2024-12-19 20:29:59,337] Trial 93 pruned. Trial was pruned at iteration 4.


[0]	train-mae:0.32336	eval-mae:0.33344
[1]	train-mae:0.24738	eval-mae:0.26296
[2]	train-mae:0.19147	eval-mae:0.21197
[3]	train-mae:0.15462	eval-mae:0.18410
[4]	train-mae:0.12454	eval-mae:0.16082
[5]	train-mae:0.10273	eval-mae:0.14701
[6]	train-mae:0.08697	eval-mae:0.13720
[7]	train-mae:0.07520	eval-mae:0.13097
[8]	train-mae:0.06662	eval-mae:0.12714
[9]	train-mae:0.05901	eval-mae:0.12464
[10]	train-mae:0.05388	eval-mae:0.12350
[11]	train-mae:0.04984	eval-mae:0.12237
[12]	train-mae:0.04628	eval-mae:0.12144
[13]	train-mae:0.04468	eval-mae:0.12136
[14]	train-mae:0.04261	eval-mae:0.12153
[15]	train-mae:0.04073	eval-mae:0.12127
[16]	train-mae:0.03935	eval-mae:0.12131
[17]	train-mae:0.03717	eval-mae:0.12128
[18]	train-mae:0.03624	eval-mae:0.12103
[19]	train-mae:0.03549	eval-mae:0.12094
[20]	train-mae:0.03442	eval-mae:0.12105
[21]	train-mae:0.03353	eval-mae:0.12081
[22]	train-mae:0.03310	eval-mae:0.12075
[23]	train-mae:0.03210	eval-mae:0.12068
[24]	train-mae:0.03119	eval-mae:0.12025
[25]	train

[I 2024-12-19 20:29:59,867] Trial 94 pruned. Trial was pruned at iteration 64.


[0]	train-mae:0.32170	eval-mae:0.33276
[1]	train-mae:0.24556	eval-mae:0.26270
[2]	train-mae:0.19114	eval-mae:0.21239
[3]	train-mae:0.15083	eval-mae:0.17909
[4]	train-mae:0.12093	eval-mae:0.15644
[5]	train-mae:0.09850	eval-mae:0.14081
[6]	train-mae:0.08264	eval-mae:0.13097
[7]	train-mae:0.07116	eval-mae:0.12453
[8]	train-mae:0.06292	eval-mae:0.12148
[9]	train-mae:0.05594	eval-mae:0.11900
[10]	train-mae:0.05119	eval-mae:0.11767
[11]	train-mae:0.04711	eval-mae:0.11717
[12]	train-mae:0.04389	eval-mae:0.11650
[13]	train-mae:0.04138	eval-mae:0.11674
[14]	train-mae:0.03986	eval-mae:0.11669
[15]	train-mae:0.03825	eval-mae:0.11665
[16]	train-mae:0.03627	eval-mae:0.11627
[17]	train-mae:0.03524	eval-mae:0.11582
[18]	train-mae:0.03460	eval-mae:0.11557
[19]	train-mae:0.03352	eval-mae:0.11514
[20]	train-mae:0.03316	eval-mae:0.11537
[21]	train-mae:0.03238	eval-mae:0.11531
[22]	train-mae:0.03188	eval-mae:0.11546
[23]	train-mae:0.03162	eval-mae:0.11545
[24]	train-mae:0.03113	eval-mae:0.11530
[25]	train

[I 2024-12-19 20:30:00,594] Trial 95 finished with value: 0.4971351785104821 and parameters: {'lambda': 1.5005206537336019e-05, 'alpha': 4.119110117346343e-05, 'eta': 0.2699614076936671, 'gamma': 0.00031758169962938377, 'max_depth': 12, 'min_child_weight': 4, 'subsample': 0.9869480190973344, 'colsample_bytree': 0.9245338110090766, 'colsample_bylevel': 0.8681718149507384, 'colsample_bynode': 0.970585472386204, 'n_estimators': 736}. Best is trial 86 with value: 0.4957817641483245.


[0]	train-mae:0.32199	eval-mae:0.33282
[1]	train-mae:0.24754	eval-mae:0.26467
[2]	train-mae:0.19192	eval-mae:0.21746
[3]	train-mae:0.15215	eval-mae:0.18453


[I 2024-12-19 20:30:00,807] Trial 96 pruned. Trial was pruned at iteration 4.


[0]	train-mae:0.32495	eval-mae:0.33629


[I 2024-12-19 20:30:00,982] Trial 97 pruned. Trial was pruned at iteration 1.


[0]	train-mae:0.32141	eval-mae:0.33347
[1]	train-mae:0.25029	eval-mae:0.26584
[2]	train-mae:0.19412	eval-mae:0.21397
[3]	train-mae:0.15410	eval-mae:0.18127
[4]	train-mae:0.12571	eval-mae:0.15701
[5]	train-mae:0.10538	eval-mae:0.14299
[6]	train-mae:0.09114	eval-mae:0.13465
[7]	train-mae:0.08068	eval-mae:0.13019
[8]	train-mae:0.07254	eval-mae:0.12719
[9]	train-mae:0.06769	eval-mae:0.12596
[10]	train-mae:0.06327	eval-mae:0.12491
[11]	train-mae:0.05991	eval-mae:0.12386
[12]	train-mae:0.05758	eval-mae:0.12373
[13]	train-mae:0.05569	eval-mae:0.12332
[14]	train-mae:0.05422	eval-mae:0.12329
[15]	train-mae:0.05208	eval-mae:0.12328


[I 2024-12-19 20:30:01,266] Trial 98 pruned. Trial was pruned at iteration 16.


[0]	train-mae:0.32254	eval-mae:0.32692
[1]	train-mae:0.25683	eval-mae:0.26469
[2]	train-mae:0.21088	eval-mae:0.22293
[3]	train-mae:0.17460	eval-mae:0.19062


[I 2024-12-19 20:30:01,451] Trial 99 pruned. Trial was pruned at iteration 4.


[0]	train-mae:0.31759	eval-mae:0.33023
[1]	train-mae:0.24130	eval-mae:0.26364
[2]	train-mae:0.18157	eval-mae:0.21452
[3]	train-mae:0.13767	eval-mae:0.17880
[4]	train-mae:0.10751	eval-mae:0.15901
[5]	train-mae:0.08356	eval-mae:0.14322
[6]	train-mae:0.06561	eval-mae:0.13361
[7]	train-mae:0.05260	eval-mae:0.12784
[8]	train-mae:0.04297	eval-mae:0.12495
[9]	train-mae:0.03552	eval-mae:0.12250
[10]	train-mae:0.03039	eval-mae:0.12087
[11]	train-mae:0.02648	eval-mae:0.11968
[12]	train-mae:0.02267	eval-mae:0.11870
[13]	train-mae:0.02070	eval-mae:0.11859
[14]	train-mae:0.01890	eval-mae:0.11831
[15]	train-mae:0.01709	eval-mae:0.11799
[16]	train-mae:0.01585	eval-mae:0.11768
[17]	train-mae:0.01504	eval-mae:0.11749
[18]	train-mae:0.01470	eval-mae:0.11744
[19]	train-mae:0.01402	eval-mae:0.11733
[20]	train-mae:0.01380	eval-mae:0.11726
[21]	train-mae:0.01350	eval-mae:0.11718
[22]	train-mae:0.01318	eval-mae:0.11727
[23]	train-mae:0.01305	eval-mae:0.11729
[24]	train-mae:0.01305	eval-mae:0.11729
[25]	train

In [23]:
import optuna
import xgboost as xgb
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import mean_absolute_error, r2_score
import numpy as np
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OrdinalEncoder, OneHotEncoder
from sklearn.pipeline import Pipeline
import category_encoders as ce

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y_transformed, test_size=0.2, random_state=42)

def create_features(X):
    """Create additional features"""
    X = X.copy()
    
    # Area per bedroom
    X['area_per_bedroom'] = X['built_up_area'] / (X['bedRoom'] + 1)
    
    # Bathroom to bedroom ratio
    X['bath_bed_ratio'] = X['bathroom'] / (X['bedRoom'] + 1)
    
    # Total rooms
    X['total_rooms'] = X['bedRoom'] + X['bathroom'] + X['servant room'] + X['store room']
    
    return X

def preprocess_data(X_train, X_val, y_train):
    """Preprocess data with proper target encoding"""
    # Define features by type
    numeric_features = ['bedRoom', 'bathroom', 'built_up_area', 'servant room', 'store room', 
                       'area_per_bedroom', 'bath_bed_ratio', 'total_rooms']
    categorical_features = ['property_type', 'balcony', 'luxury_category', 'floor_category']
    target_encode_features = ['sector']
    onehot_features = ['agePossession', 'furnishing_type']
    
    # Scale numeric features
    scaler = StandardScaler()
    X_train_num = scaler.fit_transform(X_train[numeric_features])
    X_val_num = scaler.transform(X_val[numeric_features])
    
    # Ordinal encode basic categorical features
    ord_enc = OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1)
    X_train_cat = ord_enc.fit_transform(X_train[categorical_features])
    X_val_cat = ord_enc.transform(X_val[categorical_features])
    
    # Target encode high cardinality features
    target_enc = ce.TargetEncoder(smoothing=0.5)
    X_train_target = target_enc.fit_transform(X_train[target_encode_features], y_train)
    X_val_target = target_enc.transform(X_val[target_encode_features])
    
    # One-hot encode appropriate features
    onehot_enc = OneHotEncoder(drop='first', sparse_output=False, handle_unknown='ignore')
    X_train_onehot = onehot_enc.fit_transform(X_train[onehot_features])
    X_val_onehot = onehot_enc.transform(X_val[onehot_features])
    
    # Combine all features
    X_train_processed = np.hstack([X_train_num, X_train_cat, X_train_target, X_train_onehot])
    X_val_processed = np.hstack([X_val_num, X_val_cat, X_val_target, X_val_onehot])
    
    return X_train_processed, X_val_processed

def objective(trial):
    # Create features
    X_train_feat = create_features(X_train)
    X_test_feat = create_features(X_test)
    
    # Enhanced hyperparameter search space
    param = {
        'verbosity': 0,
        'objective': 'reg:squarederror',
        'eval_metric': 'mae',
        'booster': trial.suggest_categorical('booster', ['gbtree', 'dart']),
        'lambda': trial.suggest_float('lambda', 1e-8, 10.0, log=True),
        'alpha': trial.suggest_float('alpha', 1e-8, 10.0, log=True),
        'eta': trial.suggest_float('eta', 0.001, 0.5, log=True),
        'gamma': trial.suggest_float('gamma', 1e-8, 10.0, log=True),
        'max_depth': trial.suggest_int('max_depth', 3, 30),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 15),
        'subsample': trial.suggest_float('subsample', 0.3, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.3, 1.0),
        'colsample_bylevel': trial.suggest_float('colsample_bylevel', 0.3, 1.0),
        'colsample_bynode': trial.suggest_float('colsample_bynode', 0.3, 1.0),
        'max_delta_step': trial.suggest_int('max_delta_step', 0, 10),
        'n_estimators': trial.suggest_int('n_estimators', 100, 2000),
        'grow_policy': trial.suggest_categorical('grow_policy', ['depthwise', 'lossguide']),
    }
    
    if param['booster'] == 'dart':
        param['sample_type'] = trial.suggest_categorical('sample_type', ['uniform', 'weighted'])
        param['normalize_type'] = trial.suggest_categorical('normalize_type', ['tree', 'forest'])
        param['rate_drop'] = trial.suggest_float('rate_drop', 0.0, 0.5)
        param['skip_drop'] = trial.suggest_float('skip_drop', 0.0, 0.5)
    
    # Implement k-fold cross validation
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    mae_scores = []
    
    for fold, (train_idx, val_idx) in enumerate(kf.split(X_train_feat)):
        X_fold_train = X_train_feat.iloc[train_idx]
        y_fold_train = y_train.iloc[train_idx]
        X_fold_val = X_train_feat.iloc[val_idx]
        y_fold_val = y_train.iloc[val_idx]
        
        # Preprocess the data
        X_fold_train_processed, X_fold_val_processed = preprocess_data(
            X_fold_train, X_fold_val, y_fold_train
        )
        
        # Create DMatrix
        dtrain = xgb.DMatrix(X_fold_train_processed, label=y_fold_train)
        dval = xgb.DMatrix(X_fold_val_processed, label=y_fold_val)
        
        # Train model
        model = xgb.train(
            param,
            dtrain,
            num_boost_round=param['n_estimators'],
            evals=[(dval, 'val')],
            early_stopping_rounds=50,
            verbose_eval=False
        )
        
        # Predict and calculate MAE
        preds = model.predict(dval)
        mae = mean_absolute_error(np.expm1(y_fold_val), np.expm1(preds))
        mae_scores.append(mae)
    
    mean_mae = np.mean(mae_scores)
    mae_std = np.std(mae_scores)
    final_score = mean_mae * (1 + mae_std)
    
    trial.set_user_attr('mae_std', mae_std)
    trial.set_user_attr('raw_mae', mean_mae)
    
    return final_score

# Create and run study
study = optuna.create_study(
    direction='minimize',
    pruner=optuna.pruners.MedianPruner(
        n_startup_trials=5,
        n_warmup_steps=5,
        interval_steps=3
    )
)
study.optimize(objective, n_trials=150, n_jobs=-1)

# Train final model with best parameters
best_params = study.best_trial.params
best_params['objective'] = 'reg:squarederror'
best_params['eval_metric'] = 'mae'

# Create final features and preprocess
X_train_feat = create_features(X_train)
X_test_feat = create_features(X_test)
X_train_processed, X_test_processed = preprocess_data(X_train_feat, X_test_feat, y_train)

# Train final model
dtrain = xgb.DMatrix(X_train_processed, label=y_train)
dtest = xgb.DMatrix(X_test_processed, label=y_test)
final_model = xgb.train(
    best_params,
    dtrain,
    num_boost_round=best_params['n_estimators'],
    early_stopping_rounds=50,
    evals=[(dtrain, "train"), (dtest, "eval")]
)

# Final evaluation
final_preds = final_model.predict(dtest)
final_mae = mean_absolute_error(np.expm1(y_test), np.expm1(final_preds))
final_r2 = r2_score(y_test, final_preds)

print("\nBest trial:")
print("  MAE: ", study.best_trial.value)
print("  Raw MAE: ", study.best_trial.user_attrs['raw_mae'])
print("  MAE Std: ", study.best_trial.user_attrs['mae_std'])
print("\nBest hyperparameters:")
for key, value in study.best_trial.params.items():
    print(f"    {key}: {value}")

print("\nFinal Model Performance:")
print(f"MAE: {final_mae:.4f}")
print(f"R2 Score: {final_r2:.4f}")

# Feature importance analysis
feature_importance = final_model.get_score(importance_type='gain')
sorted_importance = sorted(feature_importance.items(), key=lambda x: x[1], reverse=True)
print("\nTop 10 Most Important Features:")
for feat, importance in sorted_importance[:10]:
    print(f"{feat}: {importance:.4f}")

# Visualization
try:
    import plotly
    fig1 = optuna.visualization.plot_optimization_history(study)
    fig2 = optuna.visualization.plot_param_importances(study)
    fig1.show()
    fig2.show()
except:
    print("Visualization requires plotly to be installed")

[I 2024-12-19 20:34:27,114] A new study created in memory with name: no-name-c064aa33-7e5a-451a-aba4-bc9bb3c180d2


[I 2024-12-19 20:34:32,351] Trial 0 finished with value: 0.8326912323154253 and parameters: {'booster': 'gbtree', 'lambda': 1.109207787001655e-06, 'alpha': 0.007464261249737968, 'eta': 0.014732531528239067, 'gamma': 7.4397005941871726, 'max_depth': 25, 'min_child_weight': 14, 'subsample': 0.5611315552240134, 'colsample_bytree': 0.6331288833127087, 'colsample_bylevel': 0.641447113322807, 'colsample_bynode': 0.7920628117065205, 'max_delta_step': 9, 'n_estimators': 232, 'grow_policy': 'lossguide'}. Best is trial 0 with value: 0.8326912323154253.
[I 2024-12-19 20:34:40,354] Trial 1 finished with value: 0.5550641196726105 and parameters: {'booster': 'gbtree', 'lambda': 8.623053810891996e-05, 'alpha': 0.04093499007577015, 'eta': 0.049886074707448075, 'gamma': 0.008764101637395664, 'max_depth': 12, 'min_child_weight': 9, 'subsample': 0.9188721168944667, 'colsample_bytree': 0.9254590100510736, 'colsample_bylevel': 0.7153834068139265, 'colsample_bynode': 0.7923041215316966, 'max_delta_step': 9,

[0]	train-mae:0.42304	eval-mae:0.42809
[1]	train-mae:0.42015	eval-mae:0.42520
[2]	train-mae:0.41686	eval-mae:0.42199
[3]	train-mae:0.41352	eval-mae:0.41862
[4]	train-mae:0.40988	eval-mae:0.41508
[5]	train-mae:0.40635	eval-mae:0.41160
[6]	train-mae:0.40307	eval-mae:0.40839
[7]	train-mae:0.39959	eval-mae:0.40498
[8]	train-mae:0.39619	eval-mae:0.40159
[9]	train-mae:0.39305	eval-mae:0.39847
[10]	train-mae:0.39000	eval-mae:0.39553
[11]	train-mae:0.38674	eval-mae:0.39222
[12]	train-mae:0.38347	eval-mae:0.38893
[13]	train-mae:0.38014	eval-mae:0.38572
[14]	train-mae:0.37724	eval-mae:0.38289
[15]	train-mae:0.37444	eval-mae:0.38012
[16]	train-mae:0.37195	eval-mae:0.37768
[17]	train-mae:0.36918	eval-mae:0.37505
[18]	train-mae:0.36605	eval-mae:0.37187
[19]	train-mae:0.36305	eval-mae:0.36894
[20]	train-mae:0.35996	eval-mae:0.36585
[21]	train-mae:0.35692	eval-mae:0.36291
[22]	train-mae:0.35412	eval-mae:0.36019
[23]	train-mae:0.35138	eval-mae:0.35753
[24]	train-mae:0.34916	eval-mae:0.35532
[25]	train

In [24]:
# Best MAE (includes variance penalty)
print("Best MAE (with variance penalty):", study.best_trial.value)

# Raw MAE (without variance penalty)
print("Raw MAE:", study.best_trial.user_attrs['raw_mae'])

# Standard deviation of MAE across folds
print("MAE Standard Deviation:", study.best_trial.user_attrs['mae_std'])

# Best parameters
print("\nBest parameters:")
print(study.best_params)

# Best trial number
print("\nBest trial number:", study.best_trial.number)

# All trials information
print("\nAll trials:")
trials_df = study.trials_dataframe()
print(trials_df.sort_values('value').head())  # Shows top 5 trials

Best MAE (with variance penalty): 0.5452307215638383
Raw MAE: 0.5200646370615968
MAE Standard Deviation: 0.04839030133721801

Best parameters:
{'booster': 'gbtree', 'lambda': 2.806519745479964e-05, 'alpha': 8.971705154637653e-06, 'eta': 0.010106928111265548, 'gamma': 3.587578552074615e-06, 'max_depth': 29, 'min_child_weight': 13, 'subsample': 0.9986782885481962, 'colsample_bytree': 0.6404365813951769, 'colsample_bylevel': 0.5814819838786656, 'colsample_bynode': 0.858396343604288, 'max_delta_step': 10, 'n_estimators': 1288, 'grow_policy': 'depthwise'}

Best trial number: 93

All trials:
     number     value             datetime_start          datetime_complete  \
93       93  0.545231 2024-12-19 21:09:27.456988 2024-12-19 21:10:39.642080   
73       73  0.545708 2024-12-19 20:58:07.259950 2024-12-19 20:59:16.675232   
144     144  0.545802 2024-12-19 21:19:56.176570 2024-12-19 21:20:54.533170   
142     142  0.546033 2024-12-19 21:19:44.109708 2024-12-19 21:21:02.843436   
143     143 

In [32]:
from xgboost import XGBRegressor

def objective(trial):
    # Create features
    X_train_feat = create_features(X_train)
    X_test_feat = create_features(X_test)
    
    # Define XGBoost parameters
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 100, 2000),
        'max_depth': trial.suggest_int('max_depth', 3, 30),
        'learning_rate': trial.suggest_float('learning_rate', 0.001, 0.5, log=True),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 15),
        'subsample': trial.suggest_float('subsample', 0.3, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.3, 1.0),
        'colsample_bylevel': trial.suggest_float('colsample_bylevel', 0.3, 1.0),
        'colsample_bynode': trial.suggest_float('colsample_bynode', 0.3, 1.0),
        'reg_alpha': trial.suggest_float('reg_alpha', 1e-8, 10.0, log=True),
        'reg_lambda': trial.suggest_float('reg_lambda', 1e-8, 10.0, log=True),
        'gamma': trial.suggest_float('gamma', 1e-8, 10.0, log=True),
        'max_delta_step': trial.suggest_int('max_delta_step', 0, 10),
        'grow_policy': trial.suggest_categorical('grow_policy', ['depthwise', 'lossguide']),
        'booster': trial.suggest_categorical('booster', ['gbtree', 'dart']),
        'random_state': 42,
        'n_jobs': -1,
        'early_stopping_rounds': 50  # Moved here
    }
    
    if params['booster'] == 'dart':
        params['sample_type'] = trial.suggest_categorical('sample_type', ['uniform', 'weighted'])
        params['normalize_type'] = trial.suggest_categorical('normalize_type', ['tree', 'forest'])
        params['rate_drop'] = trial.suggest_float('rate_drop', 0.0, 0.5)
        params['skip_drop'] = trial.suggest_float('skip_drop', 0.0, 0.5)
    
    # Implement k-fold cross validation
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    mae_scores = []
    
    for fold, (train_idx, val_idx) in enumerate(kf.split(X_train_feat)):
        X_fold_train = X_train_feat.iloc[train_idx]
        y_fold_train = y_train.iloc[train_idx]
        X_fold_val = X_train_feat.iloc[val_idx]
        y_fold_val = y_train.iloc[val_idx]
        
        # Preprocess the data
        X_fold_train_processed, X_fold_val_processed = preprocess_data(
            X_fold_train, X_fold_val, y_fold_train
        )
        
        # Create and train model
        model = XGBRegressor(**params)
        model.fit(
            X_fold_train_processed, 
            y_fold_train,
            eval_set=[(X_fold_val_processed, y_fold_val)],
            verbose=False
        )
        
        # Predict and calculate MAE
        preds = model.predict(X_fold_val_processed)
        mae = mean_absolute_error(np.expm1(y_fold_val), np.expm1(preds))
        mae_scores.append(mae)
    
    mean_mae = np.mean(mae_scores)
    mae_std = np.std(mae_scores)
    final_score = mean_mae * (1 + mae_std)
    
    trial.set_user_attr('mae_std', mae_std)
    trial.set_user_attr('raw_mae', mean_mae)
    
    return final_score

# ... rest of the code remains same until final model training ...

# Train final model
best_params['early_stopping_rounds'] = 50  # Add early stopping to best params
final_model = XGBRegressor(**best_params, random_state=42, n_jobs=-1)
final_model.fit(
    X_train_processed,
    y_train,
    eval_set=[(X_test_processed, y_test)],
    verbose=False
)

# Final evaluation
final_preds = final_model.predict(X_test_processed)
final_mae = mean_absolute_error(np.expm1(y_test), np.expm1(final_preds))
final_r2 = r2_score(y_test, final_preds)

# Feature importance analysis
feature_importance = final_model.feature_importances_
feature_names = [f"feature_{i}" for i in range(X_train_processed.shape[1])]
importance_dict = dict(zip(feature_names, feature_importance))
sorted_importance = sorted(importance_dict.items(), key=lambda x: x[1], reverse=True)

In [35]:
final_mae,final_r2

(np.float64(0.510324515712915), 0.9017210523727519)

In [36]:
def objective(trial):
    # Create features
    X_train_feat = create_features(X_train)
    X_test_feat = create_features(X_test)
    
    # Define XGBoost parameters
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 100, 2000),
        'max_depth': trial.suggest_int('max_depth', 3, 30),
        'learning_rate': trial.suggest_float('learning_rate', 0.001, 0.5, log=True),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 15),
        'subsample': trial.suggest_float('subsample', 0.3, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.3, 1.0),
        'reg_alpha': trial.suggest_float('reg_alpha', 1e-8, 10.0, log=True),
        'reg_lambda': trial.suggest_float('reg_lambda', 1e-8, 10.0, log=True),
        'random_state': 42,
        'n_jobs': -1,
        'early_stopping_rounds': 50
    }
    
    # Simplified cross-validation
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    mae_scores = []
    
    for fold, (train_idx, val_idx) in enumerate(kf.split(X_train_feat)):
        X_fold_train = X_train_feat.iloc[train_idx]
        y_fold_train = y_train.iloc[train_idx]
        X_fold_val = X_train_feat.iloc[val_idx]
        y_fold_val = y_train.iloc[val_idx]
        
        # Preprocess the data
        X_fold_train_processed, X_fold_val_processed = preprocess_data(
            X_fold_train, X_fold_val, y_fold_train
        )
        
        # Create and train model
        model = XGBRegressor(**params)
        model.fit(
            X_fold_train_processed, 
            y_fold_train,
            eval_set=[(X_fold_val_processed, y_fold_val)],
            verbose=False
        )
        
        # Predict and calculate MAE
        preds = model.predict(X_fold_val_processed)
        mae = mean_absolute_error(np.expm1(y_fold_val), np.expm1(preds))
        mae_scores.append(mae)
    
    return np.mean(mae_scores)  # Return just the mean MAE

# Create study with more trials
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=200)  # Increased number of trials

# Print the best parameters
print("Best parameters:", study.best_params)
print("Best MAE:", study.best_value)

# Train final model with best parameters
best_params = study.best_params
best_params['early_stopping_rounds'] = 50
final_model = XGBRegressor(**best_params, random_state=42, n_jobs=-1)

# Create final features and preprocess
X_train_feat = create_features(X_train)
X_test_feat = create_features(X_test)
X_train_processed, X_test_processed = preprocess_data(X_train_feat, X_test_feat, y_train)

# Train and evaluate
final_model.fit(
    X_train_processed,
    y_train,
    eval_set=[(X_test_processed, y_test)],
    verbose=False
)

final_preds = final_model.predict(X_test_processed)
final_mae = mean_absolute_error(np.expm1(y_test), np.expm1(final_preds))
print(f"Final MAE: {final_mae:.4f}")

[I 2024-12-22 16:33:57,363] A new study created in memory with name: no-name-89d3ec03-2843-4290-bcc8-e40396f5140c
[I 2024-12-22 16:34:02,196] Trial 0 finished with value: 0.5604361461782889 and parameters: {'n_estimators': 410, 'max_depth': 26, 'learning_rate': 0.14011894507581135, 'min_child_weight': 2, 'subsample': 0.6826774563596791, 'colsample_bytree': 0.5790888087240276, 'reg_alpha': 1.8089168189992698e-08, 'reg_lambda': 1.7911616055420272e-08}. Best is trial 0 with value: 0.5604361461782889.
[I 2024-12-22 16:34:03,666] Trial 1 finished with value: 0.535513141362441 and parameters: {'n_estimators': 1278, 'max_depth': 18, 'learning_rate': 0.13880132132659198, 'min_child_weight': 14, 'subsample': 0.6773880404690773, 'colsample_bytree': 0.5735246420424917, 'reg_alpha': 1.5451762235465848e-05, 'reg_lambda': 7.99103358199112e-07}. Best is trial 1 with value: 0.535513141362441.
[I 2024-12-22 16:34:12,486] Trial 2 finished with value: 0.9893437548837595 and parameters: {'n_estimators': 8

KeyboardInterrupt: 

In [27]:
import numpy as np
import pandas as pd
import optuna
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error

# Convert categorical columns to category dtype
categorical_cols = ['property_type', 'sector', 'balcony', 'agePossession', 
                   'furnishing_type', 'luxury_category', 'floor_category']
for col in categorical_cols:
    X[col] = X[col].astype('category')

# Define objective function for Optuna
def objective(trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.1),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 7),
        'subsample': trial.suggest_float('subsample', 0.7, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.7, 1.0),
        'gamma': trial.suggest_float('gamma', 0, 0.5),
        'reg_alpha': trial.suggest_float('reg_alpha', 0, 1),
        'reg_lambda': trial.suggest_float('reg_lambda', 0, 1)
    }
    
    X_train, X_test, y_train, y_test = train_test_split(X, y_transformed, test_size=0.2, random_state=42)
    
    model = XGBRegressor(
        **params,
        random_state=42,
        enable_categorical=True,
        tree_method='hist',
        missing=np.nan
    )
    
    model.fit(
        X_train, 
        y_train,
        eval_set=[(X_test, y_test)],
        early_stopping_rounds=10,
        verbose=False
    )
    
    y_pred = model.predict(X_test)
    return mean_absolute_error(np.expm1(y_test), np.expm1(y_pred))

# Optimize hyperparameters
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=100)

# Train final model with best parameters
best_params = study.best_params
final_model = XGBRegressor(
    **best_params,
    random_state=42,
    enable_categorical=True,
    tree_method='hist',
    missing=np.nan
)

X_train, X_test, y_train, y_test = train_test_split(X, y_transformed, test_size=0.2, random_state=42)
final_model.fit(
    X_train, 
    y_train,
    eval_set=[(X_test, y_test)],
    early_stopping_rounds=10,
    verbose=False
)

# Evaluate final model
y_pred = final_model.predict(X_test)
mae = mean_absolute_error(np.expm1(y_test), np.expm1(y_pred))
print(f'Final MAE: {mae}')

[I 2024-12-22 18:34:54,994] A new study created in memory with name: no-name-1fb3ee30-b6ed-4959-8af8-c64dc9f4480f
[W 2024-12-22 18:34:55,006] Trial 0 failed with parameters: {'n_estimators': 130, 'max_depth': 10, 'learning_rate': 0.06406196892704631, 'min_child_weight': 2, 'subsample': 0.9627888608712818, 'colsample_bytree': 0.9058027646212827, 'gamma': 0.008752130473296371, 'reg_alpha': 0.9963903280345212, 'reg_lambda': 0.9670074373669226} because of the following error: AttributeError('`np.NaN` was removed in the NumPy 2.0 release. Use `np.nan` instead.').
Traceback (most recent call last):
  File "c:\Users\vijit_singh\Desktop\Personal projects repo\Estate-Radar\env\Lib\site-packages\optuna\study\_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "C:\Users\vijit_singh\AppData\Local\Temp\ipykernel_25736\4067707982.py", line 39, in objective
    model.fit(
  File "c:\Users\vijit_singh\Desktop\Personal projects repo\Estate-R

AttributeError: `np.NaN` was removed in the NumPy 2.0 release. Use `np.nan` instead.

In [34]:
import optuna
from sklearn.model_selection import KFold, cross_val_score
from sklearn.pipeline import Pipeline
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error
import numpy as np

def objective(trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 800, 3000),
        'max_depth': trial.suggest_int('max_depth', 4, 12),
        'learning_rate': trial.suggest_float('learning_rate', 0.005, 0.03),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 7),
        'subsample': trial.suggest_float('subsample', 0.6, 0.9),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 0.9),
        'reg_lambda': trial.suggest_float('reg_lambda', 0.01, 1.0),
        'reg_alpha': trial.suggest_float('reg_alpha', 0, 1.0)
    }
    
    model = Pipeline([
        ('preprocessor', preprocessor),
        ('regressor', XGBRegressor(
            **params,
            random_state=42,
            enable_categorical=True
        ))
    ])
    
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    scores = cross_val_score(
        model, X, y_transformed, 
        cv=kf,
        scoring='neg_mean_absolute_error'
    )
    
    return -scores.mean()

# Run optimization
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=40)

# Train final model with best parameters
final_model = Pipeline([
    ('preprocessor', preprocessor),
    ('regressor', XGBRegressor(
        **study.best_trial.params,
        random_state=42,
        enable_categorical=True
    ))
])

X_train, X_test, y_train, y_test = train_test_split(X, y_transformed, test_size=0.2, random_state=42)
final_model.fit(X_train, y_train)
y_pred = final_model.predict(X_test)
final_mae = mean_absolute_error(np.expm1(y_test), np.expm1(y_pred))
print(f"Final MAE: {final_mae}")

[I 2024-12-22 18:50:55,192] A new study created in memory with name: no-name-9281961e-e503-4800-beea-44a4e40e942d
[I 2024-12-22 18:51:04,283] Trial 0 finished with value: 0.11358788450891928 and parameters: {'n_estimators': 2772, 'max_depth': 4, 'learning_rate': 0.025527539329031387, 'min_child_weight': 6, 'subsample': 0.662403821071277, 'colsample_bytree': 0.8375612125258681, 'reg_lambda': 0.4519688816856576, 'reg_alpha': 0.39831423122598686}. Best is trial 0 with value: 0.11358788450891928.
[I 2024-12-22 18:51:16,288] Trial 1 finished with value: 0.11150526570374461 and parameters: {'n_estimators': 998, 'max_depth': 11, 'learning_rate': 0.0057584787511442675, 'min_child_weight': 6, 'subsample': 0.7658337167365619, 'colsample_bytree': 0.848459791239544, 'reg_lambda': 0.8095937037967035, 'reg_alpha': 0.21715816258337528}. Best is trial 1 with value: 0.11150526570374461.
[I 2024-12-22 18:51:26,968] Trial 2 finished with value: 0.11189448573184954 and parameters: {'n_estimators': 1193, '

Final MAE: 0.44413879753668095


In [28]:

columns_to_encode = ['property_type','sector', 'balcony', 'agePossession', 'furnishing_type', 'luxury_category', 'floor_category']

# Creating a column transformer for preprocessing
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), ['bedRoom', 'bathroom', 'built_up_area', 'servant room', 'store room']),
        ('cat', OrdinalEncoder(), columns_to_encode),
        ('cat1',OneHotEncoder(drop='first',sparse_output=False),['agePossession']),
        ('target_enc', ce.TargetEncoder(), ['sector'])
    ], 
    remainder='passthrough'
)

In [29]:
# def scorer(model_name, model):
    
# output = []

# output.append(model_name)

pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('regressor', XGBRegressor())
])

# K-fold cross-validation
kfold = KFold(n_splits=10, shuffle=True, random_state=42)
scores = cross_val_score(pipeline, X, y_transformed, cv=kfold, scoring='r2')

print(scores.mean())



    

0.9047983252719011


In [30]:
X_train, X_test, y_train, y_test = train_test_split(X,y_transformed,test_size=0.2,random_state=42)

pipeline.fit(X_train,y_train)

y_pred = pipeline.predict(X_test)

y_pred = np.expm1(y_pred)

mean_absolute_error(np.expm1(y_test),y_pred)



np.float64(0.447518119423869)