In [None]:
import pandas as pd
import numpy as np

import utils2 as u

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error as mae

from functools import partial
from hyperopt import hp, fmin, tpe, STATUS_OK, Trials

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
df_all = u.prepare_data()
df_all = u.feature_engeenering(df_all)

In [None]:
df_train, df_test = u.split_data(df_all)

##### CatBoost (selected feats)

In [None]:
def ctb_objective_feats(space):
    
    X = u.get_X(df_train[u.FEATS])
    y = u.get_y(df_train)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
    
    
    ctb_params = {
        'n_estimators': int(space['n_estimators']),
        'max_depth': int(space['max_depth']),
        'learning_rate': space['learning_rate'],
        'l2_leaf_reg': space['l2_leaf_reg'],
    }
    
    model = u.ctb.CatBoostRegressor(**ctb_params, verbose=0)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    score = mae(y_test, y_pred)
    
    return{'loss':score, 'status': STATUS_OK }

space ={
    'n_estimators': hp.quniform ('n_estimators', 100, 2000, 100),
    'max_depth': hp.quniform ('x_max_depth', 3, 15, 1),
    'learning_rate': hp.uniform ('x_learning_rate', 0.005, 0.4),
    'l2_leaf_reg': hp.uniform ('x_l2_leaf_reg', 0.2, 3.),
}


trials = Trials()
best_params = fmin(fn=ctb_objective_feats,
            space=space,
            algo=partial(tpe.suggest, n_startup_jobs=1),
            max_evals=50,
            trials=trials)

print("The best params: ", best_params)
pd.DataFrame(trials.trials).to_csv('trials_ctb_feats.csv')

100%|███████████████████| 50/50 [8:27:21<00:00, 608.84s/trial, best loss: 0.43396084433346727]
The best params:  {'n_estimators': 1400.0, 'x_l2_leaf_reg': 2.009522652209374, 'x_learning_rate': 0.0583736347504234, 'x_max_depth': 11.0}
