In [36]:
from hyperopt import tpe, hp, fmin
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor

from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import mean_absolute_error as mae

import numpy as np

In [2]:
x, y = load_boston(return_X_y=True)

In [3]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)

In [45]:
# def custom_asymmetric_train(y_true, y_pred):
#     residual = (y_true - y_pred).astype("float")
#     grad = np.where(residual>0, -2*100.0*residual, -2*residual)
#     hess = np.where(residual>0, 2*100.0, 2.0)
#     return grad, hess

# def custom_asymmetric_valid(y_true, y_pred):
#     residual = (y_true - y_pred).astype("float")
#     loss = np.where(residual > 0, (residual**2)*100.0, residual**2) 
#     return "custom_asymmetric_eval", np.mean(loss), False

def custom_asymmetric_train(y_pred, train_data):
    y_true = train_data.get_label()
    residual = (y_true - y_pred).astype("float")
    grad = np.where(residual>0, -2*100.0*residual, -2*residual)
    hess = np.where(residual>0, 2*100.0, 2.0)
    return grad, hess

def custom_asymmetric_valid(y_pred, train_data):
    y_true = train_data.get_label()
    residual = (y_true - y_pred).astype("float")
    loss = np.where(residual > 0, (residual**2)*100.0, residual**2) 
    return "custom_asymmetric_eval", np.mean(loss), False

In [15]:
def objective_func(args):

    estim = LGBMRegressor(
                  colsample_bytree=args['colsample_bytree'],
                  learning_rate=args['learning_rate'],
                  subsample=args['subsample'],
                  max_depth=int(args['max_depth']), # DON'T FORGET INT()
                  n_estimators=int(args['n_estimators']), # DON'T FORGET INT()
                  n_jobs=3,
                  objective='mse', # custom_asymmetric_train
                  metric='mse' # 'custom'
    )
    
    estim.fit(
        x_train,
        y_train,
        eval_set=[(x_test, y_test)],
        eval_metric='mse', # custom_asymmetric_valid
        verbose=False
    )
    
    y_pred = estim.predict(x_test)
    
    #_, loss, _ = mse(y_test, y_pred) #custom_asymmetric_valid(y_test, y_pred)
    loss = mse(y_test, y_pred).mean()
    #print(loss)
    
    #cv_score = cross_val_score(estimator=estim, X=x_train, y=y_train, cv=5, scoring=custom_scorer).mean()
    
    
    return loss

In [16]:
space = {
           'colsample_bytree' : hp.uniform('colsample_bytree', 0.2, 0.8),
           'learning_rate' : hp.uniform('learning_rate', 0.001, 0.5),
           'subsample' : hp.uniform('subsample', 0.2, 0.8),

           'max_depth' : hp.quniform('max_depth', 2, 9, 1), # 1 stands for q
           'n_estimators' : hp.quniform('n_estimators', 50, 500, 1)
        }

In [17]:
best_classifier = fmin(objective_func,
                       space,
                       algo=tpe.suggest,
                       max_evals=100, 
                       rstate=np.random.RandomState(0))

100%|██████████| 100/100 [00:04<00:00, 23.85it/s, best loss: 18.33216886900387]


In [18]:
best_classifier

{'colsample_bytree': 0.7807716215471612,
 'learning_rate': 0.18713591021572826,
 'max_depth': 3.0,
 'n_estimators': 177.0,
 'subsample': 0.798759582135546}

In [19]:
best_classifier['max_depth'] = int(best_classifier['max_depth'])
best_classifier['n_estimators'] = int(best_classifier['n_estimators'])

estim = LGBMRegressor(**best_classifier)

In [69]:
import lightgbm as lgb

d_train = lgb.Dataset(x_train, label=y_train)
d_test = lgb.Dataset(x_test, label=y_test)

params = {}
params['learning_rate'] = 0.003
params['boosting_type'] = 'gbdt'
params['objective'] = 'regression'
params['metric'] = 'custom'#'mae'#
params['sub_feature'] = 0.5
params['num_leaves'] = 10
params['min_data'] = 50
params['max_depth'] = 10


estim = lgb.train(params=params, 
                  train_set=d_train, 
                  valid_sets=d_test,
                  num_boost_round=100,
                  fobj=custom_asymmetric_train,
                  feval=custom_asymmetric_valid
                 )

[1]	valid_0's custom_asymmetric_eval: 57218.8
[2]	valid_0's custom_asymmetric_eval: 56895
[3]	valid_0's custom_asymmetric_eval: 56590.9
[4]	valid_0's custom_asymmetric_eval: 56269.3
[5]	valid_0's custom_asymmetric_eval: 55951.2
[6]	valid_0's custom_asymmetric_eval: 55652.1
[7]	valid_0's custom_asymmetric_eval: 55337
[8]	valid_0's custom_asymmetric_eval: 55045.1
[9]	valid_0's custom_asymmetric_eval: 54748.6
[10]	valid_0's custom_asymmetric_eval: 54442
[11]	valid_0's custom_asymmetric_eval: 54134.3
[12]	valid_0's custom_asymmetric_eval: 53849.6
[13]	valid_0's custom_asymmetric_eval: 53564.7
[14]	valid_0's custom_asymmetric_eval: 53287
[15]	valid_0's custom_asymmetric_eval: 53017.6
[16]	valid_0's custom_asymmetric_eval: 52749.8
[17]	valid_0's custom_asymmetric_eval: 52460.9
[18]	valid_0's custom_asymmetric_eval: 52183.2
[19]	valid_0's custom_asymmetric_eval: 51905.5
[20]	valid_0's custom_asymmetric_eval: 51622.6
[21]	valid_0's custom_asymmetric_eval: 51340.4
[22]	valid_0's custom_asymmetr

In [70]:
y_train_pred = estim.predict(x_train)
y_test_pred = estim.predict(x_test)

In [71]:
residuals_train = y_train - y_train_preds
residuals_test = y_test - y_test_preds

print('Train Data:')
print('Positive Residuals:\t', np.mean(residuals_train[np.where(residuals_train > 0)]))
print('Negative Residuals:\t', np.mean(residuals_train[np.where(residuals_train < 0)]))

print('\nTest Data:')
print('Positive Residuals:\t', np.mean(residuals_test[np.where(residuals_test > 0)]))
print('Negative Residuals:\t', np.mean(residuals_test[np.where(residuals_test < 0)]))

Train Data:
Positive Residuals:	 0.7671655120761167
Negative Residuals:	 -0.8304369989003749

Test Data:
Positive Residuals:	 3.2865039340325577
Negative Residuals:	 -2.164366440765035


In [72]:
print('MAE train/test')
print(mae(y_train, y_train_preds))
print(mae(y_test, y_test_preds))

MAE train/test
0.7975483547590526
2.6484257515863194


In [73]:
print('Custom train/test')
print(custom_asymmetric_valid(y_train_preds, d_train))
print(custom_asymmetric_valid(y_test_preds, d_test))

Custom train/test
('custom_asymmetric_eval', 57.37265266205813, False)
('custom_asymmetric_eval', 1346.8799423891437, False)


In [None]:
# estim = LGBMRegressor(
#               colsample_bytree=args['colsample_bytree'],
#               learning_rate=args['learning_rate'],
#               subsample=args['subsample'],
#               max_depth=int(args['max_depth']), # DON'T FORGET INT()
#               n_estimators=int(args['n_estimators']), # DON'T FORGET INT()
#               n_jobs=3,
#               objective='mse', # custom_asymmetric_train
#               metric='mse' # 'custom'
# )
    
# estim.fit(
#     x_train,
#     y_train,
#     eval_set=[(x_test, y_test)],
#     eval_metric='mse', # custom_asymmetric_valid
#     verbose=False
# )

In [20]:
estim.fit(x_train,y_train)

y_train_preds = estim.predict(x_train)
y_test_preds = estim.predict(x_test)

residuals_train = y_train - y_train_preds
residuals_test = y_test - y_test_preds

print('Train Data:')
print('Positive Residuals:\t', np.mean(residuals_train[np.where(residuals_train > 0)]))
print('Negative Residuals:\t', np.mean(residuals_train[np.where(residuals_train < 0)]))

print('\nTest Data:')
print('Positive Residuals:\t', np.mean(residuals_test[np.where(residuals_test > 0)]))
print('Negative Residuals:\t', np.mean(residuals_test[np.where(residuals_test < 0)]))

Train Data:
Positive Residuals:	 0.7671655120761167
Negative Residuals:	 -0.8304369989003749

Test Data:
Positive Residuals:	 3.2865039340325577
Negative Residuals:	 -2.164366440765035


In [23]:
mse(y_train, y_train_preds)

1.164886390567861

In [25]:
mse(y_test, y_test_preds)

18.33216886900387

In [21]:
custom_asymmetric_valid(y_train, y_train_preds)

('custom_asymmetric_eval', 57.37265266205813, False)

In [22]:
custom_asymmetric_valid(y_test, y_test_preds)

('custom_asymmetric_eval', 1346.8799423891437, False)

In [13]:
As you can see, negative residuals are closer to zero, than positive residuals

# compare with mse!!!

SyntaxError: invalid syntax (<ipython-input-13-17b09d3b27d0>, line 1)