In [1]:
import numpy as np
import pandas as pd
import lightgbm as lgb

from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV

from sklearn.datasets import load_boston
data = load_boston()
X_full = data.data
y_full = data.target

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_full, y_full, test_size=100, 
                                        random_state=241)

print('Starting training...')
# train
gbm = lgb.LGBMRegressor(num_leaves=31,
                        learning_rate=0.05,
                        n_estimators=20)
gbm.fit(X_train, y_train,
        eval_set=[(X_test, y_test)],
        eval_metric='l1',
        early_stopping_rounds=5)

print('Starting predicting...')
# predict
y_pred = gbm.predict(X_test, num_iteration=gbm.best_iteration_)
# eval
print('The rmse of prediction is:', mean_squared_error(y_test, y_pred) ** 0.5)

# feature importances
print('Feature importances:', list(gbm.feature_importances_))


# self-defined eval metric
# f(y_true: array, y_pred: array) -> name: string, eval_result: float, is_higher_better: bool
# Root Mean Squared Logarithmic Error (RMSLE)
def rmsle(y_true, y_pred):
    return 'RMSLE', np.sqrt(np.mean(np.power(np.log1p(y_pred) - np.log1p(y_true), 2))), False


print('Starting training with custom eval function...')
# train
gbm.fit(X_train, y_train,
        eval_set=[(X_test, y_test)],
        eval_metric=rmsle,
        early_stopping_rounds=5)

print('Starting predicting...')
# predict
y_pred = gbm.predict(X_test, num_iteration=gbm.best_iteration_)
# eval
print('The rmsle of prediction is:', rmsle(y_test, y_pred)[1])

# other scikit-learn modules
estimator = lgb.LGBMRegressor(num_leaves=31)

param_grid = {
    'learning_rate': [0.01, 0.1, 1],
    'n_estimators': [20, 40]
}

gbm = GridSearchCV(estimator, param_grid, cv=3)
gbm.fit(X_train, y_train)

print('Best parameters found by grid search are:', gbm.best_params_)

  return f(*args, **kwds)


Starting training...
[1]	valid_0's l1: 6.55382
Training until validation scores don't improve for 5 rounds.
[2]	valid_0's l1: 6.30303
[3]	valid_0's l1: 6.09183
[4]	valid_0's l1: 5.89075
[5]	valid_0's l1: 5.67574
[6]	valid_0's l1: 5.47115
[7]	valid_0's l1: 5.30476
[8]	valid_0's l1: 5.12275
[9]	valid_0's l1: 4.95154
[10]	valid_0's l1: 4.81269
[11]	valid_0's l1: 4.64065
[12]	valid_0's l1: 4.50641
[13]	valid_0's l1: 4.34918
[14]	valid_0's l1: 4.20543
[15]	valid_0's l1: 4.09954
[16]	valid_0's l1: 3.99217
[17]	valid_0's l1: 3.89986
[18]	valid_0's l1: 3.80253
[19]	valid_0's l1: 3.70917
[20]	valid_0's l1: 3.61276
Did not meet early stopping. Best iteration is:
[20]	valid_0's l1: 3.61276
Starting predicting...
The rmse of prediction is: 5.279422488260959
Feature importances: [35, 0, 6, 0, 20, 70, 21, 31, 3, 7, 15, 4, 83]
Starting training with custom eval function...
[1]	valid_0's l2: 76.8091	valid_0's RMSLE: 0.396023
Training until validation scores don't improve for 5 rounds.
[2]	valid_0's l2

In [2]:
pred = gbm.predict(X_test)

In [3]:
from sklearn.metrics import mean_squared_error
mean_squared_error(y_test, pred)

12.655273826759585