In [28]:
import os

os.environ['NUM_OMP_THREADS'] = "4"

import warnings
import numpy as np

import pandas as pd
import matplotlib.pyplot as plt
import lightgbm as lgb
import time
from sklearn.linear_model import HuberRegressor
import sklearn.ensemble as tree_model
# from tqdm import tqdm
import datetime
pd.set_option('display.max_column',100)
warnings.filterwarnings('ignore')

%load_ext autoreload
%autoreload 2
from utils import make_dir, score, timer, kf_lgbm, kf_xgbm, kf_ctbm, kf_sklearn

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [29]:
def make_features(df):
    app_feature = [
        '当月网购类应用使用次数',
        '当月物流快递类应用使用次数',
        '当月金融理财类应用使用总次数',
        '当月视频播放类应用使用次数',
        '当月飞机类应用使用次数',
        '当月火车类应用使用次数',
        '当月旅游资讯类应用使用次数',
    ]
    
    for f in app_feature:
        df['round_log1p'+f] = np.round(np.log1p(df[f])).astype(int)
    
    df['前五个月消费总费用'] = 6*df['用户近6个月平均消费值（元）'] - df['用户账单当月总费用（元）']
    df['前五个月消费平均费用'] = df['前五个月消费总费用'] / 5
    df['当月费用/前五个月消费平均费用'] = (df['用户账单当月总费用（元）']) \
                        / (1+df['前五个月消费平均费用'])
    df['当月费用-前五个月消费平均费用'] = df['用户账单当月总费用（元）'] - df['前五个月消费平均费用']
        
    def make_count_feature(df, col, fea_name):
        df['idx'] = range(len(df))
        tmp = df.groupby(col)['用户编码'].agg([
            (fea_name,'count')]).reset_index()
        df = df.merge(tmp)
        df = df.sort_values('idx').drop('idx',axis=1).reset_index(drop=True)
        return df
        
    df = make_count_feature(df, '缴费用户最近一次缴费金额（元）','count_缴费')
    df = make_count_feature(df, '用户账单当月总费用（元）','count_当月费用')
    df = make_count_feature(df, '前五个月消费总费用', 'count_总费用')
    df = make_count_feature(df, '当月费用-前五个月消费平均费用', 'count_费用差')
    df = make_count_feature(df, '用户近6个月平均消费值（元）', 'count_平均费用')
    df = make_count_feature(df, ['用户账单当月总费用（元）','用户近6个月平均消费值（元）'],
                            'count_当月费用_平均费用')
            
    arr = df['缴费用户最近一次缴费金额（元）']
    df['是否998折'] = ((arr/0.998)%1==0)&(arr!=0)
    
    df['年龄_0_as_nan'] = np.where(df['用户年龄']==0, [np.nan]*len(df), df['用户年龄'])
    
    return df
    
def load_df_and_make_features():
    train_df = pd.read_csv('C:/Users/yue/yuekangwei/credit-competition/train_dataset/train_dataset.csv')
    test_df = pd.read_csv('C:/Users/yue/yuekangwei/credit-competition/test_dataset/test_dataset.csv')
    train_df['train'] = 1
    test_df['train'] = 0
    df = pd.concat([train_df,test_df])
    df = make_features(df)
    return df

In [30]:
feature_name1 = \
['用户年龄',
 '用户网龄（月）',
 '用户实名制是否通过核实',
 '是否大学生客户',
 '是否4G不健康客户',
 '用户最近一次缴费距今时长（月）',
 '缴费用户最近一次缴费金额（元）',
 '用户近6个月平均消费值（元）',
 '用户账单当月总费用（元）',
 '用户当月账户余额（元）',
 '用户话费敏感度',
 '当月费用-前五个月消费平均费用',
 '前五个月消费总费用',
 'count_缴费',
 'count_当月费用',
 'count_费用差',
 'count_平均费用',
 'count_当月费用_平均费用',
 '是否998折',
 '当月通话交往圈人数',
 '近三个月月均商场出现次数',
 '当月网购类应用使用次数',
 '当月物流快递类应用使用次数',
 '当月金融理财类应用使用总次数',
 '当月视频播放类应用使用次数',
 '当月飞机类应用使用次数',
 '当月火车类应用使用次数',
 '当月旅游资讯类应用使用次数',
 '当月是否逛过福州仓山万达',
 '当月是否到过福州山姆会员店',
 '当月是否看电影',
 '当月是否景点游览',
 '当月是否体育场馆消费',
 '是否经常逛商场的人',
 '是否黑名单客户',
 '缴费用户当前是否欠费缴费']


feature_name2 = \
['年龄_0_as_nan',
 '用户网龄（月）',
 '用户实名制是否通过核实',
 '是否大学生客户',
 '是否4G不健康客户',
 '用户最近一次缴费距今时长（月）',
 '缴费用户最近一次缴费金额（元）',
 '用户近6个月平均消费值（元）',
 '用户账单当月总费用（元）',
 '用户当月账户余额（元）',
 '用户话费敏感度',
 '当月费用-前五个月消费平均费用',
 '前五个月消费总费用',
 'count_缴费',
 'count_当月费用',
 'count_费用差',
 'count_平均费用',
 'count_当月费用_平均费用',
 '是否998折',
 '当月通话交往圈人数',
 '近三个月月均商场出现次数',
 '当月网购类应用使用次数',
 '当月物流快递类应用使用次数',
 '当月金融理财类应用使用总次数',
 '当月视频播放类应用使用次数',
 '当月飞机类应用使用次数',
 '当月火车类应用使用次数',
 '当月旅游资讯类应用使用次数',
 '当月是否逛过福州仓山万达',
 '当月是否到过福州山姆会员店',
 '当月是否看电影',
 '当月是否景点游览',
 '当月是否体育场馆消费',
 '是否经常逛商场的人',
 '是否黑名单客户',
 '缴费用户当前是否欠费缴费']

feature_name3 = \
['用户年龄',
 '用户网龄（月）',
 '用户实名制是否通过核实',
 '是否大学生客户',
 '是否4G不健康客户',
 '用户最近一次缴费距今时长（月）',
 '缴费用户最近一次缴费金额（元）',
 '用户近6个月平均消费值（元）',
 '用户账单当月总费用（元）',
 '用户当月账户余额（元）',
 '用户话费敏感度',
 '当月费用-前五个月消费平均费用',
 '前五个月消费总费用',
 'count_缴费',
 'count_当月费用',
 'count_费用差',
 'count_平均费用',
 'count_当月费用_平均费用',
 '是否998折',
 '当月通话交往圈人数',
 '近三个月月均商场出现次数',
 'round_log1p当月网购类应用使用次数',
 'round_log1p当月物流快递类应用使用次数',
 'round_log1p当月金融理财类应用使用总次数',
 'round_log1p当月视频播放类应用使用次数',
 'round_log1p当月飞机类应用使用次数',
 'round_log1p当月火车类应用使用次数',
 'round_log1p当月旅游资讯类应用使用次数',
 '当月是否逛过福州仓山万达',
 '当月是否到过福州山姆会员店',
 '当月是否看电影',
 '当月是否景点游览',
 '当月是否体育场馆消费',
 '是否经常逛商场的人',
 '是否黑名单客户',
 '缴费用户当前是否欠费缴费']

feature_name4 = \
['年龄_0_as_nan',
 '用户网龄（月）',
 '用户实名制是否通过核实',
 '是否大学生客户',
 '是否4G不健康客户',
 '用户最近一次缴费距今时长（月）',
 '缴费用户最近一次缴费金额（元）',
 '用户近6个月平均消费值（元）',
 '用户账单当月总费用（元）',
 '用户当月账户余额（元）',
 '用户话费敏感度',
 '当月费用-前五个月消费平均费用',
 '前五个月消费总费用',
 'count_缴费',
 'count_当月费用',
 'count_费用差',
 'count_平均费用',
 'count_当月费用_平均费用',
 '是否998折',
 '当月通话交往圈人数',
 '近三个月月均商场出现次数',
 'round_log1p当月网购类应用使用次数',
 'round_log1p当月物流快递类应用使用次数',
 'round_log1p当月金融理财类应用使用总次数',
 'round_log1p当月视频播放类应用使用次数',
 'round_log1p当月飞机类应用使用次数',
 'round_log1p当月火车类应用使用次数',
 'round_log1p当月旅游资讯类应用使用次数',
 '当月是否逛过福州仓山万达',
 '当月是否到过福州山姆会员店',
 '当月是否看电影',
 '当月是否景点游览',
 '当月是否体育场馆消费',
 '是否经常逛商场的人',
 '是否黑名单客户',
 '缴费用户当前是否欠费缴费']

feature_name5 = \
['用户年龄',
 '用户网龄（月）',
 '用户实名制是否通过核实',
 '是否大学生客户',
 '是否4G不健康客户',
 '用户最近一次缴费距今时长（月）',
 '缴费用户最近一次缴费金额（元）',
 '用户近6个月平均消费值（元）',
 '用户账单当月总费用（元）',
 '用户当月账户余额（元）',
 '用户话费敏感度',
 '当月费用-前五个月消费平均费用',
 '当月通话交往圈人数',
 '近三个月月均商场出现次数',
 '当月网购类应用使用次数',
 '当月物流快递类应用使用次数',
 '当月金融理财类应用使用总次数',
 '当月视频播放类应用使用次数',
 '当月飞机类应用使用次数',
 '当月火车类应用使用次数',
 '当月旅游资讯类应用使用次数',
 '当月是否逛过福州仓山万达',
 '当月是否到过福州山姆会员店',
 '当月是否看电影',
 '当月是否景点游览',
 '当月是否体育场馆消费',
 '是否经常逛商场的人',
 '是否黑名单客户',
 '缴费用户当前是否欠费缴费']


feature_name6 = \
['年龄_0_as_nan',
 '用户网龄（月）',
 '用户实名制是否通过核实',
 '是否大学生客户',
 '是否4G不健康客户',
 '用户最近一次缴费距今时长（月）',
 '缴费用户最近一次缴费金额（元）',
 '用户近6个月平均消费值（元）',
 '用户账单当月总费用（元）',
 '用户当月账户余额（元）',
 '用户话费敏感度',
 '当月费用-前五个月消费平均费用',
 '当月通话交往圈人数',
 '近三个月月均商场出现次数',
 '当月网购类应用使用次数',
 '当月物流快递类应用使用次数',
 '当月金融理财类应用使用总次数',
 '当月视频播放类应用使用次数',
 '当月飞机类应用使用次数',
 '当月火车类应用使用次数',
 '当月旅游资讯类应用使用次数',
 '当月是否逛过福州仓山万达',
 '当月是否到过福州山姆会员店',
 '当月是否看电影',
 '当月是否景点游览',
 '当月是否体育场馆消费',
 '是否经常逛商场的人',
 '是否黑名单客户',
 '缴费用户当前是否欠费缴费']

In [31]:
df = load_df_and_make_features()
train_df = df[df['train']==1]
test_df = df[df['train']!=1]


In [32]:
output_dir = './stacking_files/'

In [33]:
df.head(5)

Unnamed: 0,train,信用分,当月旅游资讯类应用使用次数,当月是否体育场馆消费,当月是否到过福州山姆会员店,当月是否景点游览,当月是否看电影,当月是否逛过福州仓山万达,当月火车类应用使用次数,当月物流快递类应用使用次数,当月网购类应用使用次数,当月视频播放类应用使用次数,当月通话交往圈人数,当月金融理财类应用使用总次数,当月飞机类应用使用次数,是否4G不健康客户,是否大学生客户,是否经常逛商场的人,是否黑名单客户,用户实名制是否通过核实,用户年龄,用户当月账户余额（元）,用户最近一次缴费距今时长（月）,用户编码,用户网龄（月）,用户话费敏感度,用户账单当月总费用（元）,用户近6个月平均消费值（元）,缴费用户当前是否欠费缴费,缴费用户最近一次缴费金额（元）,近三个月月均商场出现次数,round_log1p当月网购类应用使用次数,round_log1p当月物流快递类应用使用次数,round_log1p当月金融理财类应用使用总次数,round_log1p当月视频播放类应用使用次数,round_log1p当月飞机类应用使用次数,round_log1p当月火车类应用使用次数,round_log1p当月旅游资讯类应用使用次数,前五个月消费总费用,前五个月消费平均费用,当月费用/前五个月消费平均费用,当月费用-前五个月消费平均费用,count_缴费,count_当月费用,count_总费用,count_费用差,count_平均费用,count_当月费用_平均费用,是否998折,年龄_0_as_nan
0,1,664.0,30,1,0,1,0,0,0,0,713,7145,83,2740,0,0,0,1,0,1,44,180,1,a4651f98c82948b186bdcdc8108381b4,186,3,159.2,163.86,0,99.8,75,7,0,8,9,0,0,3,823.96,164.792,0.960239,-5.592,22284,18,1,4,2,1,True,44.0
1,1,530.0,0,0,0,0,0,0,0,0,414,44862,21,2731,0,1,0,1,0,1,18,110,1,aeb10247db4e4d67b2550bbc42ff9827,5,3,145.1,153.28,0,29.94,16,6,0,8,11,0,0,0,774.58,154.916,0.930629,-9.816,11740,38,1,7,5,1,True,18.0
2,1,643.0,1,0,0,0,0,0,0,0,3391,4804,59,0,0,0,0,0,0,1,47,70,1,5af23a1e0e77410abb25e9a7eee510aa,145,1,120.2,109.64,0,49.9,1,8,0,0,8,0,0,1,537.64,107.528,1.107548,12.672,21066,28,7,3,9,1,True,47.0
3,1,649.0,5,1,0,1,0,0,0,0,500,3141,78,1931,0,0,0,1,0,1,55,90,1,43c64379d3c24a15b8478851b22049e4,234,3,167.42,92.97,0,99.8,26,6,0,8,8,0,0,2,390.4,78.08,2.117097,89.34,22284,4,3,2,9,1,True,55.0
4,1,648.0,0,0,0,1,0,0,0,0,522,59,70,64,0,0,0,1,0,1,40,80,1,f1687f3b8a6f4910bd0b13eb634056e2,76,3,101.0,95.47,0,49.9,44,6,0,4,4,0,0,0,471.82,94.364,1.0591,6.636,21066,475,3,4,7,2,True,40.0


In [34]:
# x, y = train_df[feature_name1], train_df['信用分'].values
# x_test = test_df[feature_name1]

# model = kf_lgbm(x=x,y=y,x_test=x_test,learning_rate=0.01, 
#                 stratify=True,
#                 min_split_gain=1,
#                 categorical_feature=['用户话费敏感度'],
#                 boosting_type='gbdt',
#                 early_stopping_rounds=80,
#                 fair_c=25, 
#                 huber_delta=2,
#                 max_cat_to_onehot=4,
#                 objective="mae_fair",
#                 eval_metric="mae",
#                 subsample_freq=2,
#                 min_child_samples=20,
#                 num_leaves=31,
#                 bagging_fraction=0.8,
#                 feature_fraction=0.5,
#                 max_depth=5,
#                 output_dir=output_dir,
#                 name='gotcha_lgb1',
#                 n_estimators=8000)

In [41]:
def fn_identity(x):
    return x

def kf_lgbm(x, y, x_test, output_dir, name="mae_fair30",
            n_folds=10, stratify=True, split_seed=8888,
            fn_reverse_transform=fn_identity,
            boosting_type="gbdt", base_score=None, sample_weight=None,
            objective="mae_fair", eval_metric="mae_fair",
            fair_c=30, huber_delta=20, n_estimators=3000, learning_rate=0.01,
            num_leaves=31, max_depth=5, max_bin=255, reg_alpha=2.0,
            reg_lambda=5.0, colsample_bytree=0.5, subsample=0.8,
            subsample_freq=2, min_child_samples=20, min_split_gain=1,
            categorical_feature=['用户话费敏感度'], early_stopping_rounds=80, verbose=200,
            **kwargs):
    if objective == "mae_fair":
        def fn_objective(y_true, y_pred):
            return mae_fair_loss(y_true, y_pred, fair_c)
    elif objective == "mae_huber":
        def fn_objective(y_true, y_pred):
            return mae_huber_loss(y_true, y_pred, huber_delta)
    elif objective == "fair_huber":
        def fn_objective(y_true, y_pred):
            return fair_huber_loss(y_true, y_pred, fair_c, huber_delta)
    else:
        fn_objective = objective
    objective = fn_objective

    num_training_samples = x.shape[0]
    num_testing_samples = x_test.shape[0]

    test_pred = np.zeros(num_testing_samples)
    oof_train_pred = np.zeros(num_training_samples)
    scores = []
    fold_idx = 1

    if stratify:
        kf = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=split_seed)
        idx = y.argsort()
        y_lab = np.repeat(list(range(num_training_samples // 20)), 20)
        y_lab = np.asarray(sorted(list(zip(idx, y_lab))))[:, -1].astype(np.int32)
        splits = kf.split(x, y_lab)
    else:
        kf = KFold(n_splits=n_folds, shuffle=True, random_state=split_seed)
        splits = kf.split(x)

    model = None
    for train_idx, valid_idx in splits:
        print()
        print("=" * 50, "Fold %d" % fold_idx, "=" * 50)
        fold_idx += 1
        if not isinstance(x, pd.DataFrame):
            x_train, x_valid = x[train_idx], x[valid_idx]
        else:
            x_train, x_valid = x.iloc[train_idx], x.iloc[valid_idx]
        y_train, y_valid = y[train_idx], y[valid_idx]
        if sample_weight is not None:
            sample_weight_train = sample_weight[train_idx]
        else:
            sample_weight_train = None

        if base_score is not None:
            init_score = base_score
        else:
            init_score = np.median(y_train)

        if boosting_type == 'rf':
            print('boosting_type is rf, ignore base_score')
            init_score = 0

        if eval_metric == "mae_fair":
            def fn_eval_metric(y_true, y_pred):
                y_true = fn_reverse_transform(y_true + init_score)
                y_pred = fn_reverse_transform(y_pred + init_score)
                return mae_fair_metric(y_true, y_pred, fair_c)
        elif eval_metric == "mae_huber":
            def fn_eval_metric(y_true, y_pred):
                y_true = fn_reverse_transform(y_true + init_score)
                y_pred = fn_reverse_transform(y_pred + init_score)
                return mae_huber_metric(y_true, y_pred, huber_delta)
        elif eval_metric == "fair_huber":
            def fn_eval_metric(y_true, y_pred):
                y_true = fn_reverse_transform(y_true + init_score)
                y_pred = fn_reverse_transform(y_pred + init_score)
                return fair_huber_metric(y_true, y_pred, fair_c, huber_delta)
        elif eval_metric == "mae":
            def fn_eval_metric(y_true, y_pred):
                y_true = fn_reverse_transform(y_true + init_score)
                y_pred = fn_reverse_transform(y_pred + init_score)
                return mae_metric(y_true, y_pred)
        else:
            fn_eval_metric = eval_metric
        eval_metric = fn_eval_metric

        model = lgb.LGBMRegressor(boosting_type=boosting_type,
                                  learning_rate=learning_rate,
                                  num_leaves=num_leaves,
                                  max_depth=max_depth,
                                  n_estimators=n_estimators,
                                  max_bin=max_bin,
                                  objective=objective,
                                  reg_alpha=reg_alpha,
                                  reg_lambda=reg_lambda,
                                  colsample_bytree=colsample_bytree,
                                  subsample=subsample,
                                  subsample_freq=subsample_freq,
                                  min_child_samples=min_child_samples,
                                  min_split_gain=min_split_gain,
                                  metric=['mae'], **kwargs)

        init_score_ = None if boosting_type == 'rf' else np.ones_like(y_train) * init_score

        model.fit(x_train, y_train, eval_set=[(x_train, y_train - init_score),
                                              (x_valid, y_valid - init_score)],
                  eval_names=['train', 'test'],
                  sample_weight=sample_weight_train,
                  eval_metric=eval_metric,
                  verbose=verbose, early_stopping_rounds=early_stopping_rounds,
                  categorical_feature=categorical_feature,
                  init_score=init_score_)

        val_pred = model.predict(x_valid, num_iteration=model.best_iteration_) + init_score
        val_pred = fn_reverse_transform(val_pred)
        oof_train_pred[valid_idx] = val_pred

        test_pred_fold = model.predict(x_test, num_iteration=model.best_iteration_) + init_score
        test_pred_fold = fn_reverse_transform(test_pred_fold)
        test_pred += test_pred_fold / n_folds

        scores.append(score(val_pred, fn_reverse_transform(y_valid)))

    make_dir(output_dir + '/')
    np.save(os.path.join(output_dir, 'val.%s.npy' % name), oof_train_pred)
    np.save(os.path.join(output_dir, 'test.%s.npy' % name), test_pred)

    print("=" * 100)
    print('\t'.join(map(str, scores)))
    print('min score: %.6f' % np.min(scores))
    print('max score: %.6f' % np.max(scores))
    print('median score: %.6f' % np.median(scores))
    print('mean score: %.6f' % np.mean(scores))
    print(test_pred[:10])
    return model

def mae_metric(y_true, y_pred):
    return 'mae', np.mean(np.abs(y_true - y_pred)), False

def mae_loss(y_true, y_pred):
    x = y_pred - y_true
    grad = np.sign(x)
    hess = np.zeros_like(x)
    return grad, hess

def fair_loss(y_true, y_pred, faic_c):
    x = y_pred - y_true
    grad = faic_c * x / (np.abs(x) + faic_c)
    hess = faic_c ** 2 / (np.abs(x) + faic_c) ** 2
    return grad, hess



def mae_fair_loss(y_true, y_pred, fair_c):
    grad_mae, hess_mae = mae_loss(y_true, y_pred)
    grad_fair, hess_fair = fair_loss(y_true, y_pred, fair_c)
    grad = 0.5 * grad_mae + 0.5 * grad_fair
    hess = 0.5 * hess_mae + 0.5 * hess_fair
    return grad, hess

In [42]:
from sklearn.model_selection import StratifiedKFold, KFold
x, y = train_df[feature_name1], train_df['信用分'].values
x_test = test_df[feature_name1]

model = kf_lgbm(x=x,y=y,x_test=x_test,learning_rate=0.01, 
                stratify=True,
                min_split_gain=1,
                categorical_feature=['用户话费敏感度'],
                boosting_type='gbdt',
                early_stopping_rounds=80,
                fair_c=25, 
                huber_delta=2,
                max_cat_to_onehot=4,
                objective="mae_fair",
                eval_metric="mae",
                subsample_freq=2,
                min_child_samples=20,
                num_leaves=31,
                bagging_fraction=0.8,
                feature_fraction=0.5,
                max_depth=5,
                output_dir=output_dir,
                name='gotcha_lgb1',
                n_estimators=8000)


Training until validation scores don't improve for 80 rounds
[200]	train's l1: 15.6042	train's mae: 15.6042	test's l1: 15.8841	test's mae: 15.8841
[400]	train's l1: 14.6353	train's mae: 14.6353	test's l1: 15.0806	test's mae: 15.0806
[600]	train's l1: 14.3671	train's mae: 14.3671	test's l1: 14.9206	test's mae: 14.9206
[800]	train's l1: 14.2151	train's mae: 14.2151	test's l1: 14.8455	test's mae: 14.8455
[1000]	train's l1: 14.0922	train's mae: 14.0922	test's l1: 14.7962	test's mae: 14.7962
[1200]	train's l1: 13.9884	train's mae: 13.9884	test's l1: 14.7662	test's mae: 14.7662
[1400]	train's l1: 13.8912	train's mae: 13.8912	test's l1: 14.7357	test's mae: 14.7357
[1600]	train's l1: 13.8007	train's mae: 13.8007	test's l1: 14.7117	test's mae: 14.7117
[1800]	train's l1: 13.7108	train's mae: 13.7108	test's l1: 14.6917	test's mae: 14.6917
[2000]	train's l1: 13.6281	train's mae: 13.6281	test's l1: 14.6744	test's mae: 14.6744
[2200]	train's l1: 13.552	train's mae: 13.552	test's l1: 14.6612	test's 

[800]	train's l1: 14.2307	train's mae: 14.2307	test's l1: 14.5953	test's mae: 14.5953
[1000]	train's l1: 14.1135	train's mae: 14.1135	test's l1: 14.55	test's mae: 14.55
[1200]	train's l1: 14.0042	train's mae: 14.0042	test's l1: 14.5163	test's mae: 14.5163
[1400]	train's l1: 13.9018	train's mae: 13.9018	test's l1: 14.4901	test's mae: 14.4901
[1600]	train's l1: 13.8113	train's mae: 13.8113	test's l1: 14.4698	test's mae: 14.4698
[1800]	train's l1: 13.7226	train's mae: 13.7226	test's l1: 14.4552	test's mae: 14.4552
[2000]	train's l1: 13.6417	train's mae: 13.6417	test's l1: 14.4443	test's mae: 14.4443
[2200]	train's l1: 13.5589	train's mae: 13.5589	test's l1: 14.4352	test's mae: 14.4352
[2400]	train's l1: 13.485	train's mae: 13.485	test's l1: 14.4265	test's mae: 14.4265
[2600]	train's l1: 13.4096	train's mae: 13.4096	test's l1: 14.4208	test's mae: 14.4208
[2800]	train's l1: 13.3402	train's mae: 13.3402	test's l1: 14.4181	test's mae: 14.4181
[3000]	train's l1: 13.2678	train's mae: 13.2678	te

[2200]	train's l1: 13.5437	train's mae: 13.5437	test's l1: 14.6466	test's mae: 14.6466
[2400]	train's l1: 13.4668	train's mae: 13.4668	test's l1: 14.6379	test's mae: 14.6379
[2600]	train's l1: 13.3917	train's mae: 13.3917	test's l1: 14.6286	test's mae: 14.6286
[2800]	train's l1: 13.3198	train's mae: 13.3198	test's l1: 14.6232	test's mae: 14.6232
[3000]	train's l1: 13.2464	train's mae: 13.2464	test's l1: 14.6139	test's mae: 14.6139
[3200]	train's l1: 13.1793	train's mae: 13.1793	test's l1: 14.6097	test's mae: 14.6097
Early stopping, best iteration is:
[3224]	train's l1: 13.1701	train's mae: 13.1701	test's l1: 14.6083	test's mae: 14.6083

Training until validation scores don't improve for 80 rounds
[200]	train's l1: 15.623	train's mae: 15.623	test's l1: 15.6827	test's mae: 15.6827
[400]	train's l1: 14.6676	train's mae: 14.6676	test's l1: 14.8113	test's mae: 14.8113
[600]	train's l1: 14.4047	train's mae: 14.4047	test's l1: 14.6118	test's mae: 14.6118
[800]	train's l1: 14.2525	train's mae: