## LGBM Baseline with more Feature Engineering

- source
> - https://www.kaggle.com/tiger1026/lgbm-baseline-with-more-feature-engineering
> - this one was copied from another notebook: 
>   - https://www.kaggle.com/ragnar123/optiver-realized-volatility-lgbm-baseline/notebook



**!!!!this notebook is copied from the above source!!!!**

In [1]:
import os
import glob
from joblib import Parallel, delayed
import pandas as pd
import numpy as np
import scipy as sc
from sklearn.model_selection import KFold
import lightgbm as lgb
import warnings
warnings.filterwarnings('ignore')
pd.set_option('max_columns', 300)

In [15]:
# data directory
data_dir = '../../data/'

In [3]:
# Function to calculate first WAP
def calc_wap1(df):
    wap = (df['bid_price1'] * df['ask_size1'] + df['ask_price1'] * df['bid_size1']) / (df['bid_size1'] + df['ask_size1'])
    return wap

# Function to calculate second WAP
def calc_wap2(df):
    wap = (df['bid_price2'] * df['ask_size2'] + df['ask_price2'] * df['bid_size2']) / (df['bid_size2'] + df['ask_size2'])
    return wap

In [4]:
# Function to calculate the log of the return
# Remember that logb(x / y) = logb(x) - logb(y)
def log_return(series):
    return np.log(series).diff()

# Calculate the realized volatility
def realized_volatility(series):
    return np.sqrt(np.sum(series**2))

# Function to count unique elements of a series
def count_unique(series):
    return len(np.unique(series))

In [5]:
# Function to read our base train and test set
def read_train_test():
    train = pd.read_csv('../../data/train.csv')
    test = pd.read_csv('../../data/test.csv')
    # Create a key to merge with book and trade data
    train['row_id'] = train['stock_id'].astype(str) + '-' + train['time_id'].astype(str)
    test['row_id'] = test['stock_id'].astype(str) + '-' + test['time_id'].astype(str)
    print(f'Our training set has {train.shape[0]} rows')
    return train, test

In [8]:
# Function to preprocess book data (for each stock id)
def book_preprocessor(file_path):
    # Function to preprocess book data (for each stock id)
    
    df = pd.read_parquet(file_path)
    
    # Calculate Wap
    df['wap1'] = calc_wap1(df)
    df['wap2'] = calc_wap2(df)
    
    # Calculate log returns
    df['log_return1'] = df.groupby(['time_id'])['wap1'].apply(log_return)
    df['log_return2'] = df.groupby(['time_id'])['wap2'].apply(log_return)
    
    # Calculate wap balance
    df['wap_balance'] = abs(df['wap1'] - df['wap2'])
    
    # Calculate spread
    df['price_spread'] = (df['ask_price1'] - df['bid_price1']) / ((df['ask_price1'] + df['bid_price1']) / 2)
    df['price_spread2'] = (df['ask_price2'] - df['bid_price2']) / ((df['ask_price2'] + df['bid_price2']) / 2)
    df['bid_spread'] = df['bid_price1'] - df['bid_price2']
    df['ask_spread'] = df['ask_price1'] - df['ask_price2']
    df["bid_ask_spread"] = abs(df['bid_spread'] - df['ask_spread'])
    df['total_volume'] = (df['ask_size1'] + df['ask_size2']) + (df['bid_size1'] + df['bid_size2'])
    df['volume_imbalance'] = abs((df['ask_size1'] + df['ask_size2']) - (df['bid_size1'] + df['bid_size2']))
    
    # Dict for aggregations
    create_feature_dict = {
        'wap1': [np.sum, np.mean, np.std],
        'wap2': [np.sum, np.mean, np.std],
        'log_return1': [np.sum, realized_volatility, np.mean, np.std],
        'log_return2': [np.sum, realized_volatility, np.mean, np.std],
        'wap_balance': [np.sum, np.mean, np.std],
        'price_spread':[np.sum, np.mean, np.std],
        'price_spread2':[np.sum, np.mean, np.std],
        'bid_spread':[np.sum, np.mean, np.std],
        'ask_spread':[np.sum, np.mean, np.std],
        'total_volume':[np.sum, np.mean, np.std],
        'volume_imbalance':[np.sum, np.mean, np.std],
        "bid_ask_spread":[np.sum, np.mean, np.std],
    }
    
    def get_stats_window(seconds_in_bucket, add_suffix = False):
        # Function to get group stats for different windows (seconds in bucket)
        
        # Group by the window
        df_feature = df[df['seconds_in_bucket'] >= seconds_in_bucket].groupby(['time_id']).agg(create_feature_dict).reset_index()
        
        # Rename columns joining suffix
        df_feature.columns = ['_'.join(col) for col in df_feature.columns]
        
        # Add a suffix to differentiate windows
        if add_suffix:
            df_feature = df_feature.add_suffix('_' + str(seconds_in_bucket))
        return df_feature
    
    # Get the stats for different windows
    df_feature = get_stats_window(seconds_in_bucket = 0, add_suffix = False)
    df_feature_400 = get_stats_window(seconds_in_bucket = 400, add_suffix = True)
    df_feature_300 = get_stats_window(seconds_in_bucket = 300, add_suffix = True)
    df_feature_200 = get_stats_window(seconds_in_bucket = 200, add_suffix = True)
    
    # Merge all
    df_feature = df_feature.merge(df_feature_400, how = 'left', left_on = 'time_id_', right_on = 'time_id__400')
    df_feature = df_feature.merge(df_feature_300, how = 'left', left_on = 'time_id_', right_on = 'time_id__300')
    df_feature = df_feature.merge(df_feature_200, how = 'left', left_on = 'time_id_', right_on = 'time_id__200')

    # Drop unnecesary time_ids
    df_feature.drop(['time_id__400', 'time_id__300', 'time_id__200'], axis = 1, inplace = True)
    
    
    # Create row_id so we can merge
    stock_id = file_path.split('=')[1]
    df_feature['row_id'] = df_feature['time_id_'].apply(lambda x: f'{stock_id}-{x}')
    df_feature.drop(['time_id_'], axis = 1, inplace = True)
    
    return df_feature


In [9]:
# Function to preprocess trade data (for each stock id)
def trade_preprocessor(file_path):
    # Function to preprocess trade data (for each stock id)
    
    df = pd.read_parquet(file_path)
    df['log_return'] = df.groupby('time_id')['price'].apply(log_return)
    
    # Dict for aggregations
    create_feature_dict = {
        'log_return':[realized_volatility],
        'seconds_in_bucket':[count_unique],
        'size':[np.sum, realized_volatility, np.mean, np.std, np.max, np.min],
        'order_count':[np.mean,np.sum,np.max],
    }
    
    def get_stats_window(seconds_in_bucket, add_suffix = False):
        # Function to get group stats for different windows (seconds in bucket)
        
        # Group by the window
        df_feature = df[df['seconds_in_bucket'] >= seconds_in_bucket].groupby(['time_id']).agg(create_feature_dict).reset_index()
        
        # Rename columns joining suffix
        df_feature.columns = ['_'.join(col) for col in df_feature.columns]
        
        # Add a suffix to differentiate windows
        if add_suffix:
            df_feature = df_feature.add_suffix('_' + str(seconds_in_bucket))
        return df_feature
    
    # Get the stats for different windows
    df_feature = get_stats_window(seconds_in_bucket = 0, add_suffix = False)
    df_feature_400 = get_stats_window(seconds_in_bucket = 400, add_suffix = True)
    df_feature_300 = get_stats_window(seconds_in_bucket = 300, add_suffix = True)
    df_feature_200 = get_stats_window(seconds_in_bucket = 200, add_suffix = True)
    
    def tendency(price, vol):    
        df_diff = np.diff(price)
        val = (df_diff/price[1:])*100
        power = np.sum(val*vol[1:])
        return(power)
    
    lis = []
    for n_time_id in df['time_id'].unique():
        df_id = df[df['time_id'] == n_time_id]        
        tendencyV = tendency(df_id['price'].values, df_id['size'].values)      
        f_max = np.sum(df_id['price'].values > np.mean(df_id['price'].values))
        f_min = np.sum(df_id['price'].values < np.mean(df_id['price'].values))
        df_max =  np.sum(np.diff(df_id['price'].values) > 0)
        df_min =  np.sum(np.diff(df_id['price'].values) < 0)
        abs_diff = np.median(np.abs( df_id['price'].values - np.mean(df_id['price'].values)))        
        energy = np.mean(df_id['price'].values**2)
        iqr_p = np.percentile(df_id['price'].values,75) - np.percentile(df_id['price'].values,25)
        abs_diff_v = np.median(np.abs( df_id['size'].values - np.mean(df_id['size'].values)))        
        energy_v = np.sum(df_id['size'].values**2)
        iqr_p_v = np.percentile(df_id['size'].values,75) - np.percentile(df_id['size'].values,25)
        
        lis.append({'time_id':n_time_id,'tendency':tendencyV,'f_max':f_max,'f_min':f_min,'df_max':df_max,'df_min':df_min,
                   'abs_diff':abs_diff,'energy':energy,'iqr_p':iqr_p,'abs_diff_v':abs_diff_v,'energy_v':energy_v,'iqr_p_v':iqr_p_v})
    
    df_lr = pd.DataFrame(lis)
        
   
    df_feature = df_feature.merge(df_lr, how = 'left', left_on = 'time_id_', right_on = 'time_id')
    
    # Merge all
    df_feature = df_feature.merge(df_feature_400, how = 'left', left_on = 'time_id_', right_on = 'time_id__400')
    df_feature = df_feature.merge(df_feature_300, how = 'left', left_on = 'time_id_', right_on = 'time_id__300')
    df_feature = df_feature.merge(df_feature_200, how = 'left', left_on = 'time_id_', right_on = 'time_id__200')

    # Drop unnecesary time_ids
    df_feature.drop(['time_id__400', 'time_id__300', 'time_id__200','time_id'], axis = 1, inplace = True)
    df_feature = df_feature.add_prefix('trade_')
    stock_id = file_path.split('=')[1]
    df_feature['row_id'] = df_feature['trade_time_id_'].apply(lambda x:f'{stock_id}-{x}')
    df_feature.drop(['trade_time_id_'], axis = 1, inplace = True)
    
    return df_feature


In [10]:
# Function to get group stats for the stock_id and time_id
def get_time_stock(df):
    # Function to get group stats for the stock_id and time_id
    
    # Get realized volatility columns
    vol_cols = ['log_return1_realized_volatility', 'log_return2_realized_volatility', 'log_return1_realized_volatility_400', 'log_return2_realized_volatility_400', 
                'log_return1_realized_volatility_300', 'log_return2_realized_volatility_300', 'log_return1_realized_volatility_200', 'log_return2_realized_volatility_200', 
                'trade_log_return_realized_volatility', 'trade_log_return_realized_volatility_400', 'trade_log_return_realized_volatility_300', 'trade_log_return_realized_volatility_200']

    # Group by the stock id
    df_stock_id = df.groupby(['stock_id'])[vol_cols].agg(['mean', 'std', 'max', 'min', ]).reset_index()
    
    # Rename columns joining suffix
    df_stock_id.columns = ['_'.join(col) for col in df_stock_id.columns]
    df_stock_id = df_stock_id.add_suffix('_' + 'stock')

    # Group by the stock id
    df_time_id = df.groupby(['time_id'])[vol_cols].agg(['mean', 'std', 'max', 'min', ]).reset_index()
    
    # Rename columns joining suffix
    df_time_id.columns = ['_'.join(col) for col in df_time_id.columns]
    df_time_id = df_time_id.add_suffix('_' + 'time')
    
    # Merge with original dataframe
    df = df.merge(df_stock_id, how = 'left', left_on = ['stock_id'], right_on = ['stock_id__stock'])
    df = df.merge(df_time_id, how = 'left', left_on = ['time_id'], right_on = ['time_id__time'])
    df.drop(['stock_id__stock', 'time_id__time'], axis = 1, inplace = True)
    
    return df

In [11]:
# Funtion to make preprocessing function in parallel (for each stock id)
def preprocessor(list_stock_ids, is_train = True):
    
    # Parrallel for loop
    def for_joblib(stock_id):
        # Train
        if is_train:
            file_path_book = data_dir + f"book_train.parquet/stock_id={stock_id}" 
            file_path_trade = data_dir + f"trade_train.parquet/stock_id={stock_id}" 
        # Test
        else:
            file_path_book = data_dir + f"book_test.parquet/stock_id={stock_id}" 
            file_path_trade = data_dir + f"trade_test.parquet/stock_id={stock_id}" 
    
        # Preprocess book and trade data and merge them
        df_tmp = pd.merge(book_preprocessor(file_path_book), trade_preprocessor(file_path_trade), on = 'row_id', how = 'left')
        
        # Return the merge dataframe
        return df_tmp
    
    # Use parallel api to call paralle for loop
    df = Parallel(n_jobs = -1, verbose = 1)(delayed(for_joblib)(stock_id) for stock_id in list_stock_ids)
    # Concatenate all the dataframes that return from Parallel
    df = pd.concat(df, ignore_index = True)
    return df

In [12]:
# Function to calculate the root mean squared percentage error
def rmspe(y_true, y_pred):
    return np.sqrt(np.mean(np.square((y_true - y_pred) / y_true)))

# Function to early stop with root mean squared percentage error
def feval_rmspe(y_pred, lgb_train):
    y_true = lgb_train.get_label()
    return 'RMSPE', rmspe(y_true, y_pred), False


In [13]:
def train_and_evaluate(train, test):
    # Hyperparammeters (just basic)
    params = {
      'objective': 'rmse',  
      'boosting_type': 'gbdt',
      'num_leaves': 100,
      'n_jobs': -1,
      'learning_rate': 0.1,
      'feature_fraction': 0.8,
      'bagging_fraction': 0.8,
      'verbose': -1
    }
    
    # Split features and target
    x = train.drop(['row_id', 'target', 'time_id'], axis = 1)
    y = train['target']
    x_test = test.drop(['row_id', 'time_id'], axis = 1)
    # Transform stock id to a numeric value
    x['stock_id'] = x['stock_id'].astype(int)
    x_test['stock_id'] = x_test['stock_id'].astype(int)
    
    # Create out of folds array
    oof_predictions = np.zeros(x.shape[0])
    # Create test array to store predictions
    test_predictions = np.zeros(x_test.shape[0])
    # Create a KFold object
    kfold = KFold(n_splits = 5, random_state = 66, shuffle = True)
    # Iterate through each fold
    for fold, (trn_ind, val_ind) in enumerate(kfold.split(x)):
        print(f'Training fold {fold + 1}')
        x_train, x_val = x.iloc[trn_ind], x.iloc[val_ind]
        y_train, y_val = y.iloc[trn_ind], y.iloc[val_ind]
        # Root mean squared percentage error weights
        train_weights = 1 / np.square(y_train)
        val_weights = 1 / np.square(y_val)
        train_dataset = lgb.Dataset(x_train, y_train, weight = train_weights, categorical_feature = ['stock_id'])
        val_dataset = lgb.Dataset(x_val, y_val, weight = val_weights, categorical_feature = ['stock_id'])
        model = lgb.train(params = params, 
                          train_set = train_dataset, 
                          valid_sets = [train_dataset, val_dataset], 
                          num_boost_round = 10000, 
                          early_stopping_rounds = 50, 
                          verbose_eval = 50,
                          feval = feval_rmspe)
        # Add predictions to the out of folds array
        oof_predictions[val_ind] = model.predict(x_val)
        # Predict the test set
        test_predictions += model.predict(x_test) / 5
        
    rmspe_score = rmspe(y, oof_predictions)
    print(f'Our out of folds RMSPE is {rmspe_score}')
    # Return test predictions
    return test_predictions

In [16]:
# Read train and test
train, test = read_train_test()

# Get unique stock ids 
train_stock_ids = train['stock_id'].unique()

# Preprocess them using Parallel and our single stock id functions
train_ = preprocessor(train_stock_ids, is_train = True)
train = train.merge(train_, on = ['row_id'], how = 'left')

# Get unique stock ids 
test_stock_ids = test['stock_id'].unique()

# Preprocess them using Parallel and our single stock id functions
test_ = preprocessor(test_stock_ids, is_train = False)
test = test.merge(test_, on = ['row_id'], how = 'left')

# Get group stats of time_id and stock_id
train = get_time_stock(train)
test = get_time_stock(test)


Our training set has 428932 rows


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:  2.4min
[Parallel(n_jobs=-1)]: Done 112 out of 112 | elapsed:  6.6min finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:    0.1s finished


In [17]:
# replace by order sum (tau)
train['size_tau'] = np.sqrt(1/train['trade_seconds_in_bucket_count_unique'])
test['size_tau'] = np.sqrt(1/test['trade_seconds_in_bucket_count_unique'])
train['size_tau_400'] = np.sqrt(1/train['trade_seconds_in_bucket_count_unique_400'])
test['size_tau_400'] = np.sqrt(1/test['trade_seconds_in_bucket_count_unique_400'])
train['size_tau_300'] = np.sqrt(1/train['trade_seconds_in_bucket_count_unique_300'])
test['size_tau_300'] = np.sqrt(1/test['trade_seconds_in_bucket_count_unique_300'])
train['size_tau_200'] = np.sqrt(1/train['trade_seconds_in_bucket_count_unique_200'])
test['size_tau_200'] = np.sqrt(1/test['trade_seconds_in_bucket_count_unique_200'])

# tau2 
train['size_tau2'] = np.sqrt(1/train['trade_order_count_sum'])
test['size_tau2'] = np.sqrt(1/test['trade_order_count_sum'])
train['size_tau2_400'] = np.sqrt(0.25/train['trade_order_count_sum'])
test['size_tau2_400'] = np.sqrt(0.25/test['trade_order_count_sum'])
train['size_tau2_300'] = np.sqrt(0.5/train['trade_order_count_sum'])
test['size_tau2_300'] = np.sqrt(0.5/test['trade_order_count_sum'])
train['size_tau2_200'] = np.sqrt(0.75/train['trade_order_count_sum'])
test['size_tau2_200'] = np.sqrt(0.75/test['trade_order_count_sum'])

# delta tau
train['size_tau2_d'] = train['size_tau2_400'] - train['size_tau2']
test['size_tau2_d'] = test['size_tau2_400'] - test['size_tau2']

In [18]:
# Traing and evaluate
test_predictions = train_and_evaluate(train, test)
# Save test predictions
test['target'] = test_predictions
test[['row_id', 'target']].to_csv('submission.csv',index = False)

Training fold 1
Training until validation scores don't improve for 50 rounds
[50]	training's rmse: 0.000438401	training's RMSPE: 0.203048	valid_1's rmse: 0.000468097	valid_1's RMSPE: 0.215996
[100]	training's rmse: 0.000403588	training's RMSPE: 0.186925	valid_1's rmse: 0.000445559	valid_1's RMSPE: 0.205596
[150]	training's rmse: 0.000385046	training's RMSPE: 0.178337	valid_1's rmse: 0.000438277	valid_1's RMSPE: 0.202236
[200]	training's rmse: 0.000371042	training's RMSPE: 0.17185	valid_1's rmse: 0.000435078	valid_1's RMSPE: 0.20076
[250]	training's rmse: 0.0003597	training's RMSPE: 0.166598	valid_1's rmse: 0.000434896	valid_1's RMSPE: 0.200676
Early stopping, best iteration is:
[249]	training's rmse: 0.000359894	training's RMSPE: 0.166687	valid_1's rmse: 0.000434334	valid_1's RMSPE: 0.200417
Training fold 2
Training until validation scores don't improve for 50 rounds
[50]	training's rmse: 0.000438707	training's RMSPE: 0.202932	valid_1's rmse: 0.000457016	valid_1's RMSPE: 0.211961
[100]

### addtional

In [21]:
#revise the original train_and_evaluate a bit:
#instead of hardcoding hyperparameter set,making it a function parameter
def train_and_evaluate2(train, test, params):
        
    # Split features and target
    x = train.drop(['row_id', 'target', 'time_id'], axis = 1)
    y = train['target']
    x_test = test.drop(['row_id', 'time_id'], axis = 1)
    # Transform stock id to a numeric value
    x['stock_id'] = x['stock_id'].astype(int)
    x_test['stock_id'] = x_test['stock_id'].astype(int)
    
    # Create out of folds array
    oof_predictions = np.zeros(x.shape[0])
    # Create test array to store predictions
    test_predictions = np.zeros(x_test.shape[0])
    # Create a KFold object
    kfold = KFold(n_splits = 5, random_state = 66, shuffle = True)
    # Iterate through each fold
    for fold, (trn_ind, val_ind) in enumerate(kfold.split(x)):
        print(f'Training fold {fold + 1}')
        x_train, x_val = x.iloc[trn_ind], x.iloc[val_ind]
        y_train, y_val = y.iloc[trn_ind], y.iloc[val_ind]
        # Root mean squared percentage error weights
        train_weights = 1 / np.square(y_train)
        val_weights = 1 / np.square(y_val)
        train_dataset = lgb.Dataset(x_train, y_train, weight = train_weights, categorical_feature = ['stock_id'])
        val_dataset = lgb.Dataset(x_val, y_val, weight = val_weights, categorical_feature = ['stock_id'])
        model = lgb.train(params = params, 
                          train_set = train_dataset, 
                          valid_sets = [train_dataset, val_dataset], 
                          num_boost_round = 10000, 
                          early_stopping_rounds = 50, 
                          verbose_eval = 50,
                          feval = feval_rmspe)
        # Add predictions to the out of folds array
        oof_predictions[val_ind] = model.predict(x_val)
        # Predict the test set
        test_predictions += model.predict(x_test) / 5
        
    rmspe_score = rmspe(y, oof_predictions)
    print(f'Our out of folds RMSPE is {rmspe_score}')
    # Return test predictions
    return test_predictions

#### note
the following copied from notebook: https://www.kaggle.com/felipefonte99/optiver-lgb-with-optimized-params/notebook

In [22]:
seed = 42
params = {
    'learning_rate': 0.135,        
    'lambda_l1': 2,
    'lambda_l2': 7,
    'num_leaves': 769,
    'min_sum_hessian_in_leaf': 20,
    'feature_fraction': 0.79,
    'feature_fraction_bynode': 0.8,
    'bagging_fraction': 0.97,
    'bagging_freq': 42,
    'min_data_in_leaf': 690,
    'max_depth': 3,
    'seed': seed,
    'feature_fraction_seed': seed,
    'bagging_seed': seed,
    'drop_seed': seed,
    'data_random_seed': seed,
    'objective': 'rmse',
    'boosting': 'gbdt',
    'verbosity': -1,
    'n_jobs': -1,
}   

In [24]:
# Read train and test
train, test = read_train_test()

# Get unique stock ids 
train_stock_ids = train['stock_id'].unique()
# Preprocess them using Parallel and our single stock id functions
train_ = preprocessor(train_stock_ids, is_train = True)
train = train.merge(train_, on = ['row_id'], how = 'left')

# Get unique stock ids 
test_stock_ids = test['stock_id'].unique()
# Preprocess them using Parallel and our single stock id functions
test_ = preprocessor(test_stock_ids, is_train = False)
test = test.merge(test_, on = ['row_id'], how = 'left')

# Get group stats of time_id and stock_id
train = get_time_stock(train)
test = get_time_stock(test)

Our training set has 428932 rows


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:  2.1min
[Parallel(n_jobs=-1)]: Done 112 out of 112 | elapsed:  6.3min finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:    0.1s finished


In [29]:
import pickle
with open('../../data/train.pickle', 'wb') as f:
    pickle.dump(train, f)

In [25]:
# Traing and evaluate
test_predictions = train_and_evaluate2(train, test,params)
# Save test predictions
test['target'] = test_predictions
test[['row_id', 'target']].to_csv('submission2.csv',index = False)

Training fold 1
Training until validation scores don't improve for 50 rounds
[50]	training's rmse: 0.000491021	training's RMSPE: 0.22742	valid_1's rmse: 0.000501054	valid_1's RMSPE: 0.231204
[100]	training's rmse: 0.000478901	training's RMSPE: 0.221806	valid_1's rmse: 0.000489587	valid_1's RMSPE: 0.225912
[150]	training's rmse: 0.000471199	training's RMSPE: 0.218239	valid_1's rmse: 0.00048259	valid_1's RMSPE: 0.222684
[200]	training's rmse: 0.000464836	training's RMSPE: 0.215292	valid_1's rmse: 0.000477654	valid_1's RMSPE: 0.220406
[250]	training's rmse: 0.000460029	training's RMSPE: 0.213066	valid_1's rmse: 0.000474028	valid_1's RMSPE: 0.218733
[300]	training's rmse: 0.000455725	training's RMSPE: 0.211072	valid_1's rmse: 0.000470441	valid_1's RMSPE: 0.217078
[350]	training's rmse: 0.000451389	training's RMSPE: 0.209064	valid_1's rmse: 0.000466978	valid_1's RMSPE: 0.21548
[400]	training's rmse: 0.000447946	training's RMSPE: 0.207469	valid_1's rmse: 0.000464218	valid_1's RMSPE: 0.214206

[700]	training's rmse: 0.000431187	training's RMSPE: 0.199453	valid_1's rmse: 0.00044693	valid_1's RMSPE: 0.207283
[750]	training's rmse: 0.000428901	training's RMSPE: 0.198396	valid_1's rmse: 0.00044539	valid_1's RMSPE: 0.206569
[800]	training's rmse: 0.000426734	training's RMSPE: 0.197393	valid_1's rmse: 0.000444103	valid_1's RMSPE: 0.205972
[850]	training's rmse: 0.000424779	training's RMSPE: 0.196489	valid_1's rmse: 0.000442949	valid_1's RMSPE: 0.205437
[900]	training's rmse: 0.000422802	training's RMSPE: 0.195574	valid_1's rmse: 0.000441722	valid_1's RMSPE: 0.204868
[950]	training's rmse: 0.000421067	training's RMSPE: 0.194772	valid_1's rmse: 0.000440677	valid_1's RMSPE: 0.204383
[1000]	training's rmse: 0.000419357	training's RMSPE: 0.193981	valid_1's rmse: 0.000439571	valid_1's RMSPE: 0.20387
[1050]	training's rmse: 0.000417382	training's RMSPE: 0.193067	valid_1's rmse: 0.000438787	valid_1's RMSPE: 0.203506
[1100]	training's rmse: 0.000415632	training's RMSPE: 0.192258	valid_1's 

Early stopping, best iteration is:
[1512]	training's rmse: 0.000404289	training's RMSPE: 0.187125	valid_1's rmse: 0.000433244	valid_1's RMSPE: 0.200446
Training fold 4
Training until validation scores don't improve for 50 rounds
[50]	training's rmse: 0.000492036	training's RMSPE: 0.227375	valid_1's rmse: 0.000514474	valid_1's RMSPE: 0.239545
[100]	training's rmse: 0.000479776	training's RMSPE: 0.221709	valid_1's rmse: 0.000502794	valid_1's RMSPE: 0.234107
[150]	training's rmse: 0.000472482	training's RMSPE: 0.218339	valid_1's rmse: 0.00049716	valid_1's RMSPE: 0.231483
[200]	training's rmse: 0.00046601	training's RMSPE: 0.215348	valid_1's rmse: 0.000491648	valid_1's RMSPE: 0.228917
[250]	training's rmse: 0.000460586	training's RMSPE: 0.212842	valid_1's rmse: 0.000486291	valid_1's RMSPE: 0.226423
[300]	training's rmse: 0.000455733	training's RMSPE: 0.210599	valid_1's rmse: 0.000482708	valid_1's RMSPE: 0.224754
[350]	training's rmse: 0.000451492	training's RMSPE: 0.208639	valid_1's rmse: 

[2100]	training's rmse: 0.000391138	training's RMSPE: 0.181245	valid_1's rmse: 0.000429847	valid_1's RMSPE: 0.197963
[2150]	training's rmse: 0.00039032	training's RMSPE: 0.180866	valid_1's rmse: 0.000429751	valid_1's RMSPE: 0.197919
[2200]	training's rmse: 0.000389509	training's RMSPE: 0.18049	valid_1's rmse: 0.000429554	valid_1's RMSPE: 0.197829
[2250]	training's rmse: 0.000388583	training's RMSPE: 0.180061	valid_1's rmse: 0.000429012	valid_1's RMSPE: 0.197579
[2300]	training's rmse: 0.000387759	training's RMSPE: 0.179679	valid_1's rmse: 0.00042868	valid_1's RMSPE: 0.197426
[2350]	training's rmse: 0.000386955	training's RMSPE: 0.179306	valid_1's rmse: 0.000428504	valid_1's RMSPE: 0.197345
[2400]	training's rmse: 0.000386189	training's RMSPE: 0.178952	valid_1's rmse: 0.000428431	valid_1's RMSPE: 0.197311
[2450]	training's rmse: 0.00038541	training's RMSPE: 0.178591	valid_1's rmse: 0.00042812	valid_1's RMSPE: 0.197168
[2500]	training's rmse: 0.000384534	training's RMSPE: 0.178184	valid_