In [1]:
!pip install catboost



In [2]:
!pip install scikit-optimize



Now that we have fixed and generated three feature subsets
1. non-lagged + lagged textual features
2. lagged {target,item,shop} + non-lagged basic categories
3. lagged features within shop

and three first level classifiers types for each
* a.  CatBoost
* b. RidgeCV 
* c. Random Forest (sklearn) 

we search for hyperarameters that are used for predicting a month 
based on twelve month history, with one month gap between training and prediction periods.

This is a compromise of the prediction quality on the other hand, and not having the prediction 
quality and optimal hyperparameters vary too much over the training period when generating the first level predictions as input features of second stacking level.

The search for hyperparameters is problematic in whole because the chosen validation scheme is lacking. There may not be
too much that can be done, because the validation data necessarily has different distribution as the actual testing data.
This is because the temporal nature of the prediction problem. The distributions slowly drift during cause of time. Therefore, 
it is good to have the validation period temporally close to the test period. On the other hand, data analysis shows strong seasonal=(yearly) effects. 
Predicting October sales based on previous year simply is a very different problem to predicting December sales, as sales figures seem to peak strongly in December and have special characteristics.

We decide to search for such hyperparameters that maximise the quality of predictions (with
reasonable computational burden) in the hold-out validation data of Oct 2015. This is despite the fact that we have seen in examples that
such optimal model hyperparameters do not result in optimal prediction quality for Dec 2015.
We specifically do not search for such hyperparameters (via a coross-validation scheme) that would maximise the quality of predictions during
the training period, as the value of temporally distant predictions is questionable after because of the distribution shift throughtime.


The parameters are used for
a) creating submissions for ensembling using simple schemes
b) generating level 2 input features for a stacking algorithm



In [3]:
import sys
import os.path
import json
import numpy as np
import pandas as pd 
import sklearn
import scipy.sparse 
from itertools import product
import gc
#from tqdm import tqdm_notebook
import re
from catboost import CatBoostRegressor, Pool

from sklearn.metrics import mean_squared_error
from sklearn.metrics import make_scorer

from skopt import BayesSearchCV
from skopt.callbacks import DeadlineStopper, VerboseCallback, DeltaXStopper
from skopt.space import Real, Categorical, Integer

from time import time
import pprint


IN_COLAB = 'google.colab' in sys.modules

if IN_COLAB:
  from google.colab import drive
  drive.mount('/content/gdrive') 
  if not os.path.isfile('SETTINGS.json'):
       # hard coded data directory in drive is used if SETTINGS.json not present 
       config={}
       config['DATA_DIR'] = '/content/gdrive/My Drive/kaggle-c1'
       with open('SETTINGS.json', 'w') as outfile:
         json.dump(config, outfile)

with open('SETTINGS.json') as config_file:
    config = json.load(config_file)

DATA_DIR = config['DATA_DIR']

print('Using DATA_DIR ', DATA_DIR)

DATA_FOLDER = DATA_DIR

Using DATA_DIR  e:\repos\kaggle-c1\competitive-data-science-predict-future-sales


In [4]:
test_spec = pd.read_csv(os.path.join(DATA_FOLDER, 'test.csv'))

index_cols=['item_id','shop_id','date_block_num']
date_block_val = 33
date_block_test = 35 # Dec 2015

In [5]:
test_spec

Unnamed: 0,ID,shop_id,item_id
0,0,5,5037
1,1,5,5320
2,2,5,5233
3,3,5,5232
4,4,5,5268
...,...,...,...
214195,214195,45,18454
214196,214196,45,16188
214197,214197,45,15757
214198,214198,45,19648


In [6]:
# a wrapper class to use pre-defined division to training and hold-out set
# as a cross-validation object

class HoldOut:
    """
    Hold-out cross-validator generator. In the hold-out, the
    data is split only once into a train set and a test set.
    Here the split is given as a input parameter in the class initialisation
    Unlike in other cross-validation schemes, the hold-out
    consists of only one iteration.

    Parameters
    ----------
    train_indices, test_indices : the class just passes on these when yielding splits


    """

    def __init__(self, train_indices, test_indices):
        self.train_indices = train_indices
        self.test_indices = test_indices

    def __iter__(self):
        yield self.train_indices, self.test_indices

Define helper functions

In [7]:
def downcast_dtypes(df):
    '''
        Changes column types in the dataframe: 
                
                `float64` type to `float32`
                `int64`   type to `int32`
    '''
    
    # Select columns to downcast
    float_cols = [c for c in df if df[c].dtype == "float64"]
    int_cols =   [c for c in df if df[c].dtype == "int64"]
    
    # Downcast
    df[float_cols] = df[float_cols].astype(np.float32)
    df[int_cols]   = df[int_cols].astype(np.int32)
    
    return df

In [8]:
def write_predictions_by_array(array, filename):
  df=pd.DataFrame(array)
  df.columns=['item_cnt_month']
  df.to_csv(os.path.join(DATA_FOLDER, filename), index_label='ID')

In [9]:
def clipped_rmse(gt, predicted,clip_min=0, clip_max=20):
  target=np.minimum(np.maximum(gt,clip_min), clip_max)
  return np.sqrt((target-predicted)**2).mean()

In [10]:
def report_perf(optimizer, X, y, title, callbacks=None):
    """
    A wrapper for measuring time and performances of different optmizers
    
    optimizer = a sklearn or a skopt optimizer
    X = the training set 
    y = our target
    title = a string label for the experiment
    """
    start = time()
    if callbacks:
        optimizer.fit(X, y, callback=callbacks)
    else:
        optimizer.fit(X, y)
    d=pd.DataFrame(optimizer.cv_results_)
    best_score = optimizer.best_score_
    best_score_std = d.iloc[optimizer.best_index_].std_test_score
    best_params = optimizer.best_params_
    print((title + " took %.2f seconds,  candidates checked: %d, best CV score: %.3f "
           +u"\u00B1"+" %.3f") % (time() - start, 
                                  len(optimizer.cv_results_['params']),
                                  best_score,
                                  best_score_std))    
    print('Best parameters:')
    pprint.pprint(best_params)
    print()
    return best_params
    

In [11]:
def find_catboost_parameters_bayes(feature_file_name, search_space=None, n_iter=100):
    all_data = pd.read_csv(os.path.join(DATA_FOLDER, feature_file_name))

    dates=all_data['date_block_num']

    dates_train = (dates>= date_block_val - 13) & (dates<= date_block_val - 2)
    dates_trainval = (dates>= date_block_test - 13) & (dates<= date_block_test - 2)

# extract training, validation and test sets (labels and features)

    y_train=all_data.loc[dates_train, 'target']
    y_trainval=all_data.loc[dates_trainval, 'target']
    y_val = all_data.loc[dates == date_block_val, 'target']

    to_drop_cols = ['target','date_block_num']

    X_train = all_data.loc[dates_train].drop(to_drop_cols, axis=1)
    X_trainval = all_data.loc[dates_trainval].drop(to_drop_cols, axis=1)
    X_val = all_data.loc[dates == date_block_val].drop(to_drop_cols, axis=1)
    X_test = all_data.loc[dates == date_block_test].drop(to_drop_cols, axis=1)

    # determine how to permute test set predictions for submission generation 

    shop_item2submissionid={}
    for idx, row in test_spec.iterrows():
        shop_item2submissionid[str(row['shop_id'])+'_'+str(row['item_id'])] = row['ID']

    test_data=all_data.loc[dates == date_block_test, ['shop_id','item_id']]    

    testidx2submissionidx=np.zeros(test_data.shape[0], dtype=np.int32)
    for idx in range(test_data.shape[0]):
        row =test_data.iloc[idx]
        testidx2submissionidx[idx] = shop_item2submissionid[str(row['shop_id'])+'_'+str(row['item_id'])]

    #invert the mapping
    submissionidx2testidx=np.zeros(test_data.shape[0], dtype=np.int32)
    for i in range(test_data.shape[0]):
        submissionidx2testidx[testidx2submissionidx[i]]=i

    del test_data
    gc.collect()    

    X_paramsearch =  pd.concat([X_train, X_val],ignore_index=True)
    y_paramsearch = pd.concat([y_train, y_val],ignore_index=True)
    
    train_indices = np.arange(X_train.shape[0])
    val_indices = np.arange(X_val.shape[0]) + X_train.shape[0]
    
    mse_scorer = make_scorer(mean_squared_error, greater_is_better=False)

    if search_space is None: 
        search_space = {'iterations': Integer(300, 1500),
                        'depth': Integer(8, 16),
                        'learning_rate': Real(0.01, 1.0, 'log-uniform'),
                        'random_strength': Real(1, 100, 'log-uniform'),
                        'l2_leaf_reg': Real(0.001, 2.0, 'log-uniform'),
                        }

    clf = CatBoostRegressor(task_type='GPU',has_time=True, verbose=False)


    # Setting up BayesSearchCV

    cv = HoldOut(train_indices=train_indices, test_indices=val_indices)

    opt = BayesSearchCV(clf,
                        search_space,
                        scoring=mse_scorer,
                        cv=cv,
                        n_iter=n_iter,
                        n_jobs=1,  # use just 1 job with CatBoost in order to avoid segmentation fault
                        return_train_score=False,
                        refit=False,
                        optimizer_kwargs={'base_estimator': 'GP'})

    best_params = report_perf(opt, X_paramsearch, y_paramsearch,'CatBoost', 
                               callbacks=[VerboseCallback(100)])
    
    return best_params

# Feature set 1: non-lagged and lagged basic categories

In [12]:
#best_params_basic = find_catboost_parameters_bayes('feature_set_basic.csv')
#print(best_params_basic)
# CatBoost took 5376.07 seconds,  candidates checked: 100, best CV score: -20.311 ± 0.000
#Best parameters:
#OrderedDict([('depth', 9),
#             ('iterations', 339),
#             ('l2_leaf_reg', 0.1043327165183886),
#             ('learning_rate', 0.03646613151300171),
#             ('random_strength', 70.08445579850765)])
#
# OrderedDict([('depth', 9), ('iterations', 339), ('l2_leaf_reg', 0.1043327165183886), ('learning_rate', 0.03646613151300171), ('random_strength', 70.08445579850765)])

#CatBoost took 6637.58 seconds,  candidates checked: 100, best CV score: -19.772 ± 0.000
#Best parameters:
#OrderedDict([('depth', 8),
#             ('iterations', 300),
#             ('l2_leaf_reg', 2.0),
#             ('learning_rate', 0.08922738789933286),
#             ('random_strength', 100.0)])
#
#OrderedDict([('depth', 8), ('iterations', 300), ('l2_leaf_reg', 2.0), ('learning_rate', 0.08922738789933286), ('random_strength', 100.0)])

#CatBoost took 4328.61 seconds,  candidates checked: 100, best CV score: -17.769 ± 0.000
#Best parameters:
#OrderedDict([('depth', 8),
#             ('iterations', 1500),
#             ('l2_leaf_reg', 0.001),
#             ('learning_rate', 0.1813605946638354),
#             ('random_strength', 100.0)])
#
#OrderedDict([('depth', 8), ('iterations', 1500), ('l2_leaf_reg', 0.001), ('learning_rate', 0.1813605946638354), ('random_strength', 100.0)])
#CatBoost took 5568.42 seconds,  candidates checked: 100, best CV score: -17.705 ± 0.000
#Best parameters:
#OrderedDict([('depth', 8),
#             ('iterations', 300),
#             ('l2_leaf_reg', 0.001),
#             ('learning_rate', 0.21121409098653413),
#             ('random_strength', 100.0)])
#
#OrderedDict([('depth', 8), ('iterations', 300), ('l2_leaf_reg', 0.001), ('learning_rate', 0.21121409098653413), ('random_strength', 100.0)])

In [13]:
# from the best point found, start line searches wrt. one parameter at time
# start from ones that reach the limit (=random strength, l2_leaf_reg)
#search_space = {'iterations': Categorical([1500]),   
#                'depth': Categorical([8]), 
#                'learning_rate': Categorical([0.1813605946638354]),
#                'random_strength': Real(80,3000,'log-uniform'),
#                'l2_leaf_reg': Categorical([0.001]),
#                }

#best_params_basic = find_catboost_parameters_bayes('feature_set_basic.csv', search_space=search_space, n_iter=20)
#print(best_params_basic)

#CatBoost took 1424.11 seconds,  candidates checked: 20, best CV score: -18.071 ± 0.000
#Best parameters:
#OrderedDict([('depth', 8),
#             ('iterations', 1500),
#             ('l2_leaf_reg', 0.001),
#             ('learning_rate', 0.1813605946638354),
#             ('random_strength', 80.0)])
#
#OrderedDict([('depth', 8), ('iterations', 1500), ('l2_leaf_reg', 0.001), ('learning_rate', 0.1813605946638354), ('random_strength', 80.0)])



In [14]:
#search_space = {'iterations': Categorical([1500]),   
#                'depth': Categorical([8]), 
#                'learning_rate': Categorical([0.1813605946638354]),
#                'random_strength': Real(1,80,'log-uniform'),
#                'l2_leaf_reg': Categorical([0.001]),
#                }

#best_params_basic = find_catboost_parameters_bayes('feature_set_basic.csv', search_space=search_space, n_iter=20)
#print(best_params_basic)

#CatBoost took 1431.23 seconds,  candidates checked: 20, best CV score: -18.482 ± 0.000
#Best parameters:
#OrderedDict([('depth', 8),
#             ('iterations', 1500),
#             ('l2_leaf_reg', 0.001),
#             ('learning_rate', 0.1813605946638354),
#             ('random_strength', 79.99999999999999)])

#OrderedDict([('depth', 8), ('iterations', 1500), ('l2_leaf_reg', 0.001), ('learning_rate', 0.1813605946638354), ('random_strength', 79.99999999999999)])


In [15]:
#search_space = {'iterations': Categorical([1500]),   
#                'depth': Categorical([8]), 
#                'learning_rate': Categorical([0.1813605946638354]),
#                'random_strength': Categorical([80]),
#                'l2_leaf_reg': Real(0.00001,0.1,'log-uniform'),
#                }#
#
#best_params_basic = find_catboost_parameters_bayes('feature_set_basic.csv', search_space=search_space, n_iter=20)
#print(best_params_basic)

#CatBoost took 1426.77 seconds,  candidates checked: 20, best CV score: -17.929 ± 0.000
#Best parameters:
#OrderedDict([('depth', 8),
#             ('iterations', 1500),
#             ('l2_leaf_reg', 0.005744167167485476),
#             ('learning_rate', 0.1813605946638354),
#             ('random_strength', 80)])
#
#OrderedDict([('depth', 8), ('iterations', 1500), ('l2_leaf_reg', 0.005744167167485476), ('learning_rate', 0.1813605946638354), ('random_strength', 80)])

In [16]:
#search_space = {'iterations': Integer(100,2000,'log-uniform'),   
#                'depth': Categorical([8]), 
#                'learning_rate': Categorical([0.1813605946638354]),
#                'random_strength': Categorical([80]),
#                'l2_leaf_reg': Categorical([ 0.005744167167485476]),
#                }
#
#best_params_basic = find_catboost_parameters_bayes('feature_set_basic.csv', search_space=search_space, n_iter=20)
#print(best_params_basic)

#CatBoost took 901.52 seconds,  candidates checked: 20, best CV score: -17.552 ± 0.000
#Best parameters:
#rderedDict([('depth', 8),
#             ('iterations', 107),
#             ('l2_leaf_reg', 0.005744167167485476),
#             ('learning_rate', 0.1813605946638354),
#             ('random_strength', 80)])

# OrderedDict([('depth', 8), ('iterations', 107), ('l2_leaf_reg', 0.005744167167485476), ('learning_rate', 0.1813605946638354), ('random_strength', 80)])

In [17]:
#search_space = {'iterations': Categorical([107]),   
#                'depth': Categorical([8]), 
#                'learning_rate': Real(0.1,0.3,'uniform'),
#                'random_strength': Categorical([80]),
#                'l2_leaf_reg': Categorical([ 0.005744167167485476]),
#                }
#
#best_params_basic = find_catboost_parameters_bayes('feature_set_basic.csv', search_space=search_space, n_iter=20)
#print(best_params_basic)
#
#Iteration No: 21 started. Searching for the next optimal point.
#CatBoost took 146.82 seconds,  candidates checked: 20, best CV score: -17.566 ± 0.000
#Best parameters:
#OrderedDict([('depth', 8),
#             ('iterations', 107),
#             ('l2_leaf_reg', 0.005744167167485476),
#             ('learning_rate', 0.23068135624313127),
#             ('random_strength', 80)])
#
#OrderedDict([('depth', 8), ('iterations', 107), ('l2_leaf_reg', 0.005744167167485476), ('learning_rate', 0.23068135624313127), ('random_strength', 80)])


Experiment with some additional parameters

In [18]:
#search_space = {'iterations': Categorical([107]),   
#                'depth': Categorical([8]), 
#                'learning_rate': Categorical([0.181]),
#                'random_strength': Categorical([80]),
#                'l2_leaf_reg': Categorical([ 0.005744167167485476]),
#                'bagging_temperature': Real(0.01,100,'log-uniform')
#                }
#best_params_basic = find_catboost_parameters_bayes('feature_set_basic.csv', search_space=search_space, n_iter=20)
#print(best_params_basic)

#CatBoost took 141.68 seconds,  candidates checked: 20, best CV score: -17.738 ± 0.000
#Best parameters:
#OrderedDict([('bagging_temperature', 0.9784460719042715),
#             ('depth', 8),
#             ('iterations', 107),
#             ('l2_leaf_reg', 0.005744167167485476),
#             ('learning_rate', 0.181),
#            ('random_strength', 80)])
# OrderedDict([('bagging_temperature', 0.9784460719042715), ('depth', 8), ('iterations', 107), ('l2_leaf_reg', 0.005744167167485476), ('learning_rate', 0.181), ('random_strength', 80)])

# no improvement from here


In [19]:
#search_space = {'iterations': Categorical([107]),   
#                'depth': Categorical([8]), 
#                'learning_rate': Categorical([0.181]),
#                'random_strength': Categorical([80]),
#                'l2_leaf_reg': Categorical([ 0.005744167167485476]),
#                'grow_policy': Categorical(['SymmetricTree','Depthwise','Lossguide'])
#                }
#best_params_basic = find_catboost_parameters_bayes('feature_set_basic.csv', search_space=search_space, n_iter=20)
#print(best_params_basic)

#CatBoost took 195.46 seconds,  candidates checked: 20, best CV score: -19.665 ± 0.000
#Best parameters:
#OrderedDict([('depth', 8),
#             ('grow_policy', 'Lossguide'),
#             ('iterations', 107),
#             ('l2_leaf_reg', 0.005744167167485476),
#             ('learning_rate', 0.181),
#             ('random_strength', 80)])

#OrderedDict([('depth', 8), ('grow_policy', 'Lossguide'), ('iterations', 107), ('l2_leaf_reg', 0.005744167167485476), ('learning_rate', 0.181), ('random_strength', 80)])

In [20]:
#best_params_text = find_catboost_parameters_bayes('feature_set_text.csv')
#print(best_params_text)

#Best parameters:
#OrderedDict([('depth', 16),
#             ('iterations', 300),
#             ('l2_leaf_reg', 0.001),
#             ('learning_rate', 0.17754758868410853),
#             ('random_strength', 1.0)])
#
#OrderedDict([('depth', 16), ('iterations', 300), ('l2_leaf_reg', 0.001), ('learning_rate', 0.17754758868410853), ('random_strength', 1.0)])


In [21]:
#best_params_within = find_catboost_parameters_bayes('feature_set_within.csv')
#print(best_params_within)
#CatBoost took 4357.06 seconds,  candidates checked: 100, best CV score: -21.848 ± 0.000
#Best parameters:
#OrderedDict([('depth', 8),
#             ('iterations', 1500),
#             ('l2_leaf_reg', 2.0),
#             ('learning_rate', 0.058681717789493895),
#             ('random_strength', 2.168205069150344)])
#
# OrderedDict([('depth', 8), ('iterations', 1500), ('l2_leaf_reg', 2.0), ('learning_rate', 0.058681717789493895), ('random_strength', 2.168205069150344)])

In [22]:
#to_drop_cols=[] # [col for col in X_train.columns.values if re.search('internet',col)]
#reg=CatBoostRegressor(task_type='GPU', iterations=100, eta=0.3,depth=10, metric_period=20)
#reg.fit(X_trainval.drop(to_drop_cols,axis=1).to_numpy(), y_trainval)
#pred_test = np.clip(reg.predict(X_test.drop(to_drop_cols,axis=1).to_numpy()), 0, 20)
#write_predictions_by_array(pred_test[submissionidx2testidx], 'submission-catboost-feature_set_basic.csv')

# LB  1.008784 and 1.027125 (d=8, iterations=100, eta=0.3)
# 1.046352 and 1.054449 (d=10, iterations=100, eta=0.3)

As the final parameter search step, try out the best parameters found for each of the three feature sets to all of the feature sets

In [23]:
# basic features

all_data = pd.read_csv(os.path.join(DATA_FOLDER, 'feature_set_basic.csv'))

dates=all_data['date_block_num']

dates_train = (dates>= date_block_val - 13) & (dates<= date_block_val - 2)
dates_trainval = (dates>= date_block_test - 13) & (dates<= date_block_test - 2)
# y_train = all_data.loc[(dates>= date_block_val - 13) & (dates<= date_block_val - 2), 'target']
# y_trainval = all_data.loc[(dates>= date_block_test - 13) & (dates<= date_block_test - 2), 'target']

y_train=all_data.loc[dates_train, 'target']
#y_trainval=all_data.loc[dates_trainval, 'target']
y_val = all_data.loc[dates == date_block_val, 'target']
#y_test = all_data.loc[dates == date_block_test, 'target']

to_drop_cols = ['target','date_block_num']

X_train = all_data.loc[dates_train].drop(to_drop_cols, axis=1)
#X_trainval = all_data.loc[dates_trainval].drop(to_drop_cols, axis=1)
X_val = all_data.loc[dates == date_block_val].drop(to_drop_cols, axis=1)
#X_test = all_data.loc[dates == date_block_test].drop(to_drop_cols, axis=1)

gc.collect()    



0

In [None]:
#Best parameters for basic features by Bayesian search:
#rderedDict([('depth', 8),
#             ('iterations', 107),
#             ('l2_leaf_reg', 0.005744167167485476),
#             ('learning_rate', 0.1813605946638354),
#             ('random_strength', 80)])

#Best parameters for text features found in Bayesian search:
#OrderedDict([('depth', 16),
#             ('iterations', 300),
#             ('l2_leaf_reg', 0.001),
#             ('learning_rate', 0.17754758868410853),
#             ('random_strength', 1.0)])

#Best parameters for within-shop features found in Bayesian search:
# OrderedDict([('depth', 8),
#             ('iterations', 1500),
#             ('l2_leaf_reg', 2.0),
#             ('learning_rate', 0.058681717789493895),
#             ('random_strength', 2.168205069150344)])



reg=CatBoostRegressor(task_type='GPU', depth=8, iterations=107, l2_leaf_reg=0.005744167167485476,
                     learning_rate=0.1813605946638354, random_strength=80, verbose=10)
reg.fit(X_train.to_numpy(), y_train)
pred_val = np.clip(reg.predict(X_val.to_numpy()), 0, 20)
print('Clipped RMSE {} for parameters meant for basic features'.format(clipped_rmse(y_val, pred_val)))
# Clipped RMSE 0.3787635987985689 for parameters meant for basic features

In [None]:
reg=CatBoostRegressor(task_type='GPU', depth=16, iterations=300, l2_leaf_reg=0.001,
                     learning_rate=0.17754758868410853, random_strength=1, verbose=10)
reg.fit(X_train.to_numpy(), y_train)
pred_val = np.clip(reg.predict(X_val.to_numpy()), 0, 20)
print('Clipped RMSE {} for parameters meant for text features'.format(clipped_rmse(y_val, pred_val)))

In [None]:
reg=CatBoostRegressor(task_type='GPU', depth=8, iterations=1500, l2_leaf_reg=2,
                     learning_rate=0.058681717789493895, random_strength=2.168205069150344, verbose=10)
reg.fit(X_train.to_numpy(), y_train)
pred_val = np.clip(reg.predict(X_val.to_numpy()), 0, 20)
print('Clipped RMSE {} for parameters meant for within-shop features'.format(clipped_rmse(y_val, pred_val)))

0:	learn: 3.4648913	total: 53.7ms	remaining: 5.7s
10:	learn: 2.6844193	total: 609ms	remaining: 5.32s
20:	learn: 2.5467547	total: 1.19s	remaining: 4.88s
30:	learn: 2.4088414	total: 1.78s	remaining: 4.38s
40:	learn: 2.3470181	total: 2.37s	remaining: 3.81s
50:	learn: 2.2864916	total: 2.96s	remaining: 3.25s
60:	learn: 2.2538157	total: 3.55s	remaining: 2.68s
70:	learn: 2.2053329	total: 4.16s	remaining: 2.11s
80:	learn: 2.1775419	total: 4.75s	remaining: 1.52s
90:	learn: 2.1609599	total: 5.35s	remaining: 940ms
100:	learn: 2.1378800	total: 5.93s	remaining: 352ms
106:	learn: 2.0901688	total: 6.28s	remaining: 0us
Clipped RMSE 0.3787635987985689 for parameters meant for basic features
