In [1]:
import pandas as pd
import sklearn as sklearn
import numpy as np
import kaggle
import time
import scipy
import matplotlib.pyplot as plt
import seaborn as sns
import lightgbm as lg
import optuna
import optuna.integration.lightgbm as lgb
import xgboost as xgb

from sklearn.pipeline import Pipeline
from pandas.core.frame import DataFrame
from sklearn.preprocessing import FunctionTransformer
from sklearn.ensemble import RandomForestRegressor
from sklearn.impute import SimpleImputer
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LinearRegression
from datetime import date
from sklearn.metrics import mean_squared_error
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RandomizedSearchCV

from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import minmax_scale
from sklearn.preprocessing import MaxAbsScaler
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import RobustScaler
from sklearn.preprocessing import Normalizer
from sklearn.preprocessing import QuantileTransformer
from sklearn.preprocessing import PowerTransformer
from sklearn.model_selection import RepeatedKFold
from sklearn.svm import LinearSVR
from sklearn.svm import SVR

from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor

In [2]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

## Helper functions

In [3]:
def get_float_cols(df:DataFrame):
    return df.select_dtypes(include=float).columns.tolist()

def get_int_cols(df:DataFrame):
    return df.select_dtypes(include=int).columns.tolist()

def get_number_cols(df:DataFrame):
    return df.select_dtypes(np.number).columns.tolist()

def get_obj_cols(df:DataFrame):
    return list(df.select_dtypes(include=object).columns)

In [4]:
def plot_hist_float(df:DataFrame):
    df_numeric = df[get_float_cols(df)]
    df_numeric.hist(bins=100, figsize=(30,20))
    plt.show()

In [5]:
def plot_hist_int(df:DataFrame):
    df_numeric = df[get_int_cols(df)]
    df_numeric.hist(bins=100, figsize=(30,20))
    plt.show()

In [6]:
def plot_hist_categorical(df:DataFrame):
    
    fig = plt.figure(figsize=(26,150))

    for index, col in enumerate(get_obj_cols(df)):
        plt.subplot(25,2,index+1)
        sns.countplot(x=col, data=df)
        plt.ylabel('COUNT', size = 25)
        plt.xlabel(col, fontsize = 25)
        plt.xticks(size = 20, rotation = 45 )
        plt.yticks(size = 20)
        
    fig.tight_layout(pad=1.0)

In [7]:
def split_test_train(df:DataFrame):
    test, train = df[df['ind'].eq('test')], df[df['ind'].eq('train')]
    test = test.drop(['ind'], axis=1)
    train = train.drop(['ind'], axis=1)
    return test, train
    
def combine_test_train(test:DataFrame, train:DataFrame):
    combine = pd.concat([test.assign(ind='test'), train.assign(ind='train')])
    target = train['SalePrice']
    test_ids = test['Id']
    return combine, target, test_ids

In [8]:
def print_empty_values(df:DataFrame):
    col_names_with_na = list(df.isna().sum()[lambda x: x > 0].index)
    col_names_with_empty = list(df.isnull().sum()[lambda x: x > 0].index)
    result = set(col_names_with_na) | set(col_names_with_empty)    
    print('Columns with NA or empty: {0}'.format(result))

In [9]:
def get_empty_cols(df:DataFrame):
    return list(df.isnull().sum()[lambda x: x > 0].index)

## Data splitting

In [10]:
df_train = pd.read_csv('house-prices-advanced-regression-techniques/train.csv')
df_test = pd.read_csv('house-prices-advanced-regression-techniques/test.csv')
df_combine, TARGET, TEST_IDS = combine_test_train(df_test, df_train)

In [11]:
df_combine.head()

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,OverallQual,OverallCond,YearBuilt,YearRemodAdd,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,MasVnrArea,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinSF1,BsmtFinType2,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,Heating,HeatingQC,CentralAir,Electrical,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,KitchenQual,TotRmsAbvGrd,Functional,Fireplaces,FireplaceQu,GarageType,GarageYrBlt,GarageFinish,GarageCars,GarageArea,GarageQual,GarageCond,PavedDrive,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,ind,SalePrice
0,1461,20,RH,80.0,11622,Pave,,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Feedr,Norm,1Fam,1Story,5,6,1961,1961,Gable,CompShg,VinylSd,VinylSd,,0.0,TA,TA,CBlock,TA,TA,No,Rec,468.0,LwQ,144.0,270.0,882.0,GasA,TA,Y,SBrkr,896,0,0,896,0.0,0.0,1,0,2,1,TA,5,Typ,0,,Attchd,1961.0,Unf,1.0,730.0,TA,TA,Y,140,0,0,0,120,0,,MnPrv,,0,6,2010,WD,Normal,test,
1,1462,20,RL,81.0,14267,Pave,,IR1,Lvl,AllPub,Corner,Gtl,NAmes,Norm,Norm,1Fam,1Story,6,6,1958,1958,Hip,CompShg,Wd Sdng,Wd Sdng,BrkFace,108.0,TA,TA,CBlock,TA,TA,No,ALQ,923.0,Unf,0.0,406.0,1329.0,GasA,TA,Y,SBrkr,1329,0,0,1329,0.0,0.0,1,1,3,1,Gd,6,Typ,0,,Attchd,1958.0,Unf,1.0,312.0,TA,TA,Y,393,36,0,0,0,0,,,Gar2,12500,6,2010,WD,Normal,test,
2,1463,60,RL,74.0,13830,Pave,,IR1,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,2Story,5,5,1997,1998,Gable,CompShg,VinylSd,VinylSd,,0.0,TA,TA,PConc,Gd,TA,No,GLQ,791.0,Unf,0.0,137.0,928.0,GasA,Gd,Y,SBrkr,928,701,0,1629,0.0,0.0,2,1,3,1,TA,6,Typ,1,TA,Attchd,1997.0,Fin,2.0,482.0,TA,TA,Y,212,34,0,0,0,0,,MnPrv,,0,3,2010,WD,Normal,test,
3,1464,60,RL,78.0,9978,Pave,,IR1,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,2Story,6,6,1998,1998,Gable,CompShg,VinylSd,VinylSd,BrkFace,20.0,TA,TA,PConc,TA,TA,No,GLQ,602.0,Unf,0.0,324.0,926.0,GasA,Ex,Y,SBrkr,926,678,0,1604,0.0,0.0,2,1,3,1,Gd,7,Typ,1,Gd,Attchd,1998.0,Fin,2.0,470.0,TA,TA,Y,360,36,0,0,0,0,,,,0,6,2010,WD,Normal,test,
4,1465,120,RL,43.0,5005,Pave,,IR1,HLS,AllPub,Inside,Gtl,StoneBr,Norm,Norm,TwnhsE,1Story,8,5,1992,1992,Gable,CompShg,HdBoard,HdBoard,,0.0,Gd,TA,PConc,Gd,TA,No,ALQ,263.0,Unf,0.0,1017.0,1280.0,GasA,Ex,Y,SBrkr,1280,0,0,1280,0.0,0.0,2,0,2,1,Gd,5,Typ,0,,Attchd,1992.0,RFn,2.0,506.0,TA,TA,Y,0,82,0,0,144,0,,,,0,1,2010,WD,Normal,test,


## Building pipeline

In [12]:
def evaluate_model(predictions, test, test_labels):

    errors = abs(predictions - test_labels)
    mape = 100 * np.mean(errors / test_labels)
    accuracy = 100 - mape
    mean_error = np.mean(errors)
    
    res = { 'accuracy':accuracy, 'mean': mean_error } 
    return res

In [13]:
class Pipe:
    
    def __init__(self, funcs, **kwargs):
        self.funcs = funcs
        self.kwargs = kwargs
    
    def transform(self, df:DataFrame) -> DataFrame:
        for f in self.funcs:
            df = f(df, **self.kwargs)
            
        return df

In [14]:
class BaseExperiment:
    
    def __init__(self, pipe:Pipe, df:DataFrame, _TARGET_, _TEST_IDS_):
        self.pipe = pipe
        self.df = df
        self._TARGET_ = _TARGET_
        self._TEST_IDS_ = _TEST_IDS_
        self.regressor = self.create_regressor()
        
    def transform(self):        
        self.transformed = self.pipe.transform(self.df)
        return self.transformed 
    
    def create_regressor(self):
        raise NotImplementedError()
    
    def predict_external(self, to_predict):
        return self.regressor.predict(to_predict)
    
    def predict(self):
        test, train = split_test_train(self.transformed)
        
        # fit
        self.regressor.fit(train, self._TARGET_)
        
        # predict
        self.predicted = self.regressor.predict(test)
        
        return self.predicted
    
    def get_metric(self):
        test, train = split_test_train(self.transformed)      
        
        # predict train data
        pred_metric = self.regressor.predict(train)
        
        mse = mean_squared_error(self._TARGET_, pred_metric)
        rmse = np.sqrt(mse)
        
        return rmse

    def to_kaggle(self):
        named_tuple = time.localtime()
        time_string = time.strftime("%m/%d/%Y, %H:%M:%S", named_tuple)
        
        submission = pd.concat([self._TEST_IDS_, pd.Series(self.predicted, name='SalePrice')], axis=1)
        submission.to_csv('./submission.csv', index=False, header=True)
        
        !kaggle competitions submit -c house-prices-advanced-regression-techniques -f submission.csv -m "Test submission"
        
    def get_regressor(self):
        return self.regressor
        

In [15]:
class LinearExperiment(BaseExperiment):
    
    def create_regressor(self):
        return LinearRegression()

In [16]:
class DecisionTreeExperiment(BaseExperiment):
    
    def create_regressor(self):
        return DecisionTreeRegressor()
    
    def get_metric(self):
        test, train = split_test_train(self.transformed)   
        scores = cross_val_score(self.regressor, train, self._TARGET_, scoring='neg_mean_squared_error', cv=10)
        rmse = np.sqrt(-scores)
        return rmse.std(), rmse.mean()

In [17]:
class RandomForestExperiment(DecisionTreeExperiment):
    
    def create_regressor(self):
        return RandomForestRegressor()

In [18]:
class RandomForestWithGridSearchExperiment(RandomForestExperiment):
    
    best_params = { 
        'n_estimators': 400,
        'min_samples_split': 2,
        'min_samples_leaf': 1,
        'max_features': 'sqrt',
        'max_depth': 90, 
        'bootstrap': False }
    
    def create_regressor(self):
        return RandomForestRegressor(**self.best_params)    
    
    def search_best_params(self):
        test, train = split_test_train(self.transformed)
        
        long_param_grid = {
            'bootstrap': [True, False],
            'max_depth': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, None],
            'max_features': ['auto', 'sqrt'],
            'min_samples_leaf': [1, 2, 4],
            'min_samples_split': [2, 5, 10],
            'n_estimators': [200, 400, 600, 800, 1000, 1200, 1400, 1600, 1800, 2000] }
           
        rf_random = RandomizedSearchCV(estimator = self.regressor,
                                       param_distributions = long_param_grid,
                                       n_iter = 100, 
                                       cv = 3, 
                                       verbose=2,
                                       random_state=42,
                                       n_jobs = -1)
        
        # fit
        rf_random.fit(train, self._TARGET_)

        # get best params
        self.best_params = rf_random.best_params_
                
        return self.best_params


In [19]:
class LightGBMExperiment(BaseExperiment):
    
    def create_regressor(self):
        return lg.LGBMRegressor()

In [20]:
class LightGBMExperimentWithParams(BaseExperiment):

    best_params = {
        'objective': 'regression', 
        'metric': 'rmse', 
        'verbosity': -1,
        'boosting_type': 'gbdt',
        'seed': 42, 
        'feature_pre_filter': False, 
        'lambda_l1': 7.089889561174952e-07,
        'lambda_l2': 0.02142265423635661,
        'num_leaves': 10, 
        'feature_fraction': 0.4, 
        'bagging_fraction': 0.9990730129106954,
        'bagging_freq': 4, 
        'min_child_samples': 5}
    
    def create_regressor(self):
        return lg.LGBMRegressor(**self.best_params)
    
    def search_best_params(self):
        
        rkf = RepeatedKFold(n_splits=5, n_repeats=5, random_state=42)

        params = {
            "objective": "regression",
            "metric": "rmse",
            "verbosity": -1,
            "boosting_type": "gbdt",                
            "seed": 42 }
        
        test, train = split_test_train(self.transformed)
        X = train
        y = self._TARGET_
        dtrain = lgb.Dataset(X, label=y)
        
        study_tuner = optuna.create_study(direction='minimize')        
        optuna.logging.set_verbosity(optuna.logging.WARNING) 
        
        tuner = lgb.LightGBMTunerCV(params, 
                            dtrain, 
                            study=study_tuner,
                            seed = 42,
                            folds=rkf,
                            num_boost_round=1000                       
                            )

        tuner.run()        
        self.best_params = tuner.best_params        
        return tuner

In [21]:
class XGBExperiment(BaseExperiment):
    
    def create_regressor(self):
        return xgb.XGBRegressor()

In [22]:
class XGBExperimentWithParams(BaseExperiment):
    
    best_params = {
        'max_depth': 7, 
        'learning_rate': 0.07565159949506778,
        'n_estimators': 714, 
        'min_child_weight': 6,
        'gamma': 0.9324748648192458, 
        'subsample': 0.14074600746618596, 
        'colsample_bytree': 0.6983721197854935, 
        'reg_alpha': 0.10885510377662143,
        'reg_lambda': 0.7756168972298106, 
        'random_state': 356 }
    
    def create_regressor(self):
        return xgb.XGBRegressor(**self.best_params)
    
    def transform(self):        
        self.transformed = self.pipe.transform(self.df)
        
        # static for objective function
        XGBExperimentWithParams.TRANSFORM = self.transformed
        XGBExperimentWithParams.TARGET = self._TARGET_
        
        return self.transformed 
    
    def objective(trial):
        
        param = {
            'max_depth': trial.suggest_int('max_depth', 1, 10),
            'learning_rate': trial.suggest_float('learning_rate', 0.01, 1.0),
            'n_estimators': trial.suggest_int('n_estimators', 50, 1000),
            'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
            'gamma': trial.suggest_float('gamma', 0.01, 1.0),
            'subsample': trial.suggest_float('subsample', 0.01, 1.0),
            'colsample_bytree': trial.suggest_float('colsample_bytree', 0.01, 1.0),
            'reg_alpha': trial.suggest_float('reg_alpha', 0.01, 1.0),
            'reg_lambda': trial.suggest_float('reg_lambda', 0.01, 1.0),
            'random_state': trial.suggest_int('random_state', 1, 1000) }
        
        # split
        test, train = split_test_train(XGBExperimentWithParams.TRANSFORM)
        
        # fit
        model = xgb.XGBRegressor(**param)
        model.fit(train, XGBExperimentWithParams.TARGET)
        
        # get score
        scores = cross_val_score(model, train, XGBExperimentWithParams.TARGET, scoring='neg_mean_squared_error', cv=3)
        rmse = np.sqrt(-scores)
        
        return np.mean(rmse)
    
    def search_best_params(self):
        
        study = optuna.create_study(direction='minimize', study_name='regression')
        study.optimize(XGBExperimentWithParams.objective, n_trials=100)
        optuna.logging.set_verbosity(optuna.logging.WARNING)
        self.best_param = study.best_param
        return study.best_params

In [23]:
class LinearSVRExperiment(BaseExperiment):
    
    def create_regressor(self):
        return LinearSVR(epsilon=1.5)

In [24]:
class SVRExperiment(BaseExperiment):
    
    def create_regressor(self):
        return SVR(kernel='poly', degree=2, C=100, epsilon=0.1)

In [25]:
class CatBoostExperiment(BaseExperiment):
    
    def create_regressor(self):
        return CatBoostRegressor()

In [26]:
class CatBoostWithParamsExperiment(BaseExperiment):
    
    best_params =  {'iterations': 250, 
                    'learning_rate': 0.1736687160463188,
                    'depth': 11}
    
    
    def create_regressor(self):
        return CatBoostRegressor(**self.best_params)

    def transform(self):        
        self.transformed = self.pipe.transform(self.df)

        # static for objective function
        CatBoostWithParamsExperiment.TRANSFORM = self.transformed
        CatBoostWithParamsExperiment.TARGET = self._TARGET_

        return self.transformed 
    
    def objective(trial):
        
        param = {
            'iterations': trial.suggest_int('iterations', 1, 1000),
            'learning_rate': trial.suggest_float('learning_rate', 0.01, 1.0),
            'depth': trial.suggest_int('depth', 2, 16) }

        # split
        test, train = split_test_train(CatBoostWithParamsExperiment.TRANSFORM)
        
        # fit
        model = CatBoostRegressor(**param)
        model.fit(train, CatBoostWithParamsExperiment.TARGET)
        
        # get score
        scores = cross_val_score(model, train, CatBoostWithParamsExperiment.TARGET, scoring='neg_mean_squared_error', cv=3)
        rmse = np.sqrt(-scores)        
        return np.mean(rmse)
    
    def search_best_params(self):
        
        study = optuna.create_study(direction='minimize', study_name='regression')
        study.optimize(CatBoostWithParamsExperiment.objective, n_trials=50)
        optuna.logging.set_verbosity(optuna.logging.WARNING)
        self.best_param = study.best_param
        return study.best_params
    
    def get_metric(self):
        test, train = split_test_train(self.transformed)   
        scores = cross_val_score(self.regressor, train, self._TARGET_, scoring='neg_mean_squared_error', cv=10)
        rmse = np.sqrt(-scores)
        return rmse.std(), rmse.mean()

In [27]:
def impute_categorical_with_na_avail(df:DataFrame, **kwargs) -> DataFrame:
        
    cols_to_impute = [
        'Alley',
        'BsmtQual',
        'BsmtCond',
        'BsmtExposure',
        'BsmtFinType1',
        'BsmtFinType2',
        'FireplaceQu',
        'GarageType',
        'GarageFinish',
        'GarageQual',
        'GarageCond',
        'PoolQC',
        'Fence',
        'MiscFeature']
    
    for col in cols_to_impute:
        df[col] = df[col].fillna('NA')
        
    return df

In [28]:
def impute_categorical_with_na_not_avail(df:DataFrame, **kwargs) -> DataFrame:
        
    cols_to_impute = [ 
        'MSZoning',
        'Utilities',
        'Exterior1st',
        'Exterior2nd',
        'MasVnrType',
        'Electrical',
        'KitchenQual',
        'Functional',
        'SaleType']
    
    for col in cols_to_impute:
        df[col] = df[col].mode()[0]
        
    return df

In [29]:
def delete_not_needed(df:DataFrame, **kwargs) -> DataFrame:
    to_delete = [ 'Id', 'SalePrice' ]
    for d in to_delete:
        if d in df.columns:
            df = df.drop(columns=[d])
            
    return df

In [30]:
def add_features(df:DataFrame, **kwargs) -> DataFrame:
    
    # square per room 
    df["SqFtPerRoom"] = df["GrLivArea"] / (df["TotRmsAbvGrd"] + df["FullBath"] + df["HalfBath"] + df["KitchenAbvGr"])
    
    # quality
    df['Total_Home_Quality'] = df['OverallQual'] + df['OverallCond']
    
    # bathrooms
    df['Total_Bathrooms'] = (df['FullBath'] + (0.5 * df['HalfBath']) + df['BsmtFullBath'] + (0.5 * df['BsmtHalfBath']))
    
    # sum of squre of 1 and 2 floor
    df["HighQualSF"] = df["1stFlrSF"] + df["2ndFlrSF"]
    return df

In [31]:
def remove_skew(df:DataFrame, **kwargs) -> DataFrame:
    
    skew_df = pd.DataFrame(get_number_cols(df), columns=['Feature'])
    skew_df['Skew'] = skew_df['Feature'].apply(lambda feature: scipy.stats.skew(df[feature]))
    skew_df['Absolute Skew'] = skew_df['Skew'].apply(abs)
    skew_df['Skewed'] = skew_df['Absolute Skew'].apply(lambda x: True if x >= 0.5 else False)

    for column in skew_df.query("Skewed == True")['Feature'].values:
        df[column] = np.log1p(df[column])
    
    return df

In [32]:
def fix_subclass(df:DataFrame, **kwargs) -> DataFrame:
    df['MSSubClass'] = df['MSSubClass'].astype(str)
    return df

In [33]:
def fix_month_sold(df:DataFrame, **kwargs) -> DataFrame:
    df['MoSold'] = (-np.cos(df['MoSold']))
    return df

In [34]:
def scale(df:DataFrame, **kwargs) -> DataFrame:
    
    scaling_type = kwargs['scaling_type']
    
    if (scaling_type == 'MinMaxScaler'):
        scaler = MinMaxScaler()
    
    if (scaling_type == 'MaxAbsScaler'):
        scaler = MaxAbsScaler()
    
    if (scaling_type == 'StandardScaler'):
        scaler = StandardScaler()
    
    if (scaling_type == 'RobustScaler'):
        scaler = RobustScaler()        
    
    # save ind column
    test_train_mask = df['ind']
    df = df.drop(['ind'], axis=1)
    
    scaler.fit(df)
    df = pd.DataFrame(scaler.transform(df), index=df.index, columns=df.columns) 
    
    # restore column
    df['ind'] = test_train_mask
    
    return df

In [35]:
def one_hot_encoding(df:DataFrame, **kwargs) -> DataFrame:
    
    test_train_mask = df['ind']
    df = df.drop(['ind'], axis=1)
    df = pd.get_dummies(df)
    df['ind'] = test_train_mask
    return df

In [36]:
def impute_numeric_cols(df:DataFrame, **kwargs) -> DataFrame:
    
    impute_method = kwargs['impute_method']
    
    col_names = [
        'LotFrontage',
        'MasVnrArea',
        'BsmtFinSF1',
        'BsmtFinSF2',
        'BsmtUnfSF',
        'TotalBsmtSF',
        'BsmtFullBath',
        'BsmtHalfBath',
        'GarageYrBlt',
        'GarageCars',
        'GarageArea' ]
    
    # all possible columns
    initial_cols = list(df.columns)
    
    for col_name in col_names:

        non_empty_numeric =  (set(get_number_cols(df)) - set(get_empty_cols(df))) | {col_name}        
        cols_names_to_drop = list((set(df.columns) - non_empty_numeric))
        
        if (kwargs['verbose']==True): print('Deleting: {0}. Imputing: {1}'.format(cols_names_to_drop, col_name))
        
        # save temp
        temp = df[cols_names_to_drop]

        # clear dataset 
        df = df.drop(columns=cols_names_to_drop)
        
        known = df.loc[ df[col_name].notnull() ]        
        unknown = df.loc[ df[col_name].isnull() ]
        
        # nothing to predict
        if (len(unknown) == 0): 
            if (kwargs['verbose']==True): print('Nothing to predict - continue')
            continue
        
        column_index = list(df.columns).index(col_name)

        all_indices = [i for i in range(unknown.shape[1])]
        diff = list(set(all_indices) - {column_index})

        y = known.values[:, column_index]
        X = known.values[:, diff]

        # select regressor
        if impute_method == 'randomforest':
            regressor = RandomForestRegressor(n_estimators=100, n_jobs=-1)
        if impute_method == 'knn':
            regressor = KNeighborsRegressor()
            
        regressor.fit(X, y)
        predicted = regressor.predict(unknown.values[:, diff])
        
        if (kwargs['verbose']==True): print('{0} was predicted. Len: {1}'.format(col_name, len(predicted)))
        
        # fill missings
        df.loc[ (df[col_name].isnull()), col_name ] = predicted
        
        # restore dataset
        df[cols_names_to_drop] = temp
    
    # reorder columns back
    df = df.reindex(columns = initial_cols)
    
    return df

## Plotting

In [37]:
#plot_hist_float(df_combine)

In [38]:
#plot_hist_int(df_combine)

In [39]:
#plot_hist_categorical(df_combine)

## Defining pipes

In [40]:
random_forest_imputer_pipe = Pipe([
        fix_subclass,
        impute_categorical_with_na_avail,
        impute_categorical_with_na_not_avail,
        delete_not_needed,
        impute_numeric_cols,
        one_hot_encoding
        ],
        
        verbose = False,
        impute_method = 'randomforest'
    )

knn_pipe = Pipe([
        fix_subclass,
        impute_categorical_with_na_avail,
        impute_categorical_with_na_not_avail,
        delete_not_needed,
        impute_numeric_cols,
        one_hot_encoding
        ],
        
        verbose = False,
        impute_method = 'knn'
    )

no_skew_pipe = Pipe([
        fix_subclass,
        impute_categorical_with_na_avail,
        impute_categorical_with_na_not_avail,
        delete_not_needed,
        impute_numeric_cols,
        one_hot_encoding,
        remove_skew
        ],
        
        verbose = False,
        impute_method = 'knn'
    )

no_skew_scaling = Pipe([
        fix_subclass,
        impute_categorical_with_na_avail,
        impute_categorical_with_na_not_avail,
        delete_not_needed,
        impute_numeric_cols,
        one_hot_encoding,
        remove_skew,
        scale
        ],
        
        verbose = False,
        impute_method = 'knn',
        scaling_type='RobustScaler'
    )

no_skew_scaling_cosine = Pipe([
        fix_subclass,
        fix_month_sold,
        impute_categorical_with_na_avail,
        impute_categorical_with_na_not_avail,
        delete_not_needed,
        impute_numeric_cols,
        one_hot_encoding,
        remove_skew,
        scale
        ],
        
        verbose = False,
        impute_method = 'knn',
        scaling_type='RobustScaler'
    )

no_skew_scaling_cosine_more_features = Pipe([
        fix_month_sold,
        impute_categorical_with_na_avail,
        impute_categorical_with_na_not_avail,
        delete_not_needed,
        impute_numeric_cols,
        add_features,
        one_hot_encoding,
        remove_skew,
        scale
        ],
        
        verbose = False,
        impute_method = 'randomforest',
        scaling_type='RobustScaler'
    )


## Liniear regression

In [41]:
experiment = LinearExperiment(random_forest_imputer_pipe, df_combine, TARGET, TEST_IDS)
experiment.transform()
predicted = experiment.predict()
metric = experiment.get_metric()
print('RMSE: {0}'.format(metric))

RMSE: 21607.78549144802


In [42]:
#experiment = LinearExperiment(knn_pipe, df_combine, TARGET, TEST_IDS)
#experiment.transform()
#predicted = experiment.predict()
#metric = experiment.get_metric()
#print('RMSE: {0}'.format(metric))

In [43]:
#experiment = LinearExperiment(no_skew_pipe, df_combine, TARGET, TEST_IDS)
#experiment.transform()
#predicted = experiment.predict()
#metric = experiment.get_metric()
#print('RMSE: {0}'.format(metric))

In [44]:
#experiment = LinearExperiment(no_skew_scaling, df_combine, TARGET, TEST_IDS)
#experiment.transform()
#predicted = experiment.predict()
#metric = experiment.get_metric()
#print('RMSE: {0}'.format(metric))

## Decision Tree

In [45]:
#experiment = DecisionTreeExperiment(no_skew_pipe, df_combine, TARGET, TEST_IDS)
#experiment.transform()
#predicted = experiment.predict()
#metric = experiment.get_metric()
#print('RMSE: {0:.4f} STD: {1:.2f}'.format(metric[1], metric[0]))

In [46]:
#experiment = DecisionTreeExperiment(no_skew_scaling_cosine_more_features, df_combine, TARGET, TEST_IDS)
#experiment.transform()
#predicted = experiment.predict()
#metric = experiment.get_metric()
#print('RMSE: {0:.4f} STD: {1:.2f}'.format(metric[1], metric[0]))

## Random Forest

In [47]:
#experiment = RandomForestExperiment(no_skew_scaling, df_combine, TARGET, TEST_IDS)
#experiment.transform()
#predicted = experiment.predict()
#metric = experiment.get_metric()
#print('RMSE: {0:.4f} STD: {1:.2f}'.format(metric[1], metric[0]))

In [48]:
#experiment = RandomForestExperiment(no_skew_scaling_cosine, df_combine, TARGET, TEST_IDS)
#experiment.transform()
#predicted = experiment.predict()
#metric = experiment.get_metric()
#print('RMSE: {0:.4f} STD: {1:.2f}'.format(metric[1], metric[0]))

In [49]:
#experiment = RandomForestExperiment(no_skew_scaling_cosine_more_features, df_combine, TARGET, TEST_IDS)
#experiment.transform()
#predicted = experiment.predict()
#metric = experiment.get_metric()
#print('RMSE: {0:.4f} STD: {1:.2f}'.format(metric[1], metric[0]))

## Random Forest with best params

In [50]:
#experiment = RandomForestWithGridSearchExperiment(no_skew_scaling, df_combine, TARGET, TEST_IDS)
#experiment.transform()
#predicted = experiment.predict()
#metric = experiment.get_metric()
#print('RMSE: {0:.4f} STD: {1:.2f}'.format(metric[1], metric[0]))

In [51]:
#experiment = RandomForestWithGridSearchExperiment(no_skew_scaling_cosine, df_combine, TARGET, TEST_IDS)
#experiment.transform()
#predicted = experiment.predict()
#metric = experiment.get_metric()
#print('RMSE: {0:.4f} STD: {1:.2f}'.format(metric[1], metric[0]))

In [52]:
#experiment = RandomForestWithGridSearchExperiment(no_skew_scaling_cosine_more_features, df_combine, TARGET, TEST_IDS)
#experiment.transform()
#predicted = experiment.predict()
#metric = experiment.get_metric()
#print('RMSE: {0:.4f} STD: {1:.2f}'.format(metric[1], metric[0]))

## Search params for RandomForest

In [53]:
#experiment = RandomForestWithGridSearchExperiment(no_skew_scaling_cosine_more_features, df_combine, TARGET, TEST_IDS)
#experiment.transform()
#p = experiment.search_best_params()
#print(p)

## LightGBM

In [54]:
#experiment = LightGBMExperiment(no_skew_scaling_cosine_more_features, df_combine, TARGET, TEST_IDS)
#experiment.transform()
#predicted = experiment.predict()
#metric = experiment.get_metric()
#print('RMSE: {0}'.format(metric))
#experiment.to_kaggle()

In [55]:
#experiment = LightGBMExperimentWithParams(no_skew_scaling_cosine_more_features, df_combine, TARGET, TEST_IDS)
#experiment.transform()
#predicted = experiment.predict()
#metric = experiment.get_metric()
#print('RMSE: {0}'.format(metric))

## XGB with no params

In [56]:
#experiment = XGBExperiment(no_skew_scaling_cosine_more_features, df_combine, TARGET, TEST_IDS)
#experiment.transform()
#predicted = experiment.predict()
#metric = experiment.get_metric()
#print('RMSE: {0}'.format(metric))
#experiment.to_kaggle()

In [57]:
#experiment = XGBExperimentWithParams(no_skew_scaling_cosine_more_features, df_combine, TARGET, TEST_IDS)
#experiment.transform()
#predicted = experiment.predict()
#metric = experiment.get_metric()
#print('RMSE: {0}'.format(metric))

## SVR

In [58]:
#experiment = LinearSVRExperiment(no_skew_scaling_cosine_more_features, df_combine, TARGET, TEST_IDS)
#experiment.transform()
#predicted = experiment.predict()
#metric = experiment.get_metric()
#print('RMSE: {0}'.format(metric))

In [59]:
#experiment = SVRExperiment(no_skew_scaling_cosine_more_features, df_combine, TARGET, TEST_IDS)
#experiment.transform()
#predicted = experiment.predict()
#metric = experiment.get_metric()
#print('RMSE: {0}'.format(metric))

## CatBoost

In [60]:
#experiment = CatBoostExperiment(no_skew_scaling_cosine_more_features, df_combine, TARGET, TEST_IDS)
#experiment.transform()
#predicted = experiment.predict()
#metric = experiment.get_metric()
#print('RMSE: {0}'.format(metric))

In [61]:
experiment = CatBoostWithParamsExperiment(random_forest_imputer_pipe, df_combine, TARGET, TEST_IDS)
experiment.transform()
predicted = experiment.predict()
metric = experiment.get_metric()
print('RMSE: {0}'.format(metric))

0:	learn: 70947.9891660	total: 385ms	remaining: 1m 35s
1:	learn: 64377.7691012	total: 603ms	remaining: 1m 14s
2:	learn: 59110.1532800	total: 895ms	remaining: 1m 13s
3:	learn: 54337.3173029	total: 1.14s	remaining: 1m 9s
4:	learn: 49568.5649177	total: 1.4s	remaining: 1m 8s
5:	learn: 45792.8801283	total: 1.7s	remaining: 1m 9s
6:	learn: 42469.2392417	total: 1.99s	remaining: 1m 8s
7:	learn: 39461.2486516	total: 2.3s	remaining: 1m 9s
8:	learn: 36588.3858691	total: 2.63s	remaining: 1m 10s
9:	learn: 34343.6010162	total: 2.92s	remaining: 1m 10s
10:	learn: 32215.9162915	total: 3.21s	remaining: 1m 9s
11:	learn: 30532.4061838	total: 3.53s	remaining: 1m 10s
12:	learn: 28770.0223097	total: 3.79s	remaining: 1m 9s
13:	learn: 27509.5242795	total: 4.2s	remaining: 1m 10s
14:	learn: 26322.7912866	total: 4.53s	remaining: 1m 10s
15:	learn: 25306.1129127	total: 4.86s	remaining: 1m 11s
16:	learn: 24379.2224330	total: 5.16s	remaining: 1m 10s
17:	learn: 23389.3775323	total: 5.43s	remaining: 1m 10s
18:	learn: 22

152:	learn: 2952.7350530	total: 41.7s	remaining: 26.4s
153:	learn: 2919.5435156	total: 41.9s	remaining: 26.1s
154:	learn: 2867.7111323	total: 42.2s	remaining: 25.8s
155:	learn: 2829.2399109	total: 42.4s	remaining: 25.6s
156:	learn: 2803.1720864	total: 42.7s	remaining: 25.3s
157:	learn: 2760.3610736	total: 42.9s	remaining: 25s
158:	learn: 2719.5614765	total: 43.2s	remaining: 24.7s
159:	learn: 2676.2891898	total: 43.4s	remaining: 24.4s
160:	learn: 2643.7494422	total: 43.7s	remaining: 24.1s
161:	learn: 2612.9258249	total: 44.1s	remaining: 23.9s
162:	learn: 2574.8609515	total: 44.3s	remaining: 23.7s
163:	learn: 2547.4704457	total: 44.6s	remaining: 23.4s
164:	learn: 2512.3313044	total: 44.8s	remaining: 23.1s
165:	learn: 2490.5323902	total: 45s	remaining: 22.8s
166:	learn: 2488.7135338	total: 45.3s	remaining: 22.5s
167:	learn: 2449.7720461	total: 45.5s	remaining: 22.2s
168:	learn: 2424.2910134	total: 45.7s	remaining: 21.9s
169:	learn: 2396.9906373	total: 45.9s	remaining: 21.6s
170:	learn: 23

53:	learn: 11693.7059966	total: 14.3s	remaining: 51.9s
54:	learn: 11539.5431707	total: 14.5s	remaining: 51.6s
55:	learn: 11444.4248748	total: 14.8s	remaining: 51.2s
56:	learn: 11315.9853749	total: 15s	remaining: 50.8s
57:	learn: 11152.2647071	total: 15.2s	remaining: 50.4s
58:	learn: 11072.0565259	total: 15.4s	remaining: 50s
59:	learn: 10969.0850764	total: 15.7s	remaining: 49.8s
60:	learn: 10848.9081589	total: 16s	remaining: 49.5s
61:	learn: 10653.2004563	total: 16.2s	remaining: 49.1s
62:	learn: 10511.2005656	total: 16.4s	remaining: 48.8s
63:	learn: 10374.9200892	total: 16.7s	remaining: 48.6s
64:	learn: 10253.4144445	total: 16.9s	remaining: 48.2s
65:	learn: 10124.3325794	total: 17.2s	remaining: 47.8s
66:	learn: 10022.6467579	total: 17.4s	remaining: 47.5s
67:	learn: 9854.5559690	total: 17.7s	remaining: 47.3s
68:	learn: 9752.8644215	total: 17.9s	remaining: 47s
69:	learn: 9629.2680067	total: 18.1s	remaining: 46.6s
70:	learn: 9475.1577202	total: 18.4s	remaining: 46.3s
71:	learn: 9410.041707

204:	learn: 1638.5972741	total: 52.6s	remaining: 11.6s
205:	learn: 1625.2632229	total: 52.9s	remaining: 11.3s
206:	learn: 1587.8980591	total: 53.1s	remaining: 11s
207:	learn: 1566.3535184	total: 53.3s	remaining: 10.8s
208:	learn: 1541.9989177	total: 53.6s	remaining: 10.5s
209:	learn: 1520.2027303	total: 53.8s	remaining: 10.2s
210:	learn: 1514.7720952	total: 54s	remaining: 9.99s
211:	learn: 1514.0378686	total: 54.3s	remaining: 9.73s
212:	learn: 1495.7524863	total: 54.5s	remaining: 9.47s
213:	learn: 1472.3961280	total: 54.8s	remaining: 9.22s
214:	learn: 1464.6247793	total: 55s	remaining: 8.96s
215:	learn: 1447.3543607	total: 55.3s	remaining: 8.7s
216:	learn: 1433.3086887	total: 55.5s	remaining: 8.44s
217:	learn: 1417.8106713	total: 55.7s	remaining: 8.18s
218:	learn: 1417.1904301	total: 55.9s	remaining: 7.92s
219:	learn: 1397.6685130	total: 56.2s	remaining: 7.66s
220:	learn: 1389.4686483	total: 56.4s	remaining: 7.4s
221:	learn: 1374.4584725	total: 56.7s	remaining: 7.16s
222:	learn: 1357.9

105:	learn: 5056.8658349	total: 27.8s	remaining: 37.7s
106:	learn: 5043.3819676	total: 28s	remaining: 37.4s
107:	learn: 5009.4457717	total: 28.2s	remaining: 37.1s
108:	learn: 4899.9431667	total: 28.5s	remaining: 36.8s
109:	learn: 4789.4111335	total: 28.7s	remaining: 36.5s
110:	learn: 4729.0887241	total: 28.9s	remaining: 36.2s
111:	learn: 4610.2041131	total: 29.2s	remaining: 35.9s
112:	learn: 4515.0684685	total: 29.4s	remaining: 35.6s
113:	learn: 4426.9581001	total: 29.7s	remaining: 35.4s
114:	learn: 4355.0806725	total: 29.9s	remaining: 35.1s
115:	learn: 4300.2844298	total: 30.1s	remaining: 34.8s
116:	learn: 4236.7899855	total: 30.4s	remaining: 34.5s
117:	learn: 4183.4332983	total: 30.7s	remaining: 34.3s
118:	learn: 4106.1815486	total: 30.9s	remaining: 34.1s
119:	learn: 4045.1236853	total: 31.2s	remaining: 33.8s
120:	learn: 4027.1729140	total: 31.4s	remaining: 33.5s
121:	learn: 3951.5235956	total: 31.6s	remaining: 33.2s
122:	learn: 3896.4629855	total: 31.9s	remaining: 32.9s
123:	learn: 

6:	learn: 40510.6558628	total: 1.72s	remaining: 59.9s
7:	learn: 37958.5065772	total: 1.99s	remaining: 1m
8:	learn: 35386.8644771	total: 2.24s	remaining: 1m
9:	learn: 33014.6540447	total: 2.46s	remaining: 59.1s
10:	learn: 31081.3654685	total: 2.67s	remaining: 58s
11:	learn: 29437.6252188	total: 2.87s	remaining: 56.9s
12:	learn: 28226.2226388	total: 2.97s	remaining: 54.1s
13:	learn: 26816.5186143	total: 3.19s	remaining: 53.9s
14:	learn: 25559.4855974	total: 3.4s	remaining: 53.3s
15:	learn: 24507.0905114	total: 3.52s	remaining: 51.4s
16:	learn: 23635.4197390	total: 3.73s	remaining: 51.2s
17:	learn: 22811.9644109	total: 3.93s	remaining: 50.7s
18:	learn: 21999.8336185	total: 4.16s	remaining: 50.5s
19:	learn: 21230.8688978	total: 4.38s	remaining: 50.4s
20:	learn: 20527.5342359	total: 4.6s	remaining: 50.2s
21:	learn: 19909.1114572	total: 4.79s	remaining: 49.7s
22:	learn: 19425.3532563	total: 5s	remaining: 49.3s
23:	learn: 18777.3144683	total: 5.19s	remaining: 48.9s
24:	learn: 18400.2249137	to

158:	learn: 2237.2589466	total: 39.9s	remaining: 22.8s
159:	learn: 2211.5270374	total: 40.2s	remaining: 22.6s
160:	learn: 2196.9474404	total: 40.4s	remaining: 22.3s
161:	learn: 2162.4301807	total: 40.7s	remaining: 22.1s
162:	learn: 2131.2989133	total: 41s	remaining: 21.9s
163:	learn: 2109.2954620	total: 41.2s	remaining: 21.6s
164:	learn: 2079.9815912	total: 41.5s	remaining: 21.4s
165:	learn: 2070.3946277	total: 41.7s	remaining: 21.1s
166:	learn: 2061.1781930	total: 41.9s	remaining: 20.8s
167:	learn: 2028.7680539	total: 42.2s	remaining: 20.6s
168:	learn: 2003.0139039	total: 42.5s	remaining: 20.4s
169:	learn: 1973.3228412	total: 42.7s	remaining: 20.1s
170:	learn: 1956.6797375	total: 43s	remaining: 19.9s
171:	learn: 1926.2134092	total: 43.3s	remaining: 19.6s
172:	learn: 1909.2380080	total: 43.6s	remaining: 19.4s
173:	learn: 1895.1773654	total: 43.8s	remaining: 19.1s
174:	learn: 1892.3596690	total: 44.1s	remaining: 18.9s
175:	learn: 1866.9076363	total: 44.3s	remaining: 18.6s
176:	learn: 18

59:	learn: 9465.4604958	total: 14.9s	remaining: 47.2s
60:	learn: 9357.5477470	total: 15.2s	remaining: 47s
61:	learn: 9221.9270621	total: 15.4s	remaining: 46.7s
62:	learn: 9081.1007908	total: 15.7s	remaining: 46.5s
63:	learn: 8966.7785471	total: 15.9s	remaining: 46.3s
64:	learn: 8820.5194186	total: 16.2s	remaining: 46s
65:	learn: 8730.4884084	total: 16.4s	remaining: 45.7s
66:	learn: 8653.5851775	total: 16.6s	remaining: 45.4s
67:	learn: 8515.9794898	total: 16.9s	remaining: 45.1s
68:	learn: 8393.4427272	total: 17.1s	remaining: 44.8s
69:	learn: 8225.1345109	total: 17.3s	remaining: 44.5s
70:	learn: 8159.3145407	total: 17.6s	remaining: 44.3s
71:	learn: 8061.3122030	total: 17.8s	remaining: 44s
72:	learn: 7973.3044365	total: 18s	remaining: 43.7s
73:	learn: 7872.4314057	total: 18.3s	remaining: 43.4s
74:	learn: 7799.8671076	total: 18.5s	remaining: 43.2s
75:	learn: 7739.2686122	total: 18.7s	remaining: 42.9s
76:	learn: 7670.4567158	total: 19s	remaining: 42.6s
77:	learn: 7563.8601140	total: 19.2s	r

210:	learn: 1221.6644646	total: 51.8s	remaining: 9.58s
211:	learn: 1199.0890925	total: 52.1s	remaining: 9.33s
212:	learn: 1176.8050236	total: 52.3s	remaining: 9.08s
213:	learn: 1153.8715291	total: 52.5s	remaining: 8.83s
214:	learn: 1145.6515557	total: 52.7s	remaining: 8.58s
215:	learn: 1142.9488853	total: 52.9s	remaining: 8.33s
216:	learn: 1136.2662518	total: 53.1s	remaining: 8.08s
217:	learn: 1120.2149930	total: 53.4s	remaining: 7.83s
218:	learn: 1104.0553857	total: 53.6s	remaining: 7.58s
219:	learn: 1097.0817195	total: 53.8s	remaining: 7.34s
220:	learn: 1096.0280630	total: 54s	remaining: 7.09s
221:	learn: 1087.4895450	total: 54.2s	remaining: 6.84s
222:	learn: 1073.4419552	total: 54.5s	remaining: 6.59s
223:	learn: 1059.3345728	total: 54.7s	remaining: 6.35s
224:	learn: 1040.8964661	total: 54.9s	remaining: 6.1s
225:	learn: 1020.4942695	total: 55.2s	remaining: 5.86s
226:	learn: 1009.6300127	total: 55.4s	remaining: 5.61s
227:	learn: 1000.0409848	total: 55.6s	remaining: 5.37s
228:	learn: 9

112:	learn: 4706.2743739	total: 26.9s	remaining: 32.6s
113:	learn: 4702.6498313	total: 27.1s	remaining: 32.4s
114:	learn: 4649.7593019	total: 27.4s	remaining: 32.2s
115:	learn: 4607.8284471	total: 27.7s	remaining: 32s
116:	learn: 4572.6454873	total: 28s	remaining: 31.8s
117:	learn: 4456.1028273	total: 28.3s	remaining: 31.7s
118:	learn: 4395.6569871	total: 28.6s	remaining: 31.4s
119:	learn: 4392.5297968	total: 28.9s	remaining: 31.3s
120:	learn: 4350.4300900	total: 29.1s	remaining: 31s
121:	learn: 4293.7197497	total: 29.4s	remaining: 30.8s
122:	learn: 4209.6250432	total: 29.6s	remaining: 30.6s
123:	learn: 4139.6108818	total: 29.9s	remaining: 30.4s
124:	learn: 4090.1025106	total: 30.1s	remaining: 30.1s
125:	learn: 4086.7120309	total: 30.4s	remaining: 29.9s
126:	learn: 4018.2014158	total: 30.6s	remaining: 29.7s
127:	learn: 3948.7403626	total: 30.8s	remaining: 29.4s
128:	learn: 3889.6940800	total: 31.1s	remaining: 29.1s
129:	learn: 3822.6531556	total: 31.3s	remaining: 28.9s
130:	learn: 3763

13:	learn: 27670.9250899	total: 3.25s	remaining: 54.7s
14:	learn: 26398.6910952	total: 3.49s	remaining: 54.6s
15:	learn: 25456.7110298	total: 3.72s	remaining: 54.4s
16:	learn: 24470.0683677	total: 3.94s	remaining: 54s
17:	learn: 23534.5033169	total: 4.18s	remaining: 53.9s
18:	learn: 22722.7707867	total: 4.42s	remaining: 53.7s
19:	learn: 21896.1173317	total: 4.64s	remaining: 53.4s
20:	learn: 21228.1341389	total: 4.86s	remaining: 53s
21:	learn: 20626.0000475	total: 5.08s	remaining: 52.7s
22:	learn: 20200.3297352	total: 5.11s	remaining: 50.5s
23:	learn: 19616.0083031	total: 5.36s	remaining: 50.5s
24:	learn: 19141.1993667	total: 5.61s	remaining: 50.5s
25:	learn: 18670.7294202	total: 5.84s	remaining: 50.4s
26:	learn: 18294.1622466	total: 6.06s	remaining: 50.1s
27:	learn: 17761.2714596	total: 6.29s	remaining: 49.9s
28:	learn: 17283.5005826	total: 6.51s	remaining: 49.6s
29:	learn: 16842.1834702	total: 6.73s	remaining: 49.3s
30:	learn: 16448.6894464	total: 6.94s	remaining: 49s
31:	learn: 16099

164:	learn: 2183.3936843	total: 41.4s	remaining: 21.3s
165:	learn: 2160.4145198	total: 41.6s	remaining: 21.1s
166:	learn: 2132.8808106	total: 41.9s	remaining: 20.8s
167:	learn: 2107.6833592	total: 42.1s	remaining: 20.6s
168:	learn: 2077.6502595	total: 42.4s	remaining: 20.3s
169:	learn: 2060.6925607	total: 42.6s	remaining: 20.1s
170:	learn: 2026.7389388	total: 42.9s	remaining: 19.8s
171:	learn: 2023.7288573	total: 43.2s	remaining: 19.6s
172:	learn: 1997.5981491	total: 43.5s	remaining: 19.3s
173:	learn: 1994.2502681	total: 43.7s	remaining: 19.1s
174:	learn: 1960.8508260	total: 43.9s	remaining: 18.8s
175:	learn: 1929.7064281	total: 44.2s	remaining: 18.6s
176:	learn: 1913.2103297	total: 44.4s	remaining: 18.3s
177:	learn: 1896.4124637	total: 44.6s	remaining: 18s
178:	learn: 1864.9188797	total: 44.8s	remaining: 17.8s
179:	learn: 1841.9236614	total: 45s	remaining: 17.5s
180:	learn: 1840.1477134	total: 45.3s	remaining: 17.3s
181:	learn: 1798.5103727	total: 45.5s	remaining: 17s
182:	learn: 1769

65:	learn: 8996.6846497	total: 14.6s	remaining: 40.6s
66:	learn: 8918.6510703	total: 14.8s	remaining: 40.3s
67:	learn: 8794.4831204	total: 15s	remaining: 40.1s
68:	learn: 8599.5917423	total: 15.2s	remaining: 39.8s
69:	learn: 8434.6218428	total: 15.4s	remaining: 39.6s
70:	learn: 8249.2212681	total: 15.6s	remaining: 39.3s
71:	learn: 8153.5528530	total: 15.8s	remaining: 39.1s
72:	learn: 8019.6560041	total: 16s	remaining: 38.9s
73:	learn: 7849.8517674	total: 16.3s	remaining: 38.7s
74:	learn: 7713.9795207	total: 16.5s	remaining: 38.4s
75:	learn: 7592.0374780	total: 16.7s	remaining: 38.1s
76:	learn: 7387.7267534	total: 16.9s	remaining: 37.9s
77:	learn: 7280.0577936	total: 17.1s	remaining: 37.6s
78:	learn: 7172.4770146	total: 17.3s	remaining: 37.4s
79:	learn: 7068.9823488	total: 17.5s	remaining: 37.2s
80:	learn: 6921.7644882	total: 17.7s	remaining: 37s
81:	learn: 6865.8156820	total: 17.9s	remaining: 36.8s
82:	learn: 6716.4521131	total: 18.2s	remaining: 36.6s
83:	learn: 6603.5323981	total: 18.

217:	learn: 1143.2493683	total: 49.9s	remaining: 7.32s
218:	learn: 1132.6494181	total: 50.1s	remaining: 7.09s
219:	learn: 1121.1092563	total: 50.3s	remaining: 6.86s
220:	learn: 1107.5620264	total: 50.5s	remaining: 6.63s
221:	learn: 1093.8284104	total: 50.8s	remaining: 6.4s
222:	learn: 1074.5151100	total: 51s	remaining: 6.17s
223:	learn: 1061.1290044	total: 51.2s	remaining: 5.95s
224:	learn: 1039.0062077	total: 51.4s	remaining: 5.72s
225:	learn: 1037.9195210	total: 51.7s	remaining: 5.49s
226:	learn: 1023.4718231	total: 51.9s	remaining: 5.26s
227:	learn: 1014.5817595	total: 52.1s	remaining: 5.03s
228:	learn: 1006.8780917	total: 52.3s	remaining: 4.8s
229:	learn: 1000.1992885	total: 52.6s	remaining: 4.57s
230:	learn: 983.1631022	total: 52.8s	remaining: 4.34s
231:	learn: 966.3610153	total: 53s	remaining: 4.12s
232:	learn: 954.0872160	total: 53.3s	remaining: 3.88s
233:	learn: 946.5884379	total: 53.5s	remaining: 3.66s
234:	learn: 941.1617263	total: 53.7s	remaining: 3.43s
235:	learn: 933.34864

119:	learn: 4384.6463125	total: 27.3s	remaining: 29.5s
120:	learn: 4352.8623848	total: 27.5s	remaining: 29.3s
121:	learn: 4271.2250973	total: 27.7s	remaining: 29.1s
122:	learn: 4200.4965809	total: 28s	remaining: 28.9s
123:	learn: 4155.2798833	total: 28.3s	remaining: 28.7s
124:	learn: 4106.7345755	total: 28.5s	remaining: 28.5s
125:	learn: 4082.4866972	total: 28.8s	remaining: 28.3s
126:	learn: 4048.1220021	total: 29s	remaining: 28.1s
127:	learn: 3979.0962894	total: 29.2s	remaining: 27.9s
128:	learn: 3938.7157124	total: 29.5s	remaining: 27.6s
129:	learn: 3867.8071139	total: 29.7s	remaining: 27.4s
130:	learn: 3821.8021979	total: 29.9s	remaining: 27.2s
131:	learn: 3759.4746508	total: 30.5s	remaining: 27.3s
132:	learn: 3718.0222602	total: 30.8s	remaining: 27.1s
133:	learn: 3678.0910874	total: 31s	remaining: 26.8s
134:	learn: 3601.2433664	total: 31.3s	remaining: 26.6s
135:	learn: 3555.4931013	total: 31.5s	remaining: 26.4s
136:	learn: 3500.6163098	total: 31.7s	remaining: 26.2s
137:	learn: 3448

21:	learn: 20867.4260511	total: 5.1s	remaining: 52.9s
22:	learn: 20261.0341880	total: 5.34s	remaining: 52.7s
23:	learn: 19608.2968155	total: 5.59s	remaining: 52.6s
24:	learn: 19069.3619274	total: 5.83s	remaining: 52.5s
25:	learn: 18463.8446929	total: 6.08s	remaining: 52.4s
26:	learn: 18116.9905754	total: 6.34s	remaining: 52.4s
27:	learn: 17672.3969599	total: 6.62s	remaining: 52.5s
28:	learn: 17241.7602287	total: 6.88s	remaining: 52.5s
29:	learn: 16852.3524380	total: 7.15s	remaining: 52.5s
30:	learn: 16515.6728015	total: 7.4s	remaining: 52.3s
31:	learn: 16193.7098855	total: 7.63s	remaining: 52s
32:	learn: 15909.1630422	total: 7.85s	remaining: 51.6s
33:	learn: 15598.0804206	total: 8.07s	remaining: 51.3s
34:	learn: 15301.3758685	total: 8.29s	remaining: 51s
35:	learn: 15022.7504598	total: 8.53s	remaining: 50.7s
36:	learn: 14662.7939612	total: 8.75s	remaining: 50.4s
37:	learn: 14447.7990405	total: 8.97s	remaining: 50s
38:	learn: 14289.2160751	total: 9.2s	remaining: 49.8s
39:	learn: 14129.32

172:	learn: 2213.5654386	total: 41.4s	remaining: 18.4s
173:	learn: 2176.8068048	total: 41.7s	remaining: 18.2s
174:	learn: 2145.3567153	total: 41.9s	remaining: 18s
175:	learn: 2116.8266878	total: 42.1s	remaining: 17.7s
176:	learn: 2088.9283755	total: 42.4s	remaining: 17.5s
177:	learn: 2070.9700613	total: 42.6s	remaining: 17.2s
178:	learn: 2045.4355783	total: 42.9s	remaining: 17s
179:	learn: 2019.7283368	total: 43.1s	remaining: 16.8s
180:	learn: 2011.9442225	total: 43.4s	remaining: 16.5s
181:	learn: 2011.0319725	total: 43.7s	remaining: 16.3s
182:	learn: 1993.0303882	total: 43.9s	remaining: 16.1s
183:	learn: 1953.7295714	total: 44.2s	remaining: 15.8s
184:	learn: 1923.3512930	total: 44.4s	remaining: 15.6s
185:	learn: 1891.1528992	total: 44.7s	remaining: 15.4s
186:	learn: 1865.9396992	total: 44.9s	remaining: 15.1s
187:	learn: 1820.8080361	total: 45.1s	remaining: 14.9s
188:	learn: 1803.7727123	total: 45.4s	remaining: 14.6s
189:	learn: 1777.4891870	total: 45.6s	remaining: 14.4s
190:	learn: 17

73:	learn: 8050.3121558	total: 18.1s	remaining: 43.1s
74:	learn: 8039.2621872	total: 18.3s	remaining: 42.8s
75:	learn: 7904.9042864	total: 18.6s	remaining: 42.6s
76:	learn: 7856.7270852	total: 18.8s	remaining: 42.3s
77:	learn: 7811.5537544	total: 19.1s	remaining: 42s
78:	learn: 7649.9423647	total: 19.3s	remaining: 41.8s
79:	learn: 7485.1671997	total: 19.5s	remaining: 41.5s
80:	learn: 7473.0872108	total: 19.8s	remaining: 41.3s
81:	learn: 7285.4221781	total: 20s	remaining: 41s
82:	learn: 7111.1408166	total: 20.3s	remaining: 40.9s
83:	learn: 6992.9577887	total: 20.5s	remaining: 40.6s
84:	learn: 6889.1382375	total: 20.8s	remaining: 40.4s
85:	learn: 6789.4425589	total: 21s	remaining: 40.1s
86:	learn: 6680.4494924	total: 21.3s	remaining: 39.9s
87:	learn: 6568.4155615	total: 21.5s	remaining: 39.6s
88:	learn: 6460.9928159	total: 21.7s	remaining: 39.3s
89:	learn: 6334.9656919	total: 21.9s	remaining: 39s
90:	learn: 6189.0976360	total: 22.2s	remaining: 38.7s
91:	learn: 6058.6482961	total: 22.4s	r

224:	learn: 1162.0395236	total: 53.1s	remaining: 5.9s
225:	learn: 1135.3127918	total: 53.4s	remaining: 5.67s
226:	learn: 1122.6852259	total: 53.6s	remaining: 5.43s
227:	learn: 1104.0874698	total: 53.9s	remaining: 5.2s
228:	learn: 1102.1933825	total: 54.1s	remaining: 4.96s
229:	learn: 1084.3589455	total: 54.3s	remaining: 4.72s
230:	learn: 1072.4637043	total: 54.6s	remaining: 4.49s
231:	learn: 1055.2728624	total: 54.8s	remaining: 4.25s
232:	learn: 1045.8444980	total: 55.1s	remaining: 4.02s
233:	learn: 1034.2578199	total: 55.3s	remaining: 3.78s
234:	learn: 1019.4796341	total: 55.5s	remaining: 3.54s
235:	learn: 1001.6264201	total: 55.8s	remaining: 3.31s
236:	learn: 994.9381049	total: 56s	remaining: 3.07s
237:	learn: 988.6740537	total: 56.2s	remaining: 2.83s
238:	learn: 973.1619343	total: 56.5s	remaining: 2.6s
239:	learn: 958.6090988	total: 56.7s	remaining: 2.36s
240:	learn: 943.1858872	total: 56.9s	remaining: 2.13s
241:	learn: 931.8615543	total: 57.2s	remaining: 1.89s
242:	learn: 924.98067

In [62]:
experiment = CatBoostWithParamsExperiment(no_skew_scaling_cosine_more_features, df_combine, TARGET, TEST_IDS)
experiment.transform()
predicted = experiment.predict()
metric = experiment.get_metric()
print('RMSE: {0}'.format(metric))


0:	learn: 69902.9384919	total: 234ms	remaining: 58.3s
1:	learn: 63727.0991770	total: 461ms	remaining: 57.1s
2:	learn: 57526.8704302	total: 685ms	remaining: 56.4s
3:	learn: 52338.7369026	total: 936ms	remaining: 57.6s
4:	learn: 47621.5068698	total: 1.58s	remaining: 1m 17s
5:	learn: 43705.5500139	total: 1.83s	remaining: 1m 14s
6:	learn: 40290.8046722	total: 1.9s	remaining: 1m 6s
7:	learn: 37668.6126037	total: 2.19s	remaining: 1m 6s
8:	learn: 34998.0196281	total: 2.45s	remaining: 1m 5s
9:	learn: 32709.9198761	total: 2.78s	remaining: 1m 6s
10:	learn: 31023.2864483	total: 3.16s	remaining: 1m 8s
11:	learn: 29317.5953682	total: 3.6s	remaining: 1m 11s
12:	learn: 28002.5871763	total: 3.85s	remaining: 1m 10s
13:	learn: 26652.9255324	total: 4.14s	remaining: 1m 9s
14:	learn: 25523.9712975	total: 4.41s	remaining: 1m 9s
15:	learn: 24552.4332298	total: 4.68s	remaining: 1m 8s
16:	learn: 23443.9110445	total: 4.95s	remaining: 1m 7s
17:	learn: 22573.7805871	total: 5.23s	remaining: 1m 7s
18:	learn: 21851.1

152:	learn: 2896.7438879	total: 40.8s	remaining: 25.9s
153:	learn: 2863.7492696	total: 41.1s	remaining: 25.6s
154:	learn: 2808.9154326	total: 41.4s	remaining: 25.4s
155:	learn: 2765.4940575	total: 41.7s	remaining: 25.1s
156:	learn: 2741.2864391	total: 41.9s	remaining: 24.8s
157:	learn: 2710.3187908	total: 42.2s	remaining: 24.6s
158:	learn: 2659.3935753	total: 42.5s	remaining: 24.3s
159:	learn: 2628.9962014	total: 42.7s	remaining: 24s
160:	learn: 2603.1819597	total: 43s	remaining: 23.7s
161:	learn: 2589.5963668	total: 43.2s	remaining: 23.5s
162:	learn: 2550.5355427	total: 43.5s	remaining: 23.2s
163:	learn: 2534.0119785	total: 43.8s	remaining: 22.9s
164:	learn: 2500.6856718	total: 44s	remaining: 22.7s
165:	learn: 2463.4065423	total: 44.3s	remaining: 22.4s
166:	learn: 2416.3049163	total: 44.5s	remaining: 22.1s
167:	learn: 2385.8331207	total: 44.7s	remaining: 21.8s
168:	learn: 2356.8706853	total: 45s	remaining: 21.6s
169:	learn: 2329.0212477	total: 45.3s	remaining: 21.3s
170:	learn: 2310.2

53:	learn: 10235.7618055	total: 16.5s	remaining: 59.8s
54:	learn: 10094.9347914	total: 16.9s	remaining: 1m
55:	learn: 9934.8158235	total: 17.9s	remaining: 1m 2s
56:	learn: 9850.3456315	total: 18.2s	remaining: 1m 1s
57:	learn: 9689.3890215	total: 18.5s	remaining: 1m 1s
58:	learn: 9494.0277390	total: 18.9s	remaining: 1m 1s
59:	learn: 9309.7492372	total: 19.3s	remaining: 1m 1s
60:	learn: 9207.0626963	total: 19.6s	remaining: 1m
61:	learn: 9036.4994089	total: 20s	remaining: 1m
62:	learn: 8850.9072538	total: 20.3s	remaining: 1m
63:	learn: 8704.7714337	total: 20.6s	remaining: 59.8s
64:	learn: 8630.1765875	total: 20.8s	remaining: 59.3s
65:	learn: 8492.2416137	total: 21.1s	remaining: 59s
66:	learn: 8329.8373338	total: 21.5s	remaining: 58.6s
67:	learn: 8194.6308940	total: 21.7s	remaining: 58.1s
68:	learn: 8077.6752154	total: 22s	remaining: 57.6s
69:	learn: 7963.4878878	total: 22.3s	remaining: 57.2s
70:	learn: 7837.6830471	total: 22.5s	remaining: 56.8s
71:	learn: 7771.0235320	total: 22.8s	remaini

205:	learn: 1330.2400392	total: 51.1s	remaining: 10.9s
206:	learn: 1307.8245780	total: 51.2s	remaining: 10.6s
207:	learn: 1294.3930566	total: 51.4s	remaining: 10.4s
208:	learn: 1284.9967678	total: 51.6s	remaining: 10.1s
209:	learn: 1270.1907204	total: 51.9s	remaining: 9.88s
210:	learn: 1253.2934256	total: 52s	remaining: 9.62s
211:	learn: 1229.2293995	total: 52.3s	remaining: 9.37s
212:	learn: 1208.7755533	total: 52.5s	remaining: 9.11s
213:	learn: 1189.3669920	total: 52.7s	remaining: 8.86s
214:	learn: 1174.7636286	total: 52.8s	remaining: 8.6s
215:	learn: 1161.7915598	total: 53s	remaining: 8.34s
216:	learn: 1149.5575468	total: 53.2s	remaining: 8.09s
217:	learn: 1130.1135496	total: 53.4s	remaining: 7.83s
218:	learn: 1119.5217506	total: 53.5s	remaining: 7.58s
219:	learn: 1101.8394022	total: 53.7s	remaining: 7.33s
220:	learn: 1090.0442893	total: 53.9s	remaining: 7.07s
221:	learn: 1077.7922076	total: 54.1s	remaining: 6.82s
222:	learn: 1066.8386645	total: 54.2s	remaining: 6.57s
223:	learn: 105

107:	learn: 4997.9811284	total: 21.4s	remaining: 28.1s
108:	learn: 4924.9036444	total: 21.6s	remaining: 28s
109:	learn: 4824.1082461	total: 21.8s	remaining: 27.8s
110:	learn: 4731.2130845	total: 22s	remaining: 27.6s
111:	learn: 4658.5841857	total: 22.2s	remaining: 27.4s
112:	learn: 4556.8005196	total: 22.5s	remaining: 27.2s
113:	learn: 4550.8065671	total: 22.7s	remaining: 27.1s
114:	learn: 4474.1198034	total: 22.9s	remaining: 26.9s
115:	learn: 4407.1883575	total: 23.1s	remaining: 26.6s
116:	learn: 4342.8993466	total: 23.3s	remaining: 26.4s
117:	learn: 4295.0617801	total: 23.5s	remaining: 26.3s
118:	learn: 4239.9666354	total: 23.7s	remaining: 26.1s
119:	learn: 4233.0055489	total: 23.9s	remaining: 25.9s
120:	learn: 4226.6093631	total: 24.1s	remaining: 25.7s
121:	learn: 4184.7438410	total: 24.3s	remaining: 25.5s
122:	learn: 4120.0871465	total: 24.5s	remaining: 25.2s
123:	learn: 4033.3835350	total: 24.6s	remaining: 25s
124:	learn: 3970.3721906	total: 24.8s	remaining: 24.8s
125:	learn: 3965

8:	learn: 35729.2548757	total: 1.53s	remaining: 40.9s
9:	learn: 33477.9932203	total: 1.72s	remaining: 41.3s
10:	learn: 31366.9696540	total: 1.93s	remaining: 41.9s
11:	learn: 29481.8854779	total: 2.12s	remaining: 42s
12:	learn: 28010.4887288	total: 2.31s	remaining: 42.2s
13:	learn: 26732.8582928	total: 2.5s	remaining: 42.2s
14:	learn: 25240.5788172	total: 2.69s	remaining: 42.2s
15:	learn: 24114.7837462	total: 2.87s	remaining: 42s
16:	learn: 23156.3115190	total: 3.07s	remaining: 42.1s
17:	learn: 22219.2187058	total: 3.25s	remaining: 41.9s
18:	learn: 21405.5251768	total: 3.45s	remaining: 42s
19:	learn: 20705.0819240	total: 3.65s	remaining: 41.9s
20:	learn: 19993.9854460	total: 3.84s	remaining: 41.9s
21:	learn: 19456.3990648	total: 4.03s	remaining: 41.8s
22:	learn: 18921.4842627	total: 4.22s	remaining: 41.6s
23:	learn: 18407.1069296	total: 4.42s	remaining: 41.6s
24:	learn: 18032.8277985	total: 4.61s	remaining: 41.5s
25:	learn: 17540.8651517	total: 4.79s	remaining: 41.3s
26:	learn: 17125.88

160:	learn: 2254.8237830	total: 29.5s	remaining: 16.3s
161:	learn: 2211.9575682	total: 29.7s	remaining: 16.1s
162:	learn: 2180.8827666	total: 29.9s	remaining: 15.9s
163:	learn: 2146.1460157	total: 30s	remaining: 15.8s
164:	learn: 2142.7272733	total: 30.2s	remaining: 15.6s
165:	learn: 2096.4262405	total: 30.4s	remaining: 15.4s
166:	learn: 2066.4263625	total: 30.5s	remaining: 15.2s
167:	learn: 2030.3242892	total: 30.7s	remaining: 15s
168:	learn: 2027.2719943	total: 30.9s	remaining: 14.8s
169:	learn: 1990.2228862	total: 31.1s	remaining: 14.6s
170:	learn: 1970.5033537	total: 31.2s	remaining: 14.4s
171:	learn: 1948.8304058	total: 31.4s	remaining: 14.2s
172:	learn: 1917.2472495	total: 31.6s	remaining: 14.1s
173:	learn: 1888.1652655	total: 31.8s	remaining: 13.9s
174:	learn: 1868.7428633	total: 31.9s	remaining: 13.7s
175:	learn: 1845.1058622	total: 32.1s	remaining: 13.5s
176:	learn: 1812.6492806	total: 32.3s	remaining: 13.3s
177:	learn: 1787.0147052	total: 32.5s	remaining: 13.1s
178:	learn: 17

62:	learn: 9117.0227110	total: 11.2s	remaining: 33.4s
63:	learn: 9011.7777612	total: 11.4s	remaining: 33.2s
64:	learn: 8842.4305714	total: 11.6s	remaining: 33s
65:	learn: 8723.6448089	total: 11.8s	remaining: 32.8s
66:	learn: 8552.5889609	total: 11.9s	remaining: 32.6s
67:	learn: 8509.7073751	total: 12.1s	remaining: 32.4s
68:	learn: 8337.2786589	total: 12.3s	remaining: 32.2s
69:	learn: 8278.8722386	total: 12.4s	remaining: 32s
70:	learn: 8156.3724347	total: 12.6s	remaining: 31.8s
71:	learn: 7995.3896927	total: 12.8s	remaining: 31.6s
72:	learn: 7848.7039391	total: 12.9s	remaining: 31.4s
73:	learn: 7734.0751462	total: 13.1s	remaining: 31.2s
74:	learn: 7635.8926556	total: 13.3s	remaining: 31s
75:	learn: 7578.8102184	total: 13.5s	remaining: 30.8s
76:	learn: 7495.3208589	total: 13.6s	remaining: 30.6s
77:	learn: 7395.7760533	total: 13.8s	remaining: 30.4s
78:	learn: 7278.3044266	total: 14s	remaining: 30.2s
79:	learn: 7192.4826746	total: 14.1s	remaining: 30s
80:	learn: 7179.4870116	total: 14.3s	r

214:	learn: 1122.0765290	total: 38.2s	remaining: 6.21s
215:	learn: 1108.6903168	total: 38.4s	remaining: 6.04s
216:	learn: 1081.3903987	total: 38.5s	remaining: 5.86s
217:	learn: 1069.8358018	total: 38.7s	remaining: 5.68s
218:	learn: 1048.9319768	total: 38.9s	remaining: 5.5s
219:	learn: 1046.9798677	total: 39.1s	remaining: 5.33s
220:	learn: 1024.6292782	total: 39.3s	remaining: 5.15s
221:	learn: 1023.5442766	total: 39.4s	remaining: 4.97s
222:	learn: 1001.7182626	total: 39.6s	remaining: 4.8s
223:	learn: 1000.6276103	total: 39.9s	remaining: 4.63s
224:	learn: 981.7225938	total: 40.1s	remaining: 4.45s
225:	learn: 970.1411790	total: 40.2s	remaining: 4.27s
226:	learn: 969.1669173	total: 40.4s	remaining: 4.09s
227:	learn: 954.8164004	total: 40.6s	remaining: 3.92s
228:	learn: 944.4176503	total: 40.8s	remaining: 3.74s
229:	learn: 929.0112842	total: 41s	remaining: 3.56s
230:	learn: 923.2395491	total: 41.2s	remaining: 3.38s
231:	learn: 904.2348271	total: 41.3s	remaining: 3.21s
232:	learn: 891.323103

116:	learn: 3976.7366925	total: 22.8s	remaining: 25.9s
117:	learn: 3910.0069110	total: 23s	remaining: 25.7s
118:	learn: 3851.5834357	total: 23.2s	remaining: 25.6s
119:	learn: 3792.5985551	total: 23.4s	remaining: 25.4s
120:	learn: 3734.7683041	total: 23.6s	remaining: 25.2s
121:	learn: 3675.2558184	total: 23.8s	remaining: 25s
122:	learn: 3633.2299671	total: 24s	remaining: 24.8s
123:	learn: 3607.4888094	total: 24.2s	remaining: 24.6s
124:	learn: 3550.2053301	total: 24.4s	remaining: 24.4s
125:	learn: 3498.7191704	total: 24.6s	remaining: 24.2s
126:	learn: 3430.5822281	total: 24.8s	remaining: 24s
127:	learn: 3364.0367658	total: 25s	remaining: 23.8s
128:	learn: 3289.4158175	total: 25.2s	remaining: 23.6s
129:	learn: 3232.7110698	total: 25.4s	remaining: 23.4s
130:	learn: 3190.5227051	total: 25.6s	remaining: 23.2s
131:	learn: 3149.3202388	total: 25.8s	remaining: 23s
132:	learn: 3089.2663621	total: 26s	remaining: 22.8s
133:	learn: 3056.0394769	total: 26.2s	remaining: 22.7s
134:	learn: 3022.6125737

17:	learn: 22974.1313573	total: 3.3s	remaining: 42.6s
18:	learn: 22181.1605934	total: 3.49s	remaining: 42.5s
19:	learn: 21357.6252660	total: 3.69s	remaining: 42.5s
20:	learn: 20615.8907284	total: 3.88s	remaining: 42.3s
21:	learn: 19917.8560119	total: 4.06s	remaining: 42.1s
22:	learn: 19385.5136227	total: 4.25s	remaining: 41.9s
23:	learn: 18868.2468207	total: 4.43s	remaining: 41.8s
24:	learn: 18317.8695016	total: 4.63s	remaining: 41.7s
25:	learn: 17842.8813862	total: 4.85s	remaining: 41.8s
26:	learn: 17453.2589571	total: 5.05s	remaining: 41.7s
27:	learn: 16960.4767887	total: 5.24s	remaining: 41.5s
28:	learn: 16585.5343360	total: 5.44s	remaining: 41.4s
29:	learn: 16082.6828250	total: 5.65s	remaining: 41.4s
30:	learn: 15629.3441829	total: 5.87s	remaining: 41.5s
31:	learn: 15345.7050252	total: 6.07s	remaining: 41.4s
32:	learn: 15066.5606788	total: 6.29s	remaining: 41.3s
33:	learn: 14753.7609543	total: 6.49s	remaining: 41.2s
34:	learn: 14524.0487746	total: 6.59s	remaining: 40.5s
35:	learn: 

168:	learn: 1919.4526208	total: 32.8s	remaining: 15.7s
169:	learn: 1895.1258936	total: 33s	remaining: 15.5s
170:	learn: 1861.6654649	total: 33.2s	remaining: 15.3s
171:	learn: 1859.5204724	total: 33.3s	remaining: 15.1s
172:	learn: 1831.8485927	total: 33.5s	remaining: 14.9s
173:	learn: 1795.9378338	total: 33.7s	remaining: 14.7s
174:	learn: 1765.9148357	total: 33.8s	remaining: 14.5s
175:	learn: 1739.3257481	total: 34s	remaining: 14.3s
176:	learn: 1715.1692415	total: 34.2s	remaining: 14.1s
177:	learn: 1683.0455379	total: 34.4s	remaining: 13.9s
178:	learn: 1661.5835685	total: 34.5s	remaining: 13.7s
179:	learn: 1636.1421958	total: 34.7s	remaining: 13.5s
180:	learn: 1610.0384989	total: 34.9s	remaining: 13.3s
181:	learn: 1594.4801330	total: 35.1s	remaining: 13.1s
182:	learn: 1567.0344701	total: 35.2s	remaining: 12.9s
183:	learn: 1539.7965251	total: 35.4s	remaining: 12.7s
184:	learn: 1530.4866290	total: 35.6s	remaining: 12.5s
185:	learn: 1509.8776585	total: 35.7s	remaining: 12.3s
186:	learn: 14

69:	learn: 8932.1797920	total: 12.9s	remaining: 33.1s
70:	learn: 8905.7379372	total: 13.1s	remaining: 33s
71:	learn: 8777.0485032	total: 13.3s	remaining: 32.8s
72:	learn: 8590.3297099	total: 13.4s	remaining: 32.6s
73:	learn: 8543.3119643	total: 13.6s	remaining: 32.4s
74:	learn: 8516.3430986	total: 13.8s	remaining: 32.2s
75:	learn: 8347.7886658	total: 14s	remaining: 32s
76:	learn: 8198.3334396	total: 14.1s	remaining: 31.8s
77:	learn: 8020.9962630	total: 14.3s	remaining: 31.5s
78:	learn: 7921.1040345	total: 14.5s	remaining: 31.3s
79:	learn: 7798.4959822	total: 14.7s	remaining: 31.1s
80:	learn: 7640.4906711	total: 14.8s	remaining: 30.9s
81:	learn: 7527.3818753	total: 15s	remaining: 30.7s
82:	learn: 7447.3026696	total: 15.2s	remaining: 30.5s
83:	learn: 7376.0949795	total: 15.3s	remaining: 30.3s
84:	learn: 7359.8566752	total: 15.5s	remaining: 30.1s
85:	learn: 7273.7646485	total: 15.7s	remaining: 29.9s
86:	learn: 7182.1553412	total: 15.9s	remaining: 29.7s
87:	learn: 7149.9715481	total: 16s	r

220:	learn: 1103.5310659	total: 40.1s	remaining: 5.26s
221:	learn: 1087.1800949	total: 40.3s	remaining: 5.08s
222:	learn: 1070.0124692	total: 40.5s	remaining: 4.9s
223:	learn: 1067.0169161	total: 40.7s	remaining: 4.72s
224:	learn: 1057.1830533	total: 40.9s	remaining: 4.54s
225:	learn: 1034.2407391	total: 41s	remaining: 4.36s
226:	learn: 1021.8580058	total: 41.2s	remaining: 4.18s
227:	learn: 1014.9021644	total: 41.5s	remaining: 4s
228:	learn: 1005.5826779	total: 41.7s	remaining: 3.82s
229:	learn: 991.9478689	total: 41.9s	remaining: 3.64s
230:	learn: 979.8779233	total: 42.1s	remaining: 3.46s
231:	learn: 969.1407221	total: 42.3s	remaining: 3.28s
232:	learn: 944.1081717	total: 42.4s	remaining: 3.1s
233:	learn: 930.0861452	total: 42.6s	remaining: 2.91s
234:	learn: 923.2106548	total: 42.8s	remaining: 2.73s
235:	learn: 906.8207333	total: 43s	remaining: 2.55s
236:	learn: 899.8253867	total: 43.2s	remaining: 2.37s
237:	learn: 881.9515962	total: 43.5s	remaining: 2.19s
238:	learn: 873.4445891	tota

122:	learn: 3781.0552129	total: 22.3s	remaining: 23s
123:	learn: 3720.9441428	total: 22.5s	remaining: 22.9s
124:	learn: 3668.1632797	total: 22.7s	remaining: 22.7s
125:	learn: 3593.3735545	total: 22.9s	remaining: 22.6s
126:	learn: 3513.2072020	total: 23.1s	remaining: 22.4s
127:	learn: 3427.7780434	total: 23.3s	remaining: 22.2s
128:	learn: 3364.4084761	total: 23.5s	remaining: 22s
129:	learn: 3317.2197679	total: 23.7s	remaining: 21.9s
130:	learn: 3274.9050922	total: 23.9s	remaining: 21.7s
131:	learn: 3230.8245029	total: 24.1s	remaining: 21.5s
132:	learn: 3168.0675856	total: 24.3s	remaining: 21.3s
133:	learn: 3096.1914859	total: 24.4s	remaining: 21.2s
134:	learn: 3055.4906949	total: 24.6s	remaining: 21s
135:	learn: 3011.6187490	total: 24.8s	remaining: 20.8s
136:	learn: 2955.7613257	total: 25s	remaining: 20.6s
137:	learn: 2919.9967297	total: 25.2s	remaining: 20.5s
138:	learn: 2868.3403325	total: 25.4s	remaining: 20.3s
139:	learn: 2863.2842958	total: 25.6s	remaining: 20.1s
140:	learn: 2854.4

24:	learn: 18296.2751248	total: 4.42s	remaining: 39.8s
25:	learn: 17865.7072992	total: 4.59s	remaining: 39.5s
26:	learn: 17394.4256503	total: 4.76s	remaining: 39.3s
27:	learn: 16918.4956173	total: 4.93s	remaining: 39.1s
28:	learn: 16516.4497021	total: 5.11s	remaining: 38.9s
29:	learn: 16198.9015113	total: 5.27s	remaining: 38.7s
30:	learn: 15807.7253272	total: 5.47s	remaining: 38.6s
31:	learn: 15460.2472936	total: 5.64s	remaining: 38.4s
32:	learn: 15227.3623715	total: 5.81s	remaining: 38.2s
33:	learn: 14958.8401163	total: 5.98s	remaining: 38s
34:	learn: 14634.4279632	total: 6.15s	remaining: 37.8s
35:	learn: 14412.3619308	total: 6.33s	remaining: 37.6s
36:	learn: 14215.5569939	total: 6.51s	remaining: 37.5s
37:	learn: 13961.0163908	total: 6.68s	remaining: 37.3s
38:	learn: 13762.7410438	total: 6.85s	remaining: 37.1s
39:	learn: 13652.7270618	total: 7.02s	remaining: 36.8s
40:	learn: 13558.9708922	total: 7.19s	remaining: 36.7s
41:	learn: 13370.8123704	total: 7.37s	remaining: 36.5s
42:	learn: 1

175:	learn: 2317.7615629	total: 32s	remaining: 13.5s
176:	learn: 2296.6846938	total: 32.2s	remaining: 13.3s
177:	learn: 2294.5656211	total: 32.4s	remaining: 13.1s
178:	learn: 2260.6325279	total: 32.6s	remaining: 12.9s
179:	learn: 2219.2368844	total: 32.8s	remaining: 12.7s
180:	learn: 2199.6255528	total: 33s	remaining: 12.6s
181:	learn: 2176.0963257	total: 33.1s	remaining: 12.4s
182:	learn: 2166.3822862	total: 33.3s	remaining: 12.2s
183:	learn: 2141.2794536	total: 33.5s	remaining: 12s
184:	learn: 2107.2469339	total: 33.7s	remaining: 11.8s
185:	learn: 2093.7618704	total: 33.8s	remaining: 11.6s
186:	learn: 2071.4037990	total: 34s	remaining: 11.5s
187:	learn: 2037.2469807	total: 34.2s	remaining: 11.3s
188:	learn: 2012.4299425	total: 34.4s	remaining: 11.1s
189:	learn: 1990.1655918	total: 34.5s	remaining: 10.9s
190:	learn: 1963.4554447	total: 34.7s	remaining: 10.7s
191:	learn: 1940.3529542	total: 34.9s	remaining: 10.5s
192:	learn: 1918.6487230	total: 35.1s	remaining: 10.4s
193:	learn: 1889.2

76:	learn: 7311.6931900	total: 13.6s	remaining: 30.5s
77:	learn: 7253.7302985	total: 13.8s	remaining: 30.3s
78:	learn: 7172.1221044	total: 13.9s	remaining: 30.2s
79:	learn: 7016.1549081	total: 14.1s	remaining: 30s
80:	learn: 6885.8177286	total: 14.3s	remaining: 29.9s
81:	learn: 6720.4013292	total: 14.5s	remaining: 29.7s
82:	learn: 6682.4847607	total: 14.7s	remaining: 29.6s
83:	learn: 6612.4451074	total: 14.9s	remaining: 29.4s
84:	learn: 6489.3724689	total: 15.1s	remaining: 29.3s
85:	learn: 6357.5471418	total: 15.3s	remaining: 29.1s
86:	learn: 6245.5459975	total: 15.4s	remaining: 28.9s
87:	learn: 6124.1993153	total: 15.6s	remaining: 28.8s
88:	learn: 6111.4254973	total: 15.8s	remaining: 28.7s
89:	learn: 5988.3406699	total: 16s	remaining: 28.5s
90:	learn: 5937.6999609	total: 16.2s	remaining: 28.3s
91:	learn: 5876.3697826	total: 16.4s	remaining: 28.1s
92:	learn: 5742.5139904	total: 16.6s	remaining: 28s
93:	learn: 5729.9328635	total: 16.8s	remaining: 27.8s
94:	learn: 5665.7336763	total: 16.

227:	learn: 1047.5306182	total: 40.6s	remaining: 3.92s
228:	learn: 1032.6143480	total: 40.8s	remaining: 3.74s
229:	learn: 1024.6402439	total: 40.9s	remaining: 3.56s
230:	learn: 1016.6267631	total: 41.1s	remaining: 3.38s
231:	learn: 995.5256138	total: 41.3s	remaining: 3.2s
232:	learn: 994.8804826	total: 41.4s	remaining: 3.02s
233:	learn: 994.4259446	total: 41.6s	remaining: 2.84s
234:	learn: 971.9717005	total: 41.8s	remaining: 2.67s
235:	learn: 953.2298815	total: 41.9s	remaining: 2.49s
236:	learn: 946.1165331	total: 42.1s	remaining: 2.31s
237:	learn: 941.8320984	total: 42.3s	remaining: 2.13s
238:	learn: 924.4817605	total: 42.6s	remaining: 1.96s
239:	learn: 912.4820207	total: 42.7s	remaining: 1.78s
240:	learn: 902.2062204	total: 42.9s	remaining: 1.6s
241:	learn: 890.4150746	total: 43.1s	remaining: 1.43s
242:	learn: 875.3123076	total: 43.3s	remaining: 1.25s
243:	learn: 867.8282702	total: 43.5s	remaining: 1.07s
244:	learn: 860.9531921	total: 43.7s	remaining: 891ms
245:	learn: 845.6624101	to