In [6]:
%%javascript
$('<div id="toc"></div>').css({position: 'fixed', top: '120px', left: 0}).appendTo(document.body);
$.getScript('https://kmahelona.github.io/ipython_notebook_goodies/ipython_notebook_toc.js');

<IPython.core.display.Javascript object>

time: 2.78 ms


# Libraries

In [7]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split

from mpl_toolkits.basemap import Basemap
import matplotlib.pyplot as plt
import numpy as np

import gc 

from jupyterthemes import jtplot
jtplot.style()

import xgboost as xg
from xgboost import XGBModel
from xgboost import plot_importance
from sklearn.model_selection import GridSearchCV, TimeSeriesSplit, ShuffleSplit
from sklearn.metrics import mean_absolute_error
from sklearn.feature_selection import RFE

from sklearn.linear_model import LinearRegression, BayesianRidge
from sklearn.metrics import r2_score

from sklearn.preprocessing import LabelEncoder
from sklearn import cross_validation

from catboost import CatBoostRegressor
from tqdm import tqdm

%matplotlib inline
%load_ext autotime
%load_ext line_profiler
%matplotlib inline 

The autotime extension is already loaded. To reload it, use:
  %reload_ext autotime
The line_profiler extension is already loaded. To reload it, use:
  %reload_ext line_profiler
time: 15.9 ms


# Processing

In [8]:
def plot_data(test, pred, sample, title, width=40, height=10, linewidth=0.5, color1='white', color2='orange'):
    """ Plotting method. """
    fig = plt.figure(figsize=(width, height))
    plt.plot(pred[:sample], color=color1, zorder=4, linewidth=linewidth, label='%s Prediction'%(title))
    plt.plot(test[:sample], color=color2, zorder=3, linewidth=linewidth, label='%s True Data'%(title))
    plt.title = title
    plt.legend()

# Frequency count
def get_frequency(data):
    # Gets the frequency of a column's values in 'data'. Pass on a series.
    vals = pd.merge(data.to_frame(), data.value_counts().reset_index(), 
                    how='left', left_on=data.to_frame().columns[0], right_on='index').iloc[:, -1:].values
    return vals
  
def time_data(data):
    data['transactiondate'] = pd.to_datetime(data['transactiondate'])
    data['day_of_week']     = data['transactiondate'].dt.dayofweek
    data['month_of_year']   = data['transactiondate'].dt.month
    data['quarter']         = data['transactiondate'].dt.quarter
    data['is_weekend']      = (data['day_of_week'] < 5).astype(int)
    data.drop('transactiondate', axis=1, inplace=True)
    
    print('Added time data')
    print('........')
    
    return data


def column_excluder(data, missing_perc_thresh=0.98):
    # Quick clean from https://www.kaggle.com/seesee/concise-catboost-starter-ensemble-plb-0-06435
    
    exclude_missing = []
    exclude_unique = []
    num_rows = data.shape[0]
    for c in data.columns:
        num_missing = data[c].isnull().sum()
        if num_missing == 0:
            continue
        missing_frac = num_missing / float(num_rows)
        if missing_frac > missing_perc_thresh:
            exclude_missing.append(c)

        num_uniques = len(data[c].unique())
        if data[c].isnull().sum() != 0:
            num_uniques -= 1
        if num_uniques == 1:
            exclude_unique.append(c)
            
    to_exclude = list(set(exclude_missing + exclude_unique))
    
    print('Excluded columns:')
    print(to_exclude)
    print('........')
    
    return to_exclude

def categorical_features(data):
    # Quick categories from https://www.kaggle.com/seesee/concise-catboost-starter-ensemble-plb-0-06435
        
    cat_feature_inds = []
    cat_unique_thresh = 1000
    for i, c in enumerate(data.columns):
        num_uniques = len(data[c].unique())
        if num_uniques < cat_unique_thresh \
            and not 'sqft'   in c \
            and not 'cnt'    in c \
            and not 'nbr'    in c \
            and not 'number' in c:
            cat_feature_inds.append(i)

    print("Categorical features:")
    print([data.columns[ind] for ind in cat_feature_inds])
    print('........')
    
    return cat_feature_inds


def complex_features(data):
    # Gets counts, label encoding and frequency estimates.
    
    # Frequency of occurances | length of codes | check if * is present
    data['propertyzoningdesc_frq'] = get_frequency(data['propertyzoningdesc'])
    data['propertyzoningdesc_len'] = data['propertyzoningdesc'].apply(lambda x: len(x) if pd.notnull(x) else x)
    #transactions_shuffled['propertyzoningdesc_str'] = transactions_shuffled['propertyzoningdesc'].apply(lambda x: (1 if '*' in str(x) else 0) if pd.notnull(x) else x)

    # Label encoding | length of code
    #transactions_shuffled['propertycountylandusecode_enc'] = transactions_shuffled[['propertycountylandusecode']].astype(str).apply(LabelEncoder().fit_transform)
    #transactions_shuffled['propertycountylandusecode_len'] = transactions_shuffled['propertycountylandusecode'].apply(lambda x: x if pd.isnull(x) else len(x))

    # Zip code area extraction
    data['regionidzip_ab']  = data['regionidzip'].apply(lambda x: x if pd.isnull(x) else str(x)[:2]).astype(float)
    data['regionidzip_abc'] = data['regionidzip'].apply(lambda x: x if pd.isnull(x) else str(x)[:3]).astype(float)

    # Region neighbourhood area extraction
    data['regionidneighborhood_ab'] = data['regionidneighborhood'].apply(lambda x: str(x)[:2] if pd.notnull(x) else x).astype(float)

    # Rawcensustractandblock transformed
    data['code_fips_cnt']  = get_frequency(data['rawcensustractandblock'].apply(lambda x: str(x)[:4]))
    data['code_tract_cnt'] = get_frequency(data['rawcensustractandblock'].apply(lambda x: str(x)[4:11]))
    data['code_block_cnt'] = get_frequency(data['rawcensustractandblock'].apply(lambda x: str(x)[11:]))
    data.drop('rawcensustractandblock', axis=1, inplace=True)
    
    # Encode string values
    data[['propertycountylandusecode', 'propertyzoningdesc']] = data[['propertycountylandusecode', 'propertyzoningdesc']].astype(str).apply(LabelEncoder().fit_transform)
    
    print('Generating complex features')
    print('........')
    
    return data

time: 165 ms


In [9]:
models = {}

time: 515 µs


# Data Load

In [10]:
seed = 11
np.random.seed(seed)
drop_tax = False

train2016 = pd.read_csv("../Data/train_2016_v2.csv", parse_dates=["transactiondate"], low_memory=False)
train2017 = pd.read_csv('../Data/train_2017.csv', parse_dates=['transactiondate'], low_memory=False)

if drop_tax:
    # Avoids external bias
    print('Removing tax features from 2017')
    train2017.iloc[:, train2017.columns.str.startswith('tax')] = np.nan

properties2016 = pd.read_csv('../Data/properties_2016.csv', low_memory = False)
properties2017 = pd.read_csv('../Data/properties_2017.csv', low_memory = False)

sample = pd.read_csv('../Data/sample_submission.csv')

transactions2016 = pd.merge(train2016, properties2016, how='left', on=['parcelid']).sample(frac=1)
transactions2017 = pd.merge(train2017, properties2017, how='left', on=['parcelid']).sample(frac=1)
transactions = pd.concat([transactions2016, transactions2017], axis = 0)

#transactions[['propertycountylandusecode', 'propertyzoningdesc']] = transactions[['propertycountylandusecode', 'propertyzoningdesc']].astype(str).apply(LabelEncoder().fit_transform)
transactions['taxdelinquencyflag'].replace('Y', 1, inplace=True)
    
# Clean columns
to_drop = column_excluder(transactions)
transactions.drop(to_drop, axis=1, inplace=True)

# Time data
transactions = time_data(transactions)
transactions = complex_features(transactions)

x_all = transactions.drop(['parcelid', 'propertyzoningdesc', 'propertycountylandusecode', 'fireplacecnt'], axis=1)
y_all = transactions['logerror']
#x_all.drop(['hashottuborspa' 'taxdelinquencyflag' 'fireplaceflag'], axis=1)
#x_all['hashottuborspa'].astype(float, inplace=True)

#x_all.fillna(-1, inplace=True)#.astype(str)#.apply(LabelEncoder().fit_transform)

x_all.fillna(x_all.median(),inplace = True)

ratio = 0.0
x_train, x_valid, y_train, y_valid = train_test_split(x_all, y_all, test_size=ratio)

x_train_label = x_train['logerror'].copy()
x_train_data = x_train.drop(['logerror'], axis=1).copy()

# Drop outliers 
x_train = x_train[(x_train['logerror'] > -0.4) & (x_train['logerror'] < 0.419)]
y_train = x_train['logerror']
x_train.drop('logerror', axis=1, inplace=True)
x_valid.drop('logerror', axis=1, inplace=True)

cat_index = categorical_features(x_train)
best_columns = x_train.columns

y_mean = np.mean(y_train)

del x_all, y_all, transactions, transactions2016, transactions2017, properties2017, properties2016, train2016, train2017
gc.collect()

Excluded columns:
['hashottuborspa', 'finishedsquarefeet6', 'basementsqft', 'taxdelinquencyflag', 'buildingclasstypeid', 'pooltypeid7', 'fireplaceflag', 'typeconstructiontypeid', 'pooltypeid10', 'taxdelinquencyyear', 'pooltypeid2', 'poolcnt', 'architecturalstyletypeid', 'poolsizesum', 'decktypeid', 'finishedsquarefeet13', 'yardbuildingsqft26', 'storytypeid']
........
Added time data
........
Generating complex features
........
Categorical features:
['airconditioningtypeid', 'buildingqualitytypeid', 'fips', 'heatingorsystemtypeid', 'propertylandusetypeid', 'regionidcity', 'regionidcounty', 'regionidneighborhood', 'regionidzip', 'yearbuilt', 'assessmentyear', 'day_of_week', 'month_of_year', 'quarter', 'is_weekend', 'propertyzoningdesc_frq', 'propertyzoningdesc_len', 'regionidzip_ab', 'regionidzip_abc', 'regionidneighborhood_ab']
........
time: 1min 2s


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


# OLS

In [11]:
# OLS
model_lr = LinearRegression()
model_lr.fit(x_train_data, x_train_label)
#y_pred_lr_valid = model_lr.predict(x_valid)
#y_pred_lr_train = model_lr.predict(x_train_data)
models['LinearRegression'] = model_lr

# Make predictions on both test and validation with OLS and BR
#predicted_mae_lr_valid = mean_absolute_error(y_valid, y_pred_lr_valid)
#predicted_mae_lr_train = mean_absolute_error(x_train_label, y_pred_lr_train)

#print('OLS MAE LR Valid:', predicted_mae_lr_valid, 'Train:', predicted_mae_lr_train)

scores = cross_validation.cross_val_score(model_lr, x_train_data, x_train_label, cv=5, scoring='neg_mean_absolute_error', verbose=1)
print("%s MAE: %0.5f (+/- %0.5f)" % (model_lr.__class__.__name__, scores.mean(), scores.std() * 2))

#del y_pred_lr_valid
#del y_pred_lr_train

LinearRegression MAE: -0.06863 (+/- 0.00137)
time: 643 ms


[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.5s finished


In [12]:
model_lr.fit(x_train_data, x_train_label)
print(mean_absolute_error(x_train_label, model_lr.predict(x_train_data)))

0.0685395047302
time: 141 ms


In [13]:
model_lr.fit(x_train, y_train)
print(mean_absolute_error(x_train_label, model_lr.predict(x_train_data)))

0.0679043276933
time: 90.8 ms


# Bayesian Ridge

In [14]:
# BayesianRidge Regression
model_br = BayesianRidge(compute_score=True)
#model_br.fit(x_train, y_train)
#y_pred_br_valid = model_br.predict(x_valid)
#y_pred_br_train = model_br.predict(x_train_data)
models['BayesianRidge'] = model_br

#predicted_mae_br_valid = mean_absolute_error(y_valid,       y_pred_br_valid)
#predicted_mae_br_train = mean_absolute_error(x_train_label, y_pred_br_train)

#print('BR MAE BayesianRidge Valid: %s \nTrain: %s' % (predicted_mae_br_valid, predicted_mae_br_train))

scores = cross_validation.cross_val_score(model_br, x_train_data, x_train_label, cv=5, scoring='neg_mean_absolute_error', verbose=1)
print("%s MAE: %0.5f (+/- %0.5f)" % (model_br.__class__.__name__, scores.mean(), scores.std() * 2))


#del y_pred_br_valid
#del y_pred_br_train

BayesianRidge MAE: -0.06848 (+/- 0.00140)
time: 731 ms


[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.7s finished


In [15]:
model_br.fit(x_train_data, x_train_label)
print(mean_absolute_error(x_train_label, model_lr.predict(x_train_data)))

0.0679043276933
time: 199 ms


In [16]:
model_br.fit(x_train, y_train)
print(mean_absolute_error(x_train_label, model_lr.predict(x_train_data)))

0.0679043276933
time: 158 ms


# Random Forest

In [17]:
from sklearn.ensemble import RandomForestRegressor

model_rf = RandomForestRegressor(n_jobs=1, random_state=2016, verbose=1, n_estimators=500, max_features=12)
#model_rf.fit(x_train, y_train)
#y_pred_rf_valid = model_rf.predict(x_valid)
#y_pred_rf_train = model_rf.predict(x_train_data)
models['RandomForest'] = model_rf

#predicted_mae_rf_valid = mean_absolute_error(y_valid,       y_pred_rf_valid)
#predicted_mae_rf_train = mean_absolute_error(x_train_label, y_pred_rf_train)

#print('BR MAE RandomForest Valid: %s \nTrain: %s' % (predicted_mae_rf_valid, predicted_mae_rf_train))

#scores = cross_validation.cross_val_score(model_rf, x_train, y_train, cv=5, scoring='neg_mean_absolute_error', verbose=1)
#print("%s MAE: %0.5f (+/- %0.5f)" % (model_rf.__class__.__name__, scores.mean(), scores.std() * 2))

#del y_pred_rf_train
#del y_pred_rf_valid

time: 39.5 ms


# Extra Trees

In [18]:
from sklearn.ensemble import ExtraTreesRegressor

model_et = ExtraTreesRegressor(
        n_jobs=1, random_state=2016, verbose=1,
        n_estimators=500, max_features=12)

#model_et.fit(x_train, y_train)
#y_pred_et_valid = model_et.predict(x_valid)
#y_pred_et_train = model_et.predict(x_train_data)
models['ExtraTrees'] = model_et

#predicted_mae_et_valid = mean_absolute_error(y_valid,       y_pred_et_valid)
#predicted_mae_et_train = mean_absolute_error(x_train_label, y_pred_et_train)

#print('BR MAE ExtraTrees Valid: %s \nTrain: %s' % (predicted_mae_et_valid, predicted_mae_et_train))

#scores = cross_validation.cross_val_score(model_et, x_train, y_train, cv=5, scoring='neg_mean_absolute_error', verbose=1)
#print("%s MAE: %0.5f (+/- %0.5f)" % (model_et.__class__.__name__, scores.mean(), scores.std() * 2))

#del y_pred_et_valid
#del y_pred_et_train

time: 5.07 ms


In [19]:
gc.collect()

503

time: 38.5 ms


# AdaBoost

In [21]:
from sklearn.ensemble import AdaBoostRegressor

model_ab = AdaBoostRegressor()
#model_ab.fit(x_train, y_train)
#y_pred_ab_valid = model_ab.predict(x_valid)
#y_pred_ab_train = model_ab.predict(x_train_data)
models['AdaBoost'] = model_ab

#predicted_mae_ab_valid = mean_absolute_error(y_valid,       y_pred_ab_valid)
#predicted_mae_ab_train = mean_absolute_error(x_train_label, y_pred_ab_train)

#print('BR MAE AdaBoost Valid: %s \nTrain: %s' % (predicted_mae_ab_valid, predicted_mae_ab_train))

#scores = cross_validation.cross_val_score(model_ab, x_train, y_train, cv=5, scoring='neg_mean_absolute_error', verbose=1)
#print("%s MAE: %0.5f (+/- %0.5f)" % (model_ab.__class__.__name__, scores.mean(), scores.std() * 2))

#del y_pred_ab_valid
#del y_pred_ab_train

time: 5.05 ms


# CatBoost

In [None]:
def cat_booster(x_train, y_train, x_valid, y_valid, cat_index, loss='MAE'):
    # Cat booster train and predict
    num_ensembles = 5
    y_pred_valid = 0.0
    y_pred_train = 0.0
    
    print('Initialising CAT Boost Regression')
    for i in tqdm(range(num_ensembles)):
        print('Building ensemble', i)
        # Use CV, tune hyperparameters
        catb = CatBoostRegressor(
                iterations=630, learning_rate=0.03,
                depth=6, l2_leaf_reg=3,
                loss_function=loss,
                eval_metric='MAE',
                random_seed=i)

        catb.fit(x_train, y_train, cat_features=cat_index)

        y_pred_valid += catb.predict(x_valid)
        y_pred_train += catb.predict(x_train)

    y_pred_valid /= num_ensembles
    y_pred_train /= num_ensembles

    print('Train MAE:', mean_absolute_error(y_train, y_pred_train))
    print('Valid MAE:', mean_absolute_error(y_valid, y_pred_valid))
    
    return catb, y_pred_valid

In [None]:
gc.collect()

In [None]:
model_cb, preds = cat_booster(x_train, y_train, x_train_data, x_train_label, cat_index)

print('BR MAE CatBoost Valid: %s' % (mean_absolute_error(y_valid, preds)))


In [None]:
model_cb = CatBoostRegressor(
            iterations=630, learning_rate=0.03,
            depth=6, l2_leaf_reg=3,
            loss_function='MAE',
            eval_metric='MAE')

models['CatBoost'] = model_cb

#scores = cross_validation.cross_val_score(model_cb, x_train, y_train, cv=5, scoring='neg_mean_absolute_error', verbose=1)
#print("%s MAE: %0.5f (+/- %0.5f)" % (model_cb.__class__.__name__, scores.mean(), scores.std() * 2))

#del preds

# GB

In [22]:
from sklearn.ensemble import GradientBoostingRegressor

model_gb = GradientBoostingRegressor(
             random_state=2016, verbose=1,
             n_estimators=500, max_features=12, max_depth=8,
             learning_rate=0.05, subsample=0.8)

#model_gb.fit(x_train, y_train)
#y_pred_gb_valid = model_gb.predict(x_valid)
#y_pred_gb_train = model_gb.predict(x_train_data)
models['GradientBoosting'] = model_gb

#predicted_mae_gb_valid = mean_absolute_error(y_valid,       y_pred_gb_valid)
#predicted_mae_gb_train = mean_absolute_error(x_train_label, y_pred_gb_train)

#print('BR MAE GradientBoosting Valid: %s \nTrain: %s' % (predicted_mae_gb_valid, predicted_mae_gb_train))

#scores = cross_validation.cross_val_score(model_gb, x_train, y_train, cv=5, scoring='neg_mean_absolute_error', verbose=1)
#print("%s MAE: %0.5f (+/- %0.5f)" % (model_gb.__class__.__name__, scores.mean(), scores.std() * 2))

#del y_pred_gb_valid
#del y_pred_gb_train

time: 5.65 ms


# XGB

In [None]:
params_xgb = {
    'max_depth':        5,  # shuld be 0.5 to 1% of the examples
    'subsample':        1,  # Ratio of observations to be used as samples for each tree
    'min_child_weight': 10, # Deals with imbalanced data and prevents overfitting as the value >
    'objective':        'reg:linear',
    'n_estimators':     1000, # Sequential trees to be modelled.
    'eta':              0.1,  # Shrinkage. Typically between 0.1 - 0.2 - learning rate for gradient boost (D:0.3)
    'eval_metric':      'mae',
    'base_score':       y_mean,
}

d_train = xg.DMatrix(x_train, label=y_train, missing=-1)
#d_valid = xg.DMatrix(x_valid, label=y_valid, missing=-1)
xgb_gs = xg.train(params_xgb, d_train, num_boost_round=250, verbose_eval=50)
#models['XGB'] = xgb_gs

#del d_train
#del d_valid

# LightGBM

In [23]:
def light_gbm_folds(x_train, x_valid, y_train, y_valid, params, num_ensembles):
    # Light gbm n ensambles average predictions

    y_pred_valid = 0.0
    y_pred_train = 0.0
    
    d_train = lgb.Dataset(x_train, label=y_train)
    
    print('Initialising Light GBM')
    for i in tqdm(range(num_ensembles)):
        # Use CV, tune hyperparameters
        params['seed'] = i
        model_lgb = lgb.train(params, d_train, 430)
        
        lg_pred_valid = model_lgb.predict(x_valid)
        lg_pred_train = model_lgb.predict(x_train)

    lg_pred_valid /= num_ensembles
    lg_pred_train /= num_ensembles
    
    print('Train MAE:', mean_absolute_error(y_train, lg_pred_train))
    print('Valid MAE:', mean_absolute_error(y_valid, lg_pred_valid))
    
    return model_lgb

time: 8.93 ms


In [24]:
import random
import lightgbm as lgb

params_lg={
    'max_bin'          : 10,
    'learning_rate'    : 0.0021, # shrinkage_rate
    'boosting_type'    : 'gbdt',
    'objective'        : 'regression',
    'metric'           : 'mae',      
    'sub_feature'      : 0.345 ,   
    'bagging_fraction' : 0.85, 
    'bagging_freq'     : 40,
    'num_leaves'       : 512,   # num_leaf
    'min_data'         : 500,   # min_data_in_leaf
    'min_hessian'      : 0.05,  # min_sum_hessian_in_leaf
    'verbose'          : 1
}
d_train = lgb.Dataset(x_train, label=y_train)
model_lgb = lgb.train(params_lg, d_train, 430)

#model_lgb = light_gbm_folds(x_train, x_train_data, y_train, x_train_label, params_lg, num_ensembles=5)
models['LightGBM'] = model_lgb

time: 6.08 s


# DNN

In [25]:
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from keras.layers import Dropout, BatchNormalization
from keras.layers.advanced_activations import PReLU
from keras.layers.noise import GaussianDropout
from keras.optimizers import Adam
from sklearn.preprocessing import Imputer

def larger_model():
    # create model
    model = Sequential()
    model.add(Dense(size, input_dim=size, kernel_initializer='normal', activation='relu'))
    model.add(Dense(size*2, kernel_initializer='normal', activation='relu'))
    model.add(Dense(size, kernel_initializer='normal', activation='relu'))
    model.add(Dense(1, kernel_initializer='normal'))
    # Compile model
    model.compile(loss='mae', optimizer=Adam(lr=4e-3, decay=1e-4))
    return model

# define wider model
def wider_model():
    # create model
    model = Sequential()
    model.add(Dense(size*2, input_dim=size, kernel_initializer='normal', activation='relu'))
    model.add(Dense(1, kernel_initializer='normal'))
    # Compile model
    model.compile(loss='mae', optimizer=Adam(lr=4e-3, decay=1e-4))
    return model


# define base model
def baseline_model():
    # create model
    model = Sequential()
    model.add(Dense(size, input_dim=size, kernel_initializer='normal', activation='relu'))
    model.add(Dense(1, kernel_initializer='normal'))
    # Compile model
    model.compile(loss='mae', optimizer=Adam(lr=4e-3, decay=1e-4))
    return model

def prebuilt_nn():
    nn = Sequential()
    nn.add(Dense(units = 400 , kernel_initializer = 'normal', input_dim = size))
    nn.add(PReLU())
    nn.add(Dropout(.4))
    nn.add(Dense(units = 160 , kernel_initializer = 'normal'))
    nn.add(PReLU())
    nn.add(BatchNormalization())
    nn.add(Dropout(.6))
    nn.add(Dense(units = 64 , kernel_initializer = 'normal'))
    nn.add(PReLU())
    nn.add(BatchNormalization())
    nn.add(Dropout(.5))
    nn.add(Dense(units = 26, kernel_initializer = 'normal'))
    nn.add(PReLU())
    nn.add(BatchNormalization())
    nn.add(Dropout(.6))
    nn.add(Dense(1, kernel_initializer='normal'))
    nn.compile(loss='mae', optimizer=Adam(lr=4e-3, decay=1e-4))

    return nn

Using TensorFlow backend.


time: 14.8 s


In [26]:
## Preprocessing
print("Preprocessing neural network data...")
imputer= Imputer()
imputer.fit(x_train.iloc[:, :])
x_train_nn = imputer.transform(x_train.iloc[:, :])

#imputer.fit(x_valid.iloc[:, :])
#x_valid_nn = imputer.transform(x_valid.iloc[:, :])

sc = StandardScaler()
x_train_nn = sc.fit_transform(x_train_nn)
#x_valid_nn = sc.transform(x_valid_nn)

Preprocessing neural network data...
time: 149 ms


In [27]:
# fix random seed for reproducibility
seed = 7
size = x_train_nn.shape[1]
# Prebuit KAGGLE Kernel
np.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasRegressor(build_fn=prebuilt_nn, epochs=5, batch_size=50, verbose=0)))
pipeline = Pipeline(estimators)
pipeline.fit(x_train_nn, y_train)
models['DNN'] = pipeline

#print(mean_absolute_error(y_valid, pipeline.predict(x_valid_nn)))

time: 59 s


# LSTM

In [28]:
from numpy import concatenate
from matplotlib import pyplot
from pandas import read_csv
from pandas import DataFrame
from pandas import concat
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
 
#x_train = x_train.values
#x_valid = x_valid.values

# reshape input to be 3D [samples, timesteps, features]
x_train_lstm = x_train_data.values.reshape((x_train_data.shape[0], 1, x_train_data.shape[1]))
#x_valid_lstm = x_valid.values.reshape((x_valid.shape[0], 1, x_valid.shape[1]))
 
# design network
lstm = Sequential()
lstm.add(LSTM(50, input_shape=(x_train_lstm.shape[1], x_train_lstm.shape[2])))
lstm.add(PReLU())
lstm.add(Dropout(.2))
lstm.add(Dense(units = 100 , kernel_initializer = 'normal'))
lstm.add(PReLU())
lstm.add(Dropout(.2))
lstm.add(Dense(units = 50 , kernel_initializer = 'normal'))
lstm.add(PReLU())
lstm.add(Dense(1))
lstm.compile(loss='mae', optimizer='adam')
# fit network
#validation_data=(x_valid_lstm, y_valid)
lstm.fit(x_train_lstm, x_train_label, epochs=15, batch_size=50, verbose=1, shuffle=False)
 
# make a prediction
#yhat = lstm.predict(x_valid_lstm)
models['LSTM'] = lstm
#mae = mean_absolute_error(y_valid, yhat)
#print('Test MAE: %.3f' % mae)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


NameError: name 'yhat' is not defined

time: 3min 30s


# Stacking

In [29]:
# https://github.com/dnc1994/Kaggle-Playground/blob/master/home-depot/ensemble.py
import time
from sklearn.metrics import mean_absolute_error, make_scorer
from xgboost import XGBRegressor
from sklearn.cross_validation import KFold
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor, \
        ExtraTreesRegressor, AdaBoostClassifier
from sklearn import grid_search

def mean_absolute_error_(ground_truth, predictions):
    return mean_absolute_error(ground_truth, predictions)

MAE = make_scorer(mean_absolute_error_, greater_is_better=False)

params_xgb = {
    'max_depth':        5,  # shuld be 0.5 to 1% of the examples
    'subsample':        1,  # Ratio of observations to be used as samples for each tree
    'min_child_weight': 10, # Deals with imbalanced data and prevents overfitting as the value >
    'objective':        'reg:linear',
    'n_estimators':     1000, # Sequential trees to be modelled.
    'eta':              0.1,  # Shrinkage. Typically between 0.1 - 0.2 - learning rate for gradient boost (D:0.3)
    'eval_metric':      'mae'
}

class Ensemble(object):
    
    def __init__(self, n_folds, stacker, base_models, include_features, cvgrid):
        self.n_folds = n_folds
        self.stacker = stacker
        self.base_models = base_models
        self.features = include_features
        self.param_grid = cvgrid
    

    def fit(self, X, y, include_features=False):
        X = np.array(X)
        y = np.array(y)
        
        folds = list(KFold(len(y), n_folds=self.n_folds, shuffle=True, random_state=2016))
        S_train = np.zeros((X.shape[0], len(self.base_models)))
        
        start_time = time.time()
        
        for i, c in enumerate(self.base_models):
            print('Fitting For Base Model {} ---'.format(c))       
            clf = self.base_models[c]
            
            for j, (train_idx, test_idx) in enumerate(folds):
                print('--- Fitting For Fold %d / %d ---', j + 1, self.n_folds)
                X_train = X[train_idx]
                y_train = y[train_idx]
                X_holdout = X[test_idx]
                
                if c not in ['XGB', 'LightGBM', 'LSTM']:
                    
                    clf.fit(X_train, y_train)
                    y_pred = clf.predict(X_holdout)[:]
                    
                    S_train[test_idx, i] = y_pred
                    
                elif c in ['LSTM']:
                    x_train_lstm = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))

                    clf.fit(x_train_lstm, y_train, epochs=15, batch_size=50, verbose=1, shuffle=False)
                    y_pred = clf.predict(X_holdout.reshape((X_holdout.shape[0], 1, X_holdout.shape[1])))[:]
                    
                    S_train[test_idx, i] = [i[0] for i in y_pred]

                else:
                    d_train = xg.DMatrix(X_train, label=y_train, missing=-1)
                    d_valid = xg.DMatrix(X_holdout, missing=-1)
                    
                    clf = xg.train(params_xgb, d_train)
                    y_pred = clf.predict(d_valid)[:]
                    
                    S_train[test_idx, i] = y_pred
                    
                print('Elapsed: %s minutes ---' % round(((time.time() - start_time) / 60), 2))

            print('Elapsed: %s minutes ---' % round(((time.time() - start_time) / 60), 2))

        print('--- Base Models Trained: %s minutes ---' % round(((time.time() - start_time) / 60), 2))

        grid = grid_search.GridSearchCV(estimator=self.stacker, param_grid=self.param_grid, n_jobs=1, cv=5, verbose=20, scoring=MAE)
        
        if self.features:
            S_train = np.append(X, S_train, 1)
            
        grid.fit(S_train, y)
        
        try:
            print('Best Params:')
            print(grid.best_params_)
            print('Best CV Score:')
            print(-grid.best_score_)
            print('Best estimator:')
            print(grid.best_estimator_)
        except:
            pass
        
        self.stacker = grid.best_estimator_
        
        print('--- Stacker Trained: %s minutes ---' % round(((time.time() - start_time) / 60), 2))

        
        
    def predict(self, X):
        X = np.array(X)
        folds = list(KFold(len(X), n_folds=self.n_folds, shuffle=True, random_state=2016))
        if self.features:
            S_test = np.append(X, np.zeros((X.shape[0], len(self.base_models))), 1)  
            print('Using features of shape', S_test.shape)
        else:
            S_test = np.zeros((X.shape[0], len(self.base_models)))
            print('Using features of shape', S_test.shape)

        for ind, c in enumerate(self.base_models):
            clf = self.base_models[c]
            
            # Uses all features.
            if self.features:
                i = X.shape[1] + ind
            else:
                i = ind
                
            S_test_i = np.zeros((X.shape[0], len(folds)))
            print('--- Predicting For  #{}'.format(c))
            
            # Makes predictions for each model
            for j, (train_idx, test_idx) in enumerate(folds):    
                if c not in ['XGB', 'LSTM']:
                    S_test_i[:, j] = clf.predict(X)[:]
                    
                elif c in ['LSTM']:
                    S_test_i[:, j] = [i for i in clf.predict(X.reshape((X.shape[0], 1, X.shape[1])))[:]]
                    
                else:
                    S_test_i[:, j] = clf.predict(X)[:]
                
            S_test[:, i] = S_test_i.mean(1)

        clf = self.stacker
        y_pred = clf.predict(S_test)[:]
        
        return y_pred

    
    def fit_predict(self, X, y, T):
        X = np.array(X)
        y = np.array(y)
        T = np.array(T)

        start_time = time.time()
        folds = list(KFold(len(y), n_folds=self.n_folds, shuffle=True, random_state=2016))

        S_train = np.zeros((X.shape[0], len(self.base_models)))
        S_test  = np.zeros((T.shape[0], len(self.base_models)))

        for i, c in enumerate(self.base_models):
            print('########## \nFitting For Base Model {} \n##########'.format(c))
            clf = self.base_models[c]
            S_test_i = np.zeros((T.shape[0], len(folds)))

            for j, (train_idx, test_idx) in enumerate(folds):
                print('--- Fitting For Fold #{0} / {1} ---'.format(j+1, self.n_folds))
                X_train = X[train_idx]
                y_train = y[train_idx]
                X_holdout = X[test_idx]
                
                if c not in ['XGB', 'LightGBM', 'LSTM']:
                    clf.fit(X_train, y_train)
                    y_pred = clf.predict(X_holdout)[:]
                    
                    S_train[test_idx, i] = y_pred
                    S_test_i[:, j] = clf.predict(T)[:]
                    
                elif c in ['LSTM']:
                    x_train_lstm = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
                    
                    clf.fit(x_train_lstm, y_train, epochs=15, batch_size=50, verbose=1, shuffle=False)
                    y_pred = clf.predict(X_holdout.reshape((X_holdout.shape[0], 1, X_holdout.shape[1])))[:]
                    
                    S_train[test_idx, i] = [i[0] for i in y_pred]
                    S_test_i[:, j] = [i for i in clf.predict(T.reshape((T.shape[0], 1, T.shape[1])))[:]]
                    
                else:
                    d_train = xg.DMatrix(X_train, label=y_train, missing=-1)
                    d_valid = xg.DMatrix(X_holdout, missing=-1)
                    
                    clf = xg.train(params_xgb, d_train)
                    y_pred = clf.predict(d_valid)[:]
                    
                    S_train[test_idx, i] = y_pred
                    data_pred = xg.DMatrix(T, missing=-1)
                    S_test_i[:, j] = clf.predict(data_pred)[:]

                print('Elapsed: %s minutes ---' % round(((time.time() - start_time) / 60), 2))

            S_test[:, i] = S_test_i.mean(1)
            print('Elapsed: %s minutes ---' % round(((time.time() - start_time) / 60), 2))

        print('--- Base Models Trained: %s minutes ---' % round(((time.time() - start_time) / 60), 2))

        param_grid = {'n_estimators':  [100],
                      'learning_rate': [0.05],
                      'subsample':     [0.75]}

        grid = grid_search.GridSearchCV(estimator=self.stacker, param_grid=param_grid, n_jobs=1, cv=5, verbose=20, scoring=MAE)
        grid.fit(S_train, y)

        try:
            print('Param grid:')
            print(param_grid)
            print('Best Params:')
            print(grid.best_params_)
            print('Best CV Score:')
            print(-grid.best_score_)
            print('Best estimator:')
            print(grid.best_estimator_)
            print(message)
        except:
            pass

        print('--- Stacker Trained: %s minutes ---' % round(((time.time() - start_time) / 60), 2))
        y_pred = grid.predict(S_test)[:]

        return y_pred

time: 518 ms




In [30]:
#del models['CatBoost']
#del models['XGB']

time: 550 µs


In [31]:
param_grid = {'n_estimators':  [50, 100, 150],
              'learning_rate': [0.03, 0.05, 0.07],
              'subsample':     [0.5, 0.75, 1]
             }

ensemble = Ensemble(n_folds=5,
                    stacker=GradientBoostingRegressor(random_state=2016, verbose=1),
                    base_models=models, include_features=False, cvgrid=param_grid)
                    
#model_ensemble = ensemble.fit_predict(x_train[:100], y_train[:100], x_valid)
# MAE 0.0653212760898 - lr 0.03, nest = 50, subsample: 0.5
ensemble.fit(x_train, y_train)
#final_prediction = ensemble.predict(x_valid)
#print('MAE', mean_absolute_error(y_valid, final_prediction))

#del final_prediction


Fitting For Base Model LinearRegression ---
--- Fitting For Fold %d / %d --- 1 5
Elapsed: 0.0 minutes ---
--- Fitting For Fold %d / %d --- 2 5
Elapsed: 0.0 minutes ---
--- Fitting For Fold %d / %d --- 3 5
Elapsed: 0.01 minutes ---
--- Fitting For Fold %d / %d --- 4 5
Elapsed: 0.01 minutes ---
--- Fitting For Fold %d / %d --- 5 5
Elapsed: 0.01 minutes ---
Elapsed: 0.01 minutes ---
Fitting For Base Model BayesianRidge ---
--- Fitting For Fold %d / %d --- 1 5
Elapsed: 0.01 minutes ---
--- Fitting For Fold %d / %d --- 2 5
Elapsed: 0.01 minutes ---
--- Fitting For Fold %d / %d --- 3 5
Elapsed: 0.02 minutes ---
--- Fitting For Fold %d / %d --- 4 5
Elapsed: 0.02 minutes ---
--- Fitting For Fold %d / %d --- 5 5
Elapsed: 0.02 minutes ---
Elapsed: 0.02 minutes ---
Fitting For Base Model RandomForest ---
--- Fitting For Fold %d / %d --- 1 5


[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:  5.1min finished
[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:    5.0s finished


Elapsed: 5.17 minutes ---
--- Fitting For Fold %d / %d --- 2 5


[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:  5.0min finished
[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:    4.5s finished


Elapsed: 10.23 minutes ---
--- Fitting For Fold %d / %d --- 3 5


[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:  5.0min finished
[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:    4.5s finished


Elapsed: 15.34 minutes ---
--- Fitting For Fold %d / %d --- 4 5


[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:  5.0min finished
[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:    4.6s finished


Elapsed: 20.42 minutes ---
--- Fitting For Fold %d / %d --- 5 5


[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:  5.0min finished
[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:    4.4s finished


Elapsed: 25.53 minutes ---
Elapsed: 25.53 minutes ---
Fitting For Base Model ExtraTrees ---
--- Fitting For Fold %d / %d --- 1 5


[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:  2.1min finished
[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:    9.7s finished


Elapsed: 27.78 minutes ---
--- Fitting For Fold %d / %d --- 2 5


[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:  2.1min finished
[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:    4.7s finished


Elapsed: 29.93 minutes ---
--- Fitting For Fold %d / %d --- 3 5


[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:  2.1min finished
[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:    5.0s finished


Elapsed: 32.11 minutes ---
--- Fitting For Fold %d / %d --- 4 5


[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:  2.1min finished
[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:    5.3s finished


Elapsed: 34.29 minutes ---
--- Fitting For Fold %d / %d --- 5 5


[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:  2.1min finished
[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:    5.8s finished


Elapsed: 36.53 minutes ---
Elapsed: 36.53 minutes ---
Fitting For Base Model AdaBoost ---
--- Fitting For Fold %d / %d --- 1 5
Elapsed: 36.61 minutes ---
--- Fitting For Fold %d / %d --- 2 5
Elapsed: 36.7 minutes ---
--- Fitting For Fold %d / %d --- 3 5
Elapsed: 36.79 minutes ---
--- Fitting For Fold %d / %d --- 4 5
Elapsed: 36.92 minutes ---
--- Fitting For Fold %d / %d --- 5 5
Elapsed: 37.02 minutes ---
Elapsed: 37.02 minutes ---
Fitting For Base Model GradientBoosting ---
--- Fitting For Fold %d / %d --- 1 5
      Iter       Train Loss      OOB Improve   Remaining Time 
         1           0.0069           0.0000            2.35m
         2           0.0069           0.0000            2.36m
         3           0.0068           0.0000            2.45m
         4           0.0068           0.0000            2.34m
         5           0.0068           0.0000            2.33m
         6           0.0068           0.0000            2.33m
         7           0.0068           0.0000    

Elapsed: 47.89 minutes ---
Elapsed: 47.89 minutes ---
Fitting For Base Model LightGBM ---
--- Fitting For Fold %d / %d --- 1 5
Elapsed: 47.92 minutes ---
--- Fitting For Fold %d / %d --- 2 5
Elapsed: 47.95 minutes ---
--- Fitting For Fold %d / %d --- 3 5
Elapsed: 47.98 minutes ---
--- Fitting For Fold %d / %d --- 4 5
Elapsed: 48.02 minutes ---
--- Fitting For Fold %d / %d --- 5 5
Elapsed: 48.05 minutes ---
Elapsed: 48.05 minutes ---
Fitting For Base Model DNN ---
--- Fitting For Fold %d / %d --- 1 5
Elapsed: 48.92 minutes ---
--- Fitting For Fold %d / %d --- 2 5
Elapsed: 49.9 minutes ---
--- Fitting For Fold %d / %d --- 3 5
Elapsed: 50.82 minutes ---
--- Fitting For Fold %d / %d --- 4 5
Elapsed: 51.82 minutes ---
--- Fitting For Fold %d / %d --- 5 5
Elapsed: 52.8 minutes ---
Elapsed: 52.8 minutes ---
Fitting For Base Model LSTM ---
--- Fitting For Fold %d / %d --- 1 5
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11

        30           0.0069           0.0000            1.13s
        40           0.0070           0.0000            0.56s
        50           0.0069           0.0000            0.00s
[CV]  learning_rate=0.03, n_estimators=50, subsample=0.5, score=-0.051094 -   2.8s
[CV] learning_rate=0.03, n_estimators=50, subsample=0.5 ..............
      Iter       Train Loss      OOB Improve   Remaining Time 
         1           0.0069           0.0000            2.55s
         2           0.0069           0.0000            2.52s


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    2.8s remaining:    0.0s


         3           0.0068           0.0000            2.46s
         4           0.0068           0.0000            2.41s
         5           0.0068           0.0000            2.34s
         6           0.0069           0.0000            2.26s
         7           0.0068           0.0000            2.21s
         8           0.0068           0.0000            2.16s
         9           0.0068           0.0000            2.10s
        10           0.0069           0.0000            2.05s
        20           0.0068           0.0000            1.55s
        30           0.0068           0.0000            1.03s
        40           0.0068           0.0000            0.52s
        50           0.0067           0.0000            0.00s
[CV]  learning_rate=0.03, n_estimators=50, subsample=0.5, score=-0.052455 -   2.6s
[CV] learning_rate=0.03, n_estimators=50, subsample=0.5 ..............
      Iter       Train Loss      OOB Improve   Remaining Time 
         1           0.0070           0

[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    5.4s remaining:    0.0s


         3           0.0068           0.0000            2.50s
         4           0.0068           0.0000            2.51s
         5           0.0068           0.0000            2.45s
         6           0.0069           0.0000            2.36s
         7           0.0069           0.0000            2.30s
         8           0.0069           0.0000            2.25s
         9           0.0068           0.0000            2.18s
        10           0.0069           0.0000            2.11s
        20           0.0068           0.0000            1.66s
        30           0.0068           0.0000            1.09s
        40           0.0068           0.0000            0.55s
        50           0.0068           0.0000            0.00s
[CV]  learning_rate=0.03, n_estimators=50, subsample=0.5, score=-0.052355 -   2.8s
[CV] learning_rate=0.03, n_estimators=50, subsample=0.5 ..............
      Iter       Train Loss      OOB Improve   Remaining Time 
         1           0.0069           0

[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    8.3s remaining:    0.0s


         3           0.0068           0.0000            2.45s
         4           0.0069           0.0000            2.47s
         5           0.0069           0.0000            2.44s
         6           0.0068           0.0000            2.38s
         7           0.0068           0.0000            2.30s
         8           0.0068           0.0000            2.28s
         9           0.0068           0.0000            2.23s
        10           0.0068           0.0000            2.19s
        20           0.0067           0.0000            1.62s
        30           0.0066           0.0000            1.07s
        40           0.0066           0.0000            0.53s
        50           0.0067           0.0000            0.00s
[CV]  learning_rate=0.03, n_estimators=50, subsample=0.5, score=-0.053310 -   2.7s
[CV] learning_rate=0.03, n_estimators=50, subsample=0.5 ..............
      Iter       Train Loss      OOB Improve   Remaining Time 
         1           0.0069           0

[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:   11.0s remaining:    0.0s


         3           0.0069           0.0000            2.42s
         4           0.0069           0.0000            2.36s
         5           0.0069           0.0000            2.31s
         6           0.0069           0.0000            2.28s
         7           0.0068           0.0000            2.23s
         8           0.0068           0.0000            2.23s
         9           0.0068           0.0000            2.17s
        10           0.0069           0.0000            2.11s
        20           0.0068           0.0000            1.57s
        30           0.0067           0.0000            1.04s
        40           0.0066           0.0000            0.52s
        50           0.0067           0.0000            0.00s
[CV]  learning_rate=0.03, n_estimators=50, subsample=0.5, score=-0.053342 -   2.7s
[CV] learning_rate=0.03, n_estimators=50, subsample=0.75 .............
      Iter       Train Loss      OOB Improve   Remaining Time 
         1           0.0071           0

[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:   13.6s remaining:    0.0s


         3           0.0070           0.0000            2.97s
         4           0.0070           0.0000            2.92s
         5           0.0070           0.0000            2.83s
         6           0.0069           0.0000            2.79s
         7           0.0070           0.0000            2.73s
         8           0.0070           0.0000            2.64s
         9           0.0070           0.0000            2.58s
        10           0.0070           0.0000            2.54s
        20           0.0069           0.0000            1.89s
        30           0.0069           0.0000            1.25s
        40           0.0069           0.0000            0.62s
        50           0.0069           0.0000            0.00s
[CV]  learning_rate=0.03, n_estimators=50, subsample=0.75, score=-0.051102 -   3.2s
[CV] learning_rate=0.03, n_estimators=50, subsample=0.75 .............
      Iter       Train Loss      OOB Improve   Remaining Time 
         1           0.0069           

[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:   16.8s remaining:    0.0s


         3           0.0069           0.0000            3.07s
         4           0.0068           0.0000            2.98s
         5           0.0069           0.0000            2.89s
         6           0.0069           0.0000            2.80s
         7           0.0069           0.0000            2.74s
         8           0.0068           0.0000            2.66s
         9           0.0069           0.0000            2.59s
        10           0.0069           0.0000            2.52s
        20           0.0068           0.0000            1.85s
        30           0.0068           0.0000            1.22s
        40           0.0067           0.0000            0.61s
        50           0.0067           0.0000            0.00s
[CV]  learning_rate=0.03, n_estimators=50, subsample=0.75, score=-0.052467 -   3.1s
[CV] learning_rate=0.03, n_estimators=50, subsample=0.75 .............
      Iter       Train Loss      OOB Improve   Remaining Time 
         1           0.0070           

[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:   19.9s remaining:    0.0s


         3           0.0070           0.0000            2.83s
         4           0.0068           0.0000            2.78s
         5           0.0069           0.0000            2.69s
         6           0.0069           0.0000            2.65s
         7           0.0069           0.0000            2.60s
         8           0.0069           0.0000            2.54s
         9           0.0069           0.0000            2.53s
        10           0.0068           0.0000            2.45s
        20           0.0068           0.0000            1.84s
        30           0.0068           0.0000            1.24s
        40           0.0068           0.0000            0.62s
        50           0.0067           0.0000            0.00s
[CV]  learning_rate=0.03, n_estimators=50, subsample=0.75, score=-0.052358 -   3.1s
[CV] learning_rate=0.03, n_estimators=50, subsample=0.75 .............
      Iter       Train Loss      OOB Improve   Remaining Time 
         1           0.0069           

[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:   23.0s remaining:    0.0s


         3           0.0068           0.0000            2.83s
         4           0.0069           0.0000            2.76s
         5           0.0069           0.0000            2.70s
         6           0.0068           0.0000            2.65s
         7           0.0068           0.0000            2.58s
         8           0.0068           0.0000            2.51s
         9           0.0068           0.0000            2.45s
        10           0.0068           0.0000            2.40s
        20           0.0067           0.0000            1.78s
        30           0.0067           0.0000            1.20s
        40           0.0067           0.0000            0.61s
        50           0.0067           0.0000            0.00s
[CV]  learning_rate=0.03, n_estimators=50, subsample=0.75, score=-0.053309 -   3.1s
[CV] learning_rate=0.03, n_estimators=50, subsample=0.75 .............
      Iter       Train Loss      OOB Improve   Remaining Time 
         1           0.0069           

[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:   26.1s remaining:    0.0s


         3           0.0069           0.0000            2.76s
         4           0.0069           0.0000            2.76s
         5           0.0069           0.0000            2.68s
         6           0.0069           0.0000            2.63s
         7           0.0067           0.0000            2.62s
         8           0.0068           0.0000            2.55s
         9           0.0068           0.0000            2.50s
        10           0.0069           0.0000            2.43s
        20           0.0067           0.0000            1.82s
        30           0.0067           0.0000            1.23s
        40           0.0066           0.0000            0.63s
        50           0.0067           0.0000            0.00s
[CV]  learning_rate=0.03, n_estimators=50, subsample=0.75, score=-0.053335 -   3.2s
[CV] learning_rate=0.03, n_estimators=50, subsample=1 ................
      Iter       Train Loss   Remaining Time 
         1           0.0071            3.61s


[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:   29.4s remaining:    0.0s


         2           0.0071            3.68s
         3           0.0070            3.67s
         4           0.0070            3.51s
         5           0.0070            3.40s
         6           0.0070            3.31s
         7           0.0070            3.19s
         8           0.0070            3.08s
         9           0.0070            3.01s
        10           0.0070            2.93s
        20           0.0069            2.11s
        30           0.0069            1.42s
        40           0.0069            0.70s
        50           0.0068            0.00s
[CV]  learning_rate=0.03, n_estimators=50, subsample=1, score=-0.051110 -   3.5s
[CV] learning_rate=0.03, n_estimators=50, subsample=1 ................
      Iter       Train Loss   Remaining Time 
         1           0.0069            3.31s


[Parallel(n_jobs=1)]: Done  11 out of  11 | elapsed:   32.9s remaining:    0.0s


         2           0.0069            3.27s
         3           0.0069            3.27s
         4           0.0069            3.21s
         5           0.0069            3.14s
         6           0.0069            3.11s
         7           0.0069            3.04s
         8           0.0069            2.97s
         9           0.0069            2.92s
        10           0.0068            2.85s
        20           0.0068            2.26s
        30           0.0068            1.56s
        40           0.0067            0.89s
        50           0.0067            0.00s
[CV]  learning_rate=0.03, n_estimators=50, subsample=1, score=-0.052471 -   4.4s
[CV] learning_rate=0.03, n_estimators=50, subsample=1 ................
      Iter       Train Loss   Remaining Time 
         1           0.0069            5.00s


[Parallel(n_jobs=1)]: Done  12 out of  12 | elapsed:   37.3s remaining:    0.0s


         2           0.0069            4.95s
         3           0.0069            4.62s
         4           0.0069            4.39s
         5           0.0069            4.16s
         6           0.0069            3.96s
         7           0.0069            3.78s
         8           0.0069            3.61s
         9           0.0069            3.59s
        10           0.0069            3.45s
        20           0.0068            2.72s
        30           0.0068            1.75s
        40           0.0067            0.90s
        50           0.0067            0.00s
[CV]  learning_rate=0.03, n_estimators=50, subsample=1, score=-0.052371 -   4.6s
[CV] learning_rate=0.03, n_estimators=50, subsample=1 ................
      Iter       Train Loss   Remaining Time 
         1           0.0069            5.24s


[Parallel(n_jobs=1)]: Done  13 out of  13 | elapsed:   42.0s remaining:    0.0s


         2           0.0069            5.17s
         3           0.0069            5.14s
         4           0.0069            5.15s
         5           0.0069            4.76s
         6           0.0068            4.45s
         7           0.0068            4.19s
         8           0.0068            4.00s
         9           0.0068            3.83s
        10           0.0068            3.67s
        20           0.0068            2.51s
        30           0.0067            1.64s
        40           0.0067            0.79s
        50           0.0067            0.00s
[CV]  learning_rate=0.03, n_estimators=50, subsample=1, score=-0.053321 -   4.0s
[CV] learning_rate=0.03, n_estimators=50, subsample=1 ................
      Iter       Train Loss   Remaining Time 
         1           0.0069            3.58s


[Parallel(n_jobs=1)]: Done  14 out of  14 | elapsed:   45.9s remaining:    0.0s


         2           0.0069            3.48s
         3           0.0069            3.40s
         4           0.0069            3.32s
         5           0.0069            3.26s
         6           0.0069            3.21s
         7           0.0068            3.21s
         8           0.0068            3.25s
         9           0.0068            3.21s
        10           0.0068            3.14s
        20           0.0068            2.23s
        30           0.0067            1.76s
        40           0.0067            0.98s
        50           0.0067            0.00s
[CV]  learning_rate=0.03, n_estimators=50, subsample=1, score=-0.053341 -   5.3s
[CV] learning_rate=0.03, n_estimators=100, subsample=0.5 .............
      Iter       Train Loss      OOB Improve   Remaining Time 
         1           0.0070           0.0000            8.26s


[Parallel(n_jobs=1)]: Done  15 out of  15 | elapsed:   51.2s remaining:    0.0s


         2           0.0070           0.0000            7.54s
         3           0.0070           0.0000            7.12s
         4           0.0069           0.0000            6.82s
         5           0.0071           0.0000            6.60s
         6           0.0070           0.0000            6.43s
         7           0.0069           0.0000            6.23s
         8           0.0069           0.0000            6.09s
         9           0.0069           0.0000            6.02s
        10           0.0071           0.0000            6.02s
        20           0.0069           0.0000            5.55s
        30           0.0069           0.0000            4.78s
        40           0.0070           0.0000            3.94s
        50           0.0069           0.0000            3.15s
        60           0.0068           0.0000            2.42s
        70           0.0068           0.0000            1.76s
        80           0.0067          -0.0000            1.15s
        

[Parallel(n_jobs=1)]: Done  16 out of  16 | elapsed:   56.9s remaining:    0.0s



         3           0.0068           0.0000            5.43s
         4           0.0068           0.0000            5.31s
         5           0.0068           0.0000            5.23s
         6           0.0069           0.0000            5.11s
         7           0.0068           0.0000            5.11s
         8           0.0068           0.0000            5.07s
         9           0.0068           0.0000            4.93s
        10           0.0069           0.0000            4.83s
        20           0.0068           0.0000            4.20s
        30           0.0068           0.0000            3.70s
        40           0.0068           0.0000            3.14s
        50           0.0067           0.0000            2.61s
        60           0.0067           0.0000            2.08s
        70           0.0067           0.0000            1.54s
        80           0.0066           0.0000            1.02s
        90           0.0068           0.0000            0.51s
       

[Parallel(n_jobs=1)]: Done  17 out of  17 | elapsed:  1.0min remaining:    0.0s


         3           0.0068           0.0000            5.10s
         4           0.0068           0.0000            4.96s
         5           0.0068           0.0000            4.89s
         6           0.0069           0.0000            4.81s
         7           0.0069           0.0000            4.83s
         8           0.0069           0.0000            4.82s
         9           0.0068           0.0000            4.79s
        10           0.0069           0.0000            4.72s
        20           0.0068           0.0000            4.11s
        30           0.0068           0.0000            3.64s
        40           0.0068           0.0000            3.13s
        50           0.0068           0.0000            2.55s
        60           0.0066           0.0000            2.02s
        70           0.0067           0.0000            1.50s
        80           0.0067           0.0000            0.99s
        90           0.0069           0.0000            0.49s
       1

[Parallel(n_jobs=1)]: Done  18 out of  18 | elapsed:  1.1min remaining:    0.0s


         3           0.0068           0.0000            4.98s
         4           0.0069           0.0000            5.09s
         5           0.0069           0.0000            5.10s
         6           0.0068           0.0000            5.02s
         7           0.0068           0.0000            5.02s
         8           0.0068           0.0000            4.94s
         9           0.0068           0.0000            4.78s
        10           0.0068           0.0000            4.71s
        20           0.0067           0.0000            4.08s
        30           0.0066           0.0000            3.57s
        40           0.0066           0.0000            3.03s
        50           0.0067           0.0000            2.47s
        60           0.0067          -0.0000            1.95s
        70           0.0067           0.0000            1.47s
        80           0.0067           0.0000            0.97s
        90           0.0066           0.0000            0.49s
       1

[Parallel(n_jobs=1)]: Done  19 out of  19 | elapsed:  1.2min remaining:    0.0s


         4           0.0069           0.0000            4.13s
         5           0.0069           0.0000            4.09s
         6           0.0069           0.0000            4.04s
         7           0.0068           0.0000            3.99s
         8           0.0068           0.0000            3.94s
         9           0.0068           0.0000            3.91s
        10           0.0069           0.0000            3.89s
        20           0.0068           0.0000            3.73s
        30           0.0067           0.0000            3.25s
        40           0.0066           0.0000            2.74s
        50           0.0067           0.0000            2.25s
        60           0.0067           0.0000            1.82s
        70           0.0067           0.0000            1.38s
        80           0.0066          -0.0000            0.93s
        90           0.0066          -0.0000            0.47s
       100           0.0067          -0.0000            0.00s
[CV]  le

         2           0.0071            6.79s
         3           0.0070            6.69s
         4           0.0070            6.58s
         5           0.0070            6.54s
         6           0.0070            6.43s
         7           0.0070            6.36s
         8           0.0070            6.28s
         9           0.0070            6.17s
        10           0.0070            6.12s
        20           0.0069            5.28s
        30           0.0069            4.59s
        40           0.0069            3.91s
        50           0.0068            3.29s
        60           0.0068            2.61s
        70           0.0068            1.95s
        80           0.0068            1.30s
        90           0.0068            0.65s
       100           0.0068            0.00s
[CV]  learning_rate=0.03, n_estimators=100, subsample=1, score=-0.051191 -   6.5s
[CV] learning_rate=0.03, n_estimators=100, subsample=1 ...............
      Iter       Train Loss   Remaini

         4           0.0068           0.0000            6.48s
         5           0.0068           0.0000            6.64s
         6           0.0069           0.0000            6.67s
         7           0.0069           0.0000            6.67s
         8           0.0069           0.0000            6.74s
         9           0.0068           0.0000            6.73s
        10           0.0069           0.0000            6.70s
        20           0.0068           0.0000            5.96s
        30           0.0068           0.0000            5.45s
        40           0.0068           0.0000            4.94s
        50           0.0068           0.0000            4.46s
        60           0.0066           0.0000            4.03s
        70           0.0067           0.0000            3.56s
        80           0.0067           0.0000            3.12s
        90           0.0069           0.0000            2.66s
       100           0.0066           0.0000            2.21s
[CV]  le

         3           0.0068           0.0000            8.13s
         4           0.0069           0.0000            8.00s
         5           0.0069           0.0000            8.05s
         6           0.0068           0.0000            8.12s
         7           0.0068           0.0000            8.08s
         8           0.0068           0.0000            7.93s
         9           0.0068           0.0000            7.86s
        10           0.0068           0.0000            7.78s
        20           0.0067           0.0000            7.22s
        30           0.0067           0.0000            6.81s
        40           0.0067           0.0000            6.26s
        50           0.0067           0.0000            5.68s
        60           0.0067           0.0000            5.13s
        70           0.0067           0.0000            4.59s
        80           0.0067           0.0000            3.98s
        90           0.0066          -0.0000            3.41s
       1

         8           0.0069           0.0000            2.04s
         9           0.0069           0.0000            2.01s
        10           0.0070           0.0000            1.96s
        20           0.0068           0.0000            1.50s
        30           0.0068           0.0000            1.02s
        40           0.0069           0.0000            0.51s
        50           0.0069           0.0000            0.00s
[CV]  learning_rate=0.05, n_estimators=50, subsample=0.5, score=-0.051184 -   2.6s
[CV] learning_rate=0.05, n_estimators=50, subsample=0.5 ..............
      Iter       Train Loss      OOB Improve   Remaining Time 
         1           0.0069           0.0000            2.33s
         2           0.0069           0.0000            2.29s
         3           0.0068           0.0000            2.24s
         4           0.0068           0.0000            2.23s
         5           0.0068           0.0000            2.22s
         6           0.0069           0

         3           0.0068           0.0000            2.55s
         4           0.0068           0.0000            2.49s
         5           0.0069           0.0000            2.44s
         6           0.0068           0.0000            2.38s
         7           0.0067           0.0000            2.38s
         8           0.0068           0.0000            2.34s
         9           0.0068           0.0000            2.30s
        10           0.0068           0.0000            2.26s
        20           0.0067           0.0000            1.78s
        30           0.0067           0.0000            1.22s
        40           0.0066           0.0000            0.63s
        50           0.0066           0.0000            0.00s
[CV]  learning_rate=0.05, n_estimators=50, subsample=0.75, score=-0.053359 -   3.3s
[CV] learning_rate=0.05, n_estimators=50, subsample=0.75 .............
      Iter       Train Loss      OOB Improve   Remaining Time 
         1           0.0069           

        20           0.0067           0.0000            4.18s
        30           0.0068           0.0000            3.59s
        40           0.0068           0.0000            3.08s
        50           0.0067           0.0000            2.58s
        60           0.0066          -0.0000            2.06s
        70           0.0066          -0.0000            1.56s
        80           0.0066           0.0000            1.02s
        90           0.0068          -0.0000            0.50s
       100           0.0066          -0.0000            0.00s
[CV]  learning_rate=0.05, n_estimators=100, subsample=0.5, score=-0.052569 -   5.1s
[CV] learning_rate=0.05, n_estimators=100, subsample=0.5 .............
      Iter       Train Loss      OOB Improve   Remaining Time 
         1           0.0070           0.0000            5.58s
         2           0.0070           0.0000            5.66s
         3           0.0068           0.0000            5.51s
         4           0.0068           

        20           0.0067           0.0000            4.77s
        30           0.0067           0.0000            4.12s
        40           0.0068          -0.0000            3.63s
        50           0.0067           0.0000            2.98s
        60           0.0067           0.0000            2.53s
        70           0.0066           0.0000            1.92s
        80           0.0066          -0.0000            1.27s
        90           0.0067          -0.0000            0.64s
       100           0.0066           0.0000            0.00s
[CV]  learning_rate=0.05, n_estimators=100, subsample=0.75, score=-0.052435 -   6.5s
[CV] learning_rate=0.05, n_estimators=100, subsample=0.75 ............
      Iter       Train Loss      OOB Improve   Remaining Time 
         1           0.0069           0.0000            6.20s
         2           0.0069           0.0000            6.16s
         3           0.0068           0.0000            6.20s
         4           0.0068          

        20           0.0067            6.65s
        30           0.0067            5.68s
        40           0.0067            4.92s
        50           0.0066            4.14s
        60           0.0066            3.34s
        70           0.0066            2.51s
        80           0.0066            1.65s
        90           0.0066            0.82s
       100           0.0066            0.00s
[CV]  learning_rate=0.05, n_estimators=100, subsample=1, score=-0.053463 -   8.2s
[CV] learning_rate=0.05, n_estimators=150, subsample=0.5 .............
      Iter       Train Loss      OOB Improve   Remaining Time 
         1           0.0070           0.0000            7.64s
         2           0.0070           0.0000            7.68s
         3           0.0070           0.0000            7.73s
         4           0.0069           0.0000            7.89s
         5           0.0070           0.0000            7.74s
         6           0.0069           0.0000            7.72s
       

        20           0.0068           0.0000            8.22s
        30           0.0068           0.0000            7.57s
        40           0.0068           0.0000            6.93s
        50           0.0068          -0.0000            6.25s
        60           0.0068          -0.0000            5.52s
        70           0.0067          -0.0000            4.94s
        80           0.0067          -0.0000            4.30s
        90           0.0069          -0.0000            3.66s
       100           0.0068          -0.0000            3.04s
[CV]  learning_rate=0.05, n_estimators=150, subsample=0.75, score=-0.051241 -   9.0s
[CV] learning_rate=0.05, n_estimators=150, subsample=0.75 ............
      Iter       Train Loss      OOB Improve   Remaining Time 
         1           0.0069           0.0000            9.62s
         2           0.0070           0.0000            9.30s
         3           0.0069           0.0000            9.15s
         4           0.0068          

       100           0.0066            3.41s
[CV]  learning_rate=0.05, n_estimators=150, subsample=1, score=-0.052571 -  10.1s
[CV] learning_rate=0.05, n_estimators=150, subsample=1 ...............
      Iter       Train Loss   Remaining Time 
         1           0.0069           11.26s
         2           0.0069           11.08s
         3           0.0069           11.11s
         4           0.0069           10.85s
         5           0.0069           10.54s
         6           0.0069           10.35s
         7           0.0068           10.33s
         8           0.0068           10.23s
         9           0.0068           10.14s
        10           0.0068           10.14s
        20           0.0068            9.28s
        30           0.0067            8.12s
        40           0.0067            7.22s
        50           0.0067            6.46s
        60           0.0067            5.81s
        70           0.0067            5.15s
        80           0.0067         

         8           0.0067           0.0000            2.04s
         9           0.0067           0.0000            2.00s
        10           0.0068           0.0000            1.93s
        20           0.0067           0.0000            1.36s
        30           0.0067           0.0000            0.93s
        40           0.0066          -0.0000            0.47s
        50           0.0066           0.0000            0.00s
[CV]  learning_rate=0.07, n_estimators=50, subsample=0.5, score=-0.053469 -   2.4s
[CV] learning_rate=0.07, n_estimators=50, subsample=0.75 .............
      Iter       Train Loss      OOB Improve   Remaining Time 
         1           0.0071           0.0000            2.88s
         2           0.0071           0.0000            2.84s
         3           0.0070           0.0000            2.77s
         4           0.0069           0.0000            2.76s
         5           0.0070           0.0000            2.72s
         6           0.0069           0

        30           0.0067            1.42s
        40           0.0067            0.72s
        50           0.0067            0.00s
[CV]  learning_rate=0.07, n_estimators=50, subsample=1, score=-0.052411 -   3.5s
[CV] learning_rate=0.07, n_estimators=50, subsample=1 ................
      Iter       Train Loss   Remaining Time 
         1           0.0069            3.57s
         2           0.0069            3.50s
         3           0.0068            3.33s
         4           0.0068            3.24s
         5           0.0068            3.08s
         6           0.0068            3.01s
         7           0.0068            2.94s
         8           0.0068            2.85s
         9           0.0067            2.80s
        10           0.0067            2.70s
        20           0.0067            1.91s
        30           0.0067            1.26s
        40           0.0066            0.61s
        50           0.0066            0.00s
[CV]  learning_rate=0.07, n_estimator

        20           0.0067           0.0000            3.75s
        30           0.0067           0.0000            3.23s
        40           0.0066          -0.0000            2.74s
        50           0.0066           0.0000            2.31s
        60           0.0067           0.0000            1.86s
        70           0.0066          -0.0000            1.38s
        80           0.0066          -0.0000            0.92s
        90           0.0066          -0.0000            0.46s
       100           0.0067          -0.0000            0.00s
[CV]  learning_rate=0.07, n_estimators=100, subsample=0.5, score=-0.053524 -   4.6s
[CV] learning_rate=0.07, n_estimators=100, subsample=0.75 ............
      Iter       Train Loss      OOB Improve   Remaining Time 
         1           0.0071           0.0000            6.18s
         2           0.0071           0.0000            6.08s
         3           0.0070           0.0000            5.94s
         4           0.0069           

        20           0.0068            5.81s
        30           0.0068            5.13s
        40           0.0068            4.29s
        50           0.0068            3.46s
        60           0.0068            2.71s
        70           0.0068            1.99s
        80           0.0068            1.31s
        90           0.0067            0.65s
       100           0.0067            0.00s
[CV]  learning_rate=0.07, n_estimators=100, subsample=1, score=-0.051253 -   6.5s
[CV] learning_rate=0.07, n_estimators=100, subsample=1 ...............
      Iter       Train Loss   Remaining Time 
         1           0.0069            6.65s
         2           0.0069            6.91s
         3           0.0069            6.55s
         4           0.0069            6.57s
         5           0.0068            6.62s
         6           0.0068            6.51s
         7           0.0068            6.44s
         8           0.0068            6.36s
         9           0.0068         

        20           0.0067           0.0000            5.74s
        30           0.0067           0.0000            5.39s
        40           0.0068          -0.0000            4.98s
        50           0.0067          -0.0000            4.55s
        60           0.0066          -0.0000            4.10s
        70           0.0067          -0.0000            3.69s
        80           0.0066          -0.0000            3.19s
        90           0.0068          -0.0000            2.74s
       100           0.0065          -0.0000            2.27s
[CV]  learning_rate=0.07, n_estimators=150, subsample=0.5, score=-0.052509 -   6.9s
[CV] learning_rate=0.07, n_estimators=150, subsample=0.5 .............
      Iter       Train Loss      OOB Improve   Remaining Time 
         1           0.0069           0.0000            7.14s
         2           0.0067           0.0000            7.13s
         3           0.0068           0.0000            7.13s
         4           0.0069           

        20           0.0067           0.0000            8.12s
        30           0.0066           0.0000            7.61s
        40           0.0066           0.0000            7.00s
        50           0.0066          -0.0000            6.32s
        60           0.0066           0.0000            5.49s
        70           0.0067          -0.0000            4.76s
        80           0.0066          -0.0000            4.09s
        90           0.0066          -0.0000            3.48s
       100           0.0066          -0.0000            2.89s
[CV]  learning_rate=0.07, n_estimators=150, subsample=0.75, score=-0.053482 -   8.7s
[CV] learning_rate=0.07, n_estimators=150, subsample=0.75 ............
      Iter       Train Loss      OOB Improve   Remaining Time 
         1           0.0069           0.0000            7.75s
         2           0.0068           0.0000            7.81s
         3           0.0068           0.0000            8.09s
         4           0.0068          

[Parallel(n_jobs=1)]: Done 135 out of 135 | elapsed: 13.4min finished


         2           0.0069           0.0000            3.51s
         3           0.0070           0.0000            3.41s
         4           0.0070           0.0000            3.34s
         5           0.0069           0.0000            3.29s
         6           0.0068           0.0000            3.25s
         7           0.0070           0.0000            3.09s
         8           0.0069           0.0000            3.00s
         9           0.0068           0.0000            2.88s
        10           0.0068           0.0000            2.79s
        20           0.0068           0.0000            2.10s
        30           0.0068           0.0000            1.40s
        40           0.0068           0.0000            0.69s
        50           0.0066           0.0000            0.00s
Best Params:
{'learning_rate': 0.03, 'n_estimators': 50, 'subsample': 0.5}
Best CV Score:
0.05251106418516075
Best estimator:
GradientBoostingRegressor(alpha=0.9, criterion='friedman_mse', init=

In [None]:
def k_fold_cross_validation(X, K, randomise = False):
    """Generates K (training, validation) pairs from the items in X."""
    
    if randomise: from random import shuffle; X=list(X); shuffle(X)
    for k in range(K):
        training   = [x for i, x in enumerate(X) if i % K != k]
        validation = [x for i, x in enumerate(X) if i % K == k]
        
        yield training, validation

In [None]:
index = 0
for training, validation in k_fold_cross_validation(x_train_data.values, K=5):
    pred_k = ensemble.predict(validation)
    print('MAE',mean_absolute_error(x_train_label[index:index+len(validation)], pred_k))
    index += len(validation)

In [None]:
########## LAYER 1 ##########
# Submodel  1 : OLS                      # Ordinary least squares estimator Sklearn implementation
# Submodel  2 : BR                       # Bayesian ridge regression - Sklearn implementation
# Submodel  3 : DNN                      # Dense Neural Network - Keras - Dense layers 
# Submodel  4 : LightGBM                 # Light Gradient Boosting - https://github.com/Microsoft/LightGBM
# Submodel  5 : XGBoost                  # Extreme Gradient Boosting - http://xgboost.readthedocs.io/en/latest/model.html
# Submodel  6 : CatBoost                 # Categorical Boosting https://github.com/catboost/catboost
# Submodel  7 : LSTM                     # Long Short Term Memory Neural Network - Keras implementation
# Submodel  8 : RandomForestRegressor    # Sklearn implementation
# Submodel  9 : ExtraTreesRegressor      # Sklearn implementation
# Submodel 10 : SVR                      # Support vector machines for regression - Sklearn implementation
# Submodel 11 : AdaBoost                 # Adaptive Boosting Sklearn Implementation

########## LAYER 2 ##########
# https://www.kaggle.com/dragost/boosted-trees-lb-0-0643707/edit

# Save data

In [None]:
submission_sample[m] = submission_sample['ParcelId'].to_frame().merge(x_predict[['parcelid', m]], how='left', left_on='ParcelId', right_on='parcelid')[m]

In [50]:
print('Building properties data')
properties2017 = pd.read_csv('../Data/properties_2017.csv', low_memory = False)
sample_prediction = pd.merge(sample['ParcelId'].to_frame(), properties2017, how='left', left_on=['ParcelId'], right_on=['parcelid'])
#transactions[['propertycountylandusecode', 'propertyzoningdesc']] = transactions[['propertycountylandusecode', 'propertyzoningdesc']].astype(str).apply(LabelEncoder().fit_transform)
sample_prediction['taxdelinquencyflag'].replace('Y', 1, inplace=True)
sample_prediction.drop(to_drop, axis=1, inplace=True)
sample_prediction = complex_features(sample_prediction)
sample_prediction.drop(['parcelid', 'propertyzoningdesc', 'propertycountylandusecode', 'fireplacecnt'], axis=1, inplace=True)
sample_prediction.fillna(sample_prediction.median(), inplace = True)

del properties2017
gc.collect()

Building properties data
Generating complex features
........


27

time: 1min 21s


In [76]:
# https://www.kaggle.com/c/zillow-prize-1/discussion/33899, Oct,Nov,Dec

WEIGHT_XGB = 0.4
WEIGHT_CAT = 0.6

test_dates = {
    '201610': pd.Timestamp('2016-09-30'),
    '201611': pd.Timestamp('2016-10-31'),
    '201612': pd.Timestamp('2016-11-30'),
    '201710': pd.Timestamp('2017-09-30'),
    '201711': pd.Timestamp('2017-10-31'),
    '201712': pd.Timestamp('2017-11-30')
}

for m in test_dates.keys():
    
    print('Processing', m)
    sample_prediction['transactiondate'] = test_dates[m]
    sample_prediction = time_data(sample_prediction)
    
    print('Ensemble Prediction', m)
    sample_prediction['ensemble'] = ensemble.predict(sample_prediction[best_columns])
    
    print('XGB - CatBoost Train', m)
    predictions_xgb = xgb_gs.predict(xg.DMatrix(sample_prediction[list(best_columns) + ['ensemble']]))
    predictions_cat = get_cat_boost_all(sample_train, sample_label, sample_prediction[list(best_columns) + ['ensemble']])

    sample[m] = (WEIGHT_XGB * predictions_xgb) + (WEIGHT_CAT * predictions_cat)
    
    del predictions_xgb, predictions_cat
    gc.collect()
    
#del x_predict

Processing 201610
Added time data
........
Ensemble Prediction 201610
Using features of shape (2985217, 9)
--- Predicting For  #LinearRegression
--- Predicting For  #BayesianRidge
--- Predicting For  #RandomForest


[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:  6.8min finished
[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:  5.9min finished
[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:  5.8min finished
[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:  5.8min finished
[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:  5.8min finished


--- Predicting For  #ExtraTrees


[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:  4.9min finished
[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:  4.7min finished
[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:  4.7min finished
[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:  4.7min finished
[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:  4.7min finished


--- Predicting For  #AdaBoost
--- Predicting For  #GradientBoosting
--- Predicting For  #LightGBM
--- Predicting For  #DNN
--- Predicting For  #LSTM
XGB - CatBoost Train 201610


AttributeError: 'DataFrame' object has no attribute 'feature_names'

time: 1h 24min 50s


In [None]:
sample_prediction['cat'] = get_cat_boost_all(sample_train, sample_label, sample_prediction[list(best_columns) + ['ensemble']])


  0%|          | 0/5 [00:00<?, ?it/s]

Initialising CAT Boost Regression
Building ensemble 0


 20%|██        | 1/5 [05:04<20:17, 304.31s/it]

Building ensemble 1


In [67]:
train2017 = pd.read_csv('../Data/train_2017.csv', parse_dates=['transactiondate'], low_memory=False)
sample_train = pd.merge(train2017, sample_prediction, how='left', left_on='parcelid' ,right_on='ParcelId')
sample_train['ensemble'] = ensemble.predict(sample_train[best_columns])
sample_label = sample_train['logerror']
sample_train = time_data(sample_train)[list(best_columns) + ['ensemble']]

Using features of shape (77613, 9)
--- Predicting For  #LinearRegression
--- Predicting For  #BayesianRidge
--- Predicting For  #RandomForest


[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:   12.0s finished
[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:   11.7s finished
[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:   11.9s finished
[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:   11.6s finished
[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:   11.8s finished


--- Predicting For  #ExtraTrees


[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:    9.6s finished
[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:    9.6s finished
[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:    9.4s finished
[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:    9.4s finished
[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:    9.3s finished


--- Predicting For  #AdaBoost
--- Predicting For  #GradientBoosting
--- Predicting For  #LightGBM
--- Predicting For  #DNN
--- Predicting For  #LSTM
Added time data
........
time: 2min 31s


In [68]:
params_xgb = {
    'max_depth':        5,  # shuld be 0.5 to 1% of the examples
    'subsample':        1,  # Ratio of observations to be used as samples for each tree
    'min_child_weight': 10, # Deals with imbalanced data and prevents overfitting as the value >
    'objective':        'reg:linear',
    'n_estimators':     1000, # Sequential trees to be modelled.
    'eta':              0.1,  # Shrinkage. Typically between 0.1 - 0.2 - learning rate for gradient boost (D:0.3)
    'eval_metric':      'mae'
}

d_train = xg.DMatrix(sample_train, label=sample_label)
xgb_gs = xg.train(params_xgb, d_train, num_boost_round=250, verbose_eval=50)

time: 44.7 s


In [73]:
def get_cat_boost_all(x_train, y_train, x_valid):
    num_ensembles = 5
    y_pred_valid = 0.0

    print('Initialising CAT Boost Regression')
    for i in tqdm(range(num_ensembles)):
        print('Building ensemble', i)
        # Use CV, tune hyperparameters
        catb = CatBoostRegressor(
                iterations=630, learning_rate=0.03,
                depth=6, l2_leaf_reg=3,
                loss_function='MAE',
                eval_metric='MAE',
                random_seed=i)

        catb.fit(x_train, y_train, cat_features=cat_index)

        y_pred_valid += catb.predict(x_valid)
        
    y_pred_valid /= num_ensembles
    
    return y_pred_valid

time: 6.13 ms


In [None]:
sample_prediction.to_csv('submission5.csv',index=False)
sample_prediction.head()

# RFE