In [1]:
import xgboost as xgb
import pandas as pd
from sklearn import preprocessing, grid_search, metrics
from sklearn.cross_validation import StratifiedKFold, KFold
from sklearn.metrics import mean_absolute_error
import time
import numpy as np
import math
import lightgbm as lgb
from scipy import sparse
from scipy.stats import skew, boxcox
from sklearn.model_selection import train_test_split
import gc

from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.layers.normalization import BatchNormalization
from keras.layers.advanced_activations import PReLU,LeakyReLU,ELU,ParametricSoftplus,ThresholdedReLU,SReLU
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras import backend as K
from keras.optimizers import SGD,Nadam
from keras.regularizers import WeightRegularizer, ActivityRegularizer,l2, activity_l2

Using Theano backend.


In [2]:
def logregobj(labels, preds):
    con = 2
    x =preds-labels
    grad =con*x / (np.abs(x)+con)
    hess =con**2 / (np.abs(x)+con)**2
    return grad, hess 

def log_mae(labels,preds,lift=200):
    return mean_absolute_error(np.exp(labels)-lift, np.exp(preds)-lift)

log_mae_scorer = metrics.make_scorer(log_mae, greater_is_better = False)

def lgbm_eval_mae(yhat, dtrain, lift=200):
    y = dtrain.get_label()
    return 'mae', mean_absolute_error(np.exp(y)-lift, np.exp(yhat)-lift), False

def xg_eval_mae(yhat, dtrain, lift=200):
    y = dtrain.get_label()
    return 'mae', mean_absolute_error(np.exp(y)-lift, np.exp(yhat)-lift)


def search_model(train_x, train_y, est, param_grid, n_jobs, cv, refit=False):
# Grid Search for the best model
    model = grid_search.GridSearchCV(estimator  = est,
                                     param_grid = param_grid,
                                     scoring    = log_mae_scorer,
                                     verbose    = 10,
                                     n_jobs  = n_jobs,
                                     iid        = True,
                                     refit    = refit,
                                     cv      = cv)
    # Fit Grid Search Model
    model.fit(train_x, train_y)
    print("Best score: %0.3f" % model.best_score_)
    print("Best parameters set:", model.best_params_)
    print("Scores:", model.grid_scores_)
    return model

# custom metric function for Keras
def mae_log(y_true, y_pred): 
    return K.mean(K.abs((K.exp(y_pred)-200) - (K.exp(y_true)-200)))

# Keras deosn't support sparse matrix. 
# The following functions are useful to split a large sparse matrix into
# smaller batches so they can be loaded into mem.
def batch_generator(X, y, batch_size, shuffle):
    number_of_batches = np.ceil(X.shape[0]/batch_size)
    counter = 0
    sample_index = np.arange(X.shape[0])
    if shuffle:
        np.random.shuffle(sample_index)
    while True:
        batch_index = sample_index[batch_size*counter:batch_size*(counter+1)]
        X_batch = X[batch_index,:].toarray()
        y_batch = y[batch_index]
        counter += 1
        yield X_batch, y_batch
        if (counter == number_of_batches):
            if shuffle:
                np.random.shuffle(sample_index)
            counter = 0
            
def batch_generatorp(X, batch_size, shuffle):
    number_of_batches = X.shape[0] / np.ceil(X.shape[0]/batch_size)
    counter = 0
    sample_index = np.arange(X.shape[0])
    while True:
        batch_index = sample_index[batch_size * counter:batch_size * (counter + 1)]
        X_batch = X[batch_index, :].toarray()
        counter += 1
        yield X_batch
        if (counter == number_of_batches):
            counter = 0            

In [3]:
# Blending function for XGBoost
def xgb_blend(estimators, train_x, train_y, test_x, fold, early_stopping_rounds=0):
    print ("Blend %d estimators for %d folds" % (len(estimators), fold))
    skf = list(KFold(len(train_y), fold))
    
    train_blend_x = np.zeros((train_x.shape[0], len(estimators)))
    test_blend_x = np.zeros((test_x.shape[0], len(estimators)))
    scores = np.zeros ((len(skf),len(estimators)))
    best_rounds = np.zeros ((len(skf),len(estimators)))
    
    for j, est in enumerate(estimators):
        print ("Model %d: %s" %(j+1, est))
        test_blend_x_j = np.zeros((test_x.shape[0], len(skf)))
        for i, (train, val) in enumerate(skf):
            print ("Model %d fold %d" %(j+1,i+1))
            fold_start = time.time() 
            train_x_fold = train_x[train]
            train_y_fold = train_y[train]
            val_x_fold = train_x[val]
            val_y_fold = train_y[val]
            
            est.set_params( n_estimators=10000)
            est.fit(train_x_fold,train_y_fold,
                    eval_set=[(val_x_fold, val_y_fold)],
                    eval_metric=xg_eval_mae,
                    early_stopping_rounds=early_stopping_rounds,
                    verbose=False
                   )
            best_round=est.best_iteration
            best_rounds[i,j]=best_round
            print ("best round %d" % (best_round))
            val_y_predict_fold = est.predict(val_x_fold,ntree_limit=best_round)
            score = log_mae(val_y_fold, val_y_predict_fold,200)
            print ("Score: ", score)
            scores[i,j]=score
            train_blend_x[val, j] = val_y_predict_fold
            test_blend_x_j[:,i] = est.predict(test_x,ntree_limit=best_round)
            print ("Model %d fold %d fitting finished in %0.3fs" % (j+1,i+1, time.time() - fold_start))            
   
        test_blend_x[:,j] = test_blend_x_j.mean(1)
        print ("Score for model %d is %f" % (j+1,np.mean(scores[:,j])))
    print ("Score for blended models is %f" % (np.mean(scores)))
    return (train_blend_x, test_blend_x, scores,best_rounds )

In [4]:
# Blending function for LightGBM
def lgbm_blend(estimators, train_x, train_y, test_x, fold, early_stopping_rounds=0):
    print ("Blend %d estimators for %d folds" % (len(estimators), fold))
    skf = list(KFold(len(train_y), fold))
    
    train_blend_x = np.zeros((train_x.shape[0], len(estimators)))
    test_blend_x = np.zeros((test_x.shape[0], len(estimators)))
    scores = np.zeros ((len(skf),len(estimators)))
    best_rounds = np.zeros ((len(skf),len(estimators)))
    
    for j, est in enumerate(estimators):
        print ("Model %d: %s" %(j+1, est))
        test_blend_x_j = np.zeros((test_x.shape[0], len(skf)))
        for i, (train, val) in enumerate(skf):
            print ("Model %d fold %d" %(j+1,i+1))
            fold_start = time.time() 
            train_x_fold = train_x[train]
            train_y_fold = train_y[train]
            val_x_fold = train_x[val]
            val_y_fold = train_y[val]

            est.set_params( n_estimators=100000)
            est.fit(train_x_fold,
                    train_y_fold,
                    eval_set=[(val_x_fold, val_y_fold)],
                    eval_metric=lgbm_eval_mae,
                    early_stopping_rounds=early_stopping_rounds,
                    verbose=False
                   )
            best_round=est.best_iteration
            best_rounds[i,j]=best_round
            print ("best round %d" % (best_round))
            val_y_predict_fold = est.predict(val_x_fold,num_iteration=best_round)
            score = log_mae(val_y_fold, val_y_predict_fold,200)
            print ("Score: ", score)
            scores[i,j]=score
            train_blend_x[val, j] = val_y_predict_fold
            test_blend_x_j[:,i] = est.predict(test_x,num_iteration=best_round)
            print ("Model %d fold %d fitting finished in %0.3fs" % (j+1,i+1, time.time() - fold_start))            
   
        test_blend_x[:,j] = test_blend_x_j.mean(1)
        print ("Score for model %d is %f" % (j+1,np.mean(scores[:,j])))
    print ("Score for blended models is %f" % (np.mean(scores)))
    return (train_blend_x, test_blend_x, scores,best_rounds )

# Load data

In [5]:
start = time.time() 
train_data = pd.read_csv('../input/train.csv')
train_size=train_data.shape[0]
print ("Loading train data finished in %0.3fs" % (time.time() - start))

start = time.time()
test_data = pd.read_csv('../input/test.csv')
print ("Loading test data finished in %0.3fs" % (time.time() - start)) 

Loading train data finished in 2.644s
Loading test data finished in 1.575s


#### Merge train and test
Save our time on duplicating logics for train and test and will also ensure the transformations applied on train and test are the same.

In [6]:
full_data=pd.concat([train_data,test_data])
del( train_data, test_data)
print ("Full Data set created.")

Full Data set created.


In [7]:
data_types = full_data.dtypes  
cat_cols = list(data_types[data_types=='object'].index)
num_cols = list(data_types[data_types=='int64'].index) + list(data_types[data_types=='float64'].index)

id_col = 'id'
target_col = 'loss'
num_cols.remove('id')
num_cols.remove('loss')

## Numeric features

Two preprocessings on numeric features are applied:

1. Apply box-cox transformations for skewed numeric features.

2. Scale numeric features so they will fall in the range between 0 and 1.

In [8]:
skewed_cols = full_data[num_cols].apply(lambda x: skew(x.dropna()))

SSL = preprocessing.StandardScaler()
skewed_cols = skewed_cols[skewed_cols > 0.25].index.values
for skewed_col in skewed_cols:
    full_data[skewed_col], lam = boxcox(full_data[skewed_col] + 1)
for num_col in num_cols:
    full_data[num_col] = SSL.fit_transform(full_data[num_col].values.reshape(-1,1))

# Model LE Coding
### Categorical features
1. Label Encoding (Factorizing)

In [9]:
LBL = preprocessing.LabelEncoder()
start=time.time()
for cat_col in cat_cols:
    full_data[cat_col] = LBL.fit_transform(full_data[cat_col])
print ('Label enconding finished in %f seconds' % (time.time()-start))

Label enconding finished in 34.448849 seconds


In [10]:
lift = 200

train_x = full_data[:train_size].drop(['loss','id'], axis=1).values
test_x = full_data[train_size:].drop(['loss','id'], axis=1).values
train_y = np.log(full_data[:train_size].loss.values + lift)
ID = full_data.id[:train_size].values

#### LE + LightGBM

In [None]:
estimators = [lgb.LGBMRegressor(learning_rate=0.005,                             
                     max_bin=9255,
                     num_leaves=81,
                     min_child_samples=191,
                     colsample_bytree=0.300000,
                     subsample=1.000000,
                     subsample_freq=1,
                     silent=False),
#               score -1139.406737              
              lgb.LGBMRegressor(learning_rate=0.005,                             
                     max_bin=9220,
                     num_leaves=95,
                     min_child_samples=220,
                     colsample_bytree=0.261269,
                     subsample=1.000000,
                     subsample_freq=1,
                     silent=False),
#               score -1139.631716
              lgb.LGBMRegressor(learning_rate=0.005,                             
                     max_bin=9263,
                     num_leaves=104,
                     min_child_samples=190,
                     colsample_bytree=0.300000,
                     subsample=1.000000,
                     subsample_freq=1,
                     silent=False),
#               score -1139.849854 
              lgb.LGBMRegressor(learning_rate=0.005,                              
                     max_bin=9248,
                     num_leaves=149,
                     min_child_samples=220,
                     colsample_bytree=0.300000,
                     subsample=1.000000,
                     subsample_freq=1,
                     silent=False)
#               score -1139.883523              
              ]

(train_blend_x_gbm_le,
 test_blend_x_gbm_le,
 blend_scores_gbm_le,
 best_rounds_gbm_le) = lgbm_blend(estimators, 
                                  train_x,train_y, 
                                  test_x,
                                  10,
                                  1000)

print (np.mean(blend_scores_gbm_le,axis=0))
print (np.mean(best_rounds_gbm_le,axis=0))

#### LE + XGBoost

In [None]:
estimators = [xgb.XGBRegressor(objective=logregobj,
                              learning_rate=0.01, 
                              n_estimators=10000,
                              max_depth=7,
                              min_child_weight=88,
                              colsample_bytree=0.615498,
                              subsample=0.810715,
                              gamma=1.562494,
                              nthread=-1,
                              silent=True,
                              seed=1234
                             ),
#               score -1143.321167
              xgb.XGBRegressor(objective=logregobj,
                              learning_rate=0.01, 
                              n_estimators=10000,
                              max_depth=6,
                              min_child_weight=115,
                              colsample_bytree=0.855791,
                              subsample=0.916137,
                              gamma=1.357693,
                              nthread=-1,
                              silent=True,
                              seed=1234
                             ),
#               score -1144.113800
              xgb.XGBRegressor(objective=logregobj,
                              learning_rate=0.01, 
                              n_estimators=10000,
                              max_depth=8,
                              min_child_weight=102,
                              colsample_bytree=0.600000,
                              subsample=1.000000,
                              gamma=0.950000,
                              nthread=-1,
                              silent=True,
                              seed=1234
                             ),
#               score -1144.603485
              xgb.XGBRegressor(objective=logregobj,
                              learning_rate=0.01, 
                              n_estimators=10000,
                              max_depth=8,
                              min_child_weight=113,
                              colsample_bytree=0.600000,
                              subsample=1.000000,
                              gamma=0.950000 ,
                              nthread=-1,
                              silent=True,
                              seed=1234
                             )
#               score -1145.356110
              
              ]

(train_blend_x_xgb_le,
 test_blend_x_xgb_le,
 blend_scores_xgb_le,
 best_rounds_xgb_le) = xgb_blend(estimators,
                                 train_x,train_y,
                                 test_x,
                                 4,
                                 500)

print (np.mean(blend_scores_xgb_le,axis=0))
print (np.mean(best_rounds_xgb_le,axis=0))

### Categorical features
1. Label Encoding (Factorizing)
2. One-hot-encoded categorical features

In [13]:
OHE = preprocessing.OneHotEncoder(sparse=True)
start=time.time()
full_data_sparse=OHE.fit_transform(full_data[cat_cols])
print ('One-hot-encoding finished in %f seconds' % (time.time()-start))

print (full_data_sparse.shape)


One-hot-encoding finished in 9.933163 seconds
(313864, 1176)


In [14]:
lift = 200

full_data_sparse = sparse.hstack((full_data_sparse
                                  ,full_data[num_cols])
                                 , format='csr'
                                 )
print (full_data_sparse.shape)
train_x = full_data_sparse[:train_size]
test_x = full_data_sparse[train_size:]
train_y = np.log(full_data[:train_size].loss.values + lift)
ID = full_data.id[:train_size].values


(313864, 1190)


#### OHE + LightGBM

In [None]:
estimators = [lgb.LGBMRegressor(learning_rate=0.005,                             
                     n_estimators=100000,
                     max_bin=526,
                     num_leaves=68,
                     min_child_samples=127,
                     colsample_bytree=0.218683,
                     subsample=0.961961,
                     subsample_freq=1,
                     silent=False),
#               score -1139.877375            
              lgb.LGBMRegressor(learning_rate=0.005,                             
                     n_estimators=100000,
                     max_bin=457,
                     num_leaves=54,
                     min_child_samples=125,
                     colsample_bytree=0.383468,
                     subsample=0.949582,
                     subsample_freq=1,
                     silent=False),
#               score -1140.332236
              lgb.LGBMRegressor(learning_rate=0.005,                             
                     n_estimators=100000,
                     max_bin=514,
                     num_leaves=40,
                     min_child_samples=126,
                     colsample_bytree=0.325435,
                     subsample=0.923560,
                     subsample_freq=1,
                     silent=False),
#               score -1140.546101
              lgb.LGBMRegressor(learning_rate=0.005,                              
                     n_estimators=100000,
                     max_bin=514,
                     num_leaves=40,
                     min_child_samples=127,
                     colsample_bytree=0.464765,
                     subsample=0.968715,
                     subsample_freq=1,
                     silent=False)
#               score -1140.593041             
              ]

(train_blend_x_gbm_ohe,
 test_blend_x_gbm_ohe,
 blend_scores_gbm_ohe,
 best_rounds_gbm_ohe) = lgbm_blend(estimators, 
                                   train_x, train_y, 
                                   test_x,
                                   10,
                                   500)

print (np.mean(blend_scores_gbm_ohe,axis=0))
print (np.mean(best_rounds_gbm_ohe,axis=0))

In [None]:
estimators = [xgb.XGBRegressor(objective=logregobj,
                              learning_rate=0.01, 
                              n_estimators=10000,
                              max_depth=11,
                              min_child_weight=200,
                              colsample_bytree=0.05,
                              subsample=1.0,
                              gamma=0.5,
                              nthread=-1,
                              silent=True,
                              seed=1234
                             ),
#               score -1140.645843
              xgb.XGBRegressor(objective=logregobj,
                              learning_rate=0.01, 
                              n_estimators=10000,
                              max_depth=15,
                              min_child_weight=177,
                              colsample_bytree=0.209870,
                              subsample=0.916137,
                              gamma=0.916137,
                              nthread=-1,
                              silent=True,
                              seed=1234
                             ),
#               score -1140.646576
              xgb.XGBRegressor(objective=logregobj,
                              learning_rate=0.01, 
                              n_estimators=10000,
                              max_depth=13,
                              min_child_weight=127,
                              colsample_bytree=0.087842,
                              subsample=0.969313,
                              gamma=0.573770,
                              nthread=-1,
                              silent=True,
                              seed=1234
                             ),
#               score -1141.266846
              xgb.XGBRegressor(objective=logregobj,
                              learning_rate=0.01, 
                              n_estimators=10000,
                              max_depth=16,
                              min_child_weight=130,
                              colsample_bytree=0.060157,
                              subsample=0.971690,
                              gamma=0.676203,
                              nthread=-1,
                              silent=True,
                              seed=1234
                             )
#               score -1142.136109
              ]

(train_blend_x_xgb_ohe,
 test_blend_x_xgb_ohe,
 blend_scores_xgb_ohe,
 best_rounds_xgb_ohe) = xgb_blend(estimators, 
                                      train_x, 
                                      train_y, 
                                      test_x,
                                      4,
                                      1000)

print (np.mean(blend_scores_xgb_ohe,axis=0))
print (np.mean(best_rounds_xgb_ohe,axis=0))

#### OHE + Keras

In [19]:
# early_stop = EarlyStopping(monitor='val_mae_log', patience=5, verbose=0, mode='auto')
# checkpointer = ModelCheckpoint(filepath="weights.hdf5", monitor='val_mae_log', verbose=1, 
#                                save_best_only=True, mode='min')

def nn_model(params):
    model = Sequential()
    model.add(Dense(params['input_size'], input_dim = params['input_dim']))

    model.add(PReLU())
    model.add(BatchNormalization())
    model.add(Dropout(params['input_drop_out']))
        
    model.add(Dense(params['hidden_size0']))
    model.add(PReLU())
    model.add(BatchNormalization())    
    model.add(Dropout(params['hidden_drop_out0']))

    model.add(Dense(params['hidden_size1']))
    model.add(PReLU())
    model.add(BatchNormalization())    
    model.add(Dropout(params['hidden_drop_out1']))    
    
    model.add(Dense(1))
    model.compile(loss = 'mae', metrics=[mae_log], optimizer = 'adadelta')
    return(model)


def nn_blend_data(parameters, train_x, train_y, test_x, fold, early_stopping_rounds=0, batch_size=128):
    print ("Blend %d estimators for %d folds" % (len(parameters), fold))
    skf = list(KFold(len(train_y), fold))
    
    train_blend_x = np.zeros((train_x.shape[0], len(parameters)))
    test_blend_x = np.zeros((test_x.shape[0], len(parameters)))
    scores = np.zeros ((len(skf),len(parameters)))
    best_rounds = np.zeros ((len(skf),len(parameters)))
 
    for j, nn_params in enumerate(parameters):
        print ("Model %d: %s" %(j+1, nn_params))
        test_blend_x_j = np.zeros((test_x.shape[0], len(skf)))
        for i, (train, val) in enumerate(skf):
            print ("Model %d fold %d" %(j+1,i+1))
            fold_start = time.time() 
            train_x_fold = train_x[train]
            train_y_fold = train_y[train]
            val_x_fold = train_x[val]
            val_y_fold = train_y[val]

            model = nn_model(nn_params)
            print (model)
            fit= model.fit_generator(generator=batch_generator(train_x_fold, train_y_fold, batch_size, True),
                                     nb_epoch=60,
                                     samples_per_epoch=train_x_fold.shape[0],
                                     validation_data=(val_x_fold.todense(), val_y_fold),
                                     verbose = 0,
                                     callbacks=[ModelCheckpoint(filepath="weights.hdf5", 
                                                                monitor='val_mae_log', 
                                                                verbose=0, save_best_only=True, mode='min')
                                                ]
                                     )
            
            model.load_weights("weights.hdf5")
            # Compile model (required to make predictions)
            model.compile(loss = 'mae', metrics=[mae_log], optimizer = 'adadelta')

            val_y_predict_fold = model.predict_generator(
                generator=batch_generatorp(val_x_fold, batch_size, True),
                val_samples=val_x_fold.shape[0]
            )
            
            score = log_mae(val_y_fold, val_y_predict_fold,200)
            print "Score: ", score, mean_absolute_error(val_y_fold, val_y_predict_fold)
            scores[i,j]=score
            train_blend_x[val, j] = val_y_predict_fold.reshape(val_y_predict_fold.shape[0])
            
            model.load_weights("weights.hdf5")
            # Compile model (required to make predictions)
            model.compile(loss = 'mae', metrics=[mae_log], optimizer = 'adadelta')            
            test_blend_x_j[:,i] = model.predict_generator(generator=batch_generatorp(test_x, batch_size, True),
                                        val_samples=test_x.shape[0]
                                     ).reshape(test_x.shape[0])
            print "Model %d fold %d fitting finished in %0.3fs" % (j+1,i+1, time.time() - fold_start)          
   
        test_blend_x[:,j] = test_blend_x_j.mean(1)
        print "Score for model %d is %f" % (j+1,np.mean(scores[:,j]))
    print "Score for blended models is %f" % (np.mean(scores))
    return (train_blend_x, test_blend_x, scores,best_rounds )

In [None]:
bagging_num = 10
nn_parameters = []

nn_parameter =  { 
     'input_size' :400 ,
     'input_dim' : train_x.shape[1],
     'input_drop_out' : 0.4 ,
     'hidden_size0' : 200 ,
     'hidden_drop_out0' :0.2,
     'hidden_size1' : 50 ,
     'hidden_drop_out1' :0.2,    
     'learning_rate': 0.1,
     'optimizer': 'adadelta'}

for i in range(bagging_num):
    nn_parameters.append(nn_parameter)

(train_blend_x_ohe_mlp,
 test_blend_x_ohe_mlp,
 blend_scores_ohe_mlp,
 best_round_ohe_mlp) = nn_blend_data(nn_parameters,
                                     train_x,
                                     train_y,
                                     test_x,
                                     4,
                                     5)

print np.mean(blend_scores_ohe_mlp,axis=0)
print log_mae(np.mean(train_blend_x_ohe_mlp,axis=1).reshape(train_size,1),train_y)

#### Blending
1. Ridge Regression
  * Ridge is focused on finding out weight of each feature which is exactly what we are interested in.
2. XGB linear

Specifically, we will simply average predictions from MLP models before using them for blending.

In [21]:
train_blend = np.hstack((
        train_blend_x_gbm_le,
        train_blend_x_xgb_le,
        train_blend_x_gbm_ohe,
        train_blend_x_xgb_ohe,
        np.mean(train_blend_x_ohe_mlp,axis=1).reshape(train_size,1)
        ))

test_blend = np.hstack((
        test_blend_x_gbm_le,
        test_blend_x_xgb_le,
        test_blend_x_gbm_ohe,
        test_blend_x_xgb_ohe,
        np.mean(test_blend_x_ohe_mlp,axis=1).reshape(test_x.shape[0],1)
        ))

In [None]:
# ridge
from sklearn.linear_model import ElasticNet,Ridge,LinearRegression
print  ("Blending.")

param_grid = {
    'alpha':[0,0.00001,0.00003,0.0001,0.0003,0.001,0.003,0.01,0.03,0.1,0.3,1,3,10,15,20,
             25,30,35,40,45,50,55,60,70]
}
model = search_model(train_blend, 
                     train_y, 
                     Ridge(), 
                     param_grid, 
                     n_jobs=1, 
                     cv=4, 
                     refit=True
                    )   
print "\nbest subsample:", model.best_params_

print '\nBest score: ',model.best_score_
print '\n'

In [None]:
start = time.time() 
# XGBoost gblinear
params = {
    'eta': 0.1,
    'booster': 'gblinear',
    'lambda': 0,
    'alpha': 0, # you can try different values for alpha
    'lambda_bias' : 0,
    'silent': 0,
    'verbose_eval': True,
    'seed': 1234
}


cv_all = xgb.cv(params,xgb.DMatrix(train_blend, label=train_y,missing=np.nan),
                num_boost_round=400000, nfold=4, feval=xg_eval_mae,seed=1234,
                callbacks=[xgb.callback.early_stop(500)])

print ("\nLoading train data finished in %0.3fs" % (time.time() - start))    
print cv_all[cv_all['test-mae-mean'] == cv_all['test-mae-mean'].min()]

In [None]:
xgtrain_blend = xgb.DMatrix(train_blend,label=train_y,missing=np.nan)

xgb_model=xgb.train(params, xgtrain_blend,
                    num_boost_round=<>, # best boost round based on previous step
                    feval=xg_eval_mae)

pred_y_gblinear = np.exp(xgb_model.predict(xgb.DMatrix(test_blend))) - lift

results = pd.DataFrame()
results['id'] = full_data[train_size:].id
results['loss'] = pred_y_gblinear
print ("Submission created.")

## Final submission 
  weights: [0.5,0.5]

In [None]:
pred_y = pred_y_ridge*0.5 + pred_y_gblinear*0.5

results = pd.DataFrame()
results['id'] = full_data[train_size:].id
results['loss'] = pred_y
# results.to_csv("../output/sub_final.csv", index=False)
print ("Submission created.")