# Volatility Prediction in Financial Markets  - Model Notebook


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy as sci




import datetime
from matplotlib.ticker import FuncFormatter, MaxNLocator
from scipy import stats
import gc
%matplotlib inline


## Load the data

In [None]:
orig_train_X = pd.read_csv('data/training_input.csv', sep=';')
train_y = pd.read_csv('data/training_output.csv', sep=';')
orig_test_X  = pd.read_csv('data/testing_input.csv', sep=';')

In [None]:
gc.collect()

### Calculate number of and Impute missing values via interpolation

In [None]:
volatility_cols = [c for c in orig_train_X.columns if c.startswith('volatility')]
return_cols = [c for c in orig_train_X.columns if c.startswith('return')]
other_cols = ['date' , 'product_id']

In [None]:
train_X = orig_train_X.drop(orig_train_X[return_cols],axis=1)
train_R = orig_train_X.drop(orig_train_X[volatility_cols], axis=1)
test_X  = orig_test_X.drop(orig_test_X[return_cols], axis=1)
test_R  = orig_test_X.drop(orig_test_X[volatility_cols], axis=1)

Count nans and zeros i.e. no price change

In [None]:
train_num_NANs = pd.DataFrame({'ID': orig_train_X['ID'],'product_id': orig_train_X['product_id'],'num_NANs':(orig_train_X[volatility_cols].isnull()).sum(axis =1).astype(dtype = 'float64',copy=False)})
train_true_zeros = pd.DataFrame({'ID': orig_train_X['ID'],'product_id': orig_train_X['product_id'],'true_zeros': (orig_train_X[volatility_cols]==0).sum(axis =1).astype(dtype = 'float64',copy=False)})
test_num_NANs = pd.DataFrame({'ID': orig_test_X['ID'],'product_id': orig_test_X['product_id'],'num_NANs':(orig_test_X[volatility_cols].isnull()).sum(axis =1).astype(dtype = 'float64',copy=False)})
test_true_zeros = pd.DataFrame({'ID': orig_test_X['ID'],'product_id': orig_test_X['product_id'],'true_zeros': (orig_test_X[volatility_cols]==0).sum(axis =1).astype(dtype = 'float64',copy=False)})

We linearly interpolate the NaNs corresponding to volatilities and set NaN returns to 0 :

In [None]:
#Default interpolation direction is forward
train_X.iloc[:,3:] = train_X.iloc[:,3:].interpolate(axis=1)
test_X.iloc[:,3:]  = test_X.iloc[:,3:].interpolate(axis=1)

In [None]:
train_X.fillna(0, inplace=True) 
test_X.fillna(0, inplace=True) 

In [None]:
train_R.fillna(0, inplace=True) 
test_R.fillna(0, inplace=True) 

# Feature engineering

### Clustering

In [None]:
train_features =train_X.drop(train_X[volatility_cols],axis =1)
train_features['min_vol']    = np.min(train_X.iloc[:,3:], axis=1)
train_features['max_vol']    = np.max(train_X.iloc[:,3:], axis=1)
train_features['std_vol']    = np.std(train_X.iloc[:,3:], axis=1)
train_features['median_vol'] = np.median(train_X.iloc[:,3:], axis=1)
train_features['mean_vol'] = np.mean(train_X.iloc[:,3:], axis=1)
train_features['skew_vol'] = stats.skew(train_X.iloc[:,3:], axis=1)
train_features['kurtosis_vol'] = stats.kurtosis(train_X.iloc[:,3:], axis=1)
bin_length = 9
train_features['bin1_vol']  = np.mean(train_X.iloc[:,3:3+bin_length-1], axis=1)
train_features['bin2_vol']  = np.mean(train_X.iloc[:,3+bin_length:3+2*bin_length-1], axis=1)
train_features['bin3_vol']  = np.mean(train_X.iloc[:,3+2*bin_length:3+3*bin_length-1], axis=1)
train_features['bin4_vol']  = np.mean(train_X.iloc[:,3+3*bin_length:3+4*bin_length-1], axis=1)
train_features['bin5_vol']  = np.mean(train_X.iloc[:,3+4*bin_length:3+5*bin_length-1], axis=1)
train_features['bin6_vol']  = np.mean(train_X.iloc[:,3+5*bin_length:3+6*bin_length-1], axis=1)
#train_features['num_NANs'] =  train_num_NANs ## bytt ut med per produkt
#train_features['true_zeros'] =  train_true_zeros ## bytt ut med per produkt

###Differencing to account for ACF feature
train_features['lagged_diff_mean']= np.mean(np.diff(train_X.iloc[:,3:], axis=1),axis=1)
train_features['lagged_diff_std']= np.std(np.diff(train_X.iloc[:,3:], axis=1),axis=1)
train_features['lagged_diff_max']= np.max(np.diff(train_X.iloc[:,3:], axis=1),axis=1)
train_features['lagged_diff_min']= np.min(np.diff(train_X.iloc[:,3:], axis=1),axis=1)

### Date Spesific features####
date_mean = pd.DataFrame({'date': train_X['date'].unique(),'date_mean': np.array(train_X.groupby('date')[volatility_cols].mean().mean(axis=1))})
train_features = train_features.merge(date_mean,on='date',how = "left").set_index(train_features.index)

train_features = train_features.merge(train_X.groupby('date')['volatility 13:45:00','volatility 13:50:00','volatility 13:55:00'].mean(),on='date',how = "left").set_index(train_features.index)
train_features.rename(columns={'volatility 13:45:00': '13:45_mean_vol_date', 'volatility 13:50:00': '13:50_mean_vol_date','volatility 13:55:00':'13:55_mean_vol_date'}, inplace=True)

train_features = train_features.merge(train_X.groupby('date')['volatility 13:45:00','volatility 13:50:00','volatility 13:55:00'].std(),on='date',how = "left").set_index(train_features.index)
train_features.rename(columns={'volatility 13:45:00': '13:45_std_vol_date', 'volatility 13:50:00': '13:50_std_vol_date','volatility 13:55:00':'13:55_std_vol_date'}, inplace=True)

train_features = train_features.merge(train_X.groupby('date')['volatility 13:45:00','volatility 13:50:00','volatility 13:55:00'].max(),on='date',how = "left").set_index(train_features.index)
train_features.rename(columns={'volatility 13:45:00': '13:45_max_vol_date', 'volatility 13:50:00': '13:50_max_vol_date','volatility 13:55:00':'13:55_max_vol_date'}, inplace=True)

train_features = train_features.merge(train_X.groupby('date')['volatility 13:45:00','volatility 13:50:00','volatility 13:55:00'].min(),on='date',how = "left").set_index(train_features.index)
train_features.rename(columns={'volatility 13:45:00': '13:45_min_vol_date', 'volatility 13:50:00': '13:50_min_vol_date','volatility 13:55:00':'13:55_min_vol_date'}, inplace=True)

train_features = train_features.merge(train_X.groupby('date')['volatility 13:45:00','volatility 13:50:00','volatility 13:55:00'].skew(),on='date',how = "left").set_index(train_features.index)
train_features.rename(columns={'volatility 13:45:00': '13:45_skew_vol_date', 'volatility 13:50:00': '13:50_skew_vol_date','volatility 13:55:00':'13:55_skew_vol_date'}, inplace=True)

train_features = train_features.merge(train_X.groupby('date')['volatility 13:45:00','volatility 13:50:00','volatility 13:55:00'].apply(pd.DataFrame.kurt),on='date',how = "left").set_index(train_features.index)
train_features.rename(columns={'volatility 13:45:00': '13:45_kurt_vol_date', 'volatility 13:50:00': '13:50_kurt_vol_date','volatility 13:55:00':'13:55_kurt_vol_date'}, inplace=True)

# #### Product spesific features#####

product_mean = pd.DataFrame({'product_id': train_X['product_id'].unique(),'product_mean': np.array(train_X.groupby('product_id')[volatility_cols].mean().mean(axis=1))})
train_features = train_features.merge(product_mean,on='product_id',how = "left").set_index(train_features.index)

train_features = train_features.merge(train_X.groupby('product_id')['volatility 13:45:00','volatility 13:50:00','volatility 13:55:00'].mean(),on='product_id',how = "left").set_index(train_features.index)
train_features.rename(columns={'volatility 13:45:00': '13:45_mean_vol_stock', 'volatility 13:50:00': '13:50_mean_vol_stock','volatility 13:55:00':'13:55_mean_vol_stock'}, inplace=True)

train_features = train_features.merge(train_X.groupby('product_id')['volatility 13:45:00','volatility 13:50:00','volatility 13:55:00'].std(),on='product_id',how = "left").set_index(train_features.index)
train_features.rename(columns={'volatility 13:45:00': '13:45_std_vol_stock', 'volatility 13:50:00': '13:50_std_vol_stock','volatility 13:55:00':'13:55_std_vol_stock'}, inplace=True)

train_features = train_features.merge(train_X.groupby('product_id')['volatility 13:45:00','volatility 13:50:00','volatility 13:55:00'].max(),on='product_id',how = "left").set_index(train_features.index)
train_features.rename(columns={'volatility 13:45:00': '13:45_max_vol_stock', 'volatility 13:50:00': '13:50_max_vol_stock','volatility 13:55:00':'13:55_max_vol_stock'}, inplace=True)

train_features = train_features.merge(train_X.groupby('product_id')['volatility 13:45:00','volatility 13:50:00','volatility 13:55:00'].min(),on='product_id',how = "left").set_index(train_features.index)
train_features.rename(columns={'volatility 13:45:00': '13:45_min_vol_stock', 'volatility 13:50:00': '13:50_min_vol_stock','volatility 13:55:00':'13:55_min_vol_stock'}, inplace=True)

### Return features
train_features['ret_sign_std'] = np.var(train_R.iloc[:,3:], axis=1)
train_features['ret_sign_accum'] = np.sum(train_R.iloc[:,3:], axis=1)

#Merge zeros onto data frame
train_features = train_features.merge(train_num_NANs.drop(columns='product_id'), on='ID',how = "left").set_index(train_features.index)
train_features = train_features.merge(train_true_zeros.drop(columns='product_id'), on='ID',how = "left").set_index(train_features.index)




# Merge target onto data frame
train_features = train_features.merge(train_y, on='ID',how = "left").set_index(train_features.index)

train_features.fillna(0, inplace=True) 

##### Test features

In [None]:
test_features =test_X.drop(test_X[volatility_cols],axis =1)
test_features['min_vol']    = np.min(test_X.iloc[:,3:], axis=1)
test_features['max_vol']    = np.max(test_X.iloc[:,3:], axis=1)
test_features['std_vol']    = np.std(test_X.iloc[:,3:], axis=1)
test_features['median_vol'] = np.median(test_X.iloc[:,3:], axis=1)
test_features['mean_vol'] = np.mean(test_X.iloc[:,3:], axis=1)
test_features['skew_vol'] = stats.skew(test_X.iloc[:,3:], axis=1)
test_features['kurtosis_vol'] = stats.kurtosis(test_X.iloc[:,3:], axis=1)
bin_length = 9
test_features['bin1_vol']  = np.mean(test_X.iloc[:,3:3+bin_length-1], axis=1)
test_features['bin2_vol']  = np.mean(test_X.iloc[:,3+bin_length:3+2*bin_length-1], axis=1)
test_features['bin3_vol']  = np.mean(test_X.iloc[:,3+2*bin_length:3+3*bin_length-1], axis=1)
test_features['bin4_vol']  = np.mean(test_X.iloc[:,3+3*bin_length:3+4*bin_length-1], axis=1)
test_features['bin5_vol']  = np.mean(test_X.iloc[:,3+4*bin_length:3+5*bin_length-1], axis=1)
test_features['bin6_vol']  = np.mean(test_X.iloc[:,3+5*bin_length:3+6*bin_length-1], axis=1)
#test_features['num_NANs'] =  test_num_NANs ## bytt ut med per produkt
#test_features['true_zeros'] =  test_true_zeros ## bytt ut med per produkt

###Differencing to account for ACF feature
test_features['lagged_diff_mean']= np.mean(np.diff(test_X.iloc[:,3:], axis=1),axis=1)
test_features['lagged_diff_std']= np.std(np.diff(test_X.iloc[:,3:], axis=1),axis=1)
test_features['lagged_diff_max']= np.max(np.diff(test_X.iloc[:,3:], axis=1),axis=1)
test_features['lagged_diff_min']= np.min(np.diff(test_X.iloc[:,3:], axis=1),axis=1)

### Date Spesific features####
date_mean = pd.DataFrame({'date': test_X['date'].unique(),'date_mean': np.array(test_X.groupby('date')[volatility_cols].mean().mean(axis=1))})
test_features = test_features.merge(date_mean,on='date',how = "left").set_index(test_features.index)

test_features = test_features.merge(test_X.groupby('date')['volatility 13:45:00','volatility 13:50:00','volatility 13:55:00'].mean(),on='date',how = "left").set_index(test_features.index)
test_features.rename(columns={'volatility 13:45:00': '13:45_mean_vol_date', 'volatility 13:50:00': '13:50_mean_vol_date','volatility 13:55:00':'13:55_mean_vol_date'}, inplace=True)

test_features = test_features.merge(test_X.groupby('date')['volatility 13:45:00','volatility 13:50:00','volatility 13:55:00'].std(),on='date',how = "left").set_index(test_features.index)
test_features.rename(columns={'volatility 13:45:00': '13:45_std_vol_date', 'volatility 13:50:00': '13:50_std_vol_date','volatility 13:55:00':'13:55_std_vol_date'}, inplace=True)

test_features = test_features.merge(test_X.groupby('date')['volatility 13:45:00','volatility 13:50:00','volatility 13:55:00'].max(),on='date',how = "left").set_index(test_features.index)
test_features.rename(columns={'volatility 13:45:00': '13:45_max_vol_date', 'volatility 13:50:00': '13:50_max_vol_date','volatility 13:55:00':'13:55_max_vol_date'}, inplace=True)

test_features = test_features.merge(test_X.groupby('date')['volatility 13:45:00','volatility 13:50:00','volatility 13:55:00'].min(),on='date',how = "left").set_index(test_features.index)
test_features.rename(columns={'volatility 13:45:00': '13:45_min_vol_date', 'volatility 13:50:00': '13:50_min_vol_date','volatility 13:55:00':'13:55_min_vol_date'}, inplace=True)

test_features = test_features.merge(test_X.groupby('date')['volatility 13:45:00','volatility 13:50:00','volatility 13:55:00'].skew(),on='date',how = "left").set_index(test_features.index)
test_features.rename(columns={'volatility 13:45:00': '13:45_skew_vol_date', 'volatility 13:50:00': '13:50_skew_vol_date','volatility 13:55:00':'13:55_skew_vol_date'}, inplace=True)

test_features = test_features.merge(test_X.groupby('date')['volatility 13:45:00','volatility 13:50:00','volatility 13:55:00'].apply(pd.DataFrame.kurt),on='date',how = "left").set_index(test_features.index)
test_features.rename(columns={'volatility 13:45:00': '13:45_kurt_vol_date', 'volatility 13:50:00': '13:50_kurt_vol_date','volatility 13:55:00':'13:55_kurt_vol_date'}, inplace=True)

# #### Product spesific features#####

product_mean = pd.DataFrame({'product_id': test_X['product_id'].unique(),'product_mean': np.array(test_X.groupby('product_id')[volatility_cols].mean().mean(axis=1))})
test_features = test_features.merge(product_mean,on='product_id',how = "left").set_index(test_features.index)

test_features = test_features.merge(test_X.groupby('product_id')['volatility 13:45:00','volatility 13:50:00','volatility 13:55:00'].mean(),on='product_id',how = "left").set_index(test_features.index)
test_features.rename(columns={'volatility 13:45:00': '13:45_mean_vol_stock', 'volatility 13:50:00': '13:50_mean_vol_stock','volatility 13:55:00':'13:55_mean_vol_stock'}, inplace=True)

test_features = test_features.merge(test_X.groupby('product_id')['volatility 13:45:00','volatility 13:50:00','volatility 13:55:00'].std(),on='product_id',how = "left").set_index(test_features.index)
test_features.rename(columns={'volatility 13:45:00': '13:45_std_vol_stock', 'volatility 13:50:00': '13:50_std_vol_stock','volatility 13:55:00':'13:55_std_vol_stock'}, inplace=True)

test_features = test_features.merge(test_X.groupby('product_id')['volatility 13:45:00','volatility 13:50:00','volatility 13:55:00'].max(),on='product_id',how = "left").set_index(test_features.index)
test_features.rename(columns={'volatility 13:45:00': '13:45_max_vol_stock', 'volatility 13:50:00': '13:50_max_vol_stock','volatility 13:55:00':'13:55_max_vol_stock'}, inplace=True)

test_features = test_features.merge(test_X.groupby('product_id')['volatility 13:45:00','volatility 13:50:00','volatility 13:55:00'].min(),on='product_id',how = "left").set_index(test_features.index)
test_features.rename(columns={'volatility 13:45:00': '13:45_min_vol_stock', 'volatility 13:50:00': '13:50_min_vol_stock','volatility 13:55:00':'13:55_min_vol_stock'}, inplace=True)

### Return features
test_features['ret_sign_std'] = np.var(test_R.iloc[:,3:], axis=1)
test_features['ret_sign_accum'] = np.sum(test_R.iloc[:,3:], axis=1)


#Merge zeros onto data frame
test_features = test_features.merge(test_num_NANs.drop(columns='product_id'), on='ID',how = "left").set_index(test_features.index)
test_features = test_features.merge(test_true_zeros.drop(columns='product_id'), on='ID',how = "left").set_index(test_features.index)

test_features.fillna(0, inplace=True) 

## Results

Define error function (mean average percent error)

In [None]:
def MAPE(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

In [None]:
def MAPE_per_obs(y_true, y_pred):
    return (np.abs((y_true - y_pred) / y_true)) * 100

### Provide benchmark models

In [None]:
train_pred_mean = train_X_[['mean_vol','TARGET']]
val_pred_mean = val_X_[['mean_vol','TARGET']]

print('Train error=', round(MAPE(train_features['TARGET'], train_features['mean_vol']), 4), '%')
print('Validation error =', round(MAPE(val_pred_mean['TARGET'], val_pred_mean['mean_vol']), 4), '%')

test_pred_mean = test_features[['ID','mean_vol']]
test_pred_mean = test_pred_mean.rename(columns = {'mean_vol':'TARGET'})
test_pred_mean.to_csv('results/mean_pred.csv', sep=';', index=False)

In [None]:
train_pred_median = train_X_[['median_vol','TARGET']]
val_pred_median = val_X_[['median_vol','TARGET']]

print('Train error=', round(MAPE(train_features['TARGET'], train_features['median_vol']), 4), '%')
print('Validation error =', round(MAPE(val_pred_median['TARGET'], val_pred_median['median_vol']), 4), '%')
test_pred_median = test_features[['ID','median_vol']]
test_pred_median = test_pred_median.rename(columns = {'median_vol':'TARGET'})
test_pred_median.to_csv('results/median_pred.csv', sep=';', index=False)


### Nomalise data 

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import RobustScaler
from sklearn.preprocessing import PowerTransformer


#scaler = RobustScaler(quantile_range=(40, 60))
#scaler = PowerTransformer(method = 'box-cox')
scaler = StandardScaler()
#scaler = MinMaxScaler()


train_X_norm = train_X_.copy()
val_X_norm = val_X_.copy()
train_features_norm =train_features.copy()
test_features_norm =test_features.copy()


#Scale train and validation data
transformer_validation = scaler.fit(train_X_[regression_cols])
train_X_norm[regression_cols] = transformer_validation.transform(train_X_norm[regression_cols])
val_X_norm[regression_cols] = transformer_validation.transform(val_X_norm[regression_cols])

#Scale train and test data
transformer = scaler.fit(train_features[regression_cols])
train_features_norm[regression_cols] = transformer.transform(train_features_norm[regression_cols])
test_features_norm[regression_cols] = transformer.transform(test_features_norm[regression_cols])

## Training global regression models

In [None]:
from sklearn.model_selection import ParameterGrid
from sklearn.linear_model import SGDRegressor
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import cross_val_score, KFold, GridSearchCV



In [None]:
from sklearn.metrics import make_scorer
def score_function(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
# wraps scoring functions for use in GridSearchCV and cross_val_score.
mape = make_scorer(score_function, greater_is_better=False)

In [None]:
grid = {
    #'alpha':[0.00006, 0.00007, 0.00008, 0.00009, 0.00010, 0.00011, 0.00012, 0.00013], 
    'alpha':10.0**-np.arange(4,7),
    #'alpha': [0.00005,0.0001],
    'l1_ratio': [0.1,0.3,0.5,0.7,.9,1.0],
    #'l1_ratio': [0.0],
    'loss': ['huber'],
    'max_iter': [10000000],
    'penalty': ['elasticnet'],
    'early_stopping': [False],
    'tol': [0.00000001],
    'fit_intercept':[True],
    #'epsilon': [0.0005,0.001,0.005,0.01,0.03,0.06],
    'epsilon': [0.001,0.005,0.01,0.015],
    'shuffle': [False],
    'learning_rate': ['optimal'],
    'eta0': [0.5],
    'power_t': [0.25],
    'n_iter_no_change': [5],
}
paramGrid = ParameterGrid(grid)


In [None]:
model = SGDRegressor()
grid = GridSearchCV(estimator=model, param_grid=grid,scoring=mape, n_jobs = -1, cv =5)
grid_result = grid.fit(train_features_norm[regression_cols], train_features_norm['TARGET'])

best_model =grid_result.best_estimator_
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

In [None]:
fitted_model = best_model.fit(train_features_norm[regression_cols],train_features_norm['TARGET'])
fitted_models.append(fitted_model)
print('Train error SGD regressor =', round(MAPE(train_features_norm['TARGET'], np.maximum(fitted_model.predict(train_features_norm[regression_cols]),0)), 4), '%')

## Training local regression model

In [None]:
grid = {
    #'alpha':[0.00006, 0.00007, 0.00008, 0.00009, 0.00010, 0.00011, 0.00012, 0.00013], 
    #'alpha':10.0**-np.arange(2,5),
    'alpha': [0.00001,0.0001,0.0005,0.001],
    'l1_ratio': [0.05,0.1,0.3,0.5,0.7,.9,1.0],
    'loss': ['huber'],
    'max_iter': [10000000],
    'penalty': ['elasticnet'],
    'early_stopping': [False],
    'tol': [0.00000001],
    'fit_intercept':[True],
    'epsilon': [0.0001,0.0005,0.001,0.005,0.01,0.03],
    #'epsilon': [0.0001,0.001,0.005,0.01],
    'shuffle': [False],
    'learning_rate': ['optimal'],
    'eta0': [0.5],
    'power_t': [0.25],
    'n_iter_no_change': [5],
}

In [None]:
model = SGDRegressor()
grid = GridSearchCV(estimator=model, param_grid=grid,scoring=mape, n_jobs = -1, cv =5)

In [None]:
bestModels = []
bestScores = [] 
product_ids = train_X_norm['product_id'].unique()

train_pred = pd.DataFrame(columns=['ID','prediction'])
test_pred = pd.DataFrame(columns=['ID','TARGET'])

for product_id in product_ids:
#for product_id in range(1,10):
#for product_id in range(165,166):    

    df_slice_train = train_features_norm[train_features_norm['product_id'] == product_id]
    df_slice_test = test_features_norm[test_features_norm['product_id'] == product_id]
    grid_result = grid.fit(df_slice_train[regression_cols], df_slice_train['TARGET'])
    
    
    print("Product_id: ",product_id)
    print("MAPE Score: ",grid_result.best_score_)
    bestModels.append(grid_result.best_estimator_)
    bestScores.append(grid_result.best_score_)
    
    train_pred = train_pred.append(pd.DataFrame({'ID': df_slice_train['ID'],'prediction': np.maximum(grid_result.best_estimator_.predict(df_slice_train[regression_cols]),0.0)}))
    test_pred = test_pred.append(pd.DataFrame({'ID': df_slice_test['ID'],'TARGET': np.maximum(grid_result.best_estimator_.predict(df_slice_test[regression_cols]),0.0)}))   

    

In [None]:
train_pred['ID'] = train_pred['ID'].astype(np.int64)
temp = train_features_norm.merge(train_pred,on='ID',how = "left").set_index(train_features_norm.index)
print('Train error SGD regressor =', round(MAPE(temp['TARGET'],temp['prediction']), 4), '%')
plt.plot(MAPE_per_obs(temp['TARGET'],temp['prediction']))

In [None]:
test_pred = test_pred.set_index('ID')
test_pred.loc[test_X['ID']].to_csv('results/local_SGD_regressor_new_last.csv', sep=';', index=True)

## Training feed-forward neural net

In [None]:
from sklearn import datasets, linear_model
from sklearn.model_selection import cross_val_score, KFold, GridSearchCV
from keras.models import Sequential
from sklearn.metrics import accuracy_score
from keras.layers import Dense, Dropout, Activation
from keras.wrappers.scikit_learn import KerasRegressor
from keras.callbacks import ModelCheckpoint
import keras.backend as K
from sklearn.utils import parallel_backend
seed = 43

In [None]:
from keras import regularizers
from keras.utils import plot_model
import pydot
from keras.optimizers import SGD


In [None]:
from distutils.version import LooseVersion
import warnings
import tensorflow as tf

# Check TensorFlow Version
assert LooseVersion(tf.__version__) >= LooseVersion('1.0'), 'Please use TensorFlow version 1.0 or newer.  You are using {}'.format(tf.__version__)
print('TensorFlow Version: {}'.format(tf.__version__))

# Check for a GPU
if not tf.test.gpu_device_name():
    warnings.warn('No GPU found. Please ensure you have installed TensorFlow correctly')
else:
    print('Default GPU Device: {}'.format(tf.test.gpu_device_name()))

In [None]:
##Define model
def custom_loss(y_true, y_pred):
    return K.mean(np.abs((y_true - y_pred) / y_true))* 100

def baseline_model(l1=30,l2=10, l3=5):
    model = Sequential()
    model.add(Dense(l1, name='Layer_1', input_dim=len(regression_cols), activation='relu',kernel_initializer='normal'))
    model.add(Dense(l2, name='Layer_2', activation='relu',kernel_initializer='normal'))
    model.add(Dense(l3, name='Layer_3', activation='relu',kernel_initializer='normal'))
    model.add(Dense(1,name='Output',activation='relu',kernel_initializer='normal'))
   
    
    
    model.compile(loss=custom_loss,optimizer='Adagrad', metrics=[custom_loss])
    return model

In [None]:
estimator = KerasRegressor(build_fn=baseline_model, epochs=20, batch_size=5000,verbose=True)
l1 =[50,30,20]
l2 = [20,10,5]
l3 = [10,5,3]

param_grid = dict(l1=l1,l2=l2,l3=l3)
grid = GridSearchCV(estimator=estimator, param_grid=param_grid,scoring=mape, n_jobs = 1, cv =5,return_train_score=True)
grid_result = grid.fit(train_features_norm[regression_cols], train_features_norm['TARGET'])

print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))


In [None]:
nn_train_prediction =np.maximum(grid_result.best_estimator_.predict(train_features_norm[regression_cols]),0.0)
print('score train: ', score_function(train_features_norm['TARGET'], nn_train_prediction))

nn_test_prediction=np.maximum(grid_result.best_estimator_.predict(test_features_norm[regression_cols]),0.0)

In [None]:
print('Train error neural netowork =', round(MAPE(train_features_norm['TARGET'], nn_train_prediction), 4), '%')

In [None]:
nn_predictions = pd.DataFrame({'ID': test_features_norm['ID'],'TARGET': nn_test_prediction})
nn_predictions.to_csv('results/neural_predictions_best.csv', sep=';', index=False)

### Training LSTM network

In [None]:
# univariate lstm example
from numpy import array
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense
from keras.layers import Bidirectional


In [None]:
##Define model
def custom_loss(y_true, y_pred):
    return K.mean(np.abs((y_true - y_pred) / y_true))* 100

def LSTM_model():
    n_steps = 54
    n_features =1
    model = Sequential()
    model.add(LSTM(25,kernel_initializer='normal',input_shape=(n_steps, n_features)))
 #   model.add(LSTM(50, activation='relu',kernel_initializer='normal', return_sequences=True,input_shape=(n_steps, n_features)))
#    model.add(LSTM(10, activation='relu'))
   # model.add(Dense(10,activation='relu',kernel_initializer='normal'))
    model.add(Dense(1,name='Output',activation='tanh',kernel_initializer='normal'))
       
    
    model.compile(loss=custom_loss,optimizer='Adagrad', metrics=[custom_loss])
    return model

##### Getting the data reshaped to correct LSTM input structure

In [None]:
X_lstm = np.array(train_X.iloc[:,3:])

In [None]:
X_lstm_test = np.array(test_X.iloc[:,3:])

In [None]:
y_lstm =np.array(train_y.iloc[:,1:])

In [None]:
X_lstm = X_lstm.reshape((X_lstm.shape[0], X_lstm.shape[1], 1))

In [None]:
X_lstm_test = X_lstm_test.reshape((X_lstm_test.shape[0], X_lstm_test.shape[1], 1))

##### Fitting the LSTM network

In [None]:
lstm_estimator = KerasRegressor(build_fn=LSTM_model, epochs=25, batch_size=5000,verbose=True)

In [None]:
lstm_estimator.fit(X_lstm,y_lstm)

In [None]:
lstm_train_preds = lstm_estimator.predict(X_lstm)

In [None]:
print('Train error neural netowork =', round(MAPE(train_y['TARGET'], lstm_train_preds), 4), '%')

In [None]:
lstm_test_preds = lstm_estimator.predict(X_lstm_test)

In [None]:
lstm_predictions = pd.DataFrame({'ID': test_X['ID'],'TARGET': lstm_test_preds})
lstm_predictions.to_csv('results/lstm_regressor_25.csv', sep=';', index=False)