<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"></ul></div>

In [2]:
import sys, os
import pandas as pd
import numpy as np
from keras.callbacks import EarlyStopping
sys.path.append('../python')
pd.set_option('display.max_columns', 100)

from dataprocessingnew2 import *
from runmodels import *
from LSTM_MS import *
# from util import Run_Algorithms

import glob

# dataprocessing = DataProcessing('2006-01-01', '2022-12-01', daily=True)
# df_clean, train_df, test_df = dataprocessing.clean_final(fillna = True)

In [1]:
# %store train_df
# %store test_df

%store -r train_df
%store -r test_df

len(train_df.Asset.unique())

85

In [4]:
coverage_df = pd.read_csv('../data/coverage_dataframe.csv')[['PermID', 'Name', 'TRBCEconomicSector']]
coverage_df['PermID'] = coverage_df.PermID.astype(int)

unique_assets_df = pd.DataFrame({
    'Assets': train_df.Asset.unique()
})

unique_assets_df = pd.merge(unique_assets_df, coverage_df, how = 'left', left_on = 'Assets', right_on= 'PermID')
unique_assets_df = unique_assets_df.iloc[:, 1:]
unique_assets_df.columns = ['Assets', 'Firm Name', 'Economic Sector']

sectors_df = pd.DataFrame(np.unique(unique_assets_df['Economic Sector'], return_counts=True)).T
sectors_df.columns = ['sectors','num_firms']

In [6]:
# cols = ['vol_series_daily', 'vol_series_weekly', 'vol_series_monthly',
#         'buzz','ESG','ESGCombined','ESGControversies', 'EnvironmentalPillar',
#         'GovernancePillar','SocialPillar','Community','EnvironmentalInnovation',
#         'Management','ProductResponsibility','Shareholders','Workforce', 'V^YZ']

# cols = ['vol_series_daily', 'vol_series_weekly', 'vol_series_monthly',
#         'buzz', 'EnvironmentalPillar', 'Community','Shareholders', 'V^YZ']

# cols = ['vol_series_daily', 'vol_series_weekly', 'vol_series_monthly', 'V^YZ']

cols = ['vol_series_daily', 'vol_series_weekly', 'vol_series_monthly', 'buzz', 'ESG', 'ESGCombined', 'ESGControversies', 'EnvironmentalPillar', 'GovernancePillar', 'SocialPillar', 'Community', 'EnvironmentalInnovation', 'Management', 'ProductResponsibility', 'Shareholders', 'Workforce', 'noise_beta_0.0_gamma_0.25', 'V^YZ']

In [7]:
from sklearn.linear_model import ElasticNet
from sklearn.model_selection import TimeSeriesSplit, GridSearchCV

In [8]:
unique_assets_df.head(2)

Unnamed: 0,Assets,Firm Name,Economic Sector
0,4295874865,DCC PLC,Consumer non-cyclicals
1,4295893899,Berkeley Group Holdings PLC,Consumer cyclicals


In [9]:
from sklearn.metrics import mean_squared_error

In [10]:
def vis_line_plot_results(y_pred, y_test, name, r):

    dictionaries = {
        'EN': 'Elastic Net',
        'RF': 'Random Forest',
        'LSTM': 'Long Short-Term Memory',
        'HAR': 'Heterogeneous AutoRegressive',
        'GARCH': 'Generalised AutoRegressive Conditional Heteroskedasticity'
    }

    algorithms = 'EN'
    features = 'm3'

    # Calculate absolute differences between actual and predicted values
    diff = np.abs(y_test - y_pred)

    fig, ax1 = plt.subplots(figsize=(10,5))

    # Plot actual and predicted values
    ax1.plot(y_test, alpha = 0.7, color = 'black')
    ax1.plot(y_pred, marker='.')
    ax1.legend(['True Volatility', 'Predicted Volatility'], fontsize=7.5, loc='upper left')
    ax1.grid(axis='y', alpha=0.5)
    ax1.set_ylabel('Volatility', fontsize=9)
    # print(np.min(y_test))
    ax1.set_ylim([np.min(y_test)-np.min(y_test)*.5, np.max(y_test)+np.max(y_test)*.05]) 

    # Create a second y-axis
    ax2 = ax1.twinx()

    # Plot differences on the secondary y-axis as a bar chart
    ax2.bar(y_test.index, diff, color='gray', alpha=0.8, width=1.5)
    ax2.legend(['Absolute Difference'], fontsize=7.5, loc='upper right')
    ax2.set_ylabel('Absolute Difference', fontsize=9)

    # Setting y-limits for the second axis to prevent overlap with line plots
    ax2.set_ylim([0, np.max(diff)*3]) 

    # Set main title
    plt.title(f'{dictionaries[algorithms]} Prediction on "{name}" [Data:{features}]', fontsize=12)

    plt.xticks(rotation=0)

    plt.savefig(f'../outputs/{algorithms}-{features}/{str(r+1).zfill(3)}-{algorithms}-{name}.png')
    plt.close()

In [11]:
cols

['vol_series_daily',
 'vol_series_weekly',
 'vol_series_monthly',
 'buzz',
 'ESG',
 'ESGCombined',
 'ESGControversies',
 'EnvironmentalPillar',
 'GovernancePillar',
 'SocialPillar',
 'Community',
 'EnvironmentalInnovation',
 'Management',
 'ProductResponsibility',
 'Shareholders',
 'Workforce',
 'noise_beta_0.0_gamma_0.25',
 'V^YZ']

In [12]:
train_dfs, test_dfs = [], []
target = 'V^YZ'
# X_train, y_train = np.asarray([]), np.asarray([])

# Define the hyperparameters
param_grid = {
    'alpha': [0.0001, 0.001, 0.01, 0.1, 1],
    'l1_ratio': [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
}

master_df = pd.DataFrame()

for row, i in enumerate(unique_assets_df.Assets):
    
    asset_data = train_df[train_df.Asset == i]
    train_indexes = asset_data.index
    asset_data = asset_data.reset_index(drop=True)
    
    asset_test_data = test_df[test_df.Asset == i][cols]
    test_indexes = asset_test_data.index
    asset_test_data.reset_index(drop=True, inplace=True)
    
    ids = unique_assets_df.iloc[row, 0]
    name = unique_assets_df.iloc[row, 1]
    
    train_size = int(0.8 * len(asset_data))
    sub_train_df = asset_data.iloc[:train_size][cols]
    sub_test_df = asset_data.iloc[train_size:][cols]
    
    
    X_sub_train_df = sub_train_df.drop([target], axis=1)
    Y_sub_train_df = sub_train_df[target].reset_index(drop=True)
    
    X_sub_test_df = sub_test_df.drop([target], axis=1)
    Y_sub_test_df = sub_test_df[target].reset_index(drop=True)
    
    scaler = StandardScaler()
    X_sub_train_df = pd.DataFrame(scaler.fit_transform(X_sub_train_df))
    X_sub_test_df = pd.DataFrame(scaler.transform(X_sub_test_df))

    
    sub_train_df = pd.concat([X_sub_train_df, Y_sub_train_df], axis=1)
    sub_test_df = pd.concat([X_sub_test_df, Y_sub_test_df], axis=1)

    sub_train_df.columns = cols
    sub_test_df.columns = cols

    merge_df = pd.concat([sub_train_df, sub_test_df])
    
    X_train = merge_df[cols].drop([target], axis=1)
    y_train = merge_df[target]
    
    # Initialize ElasticNet
    model = ElasticNet(max_iter=10000) # Increased max_iter for convergence

    tscv = TimeSeriesSplit(n_splits=5)
    # Initialize Grid Search
    gsearch = GridSearchCV(estimator=model, cv=tscv,
                           param_grid=param_grid, 
                           verbose=1, 
                           scoring='neg_mean_squared_error')

    # display(X_train)
    # Fit the grid search
    gsearch.fit(X_train, y_train)
    
    
    X_test = asset_test_data.drop([target], axis=1)
    y_test = asset_test_data[target]
    
    X_test = scaler.transform(X_test)
    
    model = ElasticNet(max_iter=10000, alpha= gsearch.best_params_['alpha']
                       , l1_ratio = gsearch.best_params_['l1_ratio']) 
    model.fit(X_train, y_train)
    
    y_hat = model.predict(X_test)

    mse_test = mean_squared_error(y_hat, y_test)
    
    y_hat = pd.Series(y_hat, index=test_indexes)
    y_test = pd.Series(y_test.values, index=test_indexes)
    
    vis_line_plot_results(y_hat, y_test, name, row)
    
    # display(X_train)
    temp_df = pd.DataFrame({
        'L1 Ratio': gsearch.best_params_['l1_ratio'],
        'Alpha': gsearch.best_params_['alpha'],
        'Best Scores': gsearch.best_score_*-1 *10**3,
        'MSE': mse_test*10**3,
        'Asset IDs': ids
    },
    index = [row+1])
    # display(temp_df)
    
    master_df = pd.concat([master_df, temp_df])
    
    # dftrain = pd.concat(train_dfs, ignore_index=True)
    # dftest = pd.concat(test_dfs, ignore_index=True)


    # train_dfs = scaler.fit_transform(train_dfs)
    # test_dfs = scaler.transform(test_dfs)

Fitting 5 folds for each of 55 candidates, totalling 275 fits
Fitting 5 folds for each of 55 candidates, totalling 275 fits
Fitting 5 folds for each of 55 candidates, totalling 275 fits
Fitting 5 folds for each of 55 candidates, totalling 275 fits
Fitting 5 folds for each of 55 candidates, totalling 275 fits
Fitting 5 folds for each of 55 candidates, totalling 275 fits
Fitting 5 folds for each of 55 candidates, totalling 275 fits
Fitting 5 folds for each of 55 candidates, totalling 275 fits
Fitting 5 folds for each of 55 candidates, totalling 275 fits
Fitting 5 folds for each of 55 candidates, totalling 275 fits
Fitting 5 folds for each of 55 candidates, totalling 275 fits
Fitting 5 folds for each of 55 candidates, totalling 275 fits
Fitting 5 folds for each of 55 candidates, totalling 275 fits
Fitting 5 folds for each of 55 candidates, totalling 275 fits
Fitting 5 folds for each of 55 candidates, totalling 275 fits
Fitting 5 folds for each of 55 candidates, totalling 275 fits
Fitting 

In [13]:
master_df.to_csv('../results/EN-M4-GRIDSEARCH.csv', index=None)

In [15]:
cols = ['vol_series_daily', 'vol_series_weekly', 'vol_series_monthly', 'buzz', 'ESG', 'ESGCombined', 'ESGControversies', 'EnvironmentalPillar', 'GovernancePillar', 'SocialPillar', 'Community', 'EnvironmentalInnovation', 'Management', 'ProductResponsibility', 'Shareholders', 'Workforce', 'noise_beta_0.0_gamma_0.25', 'noise_beta_0.0_gamma_0.5', 'noise_beta_0.0_gamma_1.0', 'noise_beta_0.5_gamma_0.25', 'noise_beta_0.5_gamma_0.5', 'V^YZ']

In [16]:
train_dfs, test_dfs = [], []
target = 'V^YZ'
# X_train, y_train = np.asarray([]), np.asarray([])

# Define the hyperparameters
param_grid = {
    'alpha': [0.0001, 0.001, 0.01, 0.1, 1],
    'l1_ratio': [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
}

master_df = pd.DataFrame()

for row, i in enumerate(unique_assets_df.Assets):
    
    asset_data = train_df[train_df.Asset == i]
    train_indexes = asset_data.index
    asset_data = asset_data.reset_index(drop=True)
    
    asset_test_data = test_df[test_df.Asset == i][cols]
    test_indexes = asset_test_data.index
    asset_test_data.reset_index(drop=True, inplace=True)
    
    ids = unique_assets_df.iloc[row, 0]
    name = unique_assets_df.iloc[row, 1]
    
    train_size = int(0.8 * len(asset_data))
    sub_train_df = asset_data.iloc[:train_size][cols]
    sub_test_df = asset_data.iloc[train_size:][cols]
    
    
    X_sub_train_df = sub_train_df.drop([target], axis=1)
    Y_sub_train_df = sub_train_df[target].reset_index(drop=True)
    
    X_sub_test_df = sub_test_df.drop([target], axis=1)
    Y_sub_test_df = sub_test_df[target].reset_index(drop=True)
    
    scaler = StandardScaler()
    X_sub_train_df = pd.DataFrame(scaler.fit_transform(X_sub_train_df))
    X_sub_test_df = pd.DataFrame(scaler.transform(X_sub_test_df))

    
    sub_train_df = pd.concat([X_sub_train_df, Y_sub_train_df], axis=1)
    sub_test_df = pd.concat([X_sub_test_df, Y_sub_test_df], axis=1)

    sub_train_df.columns = cols
    sub_test_df.columns = cols

    merge_df = pd.concat([sub_train_df, sub_test_df])
    
    X_train = merge_df[cols].drop([target], axis=1)
    y_train = merge_df[target]
    
    # Initialize ElasticNet
    model = ElasticNet(max_iter=10000) # Increased max_iter for convergence

    tscv = TimeSeriesSplit(n_splits=5)
    # Initialize Grid Search
    gsearch = GridSearchCV(estimator=model, cv=tscv,
                           param_grid=param_grid, 
                           verbose=1, 
                           scoring='neg_mean_squared_error')

    # display(X_train)
    # Fit the grid search
    gsearch.fit(X_train, y_train)
    
    
    X_test = asset_test_data.drop([target], axis=1)
    y_test = asset_test_data[target]
    
    X_test = scaler.transform(X_test)
    
    model = ElasticNet(max_iter=10000, alpha= gsearch.best_params_['alpha']
                       , l1_ratio = gsearch.best_params_['l1_ratio']) 
    model.fit(X_train, y_train)
    
    y_hat = model.predict(X_test)

    mse_test = mean_squared_error(y_hat, y_test)
    
    y_hat = pd.Series(y_hat, index=test_indexes)
    y_test = pd.Series(y_test.values, index=test_indexes)
    
    vis_line_plot_results(y_hat, y_test, name, row)
    
    # display(X_train)
    temp_df = pd.DataFrame({
        'L1 Ratio': gsearch.best_params_['l1_ratio'],
        'Alpha': gsearch.best_params_['alpha'],
        'Best Scores': gsearch.best_score_*-1 *10**3,
        'MSE': mse_test*10**3,
        'Asset IDs': ids
    },
    index = [row+1])
    # display(temp_df)
    
    master_df = pd.concat([master_df, temp_df])

Fitting 5 folds for each of 55 candidates, totalling 275 fits
Fitting 5 folds for each of 55 candidates, totalling 275 fits
Fitting 5 folds for each of 55 candidates, totalling 275 fits
Fitting 5 folds for each of 55 candidates, totalling 275 fits
Fitting 5 folds for each of 55 candidates, totalling 275 fits
Fitting 5 folds for each of 55 candidates, totalling 275 fits
Fitting 5 folds for each of 55 candidates, totalling 275 fits
Fitting 5 folds for each of 55 candidates, totalling 275 fits
Fitting 5 folds for each of 55 candidates, totalling 275 fits
Fitting 5 folds for each of 55 candidates, totalling 275 fits
Fitting 5 folds for each of 55 candidates, totalling 275 fits
Fitting 5 folds for each of 55 candidates, totalling 275 fits
Fitting 5 folds for each of 55 candidates, totalling 275 fits
Fitting 5 folds for each of 55 candidates, totalling 275 fits
Fitting 5 folds for each of 55 candidates, totalling 275 fits
Fitting 5 folds for each of 55 candidates, totalling 275 fits
Fitting 

In [17]:
cols = ['vol_series_daily', 'vol_series_weekly', 'vol_series_monthly', 'buzz', 'ESG', 'ESGCombined', 'ESGControversies', 'EnvironmentalPillar', 'GovernancePillar', 'SocialPillar', 'Community', 'EnvironmentalInnovation', 'Management', 'ProductResponsibility', 'Shareholders', 'Workforce', 'noise_beta_0.0_gamma_0.25', 'noise_beta_0.0_gamma_0.5', 'noise_beta_0.0_gamma_1.0', 'noise_beta_0.5_gamma_0.25', 'noise_beta_0.5_gamma_0.5', 'noise_beta_0.5_gamma_1.0', 'noise_beta_0.75_gamma_0.25', 'noise_beta_0.75_gamma_0.5', 'noise_beta_0.75_gamma_1.0', 'noise_beta_0.9_gamma_0.25', 'V^YZ']

In [18]:
master_df.to_csv('../results/EN-M5-GRIDSEARCH.csv', index=None)

In [19]:
train_dfs, test_dfs = [], []
target = 'V^YZ'
# X_train, y_train = np.asarray([]), np.asarray([])

# Define the hyperparameters
param_grid = {
    'alpha': [0.0001, 0.001, 0.01, 0.1, 1],
    'l1_ratio': [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
}

master_df = pd.DataFrame()

for row, i in enumerate(unique_assets_df.Assets):
    
    asset_data = train_df[train_df.Asset == i]
    train_indexes = asset_data.index
    asset_data = asset_data.reset_index(drop=True)
    
    asset_test_data = test_df[test_df.Asset == i][cols]
    test_indexes = asset_test_data.index
    asset_test_data.reset_index(drop=True, inplace=True)
    
    ids = unique_assets_df.iloc[row, 0]
    name = unique_assets_df.iloc[row, 1]
    
    train_size = int(0.8 * len(asset_data))
    sub_train_df = asset_data.iloc[:train_size][cols]
    sub_test_df = asset_data.iloc[train_size:][cols]
    
    
    X_sub_train_df = sub_train_df.drop([target], axis=1)
    Y_sub_train_df = sub_train_df[target].reset_index(drop=True)
    
    X_sub_test_df = sub_test_df.drop([target], axis=1)
    Y_sub_test_df = sub_test_df[target].reset_index(drop=True)
    
    scaler = StandardScaler()
    X_sub_train_df = pd.DataFrame(scaler.fit_transform(X_sub_train_df))
    X_sub_test_df = pd.DataFrame(scaler.transform(X_sub_test_df))

    
    sub_train_df = pd.concat([X_sub_train_df, Y_sub_train_df], axis=1)
    sub_test_df = pd.concat([X_sub_test_df, Y_sub_test_df], axis=1)

    sub_train_df.columns = cols
    sub_test_df.columns = cols

    merge_df = pd.concat([sub_train_df, sub_test_df])
    
    X_train = merge_df[cols].drop([target], axis=1)
    y_train = merge_df[target]
    
    # Initialize ElasticNet
    model = ElasticNet(max_iter=10000) # Increased max_iter for convergence

    tscv = TimeSeriesSplit(n_splits=5)
    # Initialize Grid Search
    gsearch = GridSearchCV(estimator=model, cv=tscv,
                           param_grid=param_grid, 
                           verbose=1, 
                           scoring='neg_mean_squared_error')

    # display(X_train)
    # Fit the grid search
    gsearch.fit(X_train, y_train)
    
    
    X_test = asset_test_data.drop([target], axis=1)
    y_test = asset_test_data[target]
    
    X_test = scaler.transform(X_test)
    
    model = ElasticNet(max_iter=10000, alpha= gsearch.best_params_['alpha']
                       , l1_ratio = gsearch.best_params_['l1_ratio']) 
    model.fit(X_train, y_train)
    
    y_hat = model.predict(X_test)

    mse_test = mean_squared_error(y_hat, y_test)
    
    y_hat = pd.Series(y_hat, index=test_indexes)
    y_test = pd.Series(y_test.values, index=test_indexes)
    
    vis_line_plot_results(y_hat, y_test, name, row)
    
    # display(X_train)
    temp_df = pd.DataFrame({
        'L1 Ratio': gsearch.best_params_['l1_ratio'],
        'Alpha': gsearch.best_params_['alpha'],
        'Best Scores': gsearch.best_score_*-1 *10**3,
        'MSE': mse_test*10**3,
        'Asset IDs': ids
    },
    index = [row+1])
    # display(temp_df)
    
    master_df = pd.concat([master_df, temp_df])

Fitting 5 folds for each of 55 candidates, totalling 275 fits
Fitting 5 folds for each of 55 candidates, totalling 275 fits
Fitting 5 folds for each of 55 candidates, totalling 275 fits
Fitting 5 folds for each of 55 candidates, totalling 275 fits
Fitting 5 folds for each of 55 candidates, totalling 275 fits
Fitting 5 folds for each of 55 candidates, totalling 275 fits
Fitting 5 folds for each of 55 candidates, totalling 275 fits
Fitting 5 folds for each of 55 candidates, totalling 275 fits
Fitting 5 folds for each of 55 candidates, totalling 275 fits
Fitting 5 folds for each of 55 candidates, totalling 275 fits
Fitting 5 folds for each of 55 candidates, totalling 275 fits
Fitting 5 folds for each of 55 candidates, totalling 275 fits
Fitting 5 folds for each of 55 candidates, totalling 275 fits
Fitting 5 folds for each of 55 candidates, totalling 275 fits
Fitting 5 folds for each of 55 candidates, totalling 275 fits
Fitting 5 folds for each of 55 candidates, totalling 275 fits
Fitting 

In [20]:
master_df.to_csv('../results/EN-M6-GRIDSEARCH.csv', index=None)

In [164]:
master_df.head(2)

Unnamed: 0,L1 Ratio,Alpha,Best Scores,MSE,Asset IDs
1,0.6,0.001,0.077503,0.084326,4295874865
2,0.7,0.01,0.102921,0.016746,4295893899


In [11]:
np.mean(master_df['Best Scores'])

0.08391125942861427

In [12]:
np.mean(master_df['MSE'])

0.08463398947666778

In [15]:
master_df.to_csv('../results/EN-M2-GRIDSEARCH.csv', index=None)

In [13]:
master_df.to_csv('../results/EN-M1-GRIDSEARCH.csv', index=None)

In [36]:
df_m1.describe()

Unnamed: 0,L1 Ratio,Alpha,Best Scores,MSE,Asset IDs
count,85.0,85.0,85.0,85.0,85.0
mean,0.217647,0.197984,0.083911,0.084634,4693753000.0
std,0.293248,0.389159,0.064645,0.280734,1107719000.0
min,0.0,0.0001,0.015364,0.003928,4295869000.0
25%,0.0,0.001,0.039857,0.016133,4295895000.0
50%,0.1,0.01,0.056647,0.029773,4295895000.0
75%,0.3,0.01,0.103739,0.044203,4295899000.0
max,1.0,1.0,0.30714,2.334567,8589934000.0


In [37]:
df_m2.describe()

Unnamed: 0,L1 Ratio,Alpha,Best Scores,MSE,Asset IDs
count,85.0,85.0,85.0,85.0,85.0
mean,0.270588,0.255954,0.08792,0.089043,4693753000.0
std,0.271602,0.429089,0.067993,0.283528,1107719000.0
min,0.0,0.0001,0.015274,0.004325,4295869000.0
25%,0.0,0.01,0.042374,0.016297,4295895000.0
50%,0.2,0.01,0.06101,0.029007,4295895000.0
75%,0.4,0.1,0.11248,0.044632,4295899000.0
max,1.0,1.0,0.318984,2.334567,8589934000.0


In [38]:
df_m3.describe()

Unnamed: 0,L1 Ratio,Alpha,Best Scores,MSE,Asset IDs
count,85.0,85.0,85.0,85.0,85.0
mean,0.345882,0.148049,0.089377,0.093803,4693753000.0
std,0.275401,0.347485,0.068611,0.293993,1107719000.0
min,0.0,0.0001,0.015301,0.003676,4295869000.0
25%,0.2,0.01,0.041463,0.016492,4295895000.0
50%,0.3,0.01,0.062155,0.028661,4295895000.0
75%,0.5,0.01,0.114836,0.047029,4295899000.0
max,1.0,1.0,0.312227,2.334567,8589934000.0


In [39]:
combine_df = pd.concat([df_m1, df_m2, df_m2 ])

In [41]:
combine_df.describe()

Unnamed: 0,L1 Ratio,Alpha,Best Scores,MSE,Asset IDs
count,255.0,255.0,255.0,255.0,255.0
mean,0.252941,0.236631,0.086584,0.087574,4693753000.0
std,0.279026,0.415466,0.066659,0.281493,1103349000.0
min,0.0,0.0001,0.015274,0.003928,4295869000.0
25%,0.0,0.001,0.040861,0.016215,4295895000.0
50%,0.2,0.01,0.059346,0.029177,4295895000.0
75%,0.4,0.1,0.11181,0.044632,4295899000.0
max,1.0,1.0,0.318984,2.334567,8589934000.0


In [14]:
df_m1 = pd.read_csv('../results/EN-M1-GRIDSEARCH.csv')
df_m2 = pd.read_csv('../results/EN-M2-GRIDSEARCH.csv')
df_m3 = pd.read_csv('../results/EN-M3-GRIDSEARCH.csv')

In [18]:
print('the result of m1 test data:', np.mean(df_m1['MSE']))
print('the result of m2 test data:', np.mean(df_m2['MSE']))
print('the result of m3 test data:', np.mean(df_m3['MSE']))

the result of m1 test data: 0.0846339894766677
the result of m2 test data: 0.08904331794611786
the result of m3 test data: 0.09380348811801345
