In [1]:
# Method #4 LSTM Model
from keras.callbacks import EarlyStopping
from keras.layers import Dropout
from keras.layers.regularization.gaussian_noise import GaussianNoise
from keras.optimizers import rmsprop_v2

from tensorflow.keras import backend as K
from tensorflow.keras import optimizers
from tensorflow.keras.callbacks import *
from tensorflow.keras.layers import Dense, LSTM, SimpleRNN
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
from tensorflow.python import keras

# Method #5 TFT Model

from IPython.display import display, HTML
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import random as rd
import warnings

# display(HTML("<style>.container { width:80% !important; }</style>"))
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', 100)

In [2]:
# lookback window 5 and train 5 batch
lags = 7
batch_size = 15

early_stop = EarlyStopping(monitor='val_loss', patience=5, verbose=1)

def create_rnn_model(hu=32, lags=lags, layer='SimpleRNN',
                           features=1, output_size = 1, algorithm='estimation'):
    
    model = Sequential()
    if layer == 'SimpleRNN':
        model.add(SimpleRNN(hu, activation='relu',
                            input_shape=(lags, features)))
    else:
        model.add(LSTM(hu, activation='relu',
                       input_shape=(lags, features)))
    if algorithm == 'estimation':
        model.add(Dense(output_size, activation='linear'))
        model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    else:
        model.add(Dense(output_size, activation='sigmoid'))
        model.compile(optimizer='adam', loss='binary_crossentropy',
                      metrics=['accuracy'])
    return model

In [3]:
base_FTSE_df = pd.read_csv('../Data/1.3-FTSE_Monthly_ESG_Volatility_Final.csv')
base_FTSE_df = base_FTSE_df.rename(columns={'Date_x':'date_key'})

base_FTSE_df.Date = pd.to_datetime(base_FTSE_df.date_key)
base_FTSE_df.Asset = base_FTSE_df.Asset.astype(int)
base_FTSE_df.index = base_FTSE_df.date_key

In [16]:
from sklearn.model_selection import train_test_split

In [22]:
def func_train_test_split(df):
    
    train_df, test_df = pd.DataFrame(), pd.DataFrame()
    list_assets = df.Asset.unique()
    
    for asset in list_assets:
        temp_df = df[df['Asset'] == asset].copy()
        rows = temp_df.shape[0]
        train_len = int(rows*0.80)
        
        stocks_mean = np.mean(train_df, axis=0)
        stocks_std = np.std(train_df, axis=0)

        train_std_df = (train_df - stocks_mean)/stocks_std # for training data
        test_std_df = (test_df - stocks_mean)/stocks_std # for test data

        
        train_df = pd.concat([temp_df.iloc[:train_len], train_df])
        test_df = pd.concat([temp_df.iloc[train_len:], test_df])
        
    return train_df, test_df

In [23]:
train_df, test_df = func_train_test_split(base_FTSE_df)

In [25]:
train_df.shape

(15322, 28)

In [26]:
test_df.shape

(3937, 28)

In [28]:
import math

In [29]:
train_len = math.ceil(base_FTSE_df.shape[0] * 0.7)
valid_len = math.ceil(base_FTSE_df.shape[0] * 0.20)
test_len = int(base_FTSE_df.shape[0] * 0.10)

assert(train_len+valid_len+test_len == base_FTSE_df.shape[0])

In [12]:
train_df = base_FTSE_df.iloc[:train_len]
valid_df = base_FTSE_df.iloc[train_len:(train_len+valid_len)]
test_df = base_FTSE_df.iloc[(train_len+valid_len):]

In [None]:
#### Standardise Features: Open, High, Low, Adj Close, Return, volatility.
stocks_mean = np.mean(train_df, axis=0)
stocks_std = np.std(train_df, axis=0)

train_std_df = (train_df - stocks_mean)/stocks_std # for training data
test_std_df = (test_df - stocks_mean)/stocks_std # for test data

ori_df_std = pd.concat([train_std_df,test_std_df])

In [None]:
split = int(stock_NVDA.shape[0] * 0.8)

# (𝑂𝑡, 𝐻𝑡, 𝐿𝑡, 𝐶𝑙𝑜𝑠𝑒𝑡, 𝐴𝑑𝑗𝐶𝑙𝑜𝑠𝑒𝑡, 𝑟𝑡 ,𝑉𝑂𝐿𝑡)T
train_df = stock_NVDA.iloc[:split].drop(['Date','Firm', 'Volume'], axis=1)
test_df  = stock_NVDA.iloc[split:].drop(['Date','Firm', 'Volume'], axis=1)

train_df = train_df[train_df.volatility.notnull()]
test_df = test_df[test_df.volatility.notnull()]

# validation
split = int(train_df.shape[0] * 0.85)
train_viz = train_df.iloc[:split].copy()
valid_viz = train_df.iloc[split:].copy()

ori_df = pd.concat([train_df,test_df])
ori_df = ori_df[ori_df['volatility'].notnull()]

In [4]:
base_FTSE_df = pd.read_csv('../Data/1.3-FTSE_Monthly_ESG_Volatility_Final.csv')
base_FTSE_df = base_FTSE_df.rename(columns={'Date_x':'date_key'})

base_FTSE_df.Date = pd.to_datetime(base_FTSE_df.date_key)
base_FTSE_df.Asset = base_FTSE_df.Asset.astype(int)
base_FTSE_df.index = base_FTSE_df.date_key

NameError: name 'pd' is not defined

---

# Incorrect Rolling Dataframe

In [18]:
def func_m1_volatility(df):
    master_df = pd.DataFrame()
    
    for asset in df.Asset.unique():
        
        temp_df = df[df.Asset == asset]

        vol_series = temp_df['V^YZ']
        vol_series_daily = vol_series.shift(1)
        vol_series_weekly = vol_series.rolling(5).mean().shift(1)
        vol_series_monthly = vol_series.rolling(22).mean().shift(1)

        temp_df = pd.DataFrame({
            'asset':asset,
            'vol': vol_series,
            'vol_lag_daily': vol_series_daily,
            'vol_lag_weekly': vol_series_weekly,
            'vol_lag_monthly': vol_series_monthly
            }
        ).dropna()
        master_df = pd.concat([master_df, temp_df])

    return master_df

In [20]:
m1_df = func_m1_volatility(base_FTSE_df)