[![Open In Studio Lab](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/tushar-mahalya/Forecasting-using-Bidirectional-LSTM/blob/master/model_selector.ipynb)

In [24]:
# Importing important pakages
import os
import json
import numpy as np
import pandas as pd
from collections import OrderedDict
from termcolor import colored as color

# Importing TF.keras packages
from keras.models import Sequential
from keras.optimizers import RMSprop
from keras.layers import Dense, LSTM, Dropout, Bidirectional

# To avoid irrelevent warning messages
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

In [25]:
# Yahoo Finance API
import yfinance

# Function to download stock data (univariate or multivariate)
from Data_Downloader import data_accumilator

In [26]:
# Importing custom defined models into local runtime
from Models import models
my_models = models()
my_models

[('Fixed', <function Models.simple_model(X, y, learn_rate)>),
 ('Dynamic', <function Models.dynamic_model(X, y, learn_rate)>),
 ('Bidirectional', <function Models.bidirectional_model(X, y, learn_rate)>),
 ('Stacked', <function Models.stacked_model(X, y, learn_rate)>)]

In [27]:
# Nifty50 stocks indormation
nifty50 = pd.read_csv('nifty50.csv')
nifty50.head()

Unnamed: 0,Company Name,Industry,Symbol,Series,ISIN Code
0,Adani Enterprises Ltd.,Metals & Mining,ADANIENT,EQ,INE423A01024
1,Adani Ports and Special Economic Zone Ltd.,Services,ADANIPORTS,EQ,INE742F01042
2,Apollo Hospitals Enterprise Ltd.,Healthcare,APOLLOHOSP,EQ,INE437A01024
3,Asian Paints Ltd.,Consumer Durables,ASIANPAINT,EQ,INE021A01026
4,Axis Bank Ltd.,Financial Services,AXISBANK,EQ,INE238A01034


In [31]:
# Get the current working directory (cwd)
cwd = os.getcwd()

# Downloading Nifty50 stocks data in Current working directory
preprocessed_data = data_accumilator(cwd, nifty50['Symbol'])

Directory named "Stocks Data" with required data is downloaded successfully at "/home/studio-lab-user/sagemaker-studiolab-notebooks/Forecasting-using-Bidirectional-LSTM"


In [36]:
# Dictionary with stock ticker(key) and location of 
# normalized data of that stock (value)
dict(list(preprocessed_data.items())[:5])  # First 5 elements0

{'ADANIENT': '/home/studio-lab-user/sagemaker-studiolab-notebooks/Forecasting-using-Bidirectional-LSTM/Stocks Data/ADANIENT/Normalized_ADANIENT.csv',
 'ADANIPORTS': '/home/studio-lab-user/sagemaker-studiolab-notebooks/Forecasting-using-Bidirectional-LSTM/Stocks Data/ADANIPORTS/Normalized_ADANIPORTS.csv',
 'APOLLOHOSP': '/home/studio-lab-user/sagemaker-studiolab-notebooks/Forecasting-using-Bidirectional-LSTM/Stocks Data/APOLLOHOSP/Normalized_APOLLOHOSP.csv',
 'ASIANPAINT': '/home/studio-lab-user/sagemaker-studiolab-notebooks/Forecasting-using-Bidirectional-LSTM/Stocks Data/ASIANPAINT/Normalized_ASIANPAINT.csv',
 'AXISBANK': '/home/studio-lab-user/sagemaker-studiolab-notebooks/Forecasting-using-Bidirectional-LSTM/Stocks Data/AXISBANK/Normalized_AXISBANK.csv'}

In [33]:
# Importing data initializing function
from Data_Initializer import train_test_data

In [35]:
def test_models(data_dict, epochs: int, models: dict, window_sizes: list, multi_seq:bool = False):
    #test result data
    parameter_dict = OrderedDict()
    for ticker in data_dict.keys():
        
        win_size = {}
        print(color(f'\n================================ NSE : {ticker} ================================', 'red', attrs = ["bold"]))
        for window_size in window_sizes:
            
            model_name = {}
            print(color(f"\nWindow size: {window_size}", 'cyan'))
            print(color('----------------', 'yellow'))
            for model_item in models:
                
                data = pd.read_csv(data_dict[ticker])
                X_train,y_train,X_test,y_test = train_test_data(data, split_size = 0.2, window_size = window_size, multi_seq = multi_seq)
                model = model_item[1](X_train,y_train,0.001)
                
                # fitting model
                model.fit(X_train, y_train, epochs=epochs, batch_size=50, verbose=0)

                # printing training and testing errors
                training_error = model.evaluate(X_train, y_train, verbose=0)
                testing_error = model.evaluate(X_test, y_test, verbose=0)
                msg = color("  >",'green') + " Model: {0:<15} Param count: {1:} \tTraining error: {2:.4f}\tTesting error: {3:.4f}"
                print(msg.format(model_item[0],model.count_params(),training_error,testing_error))
                
                model_name[model_item[0]] = {
                                             'Param Count' : model.count_params(),
                                             'Training Error' : float("{0:.4f}".format(training_error)),
                                             'Testing Error' : float("{0:.4f}".format( testing_error))
                                            }
            
            win_size[f'Window_Size_{window_size}'] = model_name
        
        parameter_dict[ticker] = win_size
        
    # Exporting performance of different models in .json format
    os.mkdir('Performance Parameters')
    if multi_seq:
        with open("Performance Parameters/multivariate.json", "w") as f:
            json.dump(parameter_dict, f)
            f.close()
    else:
        with open("Performance Parameters/univariate.json", "w") as f:
            json.dump(parameter_dict, f)
            f.close()
            
    print(color('\nSuccesfully Evaluated different models and saved the performance metrics in \'Performance Parameters\' directory.', 'magenta'))
        
    return parameter_dict

In [38]:
res = test_models(dict(list(preprocessed_data.items())[:2]) , 100, my_models, window_sizes=[7,10], multi_seq=True)

[1m[31m
[36m
Window size: 7[0m
[33m----------------[0m
[32m  >[0m Model: Fixed           Param count: 186 	Training error: 0.0008	Testing error: 0.1474
[32m  >[0m Model: Dynamic         Param count: 316 	Training error: 0.0008	Testing error: 0.1581
[32m  >[0m Model: Bidirectional   Param count: 729 	Training error: 0.0008	Testing error: 0.1339
[32m  >[0m Model: Stacked         Param count: 886 	Training error: 0.0009	Testing error: 0.1679
[36m
Window size: 10[0m
[33m----------------[0m
[32m  >[0m Model: Fixed           Param count: 186 	Training error: 0.0007	Testing error: 0.1111
[32m  >[0m Model: Dynamic         Param count: 571 	Training error: 0.0008	Testing error: 0.1416
[32m  >[0m Model: Bidirectional   Param count: 1341 	Training error: 0.0006	Testing error: 0.0596
[32m  >[0m Model: Stacked         Param count: 886 	Training error: 0.0011	Testing error: 0.1572
[35mSuccesfully Evaluated different models and saved the performance metrics in 'Performance 

FileExistsError: [Errno 17] File exists: 'Performance Parameters'

In [33]:
res['ITC']['Window_Size_10']

{'Fixed': (186, 0.002, 0.0622),
 'Dynamic': (571, 0.0017, 0.0338),
 'Bidirectional': (1341, 0.0016, 0.0246),
 'Stacked': (886, 0.002, 0.0643)}

In [22]:
if os.path.exists(cwd+ "/Stocks Data"):
    print("Data is already present in the source location.")
else:
    print("Chal BSdk")

Data is already present in the source location.


In [20]:
os.getcwd()

'/home/studio-lab-user/sagemaker-studiolab-notebooks/Forecasting-using-Bidirectional-LSTM'

In [None]:
pd.pivot_table(res, values = ['Training Error', 'Testing Error'], index=['Window Size'])

AttributeError: ignored

In [None]:
res['Ticker']

['CIPLA',
 'CIPLA',
 'CIPLA',
 'CIPLA',
 'CIPLA',
 'CIPLA',
 'CIPLA',
 'CIPLA',
 'CIPLA',
 'CIPLA',
 'CIPLA',
 'CIPLA',
 'CIPLA',
 'CIPLA',
 'CIPLA',
 'CIPLA',
 'RELIANCE',
 'RELIANCE',
 'RELIANCE',
 'RELIANCE',
 'RELIANCE',
 'RELIANCE',
 'RELIANCE',
 'RELIANCE',
 'RELIANCE',
 'RELIANCE',
 'RELIANCE',
 'RELIANCE',
 'RELIANCE',
 'RELIANCE',
 'RELIANCE',
 'RELIANCE',
 'WIPRO',
 'WIPRO',
 'WIPRO',
 'WIPRO',
 'WIPRO',
 'WIPRO',
 'WIPRO',
 'WIPRO',
 'WIPRO',
 'WIPRO',
 'WIPRO',
 'WIPRO',
 'WIPRO',
 'WIPRO',
 'WIPRO',
 'WIPRO',
 'TATACONSUM',
 'TATACONSUM',
 'TATACONSUM',
 'TATACONSUM',
 'TATACONSUM',
 'TATACONSUM',
 'TATACONSUM',
 'TATACONSUM',
 'TATACONSUM',
 'TATACONSUM',
 'TATACONSUM',
 'TATACONSUM',
 'TATACONSUM',
 'TATACONSUM',
 'TATACONSUM',
 'TATACONSUM',
 'JSWSTEEL',
 'JSWSTEEL',
 'JSWSTEEL',
 'JSWSTEEL',
 'JSWSTEEL',
 'JSWSTEEL',
 'JSWSTEEL',
 'JSWSTEEL',
 'JSWSTEEL',
 'JSWSTEEL',
 'JSWSTEEL',
 'JSWSTEEL',
 'JSWSTEEL',
 'JSWSTEEL',
 'JSWSTEEL',
 'JSWSTEEL']

In [None]:
df = pd.read_csv('Normalized_UPL.csv')

In [None]:
df.head(10)

Unnamed: 0,normal_close,normal_mfi,normal_returns
0,-0.40013,0.956538,0.259899
1,-0.491737,0.764949,-0.31153
2,-0.381392,0.831014,0.548611
3,-0.453741,0.648581,-0.226466
4,-0.499545,0.461016,-0.118129
5,-0.524528,0.469399,-0.029688
6,-0.66272,0.320024,-0.540911
7,-0.49512,0.447252,0.827687
8,-0.53702,0.534004,-0.103735
9,-0.590111,0.455292,-0.15645


In [None]:
m1,m2,m3,m4 = train_test_data(df, split_size = 0.3, window_size = 5, multi_seq = True)

In [None]:
m1.shape[1:]

(5, 3)

In [None]:
m1[1]

array([[-0.49173709,  0.76494917, -0.31152977],
       [-0.38139203,  0.83101408,  0.5486111 ],
       [-0.45374124,  0.64858073, -0.22646629],
       [-0.49954473,  0.46101643, -0.11812922],
       [-0.52452807,  0.46939903, -0.02968832]])

In [None]:
m1.shape

(334, 5, 3)