[![Open In Studio Lab](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/tushar-mahalya/Forecasting-using-Bidirectional-LSTM/blob/master/model_selector.ipynb)

In [1]:
# Importing important pakages
import os
import json
import numpy as np
import pandas as pd
from collections import OrderedDict
from termcolor import colored as color

# Importing TF.keras packages
from keras.models import Sequential
from keras.optimizers import RMSprop
from keras.layers import Dense, LSTM, Dropout, Bidirectional

# To avoid irrelevent warning messages
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

In [2]:
# Yahoo Finance API
import yfinance

# Function to download stock data (univariate or multivariate)
from Data_Downloader import data_accumilator

In [3]:
# Importing custom defined models into local runtime
from Models import models
my_models = models()
my_models

[('Fixed', <function Models.simple_model(X, y, learn_rate)>),
 ('Dynamic', <function Models.dynamic_model(X, y, learn_rate)>),
 ('Bidirectional', <function Models.bidirectional_model(X, y, learn_rate)>),
 ('Stacked', <function Models.stacked_model(X, y, learn_rate)>)]

In [4]:
# Nifty50 stocks indormation
nifty50 = pd.read_csv('nifty50.csv')
nifty50.head()

Unnamed: 0,Company Name,Industry,Symbol,Series,ISIN Code
0,Adani Enterprises Ltd.,Metals & Mining,ADANIENT,EQ,INE423A01024
1,Adani Ports and Special Economic Zone Ltd.,Services,ADANIPORTS,EQ,INE742F01042
2,Apollo Hospitals Enterprise Ltd.,Healthcare,APOLLOHOSP,EQ,INE437A01024
3,Asian Paints Ltd.,Consumer Durables,ASIANPAINT,EQ,INE021A01026
4,Axis Bank Ltd.,Financial Services,AXISBANK,EQ,INE238A01034


In [5]:
# Get the current working directory (cwd)
cwd = os.getcwd()

# Downloading Nifty50 stocks data in Current working directory
preprocessed_data = data_accumilator(cwd, nifty50['Symbol'].sample(2))

Directory named "Stocks Data" with required data is downloaded successfully at "/home/studio-lab-user/sagemaker-studiolab-notebooks/Forecasting-using-Bidirectional-LSTM"


In [36]:
# Dictionary with stock ticker(key) and location of 
# normalized data of that stock (value)
dict(list(preprocessed_data.items())[:5])  # First 5 elements0

{'ADANIENT': '/home/studio-lab-user/sagemaker-studiolab-notebooks/Forecasting-using-Bidirectional-LSTM/Stocks Data/ADANIENT/Normalized_ADANIENT.csv',
 'ADANIPORTS': '/home/studio-lab-user/sagemaker-studiolab-notebooks/Forecasting-using-Bidirectional-LSTM/Stocks Data/ADANIPORTS/Normalized_ADANIPORTS.csv',
 'APOLLOHOSP': '/home/studio-lab-user/sagemaker-studiolab-notebooks/Forecasting-using-Bidirectional-LSTM/Stocks Data/APOLLOHOSP/Normalized_APOLLOHOSP.csv',
 'ASIANPAINT': '/home/studio-lab-user/sagemaker-studiolab-notebooks/Forecasting-using-Bidirectional-LSTM/Stocks Data/ASIANPAINT/Normalized_ASIANPAINT.csv',
 'AXISBANK': '/home/studio-lab-user/sagemaker-studiolab-notebooks/Forecasting-using-Bidirectional-LSTM/Stocks Data/AXISBANK/Normalized_AXISBANK.csv'}

In [6]:
# Importing data initializing function
from Data_Initializer import train_test_data

In [35]:
def test_models(data_dict, epochs: int, models: dict, window_sizes: list, multi_seq:bool = False):
    #test result data
    parameter_dict = OrderedDict()
    for ticker in data_dict.keys():
        
        win_size = {}
        print(color(f'\n================================ NSE : {ticker} ================================', 'red', attrs = ["bold"]))
        for window_size in window_sizes:
            
            model_name = {}
            print(color(f"\nWindow size: {window_size}", 'cyan'))
            print(color('----------------', 'yellow'))
            for model_item in models:
                
                data = pd.read_csv(data_dict[ticker])
                X_train,y_train,X_test,y_test = train_test_data(data, split_size = 0.2, window_size = window_size, multi_seq = multi_seq)
                model = model_item[1](X_train,y_train,0.001)
                
                # fitting model
                model.fit(X_train, y_train, epochs=epochs, batch_size=50, verbose=0)

                # printing training and testing errors
                training_error = model.evaluate(X_train, y_train, verbose=0)
                testing_error = model.evaluate(X_test, y_test, verbose=0)
                msg = color("  >",'green') + " Model: {0:<15} Param count: {1:} \tTraining error: {2:.4f}\tTesting error: {3:.4f}"
                print(msg.format(model_item[0],model.count_params(),training_error,testing_error))
                
                model_name[model_item[0]] = {
                                             'Param Count' : model.count_params(),
                                             'Training Error' : float("{0:.4f}".format(training_error)),
                                             'Testing Error' : float("{0:.4f}".format( testing_error))
                                            }
            
            win_size[f'Window_Size_{window_size}'] = model_name
        
        parameter_dict[ticker] = win_size
        
    # Exporting performance of different models in .json format
    os.mkdir('Performance Parameters')
    if multi_seq:
        with open("Performance Parameters/multivariate.json", "w") as f:
            json.dump(parameter_dict, f)
            f.close()
    else:
        with open("Performance Parameters/univariate.json", "w") as f:
            json.dump(parameter_dict, f)
            f.close()
            
    print(color('\nSuccesfully Evaluated different models and saved the performance metrics in \'Performance Parameters\' directory.', 'magenta'))
        
    return parameter_dict

In [48]:
print(color('\n--------------------------------------------------------------- ', 'green') + color('Simple Sequence' , 'green', attrs = ['underline']))

[32m
--------------------------------------------------------------- [0m[4m[32mSimple Sequence[0m


In [38]:
res = test_models(dict(list(preprocessed_data.items())[:2]) , 100, my_models, window_sizes=[7,10], multi_seq=True)

[1m[31m
[36m
Window size: 7[0m
[33m----------------[0m
[32m  >[0m Model: Fixed           Param count: 186 	Training error: 0.0008	Testing error: 0.1474
[32m  >[0m Model: Dynamic         Param count: 316 	Training error: 0.0008	Testing error: 0.1581
[32m  >[0m Model: Bidirectional   Param count: 729 	Training error: 0.0008	Testing error: 0.1339
[32m  >[0m Model: Stacked         Param count: 886 	Training error: 0.0009	Testing error: 0.1679
[36m
Window size: 10[0m
[33m----------------[0m
[32m  >[0m Model: Fixed           Param count: 186 	Training error: 0.0007	Testing error: 0.1111
[32m  >[0m Model: Dynamic         Param count: 571 	Training error: 0.0008	Testing error: 0.1416
[32m  >[0m Model: Bidirectional   Param count: 1341 	Training error: 0.0006	Testing error: 0.0596
[32m  >[0m Model: Stacked         Param count: 886 	Training error: 0.0011	Testing error: 0.1572
[35mSuccesfully Evaluated different models and saved the performance metrics in 'Performance 

FileExistsError: [Errno 17] File exists: 'Performance Parameters'

In [13]:
res = test_models_dumdum(preprocessed_data, 2, my_models, window_sizes=[7])

[1m[31m
[32m
--------------------------------------------------------------- [0m[32mSimple Sequence[0m
[34m
Window size : 7[0m
[33m----------------[0m
[32m  >[0m Model: Fixed           Param count: 146 	Training error: 0.0239	Testing error: 0.1061
[32m  >[0m Model: Dynamic         Param count: 260 	Training error: 0.0910	Testing error: 0.2291
[32m  >[0m Model: Bidirectional   Param count: 617 	Training error: 0.0335	Testing error: 0.2477
[32m  >[0m Model: Stacked         Param count: 806 	Training error: 0.1273	Testing error: 0.2754
[32m
--------------------------------------------------------------- [0m[32mMulti Sequence[0m
[34m
Window size : 7[0m
[33m----------------[0m
[32m  >[0m Model: Fixed           Param count: 186 	Training error: 0.1544	Testing error: 0.1949
[32m  >[0m Model: Dynamic         Param count: 316 	Training error: 0.0639	Testing error: 0.1410
[32m  >[0m Model: Bidirectional   Param count: 729 	Training error: 0.1352	Testing error: 0.3

In [12]:
def test_models_dumdum(data_dict, epochs: int, models: dict, window_sizes: list):
    
    #test result data
    best_params = OrderedDict()
    parameter_dict = OrderedDict()
    sequences = ['Simple', 'Multi']
    for ticker in data_dict.keys():
        
        lowest_train_error = float()
        lowest_test_error = float()
        best_model = str()
        best_model_param_count = int()
        best_window_size = int()
        best_seq = str()
        
        seq_type = {}
        print(color(f'\n================================ NSE : {ticker} ================================', 'red', attrs = ["bold"]))
        for seq in sequences:
            print(color('\n--------------------------------------------------------------- ', 'green') + color(f'{seq} Sequence' , 'green'))
            
            win_size = {}
            for window_size in window_sizes:

                model_name = {}
                print(color(f"\nWindow size : {window_size}", 'blue'))
                print(color('----------------', 'yellow'))
                for model_item in models:

                    data = pd.read_csv(data_dict[ticker])
                    X_train,y_train,X_test,y_test = train_test_data(data, split_size = 0.2, window_size = window_size, seq = seq)
                    model = model_item[1](X_train,y_train,0.001)

                    # fitting model
                    model.fit(X_train, y_train, epochs=epochs, batch_size=50, verbose=0)

                    # printing training and testing errors
                    training_error = model.evaluate(X_train, y_train, verbose=0)
                    testing_error = model.evaluate(X_test, y_test, verbose=0)
                    msg = color("  >",'green') + " Model: {0:<15} Param count: {1:} \tTraining error: {2:.4f}\tTesting error: {3:.4f}"
                    print(msg.format(model_item[0],model.count_params(),training_error,testing_error))

                    model_name[model_item[0]] = {
                                                 'Param Count' : model.count_params(),
                                                 'Training Error' : float("{0:.4f}".format(training_error)),
                                                 'Testing Error' : float("{0:.4f}".format( testing_error))
                                                }
                    
                    if (lowest_test_error > testing_error):
                        lowest_train_error = training_error
                        lowest_test_error = testing_error
                        best_model = model_item[0]
                        best_model_param_count = model.count_params()
                        best_window_size = window_size
                        best_seq = seq
                        
                win_size[f'Window_Size_{window_size}'] = model_name
                
            seq_type[f'{seq} Sequence'] = win_size
        
        parameter_dict[ticker] = seq_type
        best_params[ticker] = {
                               'Best Model' : best_model,
                               'Window Size' : best_window_size,
                               'Sequence' : best_seq,
                               'Training Error' : lowest_train_error,
                               'Testing Error' : lowest_test_error,
                               'Learning Rate' : 0.001,
                               'Parameters Count' : best_model_param_count
                              }
        
    # Exporting performance of different models and best parameters dict in .json format
    cwd = os.getcwd()
    if os.path.isdir(os.path.join(cwd, 'Model Performance')):
        print("Required data/directory is already present in current working directory.")
    
    else:
        os.mkdir('Model Performance')
        with open("Model Performance/Performance_Params.json", "w") as f:
            json.dump(parameter_dict, f)
            f.close()
        with open("Model Performance/Best_Model_Params.json", "w") as f:
            json.dump(best_params, f)
            f.close()
   
            
    print(color('\nSuccesfully Evaluated different models and saved all logs and best model performance metrics in \'Model Performance\' directory.', 'magenta'))
        
    return parameter_dict

In [64]:
os.mkdir('chutiya')

In [33]:
res['ITC']['Window_Size_10']

{'Fixed': (186, 0.002, 0.0622),
 'Dynamic': (571, 0.0017, 0.0338),
 'Bidirectional': (1341, 0.0016, 0.0246),
 'Stacked': (886, 0.002, 0.0643)}