In [1]:
# Hardware configuration 
# Using tf-gpu-310 Conda Env
import tensorflow as tf
from tensorflow.python.client import device_lib
device_lib.list_local_devices()

[name: "/device:CPU:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 }
 incarnation: 749744972349004341
 xla_global_id: -1,
 name: "/device:GPU:0"
 device_type: "GPU"
 memory_limit: 14497349632
 locality {
   bus_id: 1
   links {
   }
 }
 incarnation: 9967127212681109444
 physical_device_desc: "device: 0, name: Tesla T4, pci bus id: 0000:00:1e.0, compute capability: 7.5"
 xla_global_id: 416903419]

In [2]:
# Importing important libraries
import os
import json

# Setting root folder as current working directory
os.chdir('/home/studio-lab-user/sagemaker-studiolab-notebooks/Forecasting-NIFTY50')

# Importing modules from Source Code
from src.pipeline.testing import test_models
from src.components.models import all_models

In [3]:
# Creating the dictionary of the data_map.json file
with open('/home/studio-lab-user/sagemaker-studiolab-notebooks/Forecasting-NIFTY50/artifacts/data_map.json', 'r') as file:
    data_map = json.load(file)

In [4]:
# Creating seperate dict containing only {"ticker":"normalized data path"} 
# to input in testing pipeline
norm_data_dict = dict()
for ticker in data_map.keys():
    norm_data_dict[ticker] = data_map[ticker]['Normalized Data']

## Testing Pipeline
In order to determine the optimal model, window size, and sequencing for a specific dataset, an exhaustive search algorithm will be employed. This algorithm systematically tests every combination of parameters to identify the model that yields the lowest testing error. The chosen model will be saved along with training logs for further analysis.

The following parameters will be explored during the exhaustive search:

1. **Models**:
    * Simple LSTM Model: A basic Long Short-Term Memory (LSTM) model.
    * Dynamic LSTM Model: An LSTM model with dynamic architecture (No. of LSTM cells in a layer is proportional to the window size).
    * Bidirectional LSTM Model: An LSTM model that processes input sequences in both forward and backward directions.
    * Stacked LSTM Model: An LSTM model with multiple layers for enhanced representation learning (Two consecutive layers having LSTM cells).
2. **Sequences**:
    * Simple Univariate Sequence: A sequence consisting of a single variable or feature (Closing price of the day).
    * Multivariate Sequences: Sequences containing multiple variables or features (Closing price, MFI & Log Returns).
3. **Window Sizes**:
    * 5 Days Values: A window of five consecutive days' worth of data.
    * 1 Week Values: A window encompassing one week's worth of data.
    * 10 Days Values: A window spanning ten consecutive days' worth of data.
    * 2 Weeks Values: A window comprising two weeks' worth of data.

By exhaustively testing all possible combinations of these parameters, the algorithm aims to identify the model and parameter configuration that demonstrates the lowest testing error. The selected model will be saved (at '/models' directory), training logs and the information of best performing model will be saved (at '/artifacts' directory) for further analysis and evaluation. This approach ensures a thorough exploration of the parameter space to maximize the model's performance for each specific stock's data.

In [5]:
# Importing my LSTM Models with different configurations for testing
my_models = all_models()
my_models

{'Fixed': <function src.components.models.simple_model(X, y, learn_rate, model_name=None)>,
 'Dynamic': <function src.components.models.dynamic_model(X, y, learn_rate, model_name=None)>,
 'Bidirectional': <function src.components.models.bidirectional_model(X, y, learn_rate, model_name=None)>,
 'Stacked': <function src.components.models.stacked_model(X, y, learn_rate, model_name=None)>}

In [6]:
# Defining hyperparameters 
window_size_list = [5,7,10,14]
epochs = 200

In [None]:
# Testing Pipeline
test_models(data_dict = norm_data_dict,
            epochs = epochs,
            models = my_models,
            window_sizes = window_size_list)

[1m[31m
[32m
--------------------------------------------------------------- [0m[32mSimple Sequence[0m
[34m
Window size : 5[0m
[33m----------------[0m
[32m  >[0m Model: Fixed           Param count: 146 	Training error: 0.0032	Testing error: 0.0354
[32m  >[0m Model: Dynamic         Param count: 146 	Training error: 0.0040	Testing error: 0.0435
[32m  >[0m Model: Bidirectional   Param count: 341 	Training error: 0.0035	Testing error: 0.0518
[32m  >[0m Model: Stacked         Param count: 806 	Training error: 0.0041	Testing error: 0.0315
[34m
Window size : 7[0m
[33m----------------[0m
[32m  >[0m Model: Fixed           Param count: 146 	Training error: 0.0038	Testing error: 0.0535
[32m  >[0m Model: Dynamic         Param count: 260 	Training error: 0.0028	Testing error: 0.0336
[32m  >[0m Model: Bidirectional   Param count: 617 	Training error: 0.0029	Testing error: 0.0190
[32m  >[0m Model: Stacked         Param count: 806 	Training error: 0.0036	Testing error: 0.