In [1]:
%%capture
# %%capture prevents this cell from printing a ton of STDERR stuff to the screen

## First, check to see if lightning is installed, if not, install it.
##
## NOTE: If you **do** need to install something, just know that you may need to
##       restart your session for python to find the new module(s).
##
##       To restart your session:
##       - In Google Colab, click on the "Runtime" menu and select
##         "Restart Session" from the pulldown menu
##       - In a local jupyter notebook, click on the "Kernel" menu and select
##         "Restart Kernel" from the pulldown menu
import pip
try:
  __import__("lightning")
except ImportError:
  pip.main(['install', "lightning"])

In [9]:
import os

In [10]:
os.environ["CUDA_VISIBLE_DEVICES"]=""

In [11]:
import torch  # torch will allow us to create tensors.
import torch.nn as nn  # torch.nn allows us to create a neural network.
import torch.nn.functional as F  # nn.functional give us access to the activation and loss functions.
from torch.optim import (
    Adam,
)  # optim contains many optimizers. This time we're using Adam

import lightning as L  # lightning has tons of cool tools that make neural networks easier
from torch.utils.data import (
    TensorDataset,
    DataLoader,
)  # these are needed for the training data

In [12]:
class LSTMbyHand(L.LightningModule):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        L.seed_everything(seed=42)

        mean=torch.tensor(0.0)
        std=torch.tensor(1.0)
        # Scaling Long Term Memory
        self.wlr1 = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True)
        self.wlr2 = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True)
        self.blr1 = nn.Parameter(torch.tensor(0.0), requires_grad=True)
        # Potential Long term memory to remember
        self.wpr1 = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True)
        self.wpr2 = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True)
        self.bpr1 = nn.Parameter(torch.tensor(0.0), requires_grad=True)
        # New Long term memory

        self.wp1 = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True)
        self.wp2 = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True)
        self.bp1 = nn.Parameter(torch.tensor(0.0), requires_grad=True)
        # New short term memory and output
        self.wo1 = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True)
        self.wo2 = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True)
        self.bo1 = nn.Parameter(torch.tensor(0.0), requires_grad=True)
    def lstm_unit(self,input_value,long_term,short_term):
        long_term_percent=torch.sigmoid((input_value*self.wlr2)+(short_term*self.wlr1)+self.blr1)
        potential_remember_percent=torch.sigmoid((input_value*self.wpr2)+(short_term*self.wpr1)+self.bpr1)
        potential_memory=torch.tanh((input_value*self.wo2)+(short_term*self.wo1)+self.bp1)
        updated_long_memory=((long_term*long_term_percent)+(potential_remember_percent*potential_memory))
        output_percent=torch.sigmoid((input_value*self.wo2)+(short_term*self.wo1)+self.bo1)
        updated_short_memory=torch.tanh(updated_long_memory)*output_percent
        return (updated_long_memory,updated_short_memory)
    def forward(self, input):
        # unroll lstm for training data for each data point/ or each past day etc
        # Each unit will output the long term and short term  and at the end the final short term will be output
        long_term=0
        short_term=0
        day1=input[0]
        day2=input[1]
        day3=input[2]
        day4=input[3]

        long_term, short_term = self.lstm_unit(day1, long_term, short_term)
        long_term, short_term = self.lstm_unit(day2, long_term, short_term)
        long_term, short_term = self.lstm_unit(day3, long_term, short_term)
        long_term, short_term = self.lstm_unit(day4, long_term, short_term)
        return short_term
    def configure_optimizers(self):
        return Adam(self.parameters())
    def training_step(self, batch,batch_idx):
        input_i,label_i=batch
        output_i=self.forward(input_i[0])
        loss=(output_i-label_i)**2
        self.log("Train_loss",loss)
        if(label_i==0):
            self.log("out_0",output_i)
        else:
            self.log("out_1",output_i)
        return loss
        

In [13]:
model = LSTMbyHand()
print("before optimization, the parameters are...")
for name,param in model.named_parameters():
    print(name,param.data)

print("\n Now let's compare the observed vs predicted value")
print("Company A: Observed =0, Predicted =", model(torch.tensor([0.,0.5,0.25,1.])))
print("Company B: Observed =1, Predicted =", model(torch.tensor([1., 0.5, 0.25, 1.0])))

before optimization, the parameters are...
wlr1 tensor(0.3367)
wlr2 tensor(0.1288)
blr1 tensor(0.)
wpr1 tensor(0.2345)
wpr2 tensor(0.2303)
bpr1 tensor(0.)
wp1 tensor(-1.1229)
wp2 tensor(-0.1863)
bp1 tensor(0.)
wo1 tensor(2.2082)
wo2 tensor(-0.6380)
bo1 tensor(0.)

 Now let's compare the observed vs predicted value
Company A: Observed =0, Predicted = tensor(-0.1393, grad_fn=<MulBackward0>)
Company B: Observed =1, Predicted = tensor(-0.1537, grad_fn=<MulBackward0>)


In [14]:
## create the training data for the neural network.
inputs = torch.tensor([[0.0, 0.5, 0.25, 1.0], [1.0, 0.5, 0.25, 1.0]])
labels = torch.tensor([0.0, 1.0])

dataset = TensorDataset(inputs, labels)
dataloader = DataLoader(dataset)

In [15]:
trainer = L.Trainer(
    max_epochs=2000
)  # with default learning rate, 0.001 (this tiny learning rate makes learning slow)
trainer.fit(model, train_dataloaders=dataloader)

/home/hadi/Documents/statquest/.venv/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
/home/hadi/Documents/statquest/.venv/lib/python3.11/site-packages/lightning/pytorch/loops/fit_loop.py:310: The number of training batches (2) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Training: |          | 0/? [00:00<?, ?it/s]

  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


In [16]:
print("\nNow let's compare the observed and predicted values...")
print("Company A: Observed = 0, Predicted =", model(torch.tensor([0., 0.5, 0.25, 1.])).detach())
print("Company B: Observed = 1, Predicted =", model(torch.tensor([1., 0.5, 0.25, 1.])).detach())


Now let's compare the observed and predicted values...
Company A: Observed = 0, Predicted = tensor(0.4153)
Company B: Observed = 1, Predicted = tensor(0.5853)


In [None]:
tensorboard --logdir=lightning_logs/