In [5]:
import torch
import torchinfo

In [2]:
batch_size = 32
timesteps = 10
input_features = 8
output_features = 4

inputs = torch.randn(timesteps, batch_size, input_features)
lstm = torch.nn.LSTM(input_size=input_features,hidden_size=output_features)
whole_seq_output, (h_T, c_T) = lstm(inputs)

is_equal =  torch.all(torch.eq(whole_seq_output[-1,:,:], h_T))

print("Output shape of the whole_seq_output : ", whole_seq_output.shape)
print("Output shape of the h_T : ", h_T.shape)
print("Output shape of the c_T : ", c_T.shape)

print("\nVerifying the last element (in timesteps axis) of whole_seq_output is same as h_T.")
print("Is whole_seq_output[-1, :, :] == h_T ? ", is_equal.numpy())

Output shape of the whole_seq_output :  torch.Size([10, 32, 4])
Output shape of the h_T :  torch.Size([1, 32, 4])
Output shape of the c_T :  torch.Size([1, 32, 4])

Verifying the last element (in timesteps axis) of whole_seq_output is same as h_T.
Is whole_seq_output[-1, :, :] == h_T ?  True


* "1.   `whole_seq_output` is of the shape :    `(timesteps, batch_size, output_features)` - $(T,n_{batch},n_h)$ ",
        "This corresponds to $h^t : \\forall t = 1,2...T$.\n",
* "2.   `h_T` is of the shape :    `(1, batch_size, output_features)` - $(1, n_{batch},n_h)$  \n",
        "This corresponds to $h^T$, where $T$ is the last `timestep` in the sequence.\n",
* "3. `c_T` is of the shape :    `(1, batch_size, output_features)` - $(1, n_{batch},n_h)$  \n",
        "This corresponds to $c^T$, where $T$ is the last `timestep` in the sequence.\n",

In [6]:
batch_size = 32
timesteps = 12
input_features = 16
h1_features = 8
h2_features = 4
h3_features = 2
output_features = 1

class SimpleModel(torch.nn.Module):
  def __init__(self):
    super().__init__()
    self.lstm1 = torch.nn.LSTM(input_size=input_features, hidden_size=h1_features)
    self.lstm2 = torch.nn.LSTM(input_size=h1_features, hidden_size=h2_features)
    self.fc1 = torch.nn.Linear(h2_features, h3_features)
    self.relu = torch.nn.ReLU()
    self.fc2 = torch.nn.Linear(h3_features, output_features)
    self.sigmoid = torch.nn.Sigmoid()

  def forward(self, inputs):
    h1, (h1_T,c1_T) = self.lstm1(inputs)
    h2, (h2_T, c2_T) = self.lstm2(h1)
    h3 = self.fc1(h2[-1,:,:])       # inplace of h2[-1,:,:] we can use h2_T. Both are identical
    h3 = self.relu(h3)
    output = self.fc2(h3)
    output = self.sigmoid(output)
    return output

model = SimpleModel()

torchinfo.summary(model,(timesteps, batch_size, input_features))

Layer (type:depth-idx)                   Output Shape              Param #
SimpleModel                              [32, 1]                   --
├─LSTM: 1-1                              [12, 32, 8]               832
├─LSTM: 1-2                              [12, 32, 4]               224
├─Linear: 1-3                            [32, 2]                   10
├─ReLU: 1-4                              [32, 2]                   --
├─Linear: 1-5                            [32, 1]                   3
├─Sigmoid: 1-6                           [32, 1]                   --
Total params: 1,069
Trainable params: 1,069
Non-trainable params: 0
Total mult-adds (M): 0.41
Input size (MB): 0.02
Forward/backward pass size (MB): 0.04
Params size (MB): 0.00
Estimated Total Size (MB): 0.07