In [230]:
import os
os.chdir("/Users/yenchenchou/Documents/GitHub/recsys-benchmarks")

In [231]:
import random
import polars as pl
import pandas as pd
import numpy as np
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets

from src import EnvInit

In [232]:
env_init = EnvInit()
seed = env_init.fix_seed(12345)

In [250]:
class MyRNN(nn.Module):
    """Near from scratch, need to manually pass the output to
    subsequencial input if we want to train models.

    Args:
        nn (_type_): _description_
    """

    def __init__(self, input_size, hidden_size):
        super(MyRNN, self).__init__()
        self.w_ih = nn.Parameter(torch.randn(hidden_size, input_size))
        self.b_ih = nn.Parameter(torch.zeros(hidden_size))  # Set biases to zero
        self.w_hh = nn.Parameter(torch.randn(hidden_size, hidden_size))
        self.b_hh = nn.Parameter(torch.zeros(hidden_size))  # Set biases to zero

    def forward(self, x, h_0):
        w_ih = x @ self.w_ih.T + self.b_ih
        w_hh = h_0 @ self.w_hh.T + self.b_hh
        output = torch.tanh(w_ih + w_hh)
        return output, w_hh


class MyRNNV2(nn.Module):
    """Use nn.Linear to do the manually pass work

    Args:
        nn (_type_): _description_
    """

    def __init__(self, input_size, hidden_size):
        super(MyRNNV2, self).__init__()
        self.ih = nn.Linear(input_size, hidden_size, bias=False)
        self.hh = nn.Linear(hidden_size, hidden_size, bias=False)

    def forward(self, x, h_0):
        w_ih = self.ih(x)
        w_hh = self.hh(h_0)
        output = torch.tanh(w_ih + w_hh)
        return output, w_hh


class MyRNNV3(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(MyRNNV3, self).__init__()
        self.rnn = nn.RNN(input_size, hidden_size)

    def forward(self, x, h_0):
        output, w_hh = self.rnn(x, h_0)
        return output, w_hh


# Initialize models
input_size = 2
hidden_size = 4
model_v3 = MyRNNV3(input_size, hidden_size)
model_v2 = MyRNNV2(input_size, hidden_size)
model = MyRNN(input_size, hidden_size)

# Set identical weights and zero biases
model.w_ih.data = model_v2.ih.weight.data.clone()
model.w_hh.data = model_v2.hh.weight.data.clone()

# Test with random input and initial hidden state
# seq_length, batch, hidden_size
x = torch.ones(2, 3, 2)
h_0 = torch.zeros(1, 3, 4)
output_v3, _ = model_v3(x, h_0)
output_v2, _ = model_v2(x, h_0)
output, _ = model(x, h_0)

print("Output from MyRNNV3:", output_v3)
print("Output from MyRNNV2:", output_v2)
print("Output from MyRNN:", output)

Output from MyRNNV3: tensor([[[0.4049, 0.8368, 0.7817, 0.4895],
         [0.4049, 0.8368, 0.7817, 0.4895],
         [0.4049, 0.8368, 0.7817, 0.4895]],

        [[0.1583, 0.7447, 0.9042, 0.3796],
         [0.1583, 0.7447, 0.9042, 0.3796],
         [0.1583, 0.7447, 0.9042, 0.3796]]], grad_fn=<StackBackward0>)
Output from MyRNNV2: tensor([[[ 0.3443,  0.0808, -0.6759, -0.1231],
         [ 0.3443,  0.0808, -0.6759, -0.1231],
         [ 0.3443,  0.0808, -0.6759, -0.1231]],

        [[ 0.3443,  0.0808, -0.6759, -0.1231],
         [ 0.3443,  0.0808, -0.6759, -0.1231],
         [ 0.3443,  0.0808, -0.6759, -0.1231]]], grad_fn=<TanhBackward0>)
Output from MyRNN: tensor([[[ 0.3443,  0.0808, -0.6759, -0.1231],
         [ 0.3443,  0.0808, -0.6759, -0.1231],
         [ 0.3443,  0.0808, -0.6759, -0.1231]],

        [[ 0.3443,  0.0808, -0.6759, -0.1231],
         [ 0.3443,  0.0808, -0.6759, -0.1231],
         [ 0.3443,  0.0808, -0.6759, -0.1231]]], grad_fn=<TanhBackward0>)
