In [79]:
import torch

from torch import nn
from torch import optim
from torch.nn.utils.rnn import pad_sequence

In [229]:
class CustomRNNCell(nn.Module):
    """Custom RNN cell 
    Custom RNN cell which for a given input returns it's positive value summed
    to the information carried along.
    """

    def __init__(self, input_size: int = 1, hidden_size: int = 1):
        """Initialization method

        :param input_size: input size; number od values in a single input
        :type input_size: int
        :param hidden_size: number of features in the hidden layer of our RNN cell
        :type hidden_size: int
        """
        super(CustomRNNCell, self).__init__()

        self._rnn_cell = nn.RNNCell(input_size, hidden_size, nonlinearity='relu')

    def forward(self, x, hidden, print_results=False):

        # transformation to input x
        transformed = torch.where(x < 0, -1 * x, x)
        #transformed = torch.sqrt(x ** 2)  
        # pass through the cell
        if print_results:
            print("input")
            print(transformed, hidden)
        hidden = self._rnn_cell(transformed, hidden)
        if print_results:
            print("output")
            print(hidden)

        return hidden

In [226]:
# RNN network that sums the outputs of the CustomRNNCell
class CustomRNN(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(CustomRNN, self).__init__()
        self.hidden_size = hidden_size
        self.rnn_cell = CustomRNNCell(input_size, hidden_size)
        
    def forward(self, x, print_results=False):
        batch_size = x.size(0)
        hidden = torch.zeros(batch_size, self.hidden_size)

        # Iterate over time steps
        output_sum = torch.zeros(batch_size, self.hidden_size)
        for t in range(x.size(1)):
            masked_input = x[:, t, :]
            hidden = self.rnn_cell(masked_input, hidden, print_results)
        
        return hidden

In [138]:
# Function to generate random sequences and their L1 norms
def generate_data(batch_size, max_length):
    sequences = []
    targets = []
    for _ in range(batch_size):
        length = torch.randint(1, max_length + 1, (1,)).item() # get random length
        seq = torch.randn(length, 1)  # Random sequence of 'length'
        l1_norm = torch.sum(torch.abs(seq))  # Compute the L1 norm
        sequences.append(seq)
        targets.append(torch.tensor([l1_norm], dtype=torch.float32))
    return sequences, targets

In [170]:
# Parameters
input_size = 1  # Each element is a scalar
hidden_size = 1  # Output is a scalar
batch_size = 8
max_length = 10  # Maximum length of any sequence
epochs = 1000

# Create the RNN model
model = CustomRNN(input_size=input_size, hidden_size=hidden_size)

for p in model.rnn_cell.parameters():
    print(p)

Parameter containing:
tensor([[0.8994]], requires_grad=True)
Parameter containing:
tensor([[0.8042]], requires_grad=True)
Parameter containing:
tensor([0.6019], requires_grad=True)
Parameter containing:
tensor([-0.9476], requires_grad=True)


In [207]:
optimizer = optim.Adam(model.parameters(), lr=.1)
criterion = nn.MSELoss()  # Mean Squared Error loss

# Generate a batch of random sequences and their L1 norms
sequences, targets = generate_data(batch_size=batch_size, max_length=max_length)

# Pad sequences to have a consistent batch size
padded_sequences = pad_sequence(sequences, batch_first=True)
lengths = torch.tensor([len(seq) for seq in sequences])
targets = torch.cat(targets)

for epoch in range(epochs):
    # Pad sequences to have a consistent batch size
    padded_sequences = pad_sequence(sequences, batch_first=True)
    
    optimizer.zero_grad()
    
    # Forward pass
    outputs = model(padded_sequences).squeeze(1)
    
    # Compute loss
    loss = criterion(outputs, targets)
    loss.backward()
    
    # Update weights
    optimizer.step()
    
    if epoch % 100 == 0:
        print(f'Epoch {epoch}, Loss: {loss.item()}')

Epoch 0, Loss: 3.5278418408779544e-07
Epoch 100, Loss: 0.020604923367500305
Epoch 200, Loss: 0.0012437815312296152
Epoch 300, Loss: 1.6268782928818837e-05
Epoch 400, Loss: 5.443083850309449e-08
Epoch 500, Loss: 3.061143005744782e-11
Epoch 600, Loss: 1.4366285938649526e-13
Epoch 700, Loss: 6.849243394668747e-13
Epoch 800, Loss: 2.940876708823481e-14
Epoch 900, Loss: 1.5476508963274682e-12
Epoch 1000, Loss: 1.4001300119304005e-12
Epoch 1100, Loss: 2.115212519027132e-12
Epoch 1200, Loss: 3.2154834350706096e-13
Epoch 1300, Loss: 5.445921491542549e-13
Epoch 1400, Loss: 1.0612899448148028e-12
Epoch 1500, Loss: 3.089195566019498e-14
Epoch 1600, Loss: 8.765384251763209e-14
Epoch 1700, Loss: 6.121943230130711e-12
Epoch 1800, Loss: 4.557344085442949e-13
Epoch 1900, Loss: 1.7121810913511837e-11
Epoch 2000, Loss: 5.3711757264096605e-12
Epoch 2100, Loss: 0.00012092038377886638
Epoch 2200, Loss: 0.001623914111405611
Epoch 2300, Loss: 0.00044792747939936817
Epoch 2400, Loss: 0.001068987650796771
Epoc

In [230]:
# Training setup
def train_model(model, epochs, batch_size, max_length, learning_rate=0.001):
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    criterion = nn.MSELoss()  # Mean Squared Error loss
    
    for epoch in range(epochs):
        # Generate a batch of random sequences and their L1 norms
        sequences, targets = generate_data(batch_size=batch_size, max_length=max_length)
        
        # Pad sequences to have a consistent batch size
        padded_sequences = pad_sequence(sequences, batch_first=True)
        targets = torch.cat(targets)
        
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(padded_sequences).squeeze(1)
        
        # Compute loss
        loss = criterion(outputs, targets)
        loss.backward()
        
        # Update weights
        optimizer.step()
        
        if epoch % 100 == 0:
            print(f'Epoch {epoch}, Loss: {loss.item()}')

# Parameters
input_size = 1  # Each element is a scalar
hidden_size = 1  # Output is a scalar
batch_size = 8
max_length = 10  # Maximum length of any sequence
epochs = 10000

# Create the RNN model
model = CustomRNN(input_size=input_size, hidden_size=hidden_size)

# Train the model
train_model(model, epochs, batch_size, max_length)

# Test the model on a new sequence
test_sequence = torch.tensor([3, -5, 7, -2], dtype=torch.float32).unsqueeze(1).unsqueeze(0)
test_length = torch.tensor([test_sequence.size(1)])

# Get the model's prediction
model.eval()
with torch.no_grad():
    predicted_sum = model(test_sequence).item()
    actual_sum = sum(torch.abs(test_sequence).squeeze()).item()

print(f"Predicted sum: {predicted_sum}, Actual sum: {actual_sum}")

Epoch 0, Loss: 20.353914260864258
Epoch 100, Loss: 15.471206665039062
Epoch 200, Loss: 9.690963745117188
Epoch 300, Loss: 1.2185109853744507
Epoch 400, Loss: 1.3471957445144653
Epoch 500, Loss: 0.7807211875915527
Epoch 600, Loss: 0.5984176993370056
Epoch 700, Loss: 1.2955970764160156
Epoch 800, Loss: 0.07581353187561035
Epoch 900, Loss: 0.22449226677417755
Epoch 1000, Loss: 0.08933629840612411
Epoch 1100, Loss: 0.021775715053081512
Epoch 1200, Loss: 0.022165510803461075
Epoch 1300, Loss: 0.05594366416335106
Epoch 1400, Loss: 0.008839776739478111
Epoch 1500, Loss: 0.0044252630323171616
Epoch 1600, Loss: 0.02524992637336254
Epoch 1700, Loss: 0.02195577137172222
Epoch 1800, Loss: 0.03100227378308773
Epoch 1900, Loss: 0.01647491380572319
Epoch 2000, Loss: 0.014308987185359001
Epoch 2100, Loss: 0.013593112118542194
Epoch 2200, Loss: 0.018217910081148148
Epoch 2300, Loss: 0.014588596299290657
Epoch 2400, Loss: 0.0030868016183376312
Epoch 2500, Loss: 0.00577277597039938
Epoch 2600, Loss: 0.00

In [231]:
model.rnn_cell._rnn_cell.bias_hh, model.rnn_cell._rnn_cell.bias_ih

(Parameter containing:
 tensor([0.8732], requires_grad=True),
 Parameter containing:
 tensor([-0.8732], requires_grad=True))

In [232]:
model.rnn_cell._rnn_cell.weight_hh, model.rnn_cell._rnn_cell.weight_ih

(Parameter containing:
 tensor([[1.0000]], requires_grad=True),
 Parameter containing:
 tensor([[1.0000]], requires_grad=True))

In [214]:
seq = torch.randn(1, 10000, 1)  # Random sequence of 'length'
l1_norm = torch.sum(torch.abs(seq))  # Compute the L1 norm
seq, l1_norm

(tensor([[[-0.1404],
          [ 0.1937],
          [-1.1295],
          ...,
          [-0.1578],
          [-1.0521],
          [-0.2849]]]),
 tensor(7966.4136))

In [215]:
seq_len = torch.tensor([seq.size(1)])
seq_len

tensor([10000])

In [216]:
# Get the model's prediction
with torch.no_grad():
    predicted_sum = model(seq, seq_len, True).item()
    actual_sum = sum(torch.abs(seq).squeeze()).item()

print(f"Predicted sum: {predicted_sum}, Actual sum: {actual_sum}")

input
tensor([[0.1404]]) tensor([[0.]])
output
tensor([[0.1404]])
input
tensor([[0.1937]]) tensor([[0.1404]])
output
tensor([[0.3341]])
input
tensor([[1.1295]]) tensor([[0.3341]])
output
tensor([[1.4636]])
input
tensor([[0.9940]]) tensor([[1.4636]])
output
tensor([[2.4576]])
input
tensor([[0.7114]]) tensor([[2.4576]])
output
tensor([[3.1690]])
input
tensor([[2.4062]]) tensor([[3.1690]])
output
tensor([[5.5752]])
input
tensor([[0.6185]]) tensor([[5.5752]])
output
tensor([[6.1936]])
input
tensor([[0.1373]]) tensor([[6.1936]])
output
tensor([[6.3309]])
input
tensor([[0.8417]]) tensor([[6.3309]])
output
tensor([[7.1726]])
input
tensor([[0.6642]]) tensor([[7.1726]])
output
tensor([[7.8367]])
input
tensor([[1.4348]]) tensor([[7.8367]])
output
tensor([[9.2716]])
input
tensor([[0.5676]]) tensor([[9.2716]])
output
tensor([[9.8392]])
input
tensor([[0.3916]]) tensor([[9.8392]])
output
tensor([[10.2308]])
input
tensor([[0.0662]]) tensor([[10.2308]])
output
tensor([[10.2969]])
input
tensor([[0.6545

In [217]:
print(f"Predicted sum: {predicted_sum}, Actual sum: {actual_sum}")

Predicted sum: 7963.86474609375, Actual sum: 7966.43115234375
