In [1]:
import numpy as np

def generate_sequences(n=128, variable_len=False, seed=13):
    basic_corners = np.array([[-1, -1], [-1, 1], [1, 1], [1, -1]])
    np.random.seed(seed)
    bases = np.random.randint(4, size=n)
    if variable_len:
        lengths = np.random.randint(3, size=n) + 2
    else:
        lengths = [4] * n
    directions = np.random.randint(2, size=n)
    points = [basic_corners[[(b + i) % 4 for i in range(4)]][slice(None, None, d*2-1)][:l] + np.random.randn(l, 2) * 0.1 for b, d, l in zip(bases, directions, lengths)]
    return points, directions

In [2]:
import torch
from torch import optim
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader, TensorDataset

In [3]:
points, directions = generate_sequences()

In [4]:
X = torch.from_numpy(np.array(points)).float()

In [5]:
y = torch.from_numpy(np.array(directions)).float()

In [6]:
train_data = TensorDataset(X, y)
data_loader = DataLoader(train_data, batch_size=3, shuffle=True)

In [7]:
# create a 2 dimensional hidden state from scratch
hidden_state = torch.zeros((2,))
hidden_state

tensor([0., 0.])

In [8]:
rnn_cell = nn.RNNCell(10, 2)

In [9]:
rnn_cell.state_dict()

OrderedDict([('weight_ih',
              tensor([[-0.5690,  0.6881, -0.3313,  0.6337, -0.5018, -0.2007, -0.0716,  0.0497,
                        0.1755, -0.4532],
                      [-0.2095,  0.3850,  0.3785,  0.6666,  0.2061, -0.4660, -0.6054,  0.1116,
                       -0.5396, -0.0775]])),
             ('weight_hh',
              tensor([[ 0.3329, -0.3152],
                      [ 0.0772,  0.1730]])),
             ('bias_ih', tensor([0.4247, 0.5085])),
             ('bias_hh', tensor([ 0.1556, -0.3878]))])

In [10]:
X_train, y_train = next(iter(data_loader))

In [11]:
X_train.shape

torch.Size([3, 4, 2])

In [12]:
rnn_layer = nn.RNN(input_size = 2, hidden_size = 2, batch_first=True)

In [13]:
rnn_layer.state_dict()

OrderedDict([('weight_ih_l0',
              tensor([[-0.5745, -0.1672],
                      [-0.4453,  0.5029]])),
             ('weight_hh_l0',
              tensor([[0.2316, 0.3645],
                      [0.2534, 0.1772]])),
             ('bias_ih_l0', tensor([-0.1964, -0.5988])),
             ('bias_hh_l0', tensor([-0.6760, -0.2726]))])

In [14]:
out, final_hidden = rnn_layer(X_train)

In [15]:
out.shape

torch.Size([3, 4, 2])

In [16]:
final_hidden.shape

torch.Size([1, 3, 2])

In [17]:
class RNNModel(nn.Module):
    def __init__(self, input_size=2, hidden_size=2, output_size=1):
        super().__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.rnn = nn.GRU(self.input_size, self.hidden_size, batch_first = True)
        self.linear = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, X):
        out, context = self.rnn(X)
        return self.linear(out[:, -1])

In [18]:
model = RNNModel()

In [19]:
loss_fn = nn.BCEWithLogitsLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)

In [20]:
model(X_train)

tensor([[0.1566],
        [0.0027],
        [0.1780]], grad_fn=<AddmmBackward0>)

In [21]:
y_train

tensor([1., 1., 0.])

In [22]:
epochs = 100

In [23]:
history = []
for epoch in range(epochs):
    batch_loss = []
    for X, y in data_loader:
        model.train()
        y_hat = model(X)
        loss = loss_fn(y_hat, y.view(-1,1))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        batch_loss.append(loss.item())
    history.append(np.array(batch_loss).mean())
    print(f"Loss: {history[-1]}")

Loss: 0.7021885777628699
Loss: 0.6963924058648043
Loss: 0.6853939433430516
Loss: 0.67826511000478
Loss: 0.6581848416217538
Loss: 0.6146305667799573
Loss: 0.5068138650683469
Loss: 0.36363881133323495
Loss: 0.22799104400152384
Loss: 0.14133570776429288
Loss: 0.09009279293376346
Loss: 0.06138141518242137
Loss: 0.044265887418458626
Loss: 0.034114215068172575
Loss: 0.02763068426920231
Loss: 0.02304534566437089
Loss: 0.019621746961114017
Loss: 0.017116401431172393
Loss: 0.015142826601689639
Loss: 0.013518066990167596
Loss: 0.012230572539793198
Loss: 0.011177343836184158
Loss: 0.010265471500366233
Loss: 0.00945228835307928
Loss: 0.008807919237242882
Loss: 0.00823000492528081
Loss: 0.007709241298915342
Loss: 0.007245355745950757
Loss: 0.006865604683150386
Loss: 0.006457501128965685
Loss: 0.006137296459962463
Loss: 0.005836532321260419
Loss: 0.005592009355855542
Loss: 0.005336104632290297
Loss: 0.005117060210512474
Loss: 0.004898107697277568
Loss: 0.004717039290902226
Loss: 0.00453199821979154


In [24]:
model(X_train).clip(0,1).round()

tensor([[1.],
        [1.],
        [0.]], grad_fn=<RoundBackward0>)

In [25]:
y_train

tensor([1., 1., 0.])