In [1]:
import numpy as np

def generate_sequences(n=128, variable_len=False, seed=13):
    basic_corners = np.array([[-1, -1], [-1, 1], [1, 1], [1, -1]])
    np.random.seed(seed)
    bases = np.random.randint(4, size=n)
    if variable_len:
        lengths = np.random.randint(3, size=n) + 2
    else:
        lengths = [4] * n
    directions = np.random.randint(2, size=n)
    points = [basic_corners[[(b + i) % 4 for i in range(4)]][slice(None, None, d*2-1)][:l] + np.random.randn(l, 2) * 0.1 for b, d, l in zip(bases, directions, lengths)]
    return points, directions

In [2]:
import torch
from torch import optim
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader, TensorDataset

In [3]:
points, directions = generate_sequences()

In [4]:
X = torch.from_numpy(np.array(points)).float()

In [5]:
y = torch.from_numpy(np.array(directions)).float()

In [6]:
train_data = TensorDataset(X, y)
data_loader = DataLoader(train_data, batch_size=3, shuffle=True)

In [7]:
# create a 2 dimensional hidden state from scratch
hidden_state = torch.zeros((2,))
hidden_state

tensor([0., 0.])

In [8]:
rnn_cell = nn.RNNCell(10, 2)

In [9]:
rnn_cell.state_dict()

OrderedDict([('weight_ih',
              tensor([[-0.0280, -0.3024, -0.4793,  0.3541,  0.5490, -0.3960, -0.5311,  0.5526,
                       -0.2114,  0.2015],
                      [-0.3974,  0.6797,  0.0847, -0.2387,  0.6293,  0.4212, -0.0862, -0.5283,
                       -0.3907, -0.5696]])),
             ('weight_hh',
              tensor([[-0.5745,  0.3723],
                      [-0.4696, -0.1608]])),
             ('bias_ih', tensor([ 0.5436, -0.0545])),
             ('bias_hh', tensor([0.2536, 0.0924]))])

In [10]:
X_train, y_train = next(iter(data_loader))

In [11]:
X_train.shape

torch.Size([3, 4, 2])

In [12]:
rnn_layer = nn.RNN(input_size = 2, hidden_size = 2, batch_first=True)

In [13]:
rnn_layer.state_dict()

OrderedDict([('weight_ih_l0',
              tensor([[0.1358, 0.4976],
                      [0.3554, 0.2284]])),
             ('weight_hh_l0',
              tensor([[ 0.4140,  0.5241],
                      [ 0.3864, -0.6528]])),
             ('bias_ih_l0', tensor([0.6427, 0.4776])),
             ('bias_hh_l0', tensor([-0.3140,  0.3424]))])

In [14]:
out, final_hidden = rnn_layer(X_train)

In [15]:
out.shape

torch.Size([3, 4, 2])

In [16]:
final_hidden.shape

torch.Size([1, 3, 2])

In [17]:
class RNNModel(nn.Module):
    def __init__(self, input_size=2, hidden_size=2, output_size=1):
        super().__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.rnn = nn.RNN(self.input_size, self.hidden_size, batch_first = True)
        self.linear = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, X):
        out, context = self.rnn(X)
        return self.linear(out[:, -1])

In [18]:
model = RNNModel()

In [19]:
loss_fn = nn.BCEWithLogitsLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)

In [20]:
model(X_train)

tensor([[ 0.1880],
        [-0.1378],
        [-0.2010]], grad_fn=<AddmmBackward0>)

In [21]:
y_train

tensor([0., 0., 1.])

In [22]:
epochs = 100

In [23]:
history = []
for epoch in range(epochs):
    batch_loss = []
    for X, y in data_loader:
        model.train()
        y_hat = model(X)
        loss = loss_fn(y_hat, y.view(-1,1))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        batch_loss.append(loss.item())
    history.append(np.array(batch_loss).mean())
    print(f"Loss: {history[-1]}")

Loss: 0.6955564271572025
Loss: 0.6820544032163398
Loss: 0.6500246254510658
Loss: 0.5930489405643108
Loss: 0.5322521010110545
Loss: 0.46968023513638696
Loss: 0.400568465680577
Loss: 0.3365733327214108
Loss: 0.2740503592893135
Loss: 0.31597113251946
Loss: 0.28187567766669186
Loss: 0.2584075388000455
Loss: 0.2856497224961776
Loss: 0.28665401099986115
Loss: 0.25576595372931904
Loss: 0.23462752770459236
Loss: 0.23414958203428013
Loss: 0.21858388606762125
Loss: 0.27275814040195806
Loss: 0.23697587978744575
Loss: 0.2176010811506489
Loss: 0.20752036969480647
Loss: 0.19084421313519395
Loss: 0.19668967870259008
Loss: 0.21937982403391668
Loss: 0.187467823357343
Loss: 0.2326197658427233
Loss: 0.20020401256353876
Loss: 0.1855255996234455
Loss: 0.2556212926283479
Loss: 0.21234133702616187
Loss: 0.20537768388275318
Loss: 0.20909627064044567
Loss: 0.2078653136918018
Loss: 0.2395170311535557
Loss: 0.2091280975064999
Loss: 0.219069680978739
Loss: 0.2772647450342341
Loss: 0.24178736168501336
Loss: 0.2194

In [31]:
model(X_train).clip(0,1).round()

tensor([[0.],
        [0.],
        [1.]], grad_fn=<RoundBackward0>)

In [30]:
y_train

tensor([0., 0., 1.])