In [2]:
import torch

In [3]:
## Generating some test data
x_in = torch.randn(1, 38)
y = torch.rand(1, 15)

---
### Layer Class

Here we create a basic layer class to simplify the construction of our more complex networks.

We use the superclass `torch.nn.Module` to make generating the list of parameters easier.

We use `torch.nn.Parameter()` to define our weights and biases as parameters.

We define a `Layer.Forward()` method to simplify our feed-forward.

In [4]:
class Layer(torch.nn.Module):
    def __init__(self, size_in, size_out, activation):
        super(Layer, self).__init__()
        self.weights = torch.nn.Parameter(
            torch.randn(size_in, size_out, requires_grad=True)
        )
        self.bias = torch.nn.Parameter(
            torch.randn(1, size_out, requires_grad=True)
        )  
        self.activation = activation
        
    def Forward(self, z_in):
        return self.activation(z_in @ self.weights + self.bias)

In [5]:
## Initialize our layer
forget = Layer(38, 15, torch.nn.Sigmoid())
loss_func = torch.nn.MSELoss()
opt = torch.optim.Adam(forget.parameters())

In [6]:
## Run a quick test - notice how the bias values change
print(forget.bias)
out = forget.Forward(x_in)
loss = loss_func(out, y)
loss.backward()
opt.step()
opt.zero_grad()
print(forget.bias)

Parameter containing:
tensor([[ 1.1202, -0.4046,  1.5454, -1.2349,  0.3550,  1.6598, -0.8704, -1.8320,
         -1.3155, -0.3738,  0.2268,  0.4195,  0.2005,  1.1268,  0.3586]],
       requires_grad=True)
Parameter containing:
tensor([[ 1.1212, -0.4036,  1.5444, -1.2339,  0.3559,  1.6588, -0.8694, -1.8310,
         -1.3165, -0.3728,  0.2278,  0.4185,  0.2015,  1.1259,  0.3596]],
       requires_grad=True)


---
### RNN Class

Here we create a class for our simple Recurrent Neural Network (RNN).

Once again, we use the superclass `torch.nn.Module` to make generating the list of parameters easier. It will recursively look through and other `torch.nn.Module`'s in the class to get their parameters too.

The `RNN.Forward()` method will iterate through a sequence to generate an output after every entry.

In [7]:
class RNN(torch.nn.Module):
    def __init__(self, size_in, size_out, size_mem):
        super(RNN, self).__init__()
        self.size_mem = size_mem
        self.mem_layer = Layer(size_in + size_mem, size_mem, torch.tanh)
        self.out_layer = Layer(size_mem, size_out, torch.sigmoid)
        
    def Forward(self, x):
        mem = torch.zeros(1, self.size_mem)
        for i in range(x.shape[0]):
            x_in = x[[i], :] 
            z_in = torch.cat([x_in, mem], dim = 1)
            mem = self.mem_layer.Forward(z_in)
            y_hat = self.out_layer.Forward(mem)
        return y_hat

In [8]:
## Initialize our RNN
rnn = RNN(38, 15, 5)
loss_func = torch.nn.MSELoss()
opt = torch.optim.Adam(rnn.parameters())

In [9]:
## Run a quick test - notice how the bias values change
print(rnn.mem_layer.bias)
y_hat = rnn.Forward(x_in)
loss = loss_func(y_hat, y)
loss.backward()
opt.step()
opt.zero_grad()
print()
print(rnn.mem_layer.bias)

Parameter containing:
tensor([[-0.4771,  0.9209, -1.9465,  0.6632, -0.0426]], requires_grad=True)

Parameter containing:
tensor([[-0.4781,  0.9209, -1.9475,  0.6638, -0.0436]], requires_grad=True)


---
### LSTM Class

Write your own Long Short-Term Memory (LSTM) class using the `torch.nn.Module` superclass.

Structure your `LSTM.Forward()` function similar to the RNN, so it will iterate through a sequence.

<img src=https://i.stack.imgur.com/RHNrZ.jpg width=500>

In [10]:
# Mildly Atypical
class LSTM:
    def __init__(self, size_in, size_mem_short, size_mem_long, size_out, out_func):
        self.forget_gate = self.Layer(size_in+size_mem_short, size_mem_long, torch.sigmoid)
        self.remember_sig = self.Layer(size_in+size_mem_short, size_mem_long, torch.sigmoid)
        self.remember_tanh = self.Layer(size_in+size_mem_short, size_mem_long, torch.tanh)
        self.recall_short = self.Layer(size_in+size_mem_short, size_mem_short, torch.sigmoid)
        self.recall_long = self.Layer(size_mem_long, size_mem_short, torch.tanh)
        self.out = self.Layer(size_mem_short, size_out, out_func)
        
    def Forward(self, x):
        mem_short = torch.zeros([1,size_mem_short])
        mem_long = torch.zeros([1,size_mem_long])
        out = []
        
        for t in range(x.shape[0]):
            z = torch.cat([x,mem_short], dim=1)
            mem_long = self.forget_gate(z) * mem_long
            mem_long += self.remember_sig(x) * self.remember_tanh(x)
            mem_short = self.recall_short(z) * self.recall_long(mem_long)
            out.append(self.out(self.mem_short))
            
        return torch.cat(out, dim=0)