# 2.

In [None]:
import numpy as np
from collections import OrderedDict

A = np.array([[1, -1, -0.5, 0.5],
              [1, 1, -0.5, -1]])

R = np.array([[1, 0],
              [0, 1]])

B = np.array([[1, 1],
              [0.5, 1],
              [-1, 0],
              [0, -0.5]])
H = np.array([[0], [0]])

#Prepare Variables
letters = ['h','e','l','l','o']
index_to_letter = {0 : "h",
                   1 : "e",
                   2 : "l",
                   3 : "o"
                   }


#one hot encoding
embedding = OrderedDict()
embedding["h"] = np.array([[1], [0], [0], [0]])
embedding["e"] = np.array([[0], [1], [0], [0]])
embedding["l"] = np.array([[0], [0], [1], [0]])
embedding["o"] = np.array([[0], [0], [0], [1]])


In [None]:
def compute_next_state(h_t_minus_1, x_t, A, R, B):

    # Compute the argument for the tanh activation function
    tanh_arg = np.dot(R, h_t_minus_1) + np.dot(A, x_t)

    # Compute the next hidden state
    h_t = np.tanh(tanh_arg)

    # Compute the output
    y_t = np.dot(B, h_t)

    return h_t, y_t

##Exercise 2 A

In [None]:

#Deducing the characters
for i, letter in enumerate(letters):
    H,Y = compute_next_state(H,embedding[letter],A,R,B)

    y_hot = index_to_letter[np.argmax(Y, axis = 0)[0]]
    print("y{}: ".format(i+1), y_hot)

y1:  h
y2:  e
y3:  l
y4:  l
y5:  o


##Exercise 2B :

In [None]:
chars = ['h','e', 'l', 'o']

y_true = [3, 2, 2, 1, 0]     # olleh indexes

one_hot = [[[1, 0, 0, 0],
            [0, 1, 0, 0],
            [0, 0, 1, 0],
            [0, 0, 1, 0],
            [0, 0, 0, 1],
           ]]


In [None]:
import torch
X = torch.Tensor(one_hot)
Y = torch.LongTensor(y_true)

In [None]:
input_size  = 4 #size of embedding
hidden_layers= 1 #number of hidden layers
hidden_size = 2  #dimension of hidden layer
num_classes = 4 #last layer
sequence_length = 5 #length of final sequence


In [None]:
import torch.nn as nn
class RNN(nn.Module):

    def __init__(self, num_classes, input_size, hidden_size, hidden_layers):

        super(RNN, self).__init__()

        self.input_size = input_size
        self.hidden_size = hidden_size
        self.hidden_layers = hidden_layers
        self.sequence_length = sequence_length

        self.rnn = nn.RNN(input_size=input_size, hidden_size=hidden_size,
                          num_layers=hidden_layers, batch_first=True, bias=False)
        self.fc = nn.Linear(hidden_size, num_classes, bias=False)

    def forward(self, x):
        hidden = torch.zeros(x.size(0), self.hidden_layers, self.hidden_size)
        out, _ = self.rnn(x.view(x.size(0), self.sequence_length, self.input_size), hidden)#reshaped x
        out = out.view(-1, self.hidden_size)
        out = self.fc(out)
        return out


In [None]:
learning_rate = 0.1
num_epochs = 20
myRnn = RNN(num_classes, input_size, hidden_size, hidden_layers)
Myloss = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(myRnn.parameters(), lr=learning_rate)
curr_epoch=1
# Train the model
while(True):
    outputs = myRnn(X)

    optimizer.zero_grad()
    loss = Myloss(outputs, Y)
    loss.backward()
    optimizer.step()

    _, idx = outputs.max(1)
    idx = idx.data.numpy()
    result = ''.join([chars[char] for char in idx.squeeze()])
    print("Epoch ",curr_epoch,"- ","Predicted string: ", result)
    if(result=="olleh"):
      break
    else:
      curr_epoch+=1


Epoch  1 -  Predicted string:  leeel
Epoch  2 -  Predicted string:  lleel
Epoch  3 -  Predicted string:  oleel
Epoch  4 -  Predicted string:  oleee
Epoch  5 -  Predicted string:  oleee
Epoch  6 -  Predicted string:  ollee
Epoch  7 -  Predicted string:  ollee
Epoch  8 -  Predicted string:  ollee
Epoch  9 -  Predicted string:  ollee
Epoch  10 -  Predicted string:  ollee
Epoch  11 -  Predicted string:  ollee
Epoch  12 -  Predicted string:  ollee
Epoch  13 -  Predicted string:  ollee
Epoch  14 -  Predicted string:  ollee
Epoch  15 -  Predicted string:  ollee
Epoch  16 -  Predicted string:  ollee
Epoch  17 -  Predicted string:  ollee
Epoch  18 -  Predicted string:  olleh


In [None]:
print("FINAL RESULTANT MATRICES. :\n")

print("A = ", myRnn.rnn.weight_ih_l0.detach().numpy(),"\n") # A
print("R = ", myRnn.rnn.weight_hh_l0.detach().numpy(),"\n") #R
print("B = ", myRnn.fc.weight.detach().numpy(),"\n") #B

FINAL RESULTANT MATRICES. :

A =  [[-1.5460695  -1.156598    1.1313473  -0.4907468 ]
 [-1.5296581   1.4870545   1.7390842  -0.16456181]] 

R =  [[ 2.023601    0.6608698 ]
 [-1.3395927   0.42447317]] 

B =  [[ 1.1029     -0.09981646]
 [ 1.3917742   0.73874533]
 [-1.739446    1.7005233 ]
 [-1.7695637  -2.2950926 ]] 

