In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

In [2]:
n_hidden = 35
lr=0.01
epochs = 10000

string = "hello pytorch. how long can a rnn cell remember"
chars = "abcdefghijklmnopqrstuvwxyz ?!.,:;01"
char_list = [i for i in chars]
n_letters = len(char_list)

In [3]:
def string_to_onehot(string):
    start = np.zeros(shape=len(char_list), dtype=int)
    end = np.zeros(shape=len(char_list),dtype=int)
    start[-2]=1
    end[-1]=1
    for i in string:
        idx = char_list.index(i)
        zero = np.zeros(shape=n_letters,dtype = int)
        zero[idx]=1
        start=np.vstack([start,zero])
    output=np.vstack([start,end])
    return output

In [4]:
def onehot_to_word(onehot_1):
    onehot=torch.Tensor.numpy(onehot_1)
    return char_list[onehot.argmax()]

In [5]:
class RNN(nn.Module):
    def __init__(self,input_size,hidden_size,output_size):
        super(RNN,self).__init__()
        
        self.input_size=input_size
        self.hidden_size=hidden_size
        self.output_size=output_size
        
        self.i2h=nn.Linear(input_size,hidden_size)
        self.h2h=nn.Linear(hidden_size,hidden_size)
        self.i2o=nn.Linear(hidden_size,output_size)
        self.act_fn=nn.Tanh()
        
    def forward(self,input,hidden):
        hidden = self.act_fn(self.i2h(input)+self.h2h(hidden))
        output = self.i2o(hidden)
        return output, hidden
    
    def init_hidden(self):
        return torch.zeros(1,self.hidden_size)
    
rnn = RNN(n_letters,n_hidden,n_letters)

In [6]:
loss_func=nn.MSELoss()
optimizer=torch.optim.Adam(rnn.parameters(),lr=lr)

In [7]:
one_hot = torch.from_numpy(string_to_onehot(string)).type_as(torch.FloatTensor())

In [8]:

for i in range(epochs):
    rnn.zero_grad()
    total_loss = 0 
    hidden = rnn.init_hidden()
    
    for j in range(one_hot.size()[0]-1):
        
        input_ = one_hot[j:j+1,:]
        target = one_hot[j+1]
        
        output, hidden = rnn.forward(input_,hidden)
        loss = loss_func(output.view(-1),target.view(-1))
        total_loss += loss
        input_ = output
        
    total_loss.backward()
    optimizer.step()
    
    if i%10 == 0:
        print(total_loss)
        

tensor(3.0411, grad_fn=<AddBackward0>)
tensor(0.9991, grad_fn=<AddBackward0>)
tensor(0.6903, grad_fn=<AddBackward0>)
tensor(0.4559, grad_fn=<AddBackward0>)
tensor(0.3065, grad_fn=<AddBackward0>)
tensor(0.2354, grad_fn=<AddBackward0>)
tensor(0.1820, grad_fn=<AddBackward0>)
tensor(0.1468, grad_fn=<AddBackward0>)
tensor(0.1218, grad_fn=<AddBackward0>)
tensor(0.1099, grad_fn=<AddBackward0>)
tensor(0.0963, grad_fn=<AddBackward0>)
tensor(0.0848, grad_fn=<AddBackward0>)
tensor(0.0762, grad_fn=<AddBackward0>)
tensor(0.0687, grad_fn=<AddBackward0>)
tensor(0.0630, grad_fn=<AddBackward0>)
tensor(0.0599, grad_fn=<AddBackward0>)
tensor(0.0533, grad_fn=<AddBackward0>)
tensor(0.0490, grad_fn=<AddBackward0>)
tensor(0.0584, grad_fn=<AddBackward0>)
tensor(0.0458, grad_fn=<AddBackward0>)
tensor(0.0411, grad_fn=<AddBackward0>)
tensor(0.0374, grad_fn=<AddBackward0>)
tensor(0.0343, grad_fn=<AddBackward0>)
tensor(0.0326, grad_fn=<AddBackward0>)
tensor(0.0313, grad_fn=<AddBackward0>)
tensor(0.0272, grad_fn=<A

tensor(0.0003, grad_fn=<AddBackward0>)
tensor(0.0003, grad_fn=<AddBackward0>)
tensor(0.0003, grad_fn=<AddBackward0>)
tensor(0.0002, grad_fn=<AddBackward0>)
tensor(0.0002, grad_fn=<AddBackward0>)
tensor(0.0002, grad_fn=<AddBackward0>)
tensor(0.0003, grad_fn=<AddBackward0>)
tensor(0.0002, grad_fn=<AddBackward0>)
tensor(0.0003, grad_fn=<AddBackward0>)
tensor(0.0140, grad_fn=<AddBackward0>)
tensor(0.0018, grad_fn=<AddBackward0>)
tensor(0.0007, grad_fn=<AddBackward0>)
tensor(0.0005, grad_fn=<AddBackward0>)
tensor(0.0003, grad_fn=<AddBackward0>)
tensor(0.0003, grad_fn=<AddBackward0>)
tensor(0.0002, grad_fn=<AddBackward0>)
tensor(0.0002, grad_fn=<AddBackward0>)
tensor(0.0002, grad_fn=<AddBackward0>)
tensor(0.0008, grad_fn=<AddBackward0>)
tensor(0.0014, grad_fn=<AddBackward0>)
tensor(0.0005, grad_fn=<AddBackward0>)
tensor(0.0004, grad_fn=<AddBackward0>)
tensor(0.0003, grad_fn=<AddBackward0>)
tensor(0.0002, grad_fn=<AddBackward0>)
tensor(0.0002, grad_fn=<AddBackward0>)
tensor(0.0002, grad_fn=<A

tensor(0.0007, grad_fn=<AddBackward0>)
tensor(0.0004, grad_fn=<AddBackward0>)
tensor(0.0002, grad_fn=<AddBackward0>)
tensor(5.1153e-05, grad_fn=<AddBackward0>)
tensor(3.3077e-05, grad_fn=<AddBackward0>)
tensor(3.0771e-05, grad_fn=<AddBackward0>)
tensor(8.3573e-05, grad_fn=<AddBackward0>)
tensor(0.0019, grad_fn=<AddBackward0>)
tensor(0.0011, grad_fn=<AddBackward0>)
tensor(0.0001, grad_fn=<AddBackward0>)
tensor(7.7075e-05, grad_fn=<AddBackward0>)
tensor(3.7380e-05, grad_fn=<AddBackward0>)
tensor(2.2424e-05, grad_fn=<AddBackward0>)
tensor(3.7741e-05, grad_fn=<AddBackward0>)
tensor(0.0001, grad_fn=<AddBackward0>)
tensor(0.0019, grad_fn=<AddBackward0>)
tensor(0.0008, grad_fn=<AddBackward0>)
tensor(0.0002, grad_fn=<AddBackward0>)
tensor(8.5903e-05, grad_fn=<AddBackward0>)
tensor(0.0005, grad_fn=<AddBackward0>)
tensor(0.0008, grad_fn=<AddBackward0>)
tensor(0.0001, grad_fn=<AddBackward0>)
tensor(0.0002, grad_fn=<AddBackward0>)
tensor(0.0010, grad_fn=<AddBackward0>)
tensor(0.0002, grad_fn=<AddB

tensor(7.6627e-05, grad_fn=<AddBackward0>)
tensor(6.2391e-05, grad_fn=<AddBackward0>)
tensor(7.2479e-05, grad_fn=<AddBackward0>)
tensor(0.0005, grad_fn=<AddBackward0>)
tensor(0.0003, grad_fn=<AddBackward0>)
tensor(0.0002, grad_fn=<AddBackward0>)
tensor(0.0002, grad_fn=<AddBackward0>)
tensor(0.0005, grad_fn=<AddBackward0>)
tensor(0.0003, grad_fn=<AddBackward0>)
tensor(0.0004, grad_fn=<AddBackward0>)
tensor(0.0013, grad_fn=<AddBackward0>)
tensor(0.0005, grad_fn=<AddBackward0>)
tensor(0.0002, grad_fn=<AddBackward0>)
tensor(6.3680e-05, grad_fn=<AddBackward0>)
tensor(2.6332e-05, grad_fn=<AddBackward0>)
tensor(5.8221e-05, grad_fn=<AddBackward0>)
tensor(0.0001, grad_fn=<AddBackward0>)
tensor(0.0014, grad_fn=<AddBackward0>)
tensor(0.0005, grad_fn=<AddBackward0>)
tensor(0.0001, grad_fn=<AddBackward0>)
tensor(0.0002, grad_fn=<AddBackward0>)
tensor(0.0005, grad_fn=<AddBackward0>)
tensor(0.0006, grad_fn=<AddBackward0>)
tensor(0.0003, grad_fn=<AddBackward0>)
tensor(9.4243e-05, grad_fn=<AddBackward0

tensor(0.0004, grad_fn=<AddBackward0>)
tensor(0.0002, grad_fn=<AddBackward0>)
tensor(0.0009, grad_fn=<AddBackward0>)
tensor(0.0003, grad_fn=<AddBackward0>)
tensor(0.0002, grad_fn=<AddBackward0>)
tensor(6.2905e-05, grad_fn=<AddBackward0>)
tensor(7.4681e-05, grad_fn=<AddBackward0>)
tensor(5.8332e-05, grad_fn=<AddBackward0>)
tensor(0.0007, grad_fn=<AddBackward0>)
tensor(0.0003, grad_fn=<AddBackward0>)
tensor(0.0005, grad_fn=<AddBackward0>)
tensor(0.0004, grad_fn=<AddBackward0>)
tensor(0.0006, grad_fn=<AddBackward0>)
tensor(0.0002, grad_fn=<AddBackward0>)
tensor(0.0002, grad_fn=<AddBackward0>)
tensor(0.0003, grad_fn=<AddBackward0>)
tensor(1.9828e-05, grad_fn=<AddBackward0>)
tensor(5.5406e-05, grad_fn=<AddBackward0>)
tensor(0.0001, grad_fn=<AddBackward0>)
tensor(0.0006, grad_fn=<AddBackward0>)
tensor(0.0003, grad_fn=<AddBackward0>)
tensor(0.0004, grad_fn=<AddBackward0>)
tensor(0.0002, grad_fn=<AddBackward0>)
tensor(0.0003, grad_fn=<AddBackward0>)
tensor(0.0003, grad_fn=<AddBackward0>)
tenso

In [9]:
start = torch.zeros(1,len(char_list))
start[:,-2]=1

In [10]:
with torch.no_grad():
    hidden = rnn.init_hidden()
    input_ = start
    output_string = ""
    for i in range(len(string)):
        output,hidden = rnn.forward(input_,hidden)
        output_string += onehot_to_word(output.data)
        input_ = output
    print(output_string)

hello pytorch. how longmcelroemehleoto ceoemben
