In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.distributions import Categorical

from os.path import join as jn

import numpy as np

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:

########### Hyperparameters ###########
hidden_size = 512   # size of hidden state
seq_len = 100       # length of LSTM sequence
num_layers = 3      # num of layers in LSTM layer stack
lr = 0.002          # learning rate
epochs = 100        # max number of epochs
op_seq_len = 200    # total num of characters in output test sequence
load_chk = False    # load weights from save_path directory to continue training
save_path = jn("models", "CharRNN_shakespeare.pth")
data_path = jn("data", "yarik.txt")
#######################################



In [6]:
data = open(data_path, 'r', encoding='utf-8').read()
chars = sorted(list(set(data)))
data_size, vocab_size = len(data), len(chars)

print("Found {} chars total, {} are unique".format(data_size, vocab_size))



Found 392920 chars total, 131 are unique


In [7]:
# create dictionaries with vocabulary
char_to_ix = { ch:i for i,ch in enumerate(chars) }
ix_to_char = { i:ch for i,ch in enumerate(chars) }

In [8]:
# make data a list of chars
data = list(data)

for i, ch in enumerate(data):
    data[i] = char_to_ix[ch]
    



In [9]:
data = torch.tensor(data).to(device)
#data = torch.unsqueeze(data, dim=1)

In [10]:
class RNN(nn.Module):
    def __init__(self, input_size, output_size, hidden_size, num_layers):
        super(RNN, self).__init__()
        self.embedding = nn.Embedding(input_size, input_size)
        self.rnn = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers)
        self.decoder = nn.Linear(hidden_size, output_size)
    
    def forward(self, input_seq, hidden_state):
        embedding = self.embedding(input_seq)
        output, hidden_state = self.rnn(embedding, hidden_state)
        output = self.decoder(output)
        return output, (hidden_state[0].detach(), hidden_state[1].detach())

In [11]:
rnn = RNN(vocab_size, vocab_size, hidden_size, num_layers)

In [12]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(rnn.parameters(), lr=lr)

In [13]:
#train

#data_size = data_size/10

for epoch in range(1, epochs+1):
    data_ptr = np.random.randint(100)
    
    n = 0
    running_loss = 0
    hidden_state = None
    
    while True:
        print(data_ptr / data_size,   end= '\r')
        
        input_seq = data[data_ptr : data_ptr+seq_len]
        target_seq = data[data_ptr+1 : data_ptr + seq_len+1]
        
        #forward pass
        output, hidden_state = rnn(input_seq, hidden_state)
        
        loss = loss_fn(torch.squeeze(output), torch.squeeze(target_seq))
        running_loss += loss.item()
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        data_ptr += seq_len
        n += 1
        
        if data_ptr + seq_len + 1 > data_size:
            break
    
    print(f"Epoch: {epoch} ; Loss: {running_loss/n:.2f}")
    torch.save(rnn.state_dict(), save_path)
    
    

Epoch: 1 ; Loss: 2.207
0.0107248294818283625

KeyboardInterrupt: 

In [14]:
data_ptr = 0 
hidden_state = None
rand_index = np.random.randint(data_size-1)
input_seq = data[rand_index:rand_index+1]
print(data_ptr, op_seq_len)
while True:
    output, hidden_state = rnn(input_seq, hidden_state)
    
    output = F.softmax(torch.squeeze(output), dim=0)
    dist = Categorical(output)
    index = dist.sample()
    
    print(ix_to_char[index.item()], end='')
        
    input_seq[0] = index.item()#index.item()
    data_ptr += 1
    
    if data_ptr > op_seq_len:
        break

0 200
ареньших, минтерь напоитяет таких, был жепереским мне стоялся какогда питалию, война, идидичца чувства), чтажий своей)ю-диб,, что (о, дальше противую. “Больше не заметь двирателений и сказался в мастер