
Based on NLP From Scratch: Generating Names with a Character-Level RNN
*************************************************************
by: `Sean Robertson <https://github.com/spro/practical-pytorch>`_


In [11]:
from __future__ import unicode_literals, print_function, division
from io import open
import glob
import os
import unicodedata
import string
import random
from random import shuffle
import torch
import torch.nn as nn
from functools import reduce
from collections import Counter
import re
import numpy as np
%matplotlib inline

In [12]:


class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size,num_layers=1, bias=True, batch_first=False,
                dropout=0, bidirectional=False):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        if bidirectional:
            self.num_directions = 2
        else:
            self.num_directions = 1
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, 
                            num_layers=num_layers, bias=bias, batch_first=batch_first,
                           dropout=dropout, bidirectional=bidirectional)
#         self.i2h = nn.Linear(n_categories + input_size + hidden_size, hidden_size)
#         self.i2o = nn.Linear(n_categories + input_size + hidden_size, output_size)

        self.o2o = nn.Linear(self.num_directions * hidden_size, output_size)
#         self.dropout = nn.Dropout(0.1)
#         self.softmax = nn.Softmax(dim=2)

    def forward(self, my_input, hidden):
#         input_combined = torch.cat((category, input, hidden), 1)
#         hidden = self.i2h(input_combined)
#         output = self.i2o(input_combined)
#         output_combined = torch.cat((hidden, output), 1)
#         output = self.o2o(output_combined)
#         output = self.dropout(output)
        output, hidden = self.lstm(my_input, hidden)
        output = self.o2o(output)
#         output = self.softmax(output)
        return output, hidden

    def initHidden(self, batch=1, device=torch.device('cpu')):
        return (torch.zeros(self.num_layers,1,self.hidden_size, device=device), torch.zeros(self.num_layers * self.num_directions,batch,self.hidden_size,device=device))
#         return torch.zeros(1, self.hidden_size)

In [13]:

def string_to_chars(original):
    raw_lines = original.splitlines()

    def proc_line(l): return [l] if l == '<start>' or l == '<end>' else list(l)
    chars = reduce(lambda x, y : x + ['\n'] + y, list(map(proc_line, raw_lines)))
    assert original == ''.join(chars)
    return chars

def make_encoder(raw):
    chars = string_to_chars(raw)
    decoder = [c for c in Counter(chars)]
    decoder = sorted(decoder)
    return {c:i for i, c in enumerate(decoder)}, decoder

def split_songs(raw):
    return re.findall('(<start>.*?<end>)',raw,flags=re.DOTALL)

# train_songs = list(map(string_to_chars, split_songs(raw_train)))

# reconstructed = '\n'.join(map(''.join, train_songs))
# encoder, decoder = make_encoder(raw_train)
# n_chars = len(decoder)

def song_to_tensor_chunks(song,encoder, decoder):
    training_chunks = []
    target_chunks = []
    for i in range(0,len(song),100):
        start = i
        if start+100 > len(song) - 1:
            end = len(song)
            target_range = list(np.arange(start + 1, end,dtype=np.int)) + [int(end - 1)]
        else:
            end = start + 100
            target_range = np.arange(start+1, end+1, dtype=np.int)
        chunk = torch.zeros(end-start, 1, n_chars)
        for i, c in enumerate(song[start:end]):
            chunk[i,0,encoder[c]] = 1
        target_chunk = []
        for i in target_range:
            target_chunk.append(encoder[song[i]])
        target_chunks.append(torch.tensor(target_chunk))
        training_chunks.append(chunk)
    return training_chunks, target_chunks


In [14]:
raw_train = open('train.txt').read()
train_songs = list(map(string_to_chars, split_songs(raw_train)))

reconstructed = '\n'.join(map(''.join, train_songs))
encoder, decoder = make_encoder(raw_train)
n_chars = len(decoder)
val_songs = list(map(string_to_chars, split_songs(open('val.txt').read())))
test_songs = list(map(string_to_chars, split_songs(open('test.txt').read())))
assert reconstructed == raw_train


For convenience during training we'll make a ``randomTrainingExample``
function that fetches a random (category, line) pair and turns them into
the required (category, input, target) tensors.




Training the Network
--------------------

In contrast to classification, where only the last output is used, we
are making a prediction at every step, so we are calculating loss at
every step.

The magic of autograd allows you to simply sum these losses at each step
and call backward at the end.




In [15]:
criterion = nn.CrossEntropyLoss()

def train(song,optimizer=None, device=torch.device('cpu')):
    train_chunks, target_chunks = song_to_tensor_chunks(song, encoder,decoder)
    hidden = rnn.initHidden(device=device)
    rnn.zero_grad()

    loss = 0

    for input_line_tensor, target_line_tensor in zip(train_chunks,target_chunks):
        target_line_tensor.unsqueeze_(-1)
        for i in range(input_line_tensor.size(0)):
            output, hidden = rnn(input_line_tensor[i].view((1,1,n_chars)).to(device), hidden)
    #         print(output)
            l = criterion(output.view((1,n_chars)), target_line_tensor[i].to(device))
            loss += l / input_line_tensor.size(0)

    loss.backward()
    optimizer.step()

    return output, loss.item() / len(train_chunks)
def test(rnn, song, device=torch.device('cpu')):
    train_chunks, target_chunks = song_to_tensor_chunks(song, encoder,decoder)
    hidden = rnn.initHidden(device=device)
    loss = 0
    for input_line_tensor, target_line_tensor in zip(train_chunks,target_chunks):
        target_line_tensor.unsqueeze_(-1)
        output, hidden = rnn(input_line_tensor.to(device), hidden)
        seq_len,_ =target_line_tensor.size()
        l = criterion(output.view((seq_len,n_chars)), target_line_tensor.view((seq_len)).to(device))
        loss += l
    return output, loss.item() / len(train_chunks)

In [16]:
import time
import math

def timeSince(since):
    now = time.time()
    s = now - since
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)

In [17]:
# Check if your system supports CUDA
use_cuda = torch.cuda.is_available()

# Setup GPU optimization if CUDA is supported
if use_cuda:
    device = torch.device("cuda")
    extras = {"num_workers": 1, "pin_memory": True}
    print("CUDA is supported")
else: # Otherwise, train on the CPU
    device = torch.device("cpu")
    extras = False
    print("CUDA NOT supported")

CUDA is supported


In [26]:
rnn = RNN(n_chars, 128, n_chars).to(device)
rnn.load_state_dict(torch.load('rnn_9.pt'))
rnn = rnn.to(device)
train_loss = list(map(float,(open('train_loss_9.csv').read().splitlines())))
val_loss = list(map(float,(open('val_loss_9.csv').read().splitlines())))
print(len(train_loss))
start = len(train_loss)

optimizer = torch.optim.Adam(rnn.parameters())

10


In [None]:
# rnn = RNN(n_chars, 128, n_chars).to(device)
learning_rate = 0.05
# optimizer = torch.optim.Adam(rnn.parameters())
n_iters = 10000
print_every = 80
plot_every = 10
# train_loss = []
# val_loss = []
total_loss = 0 # Reset every plot_every iters
n_epochs = 20
start = time.time()

for epoch in range(int(start),int(start+n_epochs)):
    shuffle(train_songs)
    total_loss = 0
    for i, song in enumerate(train_songs):
        output, loss = train(song,optimizer=optimizer, device=device)
        total_loss += loss
        if i % print_every == 0:
            print('%s (%d %d%%) %.4f' % (timeSince(start), i, i / len(train_songs) * 100, loss))


    train_loss.append(total_loss / len(train_songs))
    with torch.no_grad():
        total_loss = 0
        for song in test_songs:
            _, loss = test(rnn, song, device=device)
            total_loss += loss
        val_loss.append(total_loss / len(test_songs))
    torch.save(rnn.state_dict(),f'rnn_{epoch}.pt')
    with open(f'train_loss_{epoch}.csv', 'w+') as out:
        out.write('\n'.join(map(str,train_loss)))

    with open(f'val_loss_{epoch}.csv', 'w+') as out:
        out.write('\n'.join(map(str,val_loss)))
    print('Epoch %d %s val loss: %.4f train loss: %.4f\n' % (epoch,timeSince(start), val_loss[-1], train_loss[-1]))   
        

0m 0s (0 0%) 1.8987
0m 30s (80 9%) 1.4939
0m 59s (160 19%) 1.7172
1m 30s (240 29%) 1.5937
1m 59s (320 39%) 1.3314
2m 32s (400 49%) 1.4011
3m 3s (480 59%) 0.8306
3m 32s (560 69%) 1.3003
4m 8s (640 79%) 1.3434
6m 21s (160 19%) 1.1020
6m 53s (240 29%) 1.1515
7m 23s (320 39%) 1.6751
8m 0s (400 49%) 1.4564
8m 30s (480 59%) 1.5474
9m 1s (560 69%) 1.8963
9m 35s (640 79%) 1.3625
10m 5s (720 89%) 1.3778
10m 37s (800 99%) 1.2993
Epoch 1574735852 10m 40s val loss: 1.6241 train loss: 1.3659

10m 40s (0 0%) 1.3699
11m 14s (80 9%) 1.0825
11m 46s (160 19%) 1.2589
12m 15s (240 29%) 1.4794
12m 49s (320 39%) 1.8845
13m 20s (400 49%) 1.2653
13m 57s (480 59%) 0.7129
14m 31s (560 69%) 1.6904
14m 59s (640 79%) 1.1877
15m 28s (720 89%) 1.3356
15m 55s (800 99%) 1.1414
Epoch 1574735853 15m 57s val loss: 1.6071 train loss: 1.3338

15m 57s (0 0%) 1.8355
16m 30s (80 9%) 1.8466
17m 0s (160 19%) 1.7612
17m 36s (240 29%) 1.9991
18m 6s (320 39%) 1.1711
18m 36s (400 49%) 0.6791
19m 6s (480 59%) 2.0664
19m 37s (560 69%

In [None]:
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
offset = 0
print(train_loss)
print(val_loss)
with open('train_loss_{offset}.csv', 'w+') as out:
    out.write('\n'.join(map(str,train_loss)))

with open('val_loss_{offset}.csv', 'w+') as out:
    out.write('\n'.join(map(str,val_loss)))
plt.figure()
plt.plot(train_loss)
plt.plot(val_loss)

In [30]:

def sample():
    with torch.no_grad():
        input_tensor_chunk, _ = song_to_tensor_chunks(['<start>'], encoder, decoder)
        input_tensor = input_tensor_chunk[0].to(device)
        hidden = rnn.initHidden(device=device)
        prev_char_raw, hidden =rnn(input_tensor,hidden) 
        prev_char_probs = torch.softmax(prev_char_raw, 2, prev_char_raw.dtype).view((n_chars))
        output = '<start>'
        while True:
            selection = np.random.random()
            running_prob = 0
            for i, prob in enumerate(prev_char_probs):
                running_prob += prob
                if running_prob > selection:
                    selected_char = decoder[i]
                    output += selected_char
                    input_tensor_chunk, _ = song_to_tensor_chunks([selected_char], encoder, decoder)
                    input_tensor = input_tensor_chunk[0].to(device)
                    break
            if selected_char == '<end>':
                break
            else:
                prev_char_raw, hidden =rnn(input_tensor,hidden) 
                prev_char_probs = torch.softmax(prev_char_raw, 2, prev_char_raw.dtype).view((n_chars))
        return output

print(sample())
# # Sample from a category and starting letter
# def sample(category, start_letter='A'):
#     with torch.no_grad():  # no need to track history in sampling
#         category_tensor = categoryTensor(category)
#         input = inputTensor(start_letter)
#         hidden = rnn.initHidden()

#         output_name = start_letter

#         for i in range(max_length):
#             output, hidden = rnn(category_tensor, input[0], hidden)
#             topv, topi = output.topk(1)
#             topi = topi[0][0]
#             if topi == n_letters - 1:
#                 break
#             else:
#                 letter = all_letters[topi]
#                 output_name += letter
#             input = inputTensor(letter)

#         return output_name

# # Get multiple samples from one category and multiple starting letters
# def samples(category, start_letters='ABC'):
#     for start_letter in start_letters:
#         print(sample(category, start_letter))

# samples('Russian', 'RUS')

# samples('German', 'GER')

# samples('Spanish', 'SPA')

# samples('Chinese', 'CHI')

'<start>\nX:29\nT:Maudro Samem\nM:4/8\nL:1/4\nK:Gm\nGc | cB AB | AF Ac |\nA2 Bc | {d}d3 fe | cB cA | BA BA | F/G/A/B/ cA | \ne2 B | A2 cB :| B2 Bd | cB BA | BB BA | G2 GB |\nAB cB | Ac EA| BA Ac | B2 ef/e/ | dB AF | D2 F2 ||\n<end>'

In [31]:
print('<start>\nX:29\nT:Maudro Samem\nM:4/8\nL:1/4\nK:Gm\nGc | cB AB | AF Ac |\nA2 Bc | {d}d3 fe | cB cA | BA BA | F/G/A/B/ cA | \ne2 B | A2 cB :| B2 Bd | cB BA | BB BA | G2 GB |\nAB cB | Ac EA| BA Ac | B2 ef/e/ | dB AF | D2 F2 ||\n<end>')

<start>
X:29
T:Maudro Samem
M:4/8
L:1/4
K:Gm
Gc | cB AB | AF Ac |
A2 Bc | {d}d3 fe | cB cA | BA BA | F/G/A/B/ cA | 
e2 B | A2 cB :| B2 Bd | cB BA | BB BA | G2 GB |
AB cB | Ac EA| BA Ac | B2 ef/e/ | dB AF | D2 F2 ||
<end>
