In [56]:
import requests
import zipfile
import os

data_url = "https://download.pytorch.org/tutorial/data.zip"
dir_path = "./data"
zip_file_name = "data.zip"
zip_file_path = os.path.join(dir_path, zip_file_name)

if not os.path.exists(dir_path):
  os.mkdir(dir_path)

with open(zip_file_path, "wb") as f:
  f.write(requests.get(data_url).content)

with zipfile.ZipFile(zip_file_path) as z:
  z.extractall()

In [57]:
from __future__ import unicode_literals, print_function, division
from io import open
import glob
import os

def findFiles(path): return glob.glob(path)

print(findFiles('data/names/*.txt'))

import unicodedata
import string

all_letters = string.ascii_letters + " .,;'"
n_letters = len(all_letters)

# Turn a Unicode string to plain ASCII, thanks to https://stackoverflow.com/a/518232/2809427
def unicodeToAscii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
        and c in all_letters
    )

print(unicodeToAscii('Ślusàrski'))

# Build the category_lines dictionary, a list of names per language
category_lines = {}
all_categories = []

# Read a file and split into lines
def readLines(filename):
    lines = open(filename, encoding='utf-8').read().strip().split('\n')
    return [unicodeToAscii(line) for line in lines]

for filename in findFiles('data/names/*.txt'):
    category = os.path.splitext(os.path.basename(filename))[0]
    all_categories.append(category)
    lines = readLines(filename)
    category_lines[category] = lines

n_categories = len(all_categories)

for category, lines in category_lines.items():
  print('%s %d' % (category, len(lines)))

print(category_lines['Italian'][:5])

['data/names/French.txt', 'data/names/Czech.txt', 'data/names/Portuguese.txt', 'data/names/Vietnamese.txt', 'data/names/Greek.txt', 'data/names/Spanish.txt', 'data/names/Italian.txt', 'data/names/Chinese.txt', 'data/names/German.txt', 'data/names/Irish.txt', 'data/names/Dutch.txt', 'data/names/Arabic.txt', 'data/names/Russian.txt', 'data/names/Scottish.txt', 'data/names/English.txt', 'data/names/Polish.txt', 'data/names/Japanese.txt', 'data/names/Korean.txt']
Slusarski
French 277
Czech 519
Portuguese 74
Vietnamese 73
Greek 203
Spanish 298
Italian 709
Chinese 268
German 724
Irish 232
Dutch 297
Arabic 2000
Russian 9408
Scottish 100
English 3668
Polish 139
Japanese 991
Korean 94
['Abandonato', 'Abatangelo', 'Abatantuono', 'Abate', 'Abategiovanni']


In [58]:
import torch

# Find letter index from all_letters, e.g. "a" = 0
def letterToIndex(letter):
    return all_letters.find(letter)

print(all_letters)
print(letterToIndex("J"))


# Just for demonstration, turn a letter into a <1 x n_letters> Tensor
def letterToTensor(letter):
    tensor = torch.zeros(1, n_letters)
    tensor[0][letterToIndex(letter)] = 1
    return tensor

print(letterToTensor("J"))

def lineToTensor(line):
  tensor = torch.zeros(len(line), 1, n_letters)
  for i, letter in enumerate(line):
    tensor[i][0][letterToIndex(letter)] = 1
  return tensor

print(lineToTensor("Jiang").shape)

abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ .,;'
35
tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0.]])
torch.Size([5, 1, 57])


In [59]:
import torch.nn as nn


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

n_hidden = 128

lstm = nn.LSTM(input_size=n_letters, hidden_size=n_hidden).to(device)

In [67]:
from torch.nn.utils.rnn import pack_sequence

input1 = lineToTensor('Jiang').to(device)
input2 = lineToTensor('William').to(device)

print("input1.shape:", input1.shape)
# print(all_categories[output.topk(1)[1].item()])

h0 = torch.randn(1, 2, n_hidden).to(device)  # num_layers*num_directions, batch, hidden_size
c0 = torch.randn(1, 2, n_hidden).to(device)  # num_layers*num_directions, batch, hidden_size
print(lstm)
packed_input = pack_sequence([input1.squeeze(1), input2.squeeze(1)], enforce_sorted=False)
print("packed_input:", packed_input)
output, (h, c) = lstm(packed_input, (h0, c0))
print("output:", output.data)

input1.shape: torch.Size([5, 1, 57])
LSTM(57, 128)
packed_input: PackedSequence(data=tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.,
         0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0.

In [54]:
learning_rate = 0.005 # If you set this too high, it might explode. If too low, it might not learn

import random

def randomChoice(l):
    return l[random.randint(0, len(l) - 1)]

def random_sample(batch_size):
    line_tensors = []
    label_tensors = []
    for i in range(batch_size):
        language = randomChoice(all_categories)  # sample a language
        line = randomChoice(category_lines[language])  # sample from the target language
        label_tensors.append(torch.tensor(all_categories.index(language), dtype=torch.long))
        line_tensors.append(lineToTensor(line).squeeze(1).to(device))
    return line_tensors, label_tensors
        

def train(model, n_hidden, criterion, n_iter, batch_size):
    
    losses = []
    # init hidden and cell
    h = torch.zeros(1, batch_size, n_hidden).to(device)
    c = torch.zeros(1, batch_size, n_hidden).to(device)
    
    for it in range(n_iter):
        model.zero_grad()
        # randomly sample training data and the labels
        line_tensors, label_tensors = random_sample(batch_size)
        # feed to lstm
        output, (h, c) = lstm(pack_sequence(line_tensors), (h, c))
        print(output)
        # calculate the loss
        loss = criterion(output, label_tensors)
        # backward
        loss.backward()
        # step
        for p in lstm.parameters():
            p.data.add_(p.grad.data, alpha=-learning_rate)
        # output and loss
        losses.append(loss.item())
    return losses
    

In [55]:
criterion = nn.NLLLoss()

n_iter = 1
batch_size = 1
lstm = nn.LSTM(input_size=n_letters, hidden_size=n_hidden).to(device)

train(lstm, n_hidden, criterion, n_iter, batch_size)

PackedSequence(data=tensor([[-3.2806e-02,  1.8034e-02, -7.9790e-03, -2.0515e-03,  4.9728e-03,
          1.7588e-02,  1.0416e-02,  2.3151e-02,  4.9440e-02, -3.8207e-02,
         -9.2639e-03,  5.5615e-03,  4.9893e-02,  1.9164e-02, -2.4661e-02,
         -8.1042e-04,  3.3157e-02, -2.3655e-02,  8.1463e-03, -1.5489e-02,
          1.5290e-02,  3.1640e-02, -8.9267e-03,  1.3939e-02, -1.0499e-02,
          1.1536e-02, -3.8022e-02, -2.3350e-03, -2.1018e-02,  1.5807e-03,
         -6.5252e-03, -2.3276e-02,  4.9530e-03, -1.5157e-02,  2.4052e-02,
         -3.4719e-02, -1.8018e-02,  2.0408e-02,  5.1119e-02,  1.1064e-02,
         -4.8472e-02,  3.2297e-02, -1.0744e-02, -6.8858e-03,  2.7669e-02,
         -2.3368e-03,  1.0422e-02, -2.1053e-02,  2.3932e-02,  1.4584e-02,
          2.1264e-02, -2.5557e-02, -2.2317e-02,  1.8432e-02,  1.8573e-02,
          4.6148e-02, -3.1822e-02,  1.4348e-03, -1.7959e-02, -7.8263e-03,
         -2.3215e-02, -2.3061e-02,  5.5196e-03,  1.6678e-02,  1.5534e-02,
          6.9284e-

AttributeError: 'PackedSequence' object has no attribute 'dim'