In [1]:
import torch
import torch.nn as nn
import numpy as np
import os
import torch.nn.functional as F
import matplotlib.pyplot as plt
from tqdm import tqdm, trange

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
with open("./data/names.txt", "r") as f:
    data = f.readlines()

names = [each.lower().strip() for each in data]

names_padded = []

name_lens = [len(each) for each in names]

max_len = max(name_lens)

for name in names:
    names_padded.append(list(name) + ["<EOF>"] + ["*"] * (max_len - len(name)))

vocab = {}

idx=0

for name in names_padded:
    for char in name:
        if char not in vocab:
            vocab[char] = idx
            idx+=1

targets = [each[1:] +["*"] for each in names_padded]


tensors_from_names = torch.Tensor([[vocab[char] for char in name] for name in names_padded]).type(torch.uint8)
tensors_from_targets = torch.Tensor([[vocab[char] for char in name] for name in targets]).type(torch.uint8)

print(tensors_from_names)
print(tensors_from_targets)



tensor([[ 0,  1,  2,  ...,  8,  8,  8],
        [ 2,  3,  9,  ...,  8,  8,  8],
        [14,  5, 10,  ...,  8,  8,  8],
        ...,
        [ 4,  4,  9,  ...,  8,  8,  8],
        [ 4,  4,  0,  ...,  8,  8,  8],
        [ 4,  4,  0,  ...,  8,  8,  8]], dtype=torch.uint8)
tensor([[ 1,  2,  3,  ...,  8,  8,  8],
        [ 3,  9,  1,  ...,  8,  8,  8],
        [ 5, 10, 10,  ...,  8,  8,  8],
        ...,
        [ 4,  9,  1,  ...,  8,  8,  8],
        [ 4,  0,  1,  ...,  8,  8,  8],
        [ 4,  0,  4,  ...,  8,  8,  8]], dtype=torch.uint8)


In [3]:
class Generator(nn.Module):
    def __init__(self, hidden_size, num_layers, vocab):
        super(Generator, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.emb = nn.Embedding(len(vocab), self.hidden_size, padding_idx=vocab["*"])
        self.gru = nn.GRU(self.hidden_size, self.hidden_size, num_layers=self.num_layers, bias=True, batch_first=True)
        self.fc = nn.Linear(self.hidden_size, len(vocab))
        self.relu = nn.ReLU()

    def forward(self, X, h):
        X = self.emb(X)
        X = self.relu(X)

        X, h = self.gru(X, h)
        X = self.relu(X)

        X = self.fc(X)
        return X, h
    
    def init_hidden(self, bs):
        return torch.zeros(self.num_layers, bs, self.hidden_size)

In [4]:
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, src, tgt):
        self.src = src
        self.tgt = tgt

    def __len__(self):
        return len(self.src)
    def __getitem__(self, idx):
        return self.src[idx], self.tgt[idx]



In [5]:
device = torch.device("mps") if (torch.backends.mps.is_available() and torch.backends.mps.is_built() ) else "cpu"
print(device)
batch_size = 128
dataset =  CustomDataset(tensors_from_names, tensors_from_targets)
loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)

gen = Generator(256, 2, vocab)
gen = gen.to(device)

optim = torch.optim.Adam(gen.parameters(), lr=3e-4)
loss_fn = nn.CrossEntropyLoss()
losses = []

mps


In [6]:
for ep in range(10):
    h = gen.init_hidden(batch_size).to(device)
    for src, tgt in tqdm(loader):
        if src.shape[0] != batch_size:
            continue
        src = src.to(device).int()
        tgt = tgt.to(device).int()
        preds, h = gen(src, h)
        h = h.detach()
        

        loss = loss_fn(preds.permute(0,2,1), tgt)
        losses.append(loss.item())
        loss.backward()
        optim.step()
