In [3]:
import torch
import torch.nn as nn
import numpy as np
import os
import torch.nn.functional as F
import matplotlib.pyplot as plt
from tqdm import tqdm, trange
print(torch.__version__)


1.13.0


In [4]:
# working implementation taken from my rnn_warmup notebook in vec2beat


all_chars = "abcdefghijklmnopqrstuvwxyz*$"
print(all_chars.index("*"))

with open("./data/names.txt") as f:
    data = f.readlines()

total_lens = [len(each.lower().strip()) for each in data]
data_tensors_list = [torch.Tensor([all_chars.index(char) for char in each.lower().strip()]).int() for each in data]

data_tensors_padded = torch.nn.utils.rnn.pad_sequence(data_tensors_list, batch_first=True, padding_value=26)

26


In [5]:
class CustomSet(torch.utils.data.Dataset):
    def __init__(self, src, lens):
        super(CustomSet, self).__init__()

        self.src = src
        self.lens = lens
        self.tgt = torch.zeros_like(self.src)
        self.tgt[:,:-1] = self.src[:, 1:]
        self.tgt[:, -1] = 26
        for ix, each in enumerate(self.lens):
            self.tgt[ix, each-1] = 27
    def __len__(self):
        return len(self.src)
    def __getitem__(self, idx):
        return self.src[idx], self.tgt[idx], self.lens[idx]

In [6]:
class Net(nn.Module):
    def __init__(self, in_size, h_size, out_size, n_layers,  device, dropout=0.5 ):
        super(Net, self).__init__()
        self.in_size = in_size
        self.h_size = h_size
        self.out_size = out_size
        self.n_layers = n_layers
        self.device = device
        self.dropout=dropout

        self.embed = nn.Embedding(in_size, h_size, padding_idx=26)
        self.gru = nn.GRU(h_size, h_size, num_layers=2, batch_first =True, dropout=dropout)
        self.fc = nn.Linear(h_size, out_features=out_size)

    def forward(self, X, lens, h=None):
        X = self.embed(X)
        if h is None:
            X = torch.nn.utils.rnn.pack_padded_sequence(X, lens, batch_first=True, enforce_sorted=False)
            X, h = self.gru(X)
            X, _ = torch.nn.utils.rnn.pad_packed_sequence(X, batch_first=True, padding_value=0, total_length=15)
        else:
            X, h = self.gru(X, h)
        # X = F.softmax(X)
        X = self.fc(X)
        return X, h
    
    def generate(self, start="a", len_gen=5):
        out = start
        h = torch.zeros(self.n_layers, self.h_size)
        print(h.shape)
        X = torch.Tensor([all_chars.index(start)]).long().to(self.device)
        print(X.shape)
        for p in range(len_gen):
            preds, h = self.forward(X, None, h)
            # print("PReds",preds.shape)
            dist = F.softmax(preds, dim=1)
            # print("DIst", dist.shape)
            chosen = torch.multinomial(dist, 1)[0]
            # print(chosen.shape)
            X = chosen
            out += all_chars[chosen.item()]
        return out.split("$")[0] if "$" in out else out




        


In [7]:
dset = CustomSet(data_tensors_padded, total_lens)
loader = torch.utils.data.DataLoader(dset, batch_size=128, shuffle=True)
device = torch.device("cpu")


In [8]:
model = Net(len(all_chars), 256, len(all_chars), 2, device).to(device)
optim = torch.optim.Adam(model.parameters(), lr=0.0003)

for ep in (pbar:=tqdm(range(50))):
    for ix, (src, tgt, lens) in enumerate((loader)):
        optim.zero_grad()
        src = src.to(device).long()
        tgt = tgt.to(device).long()

        preds, _ = model(src, lens)

        loss = F.cross_entropy(preds.permute(0,2,1),tgt, ignore_index=26)
        loss.backward()
        for param in model.parameters():
            param.grad.data.clamp(-1,1)

        optim.step()
        pbar.set_description(f"Ep: {ep} {ix}/{len(loader)}Loss: {loss.item()}")


Ep: 49 142/143Loss: 1.5205427408218384: 100%|██████████| 50/50 [05:14<00:00,  6.28s/it]


In [9]:
model.eval()
model.generate("z", 9)

torch.Size([2, 256])
torch.Size([1])


'zacharia'