In [10]:
import random
import torch
from torch import nn
import torch.nn.functional as F
import torch.optim as optim

In [2]:
with open('names.txt', 'r') as f:
    names = f.read()

In [3]:
names = names.split()

In [4]:
names_original = names.copy()

In [5]:
bigram_to_idx = {}
idx_to_bigram = {}
char_to_idx = {}
idx_to_char = {}

In [6]:
import string

letters = ['.'] + list(string.ascii_lowercase)
bigrams = [a + b for a in letters for b in letters]
bigram_to_idx = {bigram: index for index, bigram in enumerate(bigrams)}
char_to_idx = {char: index for index, char in enumerate(letters)}
idx_to_bigram = {index: bigram for index, bigram in enumerate(bigrams)}
idx_to_char = {index: char for index, char in enumerate(letters)}

BIGRAM_SIZE = len(bigram_to_idx)
CHAR_SIZE = len(char_to_idx)

In [7]:
names = ['.' + name + '.' for name in names]

In [8]:
def get_sample(print_name=False):
    sample = random.choice(names)
    if print_name:
        print(sample)
    X, y = [], []
    for ch, ch_nxt, ch_ans in zip(sample, sample[1:], sample[2:]):
        bigram = ch + ch_nxt
        bigram = bigram_to_idx[bigram]
        bigram = torch.tensor(bigram)
        sample_x = F.one_hot(bigram, BIGRAM_SIZE).float()
        X.append(sample_x)

        ans = char_to_idx[ch_ans]
        ans = torch.tensor(ans)
        sample_y = F.one_hot(ans, CHAR_SIZE).float()
        y.append(sample_y)
    return X, y

In [11]:
get_sample(1)

.aletta.


([tensor([0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0.

In [13]:
class TrigramModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc = nn.Linear(BIGRAM_SIZE, CHAR_SIZE)

    def forward(self, x):
        x = self.fc(x)
        return F.softmax(x, dim=0)

In [26]:
def train_loop(iters=10001):
    for iter_num in range(iters):
        X, y = get_sample()
        avg_loss = 0
        for sample_x, sample_true in zip(X, y):
            probs = model(sample_x)
            loss = criterion(probs, sample_true)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            avg_loss += loss

        avg_loss /= len(X)

        if iter_num % 200 == 0:
            print(f'{iter_num}: {avg_loss:4f}')
    

In [27]:
model = TrigramModel()
optimizer = optim.Adam(model.parameters(), lr=1, weight_decay=1e-2)  # weight_decay is the L2 regularization term
criterion = nn.CrossEntropyLoss()

In [28]:
train_loop()

0: 3.295234
200: 3.334779
400: 3.328444
600: 3.324771
800: 3.291585
1000: 3.261901
1200: 3.335888
1400: 3.351156
1600: 3.269454
1800: 3.338378
2000: 3.336837
2200: 3.337798
2400: 3.317878
2600: 3.328912
2800: 3.334175
3000: 3.335775
3200: 3.188842
3400: 3.074164
3600: 3.317640
3800: 3.342550
4000: 3.137852
4200: 3.347672
4400: 3.318719
4600: 3.352592
4800: 3.299788
5000: 3.245617
5200: 3.328977
5400: 3.188055
5600: 3.357097
5800: 3.346865
6000: 3.353758
6200: 3.343013
6400: 3.341779
6600: 3.344923
6800: 3.335197
7000: 3.278991
7200: 3.117842
7400: 3.331012
7600: 3.336889
7800: 3.346097
8000: 3.336114
8200: 3.268456
8400: 3.338760
8600: 3.332273
8800: 3.116630
9000: 3.324900
9200: 3.355831
9400: 3.288146
9600: 3.341613
9800: 3.285289
10000: 3.265465


In [19]:
def generate_name():
    ch1 = '.'
    ch2 = random.choice(list(string.ascii_lowercase))
    generated_name = [ch1, ch2]
    
    while True:
        bigram = ''.join(generated_name[-2:])
        bigram = bigram_to_idx[bigram]
        bigram = torch.tensor(bigram)
        sample_x = F.one_hot(bigram, BIGRAM_SIZE).float()
        
        probs = model(sample_x)
        pred = torch.multinomial(probs, num_samples=1)
        pred_chr = idx_to_char[pred[0].item()]
    
        if pred_chr == '.':
            return ''.join(generated_name)
        else:
            generated_name.append(pred_chr)

In [24]:
generate_name()

'.tzzhhiygzvlmoiklxenfasrtgvndlyinffn'