# Introduction
This the **./01-brigram** notebook.

A probability distribution, that is based on bigram counts, is used to generate name predictions.

In this notebook.

A a neural network approache will be used.

And this notebook is based on this [video](https://youtu.be/PaCmpygFfXo?si=jlreZYqCOdqHNMwA&t=5177).

In [1]:
# Load data 

words = open('./data/names.txt', 'r').read().splitlines()

len(words)

32033

In [2]:


chars = sorted(list(set(''.join(words))))

stoi = { s:i+1 for i, s in enumerate(chars)}
stoi['.'] = 0
itos = { i:s for s, i in stoi.items()}

## - Prepare Training Set

In [3]:
import torch

# Create the training set of bigram (x,y)

xs, ys = [], []

for w in words[:1]:
    
    chs = ['.'] + list(w) + ['.']
    
    # zip() generate the [2-consecutive characters]
    for ch1, ch2  in zip(chs, chs[1:]):

        ix1 = stoi[ch1]
        ix2 = stoi[ch2]
        print(ch1, ch2)

        xs.append(ix1)
        ys.append(ix2)

xs = torch.tensor(xs)
ys = torch.tensor(ys)


. e
e m
m m
m a
a .


In [4]:
xs, ys

(tensor([ 0,  5, 13, 13,  1]), tensor([ 5, 13, 13,  1,  0]))

## - Setup Neural Network

In [5]:
# Randomly initialize 27 neuron weights, each neuron receives 27 inputs
g = torch.Generator().manual_seed(2147483647)

W = torch.randn((27, 27), generator=g, requires_grad=True)
# W.shape, W[0,:]

In [52]:
import torch.nn.functional as F

# forward pass

xenc = F.one_hot(xs, num_classes=27).float() # input to the network: one-hot encoding

logits = xenc @ W     # Semantics: log-counts

counts = logits.exp() # Semantics: counts, equivalent to N
probs = counts / counts.sum(1, keepdims=True) # probability for next character

loss = -probs[torch.arange(5), ys].log().mean()

In [53]:
loss.item()

3.5899956226348877

## - Calculate loss

Negative log likelihood used here.

In [49]:
%%capture

nlls = torch.zeros(5)

for i in range(5):
    # i-th bigram:
    x = xs[i].item() # input character index
    y = ys[i].item() # label character index

    print('----------\n')
    print(f'bigram example {i+1}: {itos[x]}{itos[y]} (indexes {x}, {y})')
    print('input to the neural net: ', x)
    print('output probabilities from the neural net: ', probs[i])
    print('label (actual next character:)', y)
    p = probs[i, y]
    print('probability assigned by the net to the correct character', p.item())
    logp = torch.log(p)
    print('log likelihood:', logp.item())
    nll = -logp
    print('negative log likelihood:', nll.item())
    nlls[i] = nll

print('============')
print('average negative log likelihood, i.e. loss = ', nlls.mean().item())

In [50]:
# backward pass

W.grad = None # set to zero the gradient
loss.backward()

In [51]:
W.data += -0.1 * W.grad