# Generating the name using LSTM

### Resources
#### Video
- https://www.youtube.com/watch?v=WujVlF_6h5A

#### Code
- https://github.com/aladdinpersson/Machine-Learning-Collection/blob/master/ML/Projects/text_generation_babynames/generating_names.py

In [1]:
import torch
from torch import nn, optim
# from 

In [2]:
a_device = "cuda" if torch.cuda.is_available else "cpu"
print(a_device)
device = torch.device(a_device)
device

cuda


device(type='cuda')

In [3]:
import sys
import string
import random
import unidecode

In [4]:
all_characters = string.printable
n_characters = len(all_characters)

In [5]:
all_characters

'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~ \t\n\r\x0b\x0c'

In [6]:
n_characters

100

In [7]:
names_files = unidecode.unidecode(open("./data/names.txt").read())

In [8]:
names_files[:5]

'Mary\n'

In [9]:
len(names_files)/256

28529.015625

## Let's breakdown the video 
- creates the rnn model, but the shape does not match to my previous cases
- creates a generator class for 
     - get a batch of data
     - train the rnn model
     - generate name

In [300]:
class RNN(nn.Module):
   
    def __init__(self, vocab_size, embedding_size, hidden_size, num_layers, output_size):
        super(RNN, self).__init__()
        self.embedding_size = embedding_size
        self.vocab_size = vocab_size
        self.hidden_size = hidden_size
        
#         self.batch_size = batch_size
        self.num_layers = num_layers
        self.output_size = output_size
        
#         self.h, self.c = self.__init_h0_c0__()
        
        self.embedding_layer = nn.Embedding(self.vocab_size, self.embedding_size)
        self.lstm = nn.LSTM(self.embedding_size, self.hidden_size, self.num_layers, batch_first=True)
        # out to tag
        self.linear = nn.Linear(self.hidden_size, self.output_size)
        
        
    def forward(self,x, hidden, cell):
        emb_x = self.embedding_layer(x)
#         print(emb_x.shape)
        out, (hidden, cell) = self.lstm(emb_x, (hidden, cell))
        hidden = hidden.detach()
        cell = cell.detach()
        
        out = self.linear(out.view(-1, self.hidden_size))
        
        return out, (hidden, cell)        
    
    def init_h0_c0(self, batch_size):
        return (
            torch.zeros(self.num_layers, batch_size, self.hidden_size).to(device),
            torch.zeros(self.num_layers, batch_size, self.hidden_size).to(device)
        )


## Creating the generator class for training and generaing word

In [306]:
class Generator():
    def __init__(self):
        self.chunk_len = 250
        self.embedding = 256
        self.num_epochs = 2000
        self.batch_size = 1
        self.hidden_size = 256
        self.num_layers = 2
        self.lr = 0.001
       
    def prepare_sequence(self, string):
        """
            Takes a string a 
            Returns a tensor containing the index of the letters in the string.
        """
        index_ = [all_characters.index(s) for s in string]
        return torch.tensor(index_)
    
    def get_batch(self):
        """ 
            returns a random 256 chunk of letter
        """
        start_idx = random.randint(0, len(names_files) - self.chunk_len)
        end_idx = start_idx + self.chunk_len + 1
        text_str = names_files[start_idx:end_idx]
        text_input = torch.zeros(self.batch_size, self.chunk_len)
        text_target = torch.zeros(self.batch_size, self.chunk_len)

        for i in range(1):
            text_input[i, :] = self.prepare_sequence(text_str[:-1])
            text_target[i, :] = self.prepare_sequence(text_str[1:])
           
        return text_input.long(), text_target.long()
#         pass
#         return inputs, targets
    
    def generate_names(self, initial_str="A", temperature=0.85, prediction_len=100):
        hidden, cell = self.model.init_h0_c0(self.batch_size)
        initial_str_tensor = self.prepare_sequence(initial_str)
        
        # for new hidden and cell based on input only if length is > 1
        for l in range(len(initial_str)-1):
            _, hidden, cell = self.model(initial_str_tensor.view(1,1).to(device), 
                                         (hidden, cell))
        
        """
            now generating letters per letter
        """
        
        predicted = initial_str # result should start from initial_str
        # selecting the last character index
        last_char_idx = initial_str_tensor[-1]
        
        for p in range(prediction_len):
            output, (hidden, cell) = self.model(
                last_char_idx.view(1,1).to(device), hidden, cell
            ) 
            output_dist = output.data.view(-1).div(0.85).exp()
            top_char = torch.multinomial(output_dist, num_samples=1)[0]
            predicted_char = all_characters[top_char]
            predicted += predicted_char
            last_char_idx = self.prepare_sequence(predicted_char)

        return predicted
    
    def train(self):
        """
            
        """
        self.model = RNN(n_characters, 
                    self.embedding, 
                    self.hidden_size,
                    self.num_layers,
                    n_characters
                   ).to(device)
        
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(self.model.parameters(), lr=self.lr)
        
        for epoch in range(1,self.num_epochs + 1):
            inputs, targets = self.get_batch()
            inputs = inputs.to(device)
            targets = targets.to(device)
            hidden, cell = self.model.init_h0_c0(self.batch_size)
            
            optimizer.zero_grad()
            
            outputs, (hidden, cell) = self.model(inputs, hidden, cell)
#             print(outputs.shape)
            
            loss = criterion(outputs, targets.squeeze())
            loss.backward()
            
            optimizer.step()
            
            if epoch%500==0:
                print(f"epochs {epoch}, loss {loss.item()}")
                print(self.generate_names())
#                 py_max, idx = outputs.cpu().detach().max(1)
#                 out_w = [all_characters[y.item()] for y in idx.squeeze()]
#                 print(f"Direct : {''.join(out_w[:5])}")



In [307]:
name_generator = Generator()
name_generator.train()

epochs 500, loss 2.2552311420440674
AZwell
Cister
Clayna
Jegar
Lentelly
Shadna
Lone
Teag
Caabie
Samhameo
Cadie
Sosmina
Jana
Alaniele
Than
epochs 1000, loss 1.7891860008239746
Aburtie
Sapher
Dulty
Auliana
Deanar
Jamia
Vevan
Hathrir
Demanie
Micabe
Jeferi
Orian
Daverian
Jafrey
M
epochs 1500, loss 1.9733999967575073
Alvayah
Joannie
Sana
Gi
Kristi
Antonia
Aury
Linjandon
Bristine
Flay
Austallin
Irenee
Albor
Nataliol
E
epochs 2000, loss 1.3745120763778687
ACler
Branden
Mari
Joan
Janelle
Jenida
Debse
Jene
Robby
Isha
Brookanne
Leiliah
Maryan
Glendo
Jasem
Mi


In [310]:
print(name_generator.generate_names(initial_str="U"))

Udrine
Belis
Gabry
Baylen
Julie
Diana
Avia
Marisa
Migguel
Harrie
Trencence
Ashlis
Alis
Grossie
Charl



### Breaking the Code

In [51]:
print(names_files[:4])
prepare_sequence(names_files[:4])

Mary


tensor([48, 10, 27, 34])

In [18]:
# def get_random_batch(self):
start_idx = random.randint(0, len(names_files) - 250)
end_idx = start_idx + 250 + 1
text_str = names_files[start_idx:end_idx]
text_input = torch.zeros(1, 250)
text_target = torch.zeros(1, 250)

for i in range(1):
    text_input[i, :] = name_generator.prepare_sequence(text_str[:-1])
    text_target[i, :] = name_generator.prepare_sequence(text_str[1:])

# return text_input.long(), text_target.long()

In [19]:
text_input.shape

torch.Size([1, 250])

In [20]:
text_target[:,0]

tensor([96.])

In [21]:
emb_x = torch.rand(250, 100)
emb_x.shape

torch.Size([250, 100])

In [22]:
output_dist = emb_x.data.view(-1).div(0.85).exp()

In [23]:
len(all_characters)

100

In [78]:
h0, c0 = name_generator.model.init_h0_c0(1)
h0.shape

torch.Size([2, 1, 256])

In [268]:
initial_str= "A"
initial_input = name_generator.prepare_sequence(initial_str)
initial_input

tensor([36])

In [215]:
predicted = initial_str
predicted

'Ha'

In [275]:
initial_input.view(1,1).shape

torch.Size([1, 1])

In [217]:
for p in range(len(initial_str) - 1):
    _, (hidden, cell) = name_generator.model(
        initial_input[p].view(1).unsqueeze(0).to(device), hidden, cell
    )
    print("loop")

loop


In [289]:
last_char = initial_input[-1]
last_char

tensor(10)

In [219]:
last_char.view(1).unsqueeze(1)

tensor([[10]])

In [290]:
for p in range(10):
    output, (h0, c0) = name_generator.model(
        last_char.view(1,1).to(device), h0, c0
    ) 
    output_dist = output.data.view(-1).div(0.85).exp()
    top_char = torch.multinomial(output_dist, num_samples=1)[0]
    predicted_char = all_characters[top_char]
    predicted += predicted_char
    last_char = name_generator.prepare_sequence(predicted_char)


In [291]:
predicted

'Ha\nRanner\nKuke\nEler\nYeudon\nErgel'

### Experimenting my generation method

In [286]:
hidden, cell = name_generator.model.init_h0_c0(1)
hidden.shape

torch.Size([2, 1, 256])

In [278]:
initial_str= "Ha"
initial_input = name_generator.prepare_sequence(initial_str)
initial_input
# initial_input.unsqueeze(0).shape

tensor([43, 10])

In [260]:
last_char_idx = initial_input[-1]
last_char_idx.view(1).unsqueeze(0)

tensor([[43]])

In [254]:
last_char_idx

tensor(43)

In [281]:
outputs, (hidden, cell) = name_generator.model(initial_input.view(1,2).to(device), hidden, cell)

In [282]:
outputs.shape

torch.Size([2, 100])

In [285]:
hidden.shape

torch.Size([2, 1, 256])

In [232]:
output_dist = output.data.view(-1).div(0.85).exp()
output_dist.shape

torch.Size([100])

In [238]:
torch.multinomial(output_dist, num_samples=1)

tensor([14], device='cuda:0')

In [255]:
predicted_n = initial_str
predicted_n

'H'

In [248]:
idx.view(1).unsqueeze(0)

tensor([[23]])

In [267]:
print(predicted_n)

He
Jarle
Jarle
Jarle
Jarle
Jarle
Jarle
Jarle
Jarle
Jarle
Jarle
Jarle
Jarle
Jarle
Jarle
Jarle
Jarle
Ja


In [265]:
initial_str= "H"
initial_input = name_generator.prepare_sequence(initial_str)
# initial_input
# initial_input.unsqueeze(0).shape

last_char_idx = initial_input[-1]
# last_char_idx.view(1).unsqueeze(0)
predicted_n = initial_str

for i in range(100):
    print()
    print(f"From {last_char_idx.item()} --> {all_characters[last_char_idx.item()]}")
    outputs, (hidden, cell) = name_generator.model(last_char_idx.view(1).unsqueeze(0).to(device), hidden, cell)
    
    outputs = nn.functional.softmax(outputs, 1)
    py_max, last_char_idx = outputs.cpu().detach().max(1)
    
    print(f"To ; {last_char_idx.item()} --> {all_characters[last_char_idx.item()]}")
    
    last_char = all_characters[last_char_idx.item()]
    predicted_n += last_char


From 43 --> H
To ; 14 --> e

From 14 --> e
To ; 96 --> 


From 96 --> 

To ; 45 --> J

From 45 --> J
To ; 10 --> a

From 10 --> a
To ; 27 --> r

From 27 --> r
To ; 21 --> l

From 21 --> l
To ; 14 --> e

From 14 --> e
To ; 96 --> 


From 96 --> 

To ; 45 --> J

From 45 --> J
To ; 10 --> a

From 10 --> a
To ; 27 --> r

From 27 --> r
To ; 21 --> l

From 21 --> l
To ; 14 --> e

From 14 --> e
To ; 96 --> 


From 96 --> 

To ; 45 --> J

From 45 --> J
To ; 10 --> a

From 10 --> a
To ; 27 --> r

From 27 --> r
To ; 21 --> l

From 21 --> l
To ; 14 --> e

From 14 --> e
To ; 96 --> 


From 96 --> 

To ; 45 --> J

From 45 --> J
To ; 10 --> a

From 10 --> a
To ; 27 --> r

From 27 --> r
To ; 21 --> l

From 21 --> l
To ; 14 --> e

From 14 --> e
To ; 96 --> 


From 96 --> 

To ; 45 --> J

From 45 --> J
To ; 10 --> a

From 10 --> a
To ; 27 --> r

From 27 --> r
To ; 21 --> l

From 21 --> l
To ; 14 --> e

From 14 --> e
To ; 96 --> 


From 96 --> 

To ; 45 --> J

From 45 --> J
To ; 10 --> a

From 10 --> a

In [264]:
print(f"Direct : {predicted_n}")

Direct : Harle
Jarle
Jarle
Jarle
Jarle
Jarle
Jarle
Jarle
Jarle
Jarle
Jarle
Jarle
Jarle
Jarle
Jarle
Jarle
Jarle
