In [None]:
!pip install unidecode

Collecting unidecode
  Downloading Unidecode-1.3.6-py3-none-any.whl (235 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/235.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m235.9/235.9 kB[0m [31m14.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: unidecode
Successfully installed unidecode-1.3.6


In [3]:
import torch
import torch.nn as nn
import string
import random
import sys
import unidecode



device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


all_characters = string.printable       ## this is just to parse our characters from text file
n_characters = len(all_characters)

file = unidecode.unidecode(open("names.txt").read()) ## insert every time we run this code


class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        ## we are creating num_layers layers for RNN's for our predictions
        ## each step is going to take vector of size hidden_size
        ## input_size is size of input vector
        ## output size is required output vector size
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.embed = nn.Embedding(input_size, hidden_size) ## convert input->hiddle_size
        self.lstm = nn.LSTM(hidden_size, hidden_size, num_layers, batch_first=True) ## input = hidden_size and each vector size == hidden_size
        self.fc = nn.Linear(hidden_size, output_size) ## final fully connected layer to convert finalVector to output_size vector

    def forward(self, x, hidden, cell):
        out = self.embed(x)
        out, (hidden, cell) = self.lstm(out.unsqueeze(1), (hidden, cell))
        out = self.fc(out.reshape(out.shape[0], -1))
        return out, (hidden, cell)

    def init_hidden(self, batch_size):
        hidden = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(device)
        cell = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(device)
        return hidden, cell


In [4]:
## this is going to generate text/characters in desired format  and use defined RNN layer to work accordingly
class Generator:
    def __init__(self):
        self.chunk_len = 250
        self.num_epochs = 5000
        self.batch_size = 1
        self.print_every = 50
        self.hidden_size = 256
        self.num_layers = 2
        self.lr = 0.003

    def char_tensor(self, string):
        ## inserting index of each character at it's position
        tensor = torch.zeros(len(string)).long()
        for c in range(len(string)):
            tensor[c] = all_characters.index(string[c])
        return tensor
      ## getting random batch by selecting some random startId
    def get_random_batch(self):
        start_idx = random.randint(0, len(file) - self.chunk_len)
        end_idx = start_idx + self.chunk_len + 1
        text_str = file[start_idx:end_idx]
        text_input = torch.zeros(self.batch_size, self.chunk_len)
        text_target = torch.zeros(self.batch_size, self.chunk_len)

        for i in range(self.batch_size):
            ## input doesnot have end token
            text_input[i, :] = self.char_tensor(text_str[:-1])
            ## output does not have start token
            text_target[i, :] = self.char_tensor(text_str[1:])
        return text_input.long(), text_target.long()

    def generate(self, initial_str="A", predict_len=100, temperature=0.85):
        hidden, cell = self.rnn.init_hidden(batch_size=self.batch_size)
        initial_input = self.char_tensor(initial_str)
        predicted = initial_str

        for p in range(len(initial_str) - 1):
            _, (hidden, cell) = self.rnn(
                initial_input[p].view(1).to(device), hidden, cell
            )

        last_char = initial_input[-1]

        for p in range(predict_len):
            output, (hidden, cell) = self.rnn(
                last_char.view(1).to(device), hidden, cell
            )
            output_dist = output.data.view(-1).div(temperature).exp()
            top_char = torch.multinomial(output_dist, 1)[0]
            predicted_char = all_characters[top_char]
            predicted += predicted_char
            last_char = self.char_tensor(predicted_char)

        return predicted

    # input_size, hidden_size, num_layers, output_size
    def train(self):
        self.rnn = RNN(
            n_characters, self.hidden_size, self.num_layers, n_characters
        ).to(device)

        optimizer = torch.optim.Adam(self.rnn.parameters(), lr=self.lr)
        criterion = nn.CrossEntropyLoss()
        writer = SummaryWriter(f"runs/testing")  # for tensorboard

        print("=> Starting training")

        for epoch in range(1, self.num_epochs + 1):
            inp, target = self.get_random_batch()
            hidden, cell = self.rnn.init_hidden(batch_size=self.batch_size)

            self.rnn.zero_grad()
            loss = 0
            inp = inp.to(device)
            target = target.to(device)

            for c in range(self.chunk_len):
                output, (hidden, cell) = self.rnn(inp[:, c], hidden, cell)
                loss += criterion(output, target[:, c])

            loss.backward()
            optimizer.step()
            loss = loss.item() / self.chunk_len

            if epoch % self.print_every == 0:
                print(f"Loss: {loss}")
                print(self.generate())

            writer.add_scalar("Training loss", loss, global_step=epoch)


In [5]:
## setup for tensorbaord
from torch.utils.tensorboard import SummaryWriter

In [None]:
gennames = Generator()
gennames.train()

=> Starting training
Loss: 2.269477294921875
Amqie
Manmei
Joreie
Acildorie
Arie
fonriesy
Janie
Aall
Jamish
Selhsa
ienhisok
Joriha
Erere
Qelhania
C
Loss: 2.196859619140625
Ah
Aldin
Derr
Oless
Clalir
Glinig
Brelie
Morla
Bettern
Lohy
Raco
Kakece
Jerin
Anciey
Jacbon
Slon
Lasi
Loss: 2.2196630859375
Aynh
Charlie
Maronaa
Lentia
Riltha
Anduer
Jesth
Annati
Buishe
Jober
Joveston
kadie
Lema
BaLrathisn
Fr
Loss: 1.97085546875
An
Jaie
Mardia
Shedtie
Alaie
Cayla
Harlica
Daserania
Keeette
Kenney
Bonelina
Jela
Malamey
Roforatha
L
Loss: 2.119510498046875
Aan
Eubrii
Kaylea
Kailua
Kardie
Malad
Jincia
Eylira
Ludy
Cellie
Vamian
Lasa
Prianna
Edicon
Kay
Barict
Loss: 1.8180987548828125
Alen
Shestie
Mardana
Jeanna
Lacee
Seltine
Mikandra
Dyllinah
Luie
Sue
Ramary
Ben
Marimon
Boima
Nandon

Loss: 1.75592236328125
Aliace
Jady
Shacei
Katrie
Bonnie
Chelly
Idena
Carana
Adueline
Evena
Carvara
Kaylee
Hacie
Carley
Dann

Loss: 1.793961669921875
Arann
Aveny
Jighan
Kalis
Tenah
Tuber
Dennie
Covey
Jobis
Dazahle
Trobbicch
Cara


In [None]:
## loading board
!tensorboard serve --logdir runs/testing