# RNN Text Generation

## Imports

In [1]:
import torch
from torch import nn
import torch.nn.functional as F

import numpy as np
import matplotlib.pyplot as plt

## Text File Import

In [2]:
with open('shakespeare.txt','r',encoding='utf8') as f:
    text = f.read()

In [3]:
type(text)

str

In [4]:
print(text[:670])


                     1
  From fairest creatures we desire increase,
  That thereby beauty's rose might never die,
  But as the riper should by time decease,
  His tender heir might bear his memory:
  But thou contracted to thine own bright eyes,
  Feed'st thy light's flame with self-substantial fuel,
  Making a famine where abundance lies,
  Thy self thy foe, to thy sweet self too cruel:
  Thou that art now the world's fresh ornament,
  And only herald to the gaudy spring,
  Within thine own bud buriest thy content,
  And tender churl mak'st waste in niggarding:
    Pity the world, or else this glutton be,
    To eat the world's due, by the grave and thee.


  


In [5]:
len(text)

5445609

## Text Encoding

In [6]:
#all unique characters
all_characters = set(text)

In [7]:
len(all_characters)

84

In [8]:
# DECODER
# number to letter

'''
for pair in enumerate(all_characters):
    print(pair)
'''

decoder = dict(enumerate(all_characters))

In [9]:
# ENCODER
# letter to number

encoder = {char: ind for ind,char in decoder.items()}

In [10]:
# full text encoding (characters to numericall values)

encoded_text = np.array([encoder[char] for char in text])

In [11]:
len(encoded_text) == len(text)

True

In [12]:
encoded_text[:100]

array([62, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70,
       70, 70, 70, 70, 70, 19, 62, 70, 70, 12, 66, 51, 74, 70,  2, 18, 73,
       66, 64, 36, 24, 70, 39, 66, 64, 18, 24, 53, 66, 64, 36, 70, 61, 64,
       70, 77, 64, 36, 73, 66, 64, 70, 73, 32, 39, 66, 64, 18, 36, 64, 76,
       62, 70, 70, 54, 60, 18, 24, 70, 24, 60, 64, 66, 64,  7, 23, 70,  7,
       64, 18, 53, 24, 23,  9, 36, 70, 66, 51, 36, 64, 70, 74, 73])

## One Hot Encoding

In [13]:
#number of unique characters
num_uni_chars = len(set(text))

In [14]:
def one_hot_encoder(encoded_text , num_uni_chars):
    
    #encoded text - batch of encoded text
    #num_uni_chars - number of unique characters in whole text file
    
    one_hot = np.zeros((encoded_text.size , num_uni_chars)) #prepare array with correct dimensions
    
    one_hot = one_hot.astype(np.float32) #data type for PyTorch
    
    one_hot[np.arange(one_hot.shape[0]),encoded_text.flatten()] = 1.0 #put ones in the position which coresponds to encoded char value
    
    one_hot = one_hot.reshape(*encoded_text.shape,num_uni_chars)
    
    return one_hot

In [15]:
# testing one_hot_encoder on small array
arr = np.array([1,2,0])
arr

array([1, 2, 0])

In [16]:
one_hot_encoder(arr,3)

array([[0., 1., 0.],
       [0., 0., 1.],
       [1., 0., 0.]], dtype=float32)

## Training Batches

In [17]:
example_text = np.arange(10)

In [18]:
example_text

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [19]:
example_text.reshape(5,-1)

array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7],
       [8, 9]])

In [20]:
def generate_batches(encoded_text , sam_per_batch = 10 , seq_len=50):
    
    # X -> encoded text of length 'seq_len'
    # Y -> encoded text shifted by 1
    
    # how many characters per batch
    char_per_batch = sam_per_batch * seq_len
    
    # how many batches possible in entire text
    num_batches_avail = int(len(encoded_text)/char_per_batch)
    
    # cut off the end of the encoded text
    encoded_text = encoded_text[:num_batches_avail*char_per_batch]
    
    
    encoded_text = encoded_text.reshape(sam_per_batch,-1)
    
    for n in range(0,encoded_text.shape[1],seq_len):
        
        x = encoded_text[:,n:n+seq_len]
        
        y = np.zeros_like(x)
        
        try:
            
            y[:,:-1] = x[:,1:]
            y[:,-1] = encoded_text[:,n+seq_len]
            
        except:
            
            y[:,:-1] = x[:,1:]
            y[:,-1] = encoded_text[:,0]
            
        yield x,y         
            

In [21]:
# generator test
sample_text = np.arange(20)

In [22]:
sample_text

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19])

In [23]:
batch_generator = generate_batches(sample_text,sam_per_batch=2,seq_len=5)

In [24]:
type(batch_generator)

generator

In [25]:
x,y = next(batch_generator)

In [26]:
x

array([[ 0,  1,  2,  3,  4],
       [10, 11, 12, 13, 14]])

In [27]:
y

array([[ 1,  2,  3,  4,  5],
       [11, 12, 13, 14, 15]])

In [28]:
x,y = next(batch_generator)

In [29]:
x

array([[ 5,  6,  7,  8,  9],
       [15, 16, 17, 18, 19]])

In [30]:
y

array([[ 6,  7,  8,  9,  0],
       [16, 17, 18, 19, 10]])

## LSTM Model

In [31]:
class CharModel(nn.Module):
    
    def __init__(self, all_chars , num_hidden = 256 , num_layers = 4 , drop_prob = 0.5 , use_gpu = False):
        
        super().__init__()
        
        self.drop_prob = drop_prob
        self.num_layers = num_layers
        self.num_hidden = num_hidden
        self.use_gpu = use_gpu
        
        self.all_chars = all_chars
        self.decoder = dict(enumerate(all_chars))
        self.encoder = {char:ind for ind,char in decoder.items()}
        
        
        
        self.lstm = nn.LSTM(len(self.all_chars),num_hidden,num_layers,dropout=drop_prob,batch_first=True)
        
        self.dropout = nn.Dropout(drop_prob)
        
        self.fc_linear = nn.Linear(num_hidden,len(self.all_chars))
    
    def forward(self , x , hidden):
        
        lstm_output , hidden = self.lstm(x,hidden)
        
        drop_output = self.dropout(lstm_output)
        
        drop_output = drop_output.contiguous().view(-1,self.num_hidden)
        
        final_out = self.fc_linear(drop_output)
        
        return final_out, hidden
    
    
    def hidden_state(self,batch_size):
        
        if self.use_gpu:
            
            hidden = (torch.zeros(self.num_layers , batch_size , self.num_hidden).cuda(),
                      torch.zeros(self.num_layers , batch_size , self.num_hidden).cuda())
        else:
            
            hidden = (torch.zeros(self.num_layers , batch_size , self.num_hidden),
                      torch.zeros(self.num_layers , batch_size , self.num_hidden))
            
        return hidden

In [32]:
model = CharModel(all_chars=all_characters,
                 num_hidden=512,
                 num_layers=3,
                 drop_prob=0.5,
                 use_gpu=True)

In [33]:
total_params = []

for p in model.parameters():
    total_params.append(int(p.numel()))

In [34]:
sum(total_params)

5470292

In [35]:
optimizer = torch.optim.Adam(model.parameters(),lr=0.001)
criterion = nn.CrossEntropyLoss()

## Train / Test Split

In [36]:
train_percent = 0.9
train_ind = int(len(encoded_text) * train_percent)

In [37]:
train_data = encoded_text[:train_ind]
val_data = encoded_text[train_ind:]

## Model Training

In [38]:
epochs = 15
batch_size = 100

seq_len = 100

tracker = 0

num_char = max(encoded_text) + 1

In [39]:
model.train()

if model.use_gpu:
    model.cuda()
    
    

    
for i in range(epochs):
    
    hidden = model.hidden_state(batch_size)
    
    for x,y in generate_batches(train_data,batch_size,seq_len):
        
        tracker += 1
        
        x = one_hot_encoder(x,num_char)
        
        inputs = torch.from_numpy(x)
        targets = torch.from_numpy(y)
        
        
        
        if model.use_gpu:
            
            inputs = inputs.cuda()
            targets = targets.cuda()
        
        
        hidden = tuple([state.data for state in hidden])
        
        model.zero_grad()
        
        lstm_output,hidden = model.forward(inputs,hidden)
        
        loss = criterion(lstm_output,targets.view(batch_size*seq_len).long())
        loss.backward()
        
        nn.utils.clip_grad_norm_(model.parameters(),max_norm=5)
        
        optimizer.step()
        
        if tracker % 25 == 0:
            
            val_hidden = model.hidden_state(batch_size)
            val_losses = []
            model.eval()
            
            for x,y in generate_batches(val_data,batch_size,seq_len):
                
                x = one_hot_encoder(x,num_char)
        
                imputs = torch.from_numpy(x)
                targets = torch.from_numpy(y)
        
        
        
                if model.use_gpu:
            
                    inputs = inputs.cuda()
                    targets = targets.cuda()
                    
                val_hidden = tuple([state.data for state in val_hidden])
                
                lstm_out,val_hidden = model.forward(inputs,val_hidden)
                val_loss = criterion(lstm_output,targets.view(batch_size*seq_len).long())
                
                val_losses.append(val_loss.item())
                
            model.train()
            
            print(f'Epoch: {i} Step: {tracker} Val loss: {val_loss.item()}')
            
            
            
            
            
            
      

Epoch: 0 Step: 25 Val loss: 3.232865333557129
Epoch: 0 Step: 50 Val loss: 3.211498737335205
Epoch: 0 Step: 75 Val loss: 3.218843936920166


KeyboardInterrupt: 

In [40]:
model_name = 'hidden512_layers3_shakes.net'

In [None]:
torch.save(model.state_dict(),model_name)