## Generating text using RNNs ###
- an RNN  is trained in seq2seq manner to make it learn to generate text
- with lots of text fed to the network it models the language
- it learns to model the conditional probability of having a character as next character, given its previous N characters
- This code does the unrolling of RNN explicitly using a for loop


<b>Acknowledgement :</b>- This code is almost completely copied from here https://gist.github.com/michaelklachko?direction=desc&sort=updated . 

In [3]:
import string
import random
import torch
import torch.nn as nn
from torch.autograd import Variable
import time, math
 
torch.backends.cudnn.enabled = True
torch.backends.cudnn.benchmark = True

use_cuda = torch.cuda.is_available()

if use_cuda:
    print ('CUDA is available')
#use_cuda=False   #uncomment this if you dont want to use cuda variables

CUDA is available


In [5]:
printable = string.printable
 
#Input text is available here: https://sherlock-holm.es/stories/plain-text/cano.txt
text = open('../../../data/lab2/sh.txt', 'r').read().lower()



## remove non printable chars and other unnecessary punctuations
pruned_text = ''
 
for c in text:
	if c in printable and c not in '{}[]&_':
		pruned_text += c
 
text = pruned_text		  
file_len = len(text)
alphabet = sorted(list(set(text)))
n_chars = len(alphabet)

print "\nTraining RNN on The Complete Sherlock Holmes.\n"		 
print "\nFile length: {:d} characters\nUnique characters: {:d}".format(file_len, n_chars)
print "\nUnique characters:", alphabet		 
print ('no of uniq chars', n_chars)


Training RNN on The Complete Sherlock Holmes.


File length: 3867934 characters
Unique characters: 52

Unique characters: ['\n', ' ', '!', '"', "'", '(', ')', '*', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '?', '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
('no of uniq chars', 52)


In [6]:
def time_since(since):
	s = time.time() - since
	m = math.floor(s / 60)
	s -= m * 60
	return '%dm %ds' % (m, s)
 

In [7]:
def random_chunk():
	start = random.randint(0, file_len - chunk_len)
	end = start + chunk_len + 1
	return text[start:end]
 
def chunk_vector(chunk):
	vector = torch.zeros(len(chunk)).long()
	for i, c in enumerate(chunk):
		vector[i] = alphabet.index(c)  #construct ASCII vector for chunk, one number per character
	return Variable(vector.cuda(), requires_grad=False) 

In [8]:
def random_training_batch():
	inputs = []
	targets = []
	#construct list of input vectors (chunk_len):
	for b in range(batch_size):    
		chunk = random_chunk()
		inp = chunk_vector(chunk[:-1])
		target = chunk_vector(chunk[1:])
		inputs.append(inp)
		targets.append(target)
	#construct batches from lists (chunk_len, batch_size):
	#need .view to handle batch_size=1
	#need .contiguous to allow .view later
	inp = torch.cat(inputs, 0).view(batch_size, chunk_len).t().contiguous()
	target = torch.cat(targets, 0).view(batch_size, chunk_len).t().contiguous()
	return inp, target

In [11]:
class RNN(nn.Module):
	def __init__(self, input_size, hidden_size, output_size, n_layers):
		super(RNN, self).__init__()
		 
		self.input_size = input_size
		self.hidden_size = hidden_size
		self.output_size = output_size
		self.n_layers = n_layers 
		self.batch_size = batch_size
		 
		self.encoder = nn.Embedding(input_size, hidden_size) #first arg is dictionary size
		self.GRU = nn.GRU(hidden_size, hidden_size, n_layers)  #(input_size, hidden_size, n_layers)
		self.decoder = nn.Linear(hidden_size, output_size)
		 
	def forward(self, input, hidden, batch_size):

		input = self.encoder(input.view(batch_size, seq_len)) 
		#need to reshape Input to (seq_len, batch, hidden_size)
		input = input.permute(1, 0, 2)

		output, hidden = self.GRU(input, hidden) 

		output = self.decoder(output.view(batch_size, hidden_size))  
		#now the output is (batch_size, output_size)
		return output, hidden
	def init_hidden(self, batch_size):
		#Hidden (num_layers * num_directions, batch, hidden_size), num_directions = 2 for BiRNN
		return Variable(torch.randn(self.n_layers, batch_size, self.hidden_size).cuda())
 

In [12]:
seq_len = 1		   #each character is encoded as a single integer
chunk_len = 128    #number of characters in a single text sample
batch_size = 64   #number of text samples in a batch
n_batches =800   #size of training dataset (total number of batches)
hidden_size = 256  #width of model
n_layers = 2	  #depth of model
LR = 0.005		   #learning rate
 
net = RNN(n_chars, hidden_size, n_chars, n_layers).cuda()
#net = RNN(n_chars, hidden_size, n_chars, n_layers)
optim = torch.optim.Adam(net.parameters(), LR)
cost = nn.CrossEntropyLoss().cuda()  
 
print "\nModel parameters:\n"
print "n_batches: {:d}\nbatch_size: {:d}\nchunk_len: {:d}\nhidden_size: {:d}\nn_layers: {:d}\nLR: {:.4f}\n".format(n_batches, batch_size, chunk_len, hidden_size, n_layers, LR)
print "\nRandom chunk of text:\n\n", random_chunk(), '\n'
	 
"""
Take input, target pairs of chunks (target is shifted forward by a single character)
convert them into chunk vectors
for each char pair (i, t) in chunk vectors (input, target), create embeddings with dim = hidden_size
feed input char vectors to GRU model, and compute error = output - target
update weights after going through all chars in the chunk
"""


Model parameters:

n_batches: 800
batch_size: 64
chunk_len: 128
hidden_size: 256
n_layers: 2
LR: 0.0050


Random chunk of text:

nally,
     remarkably quick-witted, for this whole ingenious story is of his
     concoction. yes, watson, we have come upon the 



'\nTake input, target pairs of chunks (target is shifted forward by a single character)\nconvert them into chunk vectors\nfor each char pair (i, t) in chunk vectors (input, target), create embeddings with dim = hidden_size\nfeed input char vectors to GRU model, and compute error = output - target\nupdate weights after going through all chars in the chunk\n'

In [13]:
def evaluate(prime_str = 'A', predict_len = 100, temp = 0.8, batch_size = 1):
	hidden = net.init_hidden(batch_size) 
	prime_input = chunk_vector(prime_str)
	predicted = prime_str
	 
	for i in range(len(prime_str)-1):
		_, hidden = net(prime_input[i], hidden, batch_size)
	  
	inp = prime_input[-1]
	 
	for i in range(predict_len):
		output, hidden = net(inp, hidden, batch_size)
		output_dist = output.data.view(-1).div(temp).exp()	
		top_i = torch.multinomial(output_dist, 1)[0]
		 
		predicted_char = alphabet[top_i]
		predicted +=  predicted_char
		inp = chunk_vector(predicted_char)
 
	return predicted


In [14]:
start = time.time()
for iter in range(0,10):
	training_set = []
 
	for i in range(n_batches):
		training_set.append((random_training_batch()))
 
	i = 0	
	for inp, target in training_set:
		#re-init hidden outputs, zero grads, zero loss:
		hidden = net.init_hidden(batch_size)
		net.zero_grad()
		loss = 0	   
		#for each char in a chunk:
		#compute output, error, loss:
		count=0
		for c, t in zip(inp, target):
			#print( 'size of c')
			#print(c.size())
			count=count+1
			output, hidden = net(c, hidden, batch_size)
			loss += cost(output, t)
		#calculate gradients, update weights:
		#print('count was')
		#print (count)
		loss.backward()
		optim.step()
 
		if i % 100 == 0:
			print "\n\nSample output:\n"
			print evaluate('you are', 100, 0.8), '\n'
			print('[%s (%d / %d) loss: %.4f]' % (time_since(start), i, n_batches, loss.data[0] / chunk_len))
 
		i += 1
	#print('i is')
	
	#print (i)
	print ( 'completed iteration no.=', iter)


RuntimeError: size '[64 x 128]' is invalid for input of with 8191 elements at /OCRData/minesh.mathew/pytorch-0.1.12/torch/lib/TH/THStorage.c:59