# This is the pytorch version of karpathy's charnn
    reference: https://gist.github.com/raphaelbastide/11ae4bb5e454e5c5239f



In [1]:
import torch
from torch import nn
from torch.autograd import Variable
import torch.optim as optim

In [2]:
# data I/O
data = open('input.txt', 'r').read() # should be simple plain text file
chars = list(set(data))
data_size, vocab_size = len(data), len(chars)
print ('data has {} characters, {} unique.'.format( data_size, vocab_size))
char_to_ix = { ch:i for i,ch in enumerate(chars) }
ix_to_char = { i:ch for i,ch in enumerate(chars) }

data has 1570 characters, 49 unique.


In [3]:
# hyperparameters
hidden_size = 100 # size of hidden layer of neurons
seq_length = 25 # number of steps to unroll the RNN for
learning_rate = 1e-3

In [4]:
# Utility functions
def ix_to_tensor(ix):
    t = torch.FloatTensor(1, vocab_size).zero_()
    t[0][ix] = 1.0
    return Variable(t)

def tensor_to_ix(t):
    v,i = torch.max(t,1)
    return i.data[0][0]

In [5]:
# nn module
class CharRNN(nn.Module):
 
    def __init__(self, input_size, hidden_size, output_size):
        super(CharRNN, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        
        self.rnn = nn.RNNCell(input_size, hidden_size , nonlinearity='relu')
        self.linear = nn.Linear(hidden_size, output_size)
        
    def forward(self, input, hidden):
        hidden = self.rnn(input, hidden)
        output = self.linear(hidden)
        return output, hidden

    def init_hidden(self):
        return Variable(torch.FloatTensor(1, self.hidden_size).zero_())

In [6]:
rnn = CharRNN(vocab_size, hidden_size,vocab_size )
criterion=nn.CrossEntropyLoss()
optimizer = optim.Adam(rnn.parameters(), lr=learning_rate)

def train(inputs, targets, hprev):
    optimizer.zero_grad()

    loss = 0
    for i in range(len(inputs)):
        input = ix_to_tensor(inputs[i])
        target = Variable(torch.LongTensor([targets[i]]))

        output, hprev = rnn(input, hprev)
        loss += criterion(output, target)
    
    loss.backward()
    optimizer.step()

    return loss.data[0] , hprev

def sample(seed_ix, n):
    h = rnn.init_hidden()

    x = ix_to_tensor(seed_ix)
    ixes = []
    for t in range(n):
        output, h = rnn(x, h)
        ix = tensor_to_ix(output)
        ixes.append(ix)
        x = ix_to_tensor(ix)
        
    return ixes    

In [7]:
n, p = 0, 0

while True:
    if p+seq_length+1 >= len(data) or n == 0: 
        hprev = rnn.init_hidden()
        p = 0 # go from start of data
        
    inputs = [char_to_ix[ch] for ch in data[p:p+seq_length]]
    targets = [char_to_ix[ch] for ch in data[p+1:p+seq_length+1]]

    # sample from the model now and then
    if n % 1000 == 0:
        sample_ix = sample( inputs[0], 200)
        txt = ''.join(ix_to_char[ix] for ix in sample_ix)
        print ('----\n {} \n----'.format( txt) )

    # forward seq_length characters through the net and fetch gradient
    loss,hprev = train(inputs, targets, hprev) 
    hprev = Variable(hprev.data)
    
 
    if n % 1000 == 0:
        print( 'iter {}, loss: {}'.format(n, loss)) # print progress

    p += seq_length # move data pointer
    n += 1 # iteration counter  

----
 SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSS 
----
iter 0, loss: 98.09723663330078
----
 and the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the  
----
iter 1000, loss: 52.85036849975586
----
 mons and th the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the  
----
iter 2000, loss: 39.247779846191406
----
 derucan ingeringional deal world th trmation, anders andinge prom degl an systrmation, andering, anders anding cinsif and wirld in orderical in ted wisis of computer at on ers, and uns roage in or and 
----
iter 3000, loss: 23.58685302734375
----
 ders

KeyboardInterrupt: 