# Data

In [2]:
import data

In [3]:
corpus = data.Corpus('./data/brown/')

In [8]:
class AttrDict(dict):
    def __init__(self, *args, **kwargs):
        super(AttrDict, self).__init__(*args, **kwargs)
        self.__dict__ = self
args = AttrDict()
args.cuda = False
args.bptt=5
args.bsz = 20
args.winSize = 15

In [9]:
bsz = 20

In [12]:
def batchify(data, bsz):
    # Work out how cleanly we can divide the dataset into bsz parts.
    nbatch = data.size(0) // bsz
    # Trim off any extra elements that wouldn't cleanly fit (remainders).
    data = data.narrow(0, 0, nbatch * bsz)
    # Evenly divide the data across the bsz batches.
    data = data.view(bsz, -1).t().contiguous()
    if args.cuda:
        data = data.cuda()
    return data

In [14]:
train_data = batchify(corpus.train,20)
train_data.size()

torch.Size([54496, 20])

In [15]:
ntokens = len(corpus.dictionary)
def get_batch(source, i, evaluation=False):
    seq_len = min(args.bptt, len(source) - 1 - i)
    data = Variable(source[i:i+seq_len], volatile=evaluation)
    target = Variable(source[i+1:i+1+seq_len].view(-1))
    return data, target
batch,target = get_batch(train_data,0)

In [17]:
#Sequence length * batch_size
batch.size()

torch.Size([5, 20])

# Model

In [20]:
import torch.nn as nn
from torch.autograd import Variable

class RNNModel(nn.Module):
    """Container module with an encoder, a recurrent module, and a decoder."""

    def __init__(self, rnn_type, ntoken, ninp, nhid, nlayers, dropout=0.5, tie_weights=False):
        super(RNNModel, self).__init__()
        self.drop = nn.Dropout(dropout)
        self.encoder = nn.Embedding(ntoken, ninp)
        if rnn_type in ['LSTM', 'GRU']:
            self.rnn = getattr(nn, rnn_type)(ninp, nhid, nlayers, dropout=dropout)
        else:
            try:
                nonlinearity = {'RNN_TANH': 'tanh', 'RNN_RELU': 'relu'}[rnn_type]
            except KeyError:
                raise ValueError( """An invalid option for `--model` was supplied,
                                 options are ['LSTM', 'GRU', 'RNN_TANH' or 'RNN_RELU']""")
            self.rnn = nn.RNN(ninp, nhid, nlayers, nonlinearity=nonlinearity, dropout=dropout)
        self.decoder = nn.Linear(nhid, ntoken)

        # Optionally tie weights as in:
        # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016)
        # https://arxiv.org/abs/1608.05859
        # and
        # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016)
        # https://arxiv.org/abs/1611.01462
        if tie_weights:
            if nhid != ninp:
                raise ValueError('When using the tied flag, nhid must be equal to emsize')
            self.decoder.weight = self.encoder.weight

        self.init_weights()

        self.rnn_type = rnn_type
        self.nhid = nhid
        self.nlayers = nlayers

    def init_weights(self):
        initrange = 0.1
        self.encoder.weight.data.uniform_(-initrange, initrange)
        self.decoder.bias.data.fill_(0)
        self.decoder.weight.data.uniform_(-initrange, initrange)

    def forward(self, input, hidden):
        emb = self.drop(self.encoder(input))
        output, hidden = self.rnn(emb, hidden)
        output = self.drop(output)
        decoded = self.decoder(output.view(output.size(0)*output.size(1), output.size(2)))
        return decoded.view(output.size(0), output.size(1), decoded.size(1)), hidden

    def init_hidden(self, bsz):
        weight = next(self.parameters()).data
        if self.rnn_type == 'LSTM':
            return (Variable(weight.new(self.nlayers, bsz, self.nhid).zero_()),
                    Variable(weight.new(self.nlayers, bsz, self.nhid).zero_()))
        else:
            return Variable(weight.new(self.nlayers, bsz, self.nhid).zero_())


In [41]:
args.emsize = 32;args.nhid =32;args.nlayers = 1;args.dropout=0.2;args.tied=True

In [42]:
model = RNNModel('LSTM', ntokens, args.emsize, args.nhid, args.nlayers, args.dropout, args.tied)

In [46]:
#hidden is a tuple contains 2 elements
#hidden layer dimension nlayer*bsz*nhid
hidden = model.init_hidden(args.bsz)
hidden[0].size()

torch.Size([1, 20, 32])

In [49]:
forward_output,hidden_output = model.forward(batch,hidden)

In [50]:
#forward output size: sequence length * batchsize * ntokens
forward_output.size()

torch.Size([5, 20, 10002])

In [51]:
criterion = nn.CrossEntropyLoss()

In [54]:
criterion(forward_output.view(-1,ntokens),target)

Variable containing:
 9.2140
[torch.FloatTensor of size 1]

In [63]:
emb = model.encoder(batch)

In [67]:
output,hidden = model.rnn(emb)

In [69]:
model.decoder(output.view(output.size(0)*output.size(1), output.size(2)))

Variable containing:
 1.6637e-02  1.4585e-02 -6.1130e-03  ...  -8.6721e-03  3.0399e-03  1.0336e-02
 1.1854e-02  1.5390e-02  5.6073e-03  ...  -1.4014e-02  7.0404e-03  8.6890e-03
 1.5075e-02  1.5155e-02 -4.9548e-03  ...  -6.7117e-03  3.1056e-04  1.1977e-02
                ...                   ⋱                   ...                
 2.6354e-02  3.8109e-02  1.8322e-03  ...  -1.7862e-02  8.3338e-04  1.3056e-02
 3.1757e-02  3.5251e-02 -1.1830e-03  ...  -1.6032e-02  6.7099e-03  1.0645e-02
 2.4437e-02  4.0196e-02  1.4637e-03  ...  -2.1643e-02  9.2480e-03  9.1781e-03
[torch.FloatTensor of size 100x10002]