In [1]:
'''This is cs231 assignment3, training a good LSTM model using minpy'''
import minpy.numpy as np
from minpy.nn import layers
from minpy.nn.model import ModelBase
from minpy.nn.solver import Solver
from minpy.nn.io import NDArrayIter
from cs231n.rnn_layers_minpy import word_embedding_forward
from cs231n.coco_utils import load_coco_data, sample_coco_minibatch, decode_captions

class LSTMNet(ModelBase):
    def __init__(self,
                 word_to_idx,
                 batch_size=128,
                 input_size=128,  # input dimension
                 hidden_size=512,
                 wordvec_dim = 256,
                 caption_size = 16):
        super(LSTMNet, self).__init__()
        vocab_size = len(word_to_idx)
        print vocab_size
        self._null = word_to_idx['<NULL>']
        self._start = word_to_idx.get('<START>', None)
        self._end = word_to_idx.get('<END>', None)
        self.caption_size= caption_size
        self.input_size = input_size
        self.add_param(name='Wx', shape=(wordvec_dim, 4*hidden_size))\
            .add_param(name='Wh', shape=(hidden_size, 4*hidden_size))\
            .add_param(name='b', shape=(4*hidden_size,))\
            .add_param(name='W_vocab', shape=(hidden_size, vocab_size))\
            .add_param(name='b_vocab', shape=(vocab_size,))\
            .add_param(name='W_proj', shape=(input_size, hidden_size))\
            .add_param(name='b_proj', shape=(hidden_size,))\
            .add_param(name='W_embed', shape=(vocab_size, wordvec_dim,))

    def forward(self, data, mode):
        # get h0 and X
        #captions_in = data['captions_in']
        #features = data['features'] 
        captions_in = data[:,0:self.caption_size]
        features = data[:,self.caption_size:]
        print data.shape, captions_in.shape, features.shape
        h = np.dot(features, self.params['W_proj']) + self.params['b_proj']
        W_embed = self.params['W_embed']
        X = word_embedding_forward(captions_in, self.params['W_embed'])
        print X.shape
        seq_len = X.shape[1]
        batch_size = X.shape[0]
        hidden_size = self.params['Wh'].shape[0]
        
        c = np.zeros((batch_size, hidden_size))
        for t in xrange(seq_len):
            h, c = layers.lstm_step(X[:, t, :], h, c,
                                    self.params['Wx'],
                                    self.params['Wh'],
                                    self.params['b'])
        score = layers.affine(h, self.params['W_vocab'], self.params['b_vocab'])
        return score

    def loss(self, predict, captions_out):
        x = predict.asnumpy()

        mask = (captions_out != self._null)
        
        N, T, V = x.shape
        x_flat = x.reshape(N * T, V)
        y_flat = captions_out.reshape(N * T)
        mask_flat = mask.reshape(N * T)
        
        probs = np.exp(x_flat - np.max(x_flat, axis=1, keepdims=True))
        probs /= np.sum(probs, axis=1, keepdims=True)
        loss = -np.sum(mask_flat * np.log(probs[np.arange(N * T), y_flat])) / N
        return loss


IndentationError: unexpected indent (rnn_layers_minpy.py, line 194)

In [None]:
num_train = 10000
num_val = 2000
batch_size =128
hidden_size = 512
wordvec_dim = 256


data = load_coco_data(max_train=num_train+num_val)

minibatch = sample_coco_minibatch(data,batch_size=num_train,split='train')
captions_train, features_train, urls_train = minibatch
captions_in = captions_train[:, :-1]
y_train = captions_train[:, 1:]
#x_train = {'captions_in':captions_in, 'features':features_train}
x_train = np.hstack((captions_in, features_train))
print x_train.shape,captions_in.shape, features_train.shape
model = LSTMNet(word_to_idx=data['word_to_idx'],
                batch_size=batch_size,
                input_size=features_train.shape[1],  # input dimension
                hidden_size=hidden_size,
                wordvec_dim=wordvec_dim,
                caption_size = captions_in.shape[1]
               )

train_dataiter = NDArrayIter(x_train,
                             y_train,
                             batch_size=batch_size,
                             shuffle=True)

minibatch = sample_coco_minibatch(data,batch_size=num_val,split='val')
captions_test, features_test, urls_test = minibatch
captions_tin = captions_test[:, :-1]
y_test = captions_test[:, 1:]
x_test = np.hstack((captions_test, features_test))
test_dataiter = NDArrayIter(x_test,
                            y_test,
                            batch_size=batch_size,
                            shuffle=False)

solver = Solver(model,
                train_dataiter,
                test_dataiter,
                num_epochs=10,
                init_rule='xavier',
                update_rule='adam',
                verbose=True,
                print_every=40)
solver.init()
solver.train()






# Plot the training losses
plt.plot(solver.loss_history)
plt.xlabel('Iteration')
plt.ylabel('Loss')
plt.title('Training loss history')
plt.show()