In [1]:
import pickle

import numpy as np
import pandas as pd

with open('/home/rui/blacksid/Rui/normalized_data.plkz', 'rb') as f:
    X_train = pickle.load(f, encoding='latin1')
    y_train = pickle.load(f, encoding='latin1')
    X_test = pickle.load(f, encoding='latin1')
    y_test = pickle.load(f, encoding='latin1')

In [2]:
import theano
import theano.tensor as T
import lasagne

Using gpu device 0: GeForce GTX 750 Ti (CNMeM is disabled, cuDNN 5103)


In [3]:
#Lasagne Seed for Reproducibility
lasagne.random.set_rng(np.random.RandomState(1))

# Sequence Length
SEQ_LENGTH = 100

# Number of units in the two hidden (LSTM) layers
N_HIDDEN = 512

# Optimization learning rate
LEARNING_RATE = .01

# All gradients above this will be clipped
GRAD_CLIP = 100

# How often should we check the output?
PRINT_FREQ = 300

# Number of epochs to train the net
NUM_EPOCHS = 50

# Batch Size
BATCH_SIZE = 100

FEATURE_SIZE = 5
PREDICT_SIZE = 4

In [4]:
y_train[0]

array([ 1.14433157, -0.21922381,  0.8256126 , -0.82660353], dtype=float32)

In [5]:
X_train[0]

array([ 2.16789198,  0.96389335,  0.39143583,  0.82193977,  0.87597954], dtype=float32)

In [6]:
def gen_data(p, batch_size = BATCH_SIZE, data=X_train, return_target=True):
    x = np.zeros((batch_size,SEQ_LENGTH,FEATURE_SIZE), dtype=np.float32)
    y = np.zeros((batch_size, PREDICT_SIZE), dtype=np.float32)

    for n in range(batch_size):
        ptr = n
        x[n,:,:] = data[(p+n):(p+n+SEQ_LENGTH),:]
        if(return_target):
            y[n,:] = y_train[p+n+SEQ_LENGTH-1,:]
    return x, y

In [7]:
batch_data = gen_data(0)

In [8]:
batch_data[1].shape

(100, 4)

In [23]:
def main(num_epochs=NUM_EPOCHS):
    print("Building network ...")
   
    # First, we build the network, starting with an input layer
    # Recurrent layers expect input of shape
    # (batch size, SEQ_LENGTH, num_features)

    l_in = lasagne.layers.InputLayer(shape=(None, None, FEATURE_SIZE))

    
    # We now build the LSTM layer which takes l_in as the input layer
    # We clip the gradients at GRAD_CLIP to prevent the problem of exploding gradients. 

    l_forward_1 = lasagne.layers.LSTMLayer(
        l_in, N_HIDDEN, grad_clipping=GRAD_CLIP,
        nonlinearity=lasagne.nonlinearities.tanh)

    l_forward_2 = lasagne.layers.LSTMLayer(
        l_forward_1, N_HIDDEN, grad_clipping=GRAD_CLIP,
        nonlinearity=lasagne.nonlinearities.tanh,
        only_return_final=True)

    # The output of l_forward_2 of shape (batch_size, N_HIDDEN) is then passed through the softmax nonlinearity to 
    # create probability distribution of the prediction
    # The output of this stage is (batch_size, vocab_size)
    l_out = lasagne.layers.DenseLayer(l_forward_2, num_units=PREDICT_SIZE, W = lasagne.init.Normal(), nonlinearity=None)

    # Theano tensor for the targets
    target_values = T.matrix('target_output')
    
    # lasagne.layers.get_output produces a variable for the output of the net
    network_output = lasagne.layers.get_output(l_out)

    # The loss function is calculated as the mean of the (categorical) cross-entropy between the prediction and target.
    cost = lasagne.objectives.squared_error(network_output,target_values).mean()

    # Retrieve all parameters from the network
    all_params = lasagne.layers.get_all_params(l_out,trainable=True)

    # Compute AdaGrad updates for training
    print("Computing updates ...")
    updates = lasagne.updates.adadelta(cost, all_params, LEARNING_RATE)

    # Theano functions for training and computing cost
    print("Compiling functions ...")
    train = theano.function([l_in.input_var, target_values], cost, updates=updates, allow_input_downcast=True)
    compute_cost = theano.function([l_in.input_var, target_values], cost, allow_input_downcast=True)

    # In order to generate text from the network, we need the probability distribution of the next character given
    # the state of the network and the input (a seed).
    # In order to produce the probability distribution of the prediction, we compile a function called probs. 
    
    pred = theano.function([l_in.input_var],network_output,allow_input_downcast=True)

    # The next function generates text given a phrase of length at least SEQ_LENGTH.
    # The phrase is set using the variable generation_phrase.
    # The optional input "N" is used to set the number of characters of text to predict. 

    
    print("Training ...")
    p = 0
    try:
        for it in range(X_train.shape[0] * num_epochs // BATCH_SIZE-1):
            
            avg_cost = 0;
            for _ in range(PRINT_FREQ):
                x,y = gen_data(p)
                #print(p)
                p += BATCH_SIZE 
                if(p+BATCH_SIZE+SEQ_LENGTH >= X_train.shape[0]):
                    print('Carriage Return')
                    p = 0;
                avg_cost += train(x, y)
            print("Epoch {} average loss = {}".format(it*1.0*PRINT_FREQ/X_train.shape[0]*BATCH_SIZE, avg_cost / PRINT_FREQ))
            test_loss = 0
            test_p = 0
            for index in range(X_test.shape[0]//BATCH_SIZE-1):
                x,_ = gen_data(test_p, data=X_test, return_target=False)
                test_p += BATCH_SIZE
                test_loss += compute_cost(x,y)
            print("Epoch {} test loss = {}".format(it*1.0*PRINT_FREQ/X_train.shape[0]*BATCH_SIZE, test_loss / (X_test.shape[0]//BATCH_SIZE-1)))
    except KeyboardInterrupt:
        pass


In [None]:
main()

Building network ...
Computing updates ...
Compiling functions ...
Training ...
Carriage Return
Epoch 0.0 average loss = 0.9928983535120884
Epoch 0.0 test loss = 0.6482882614087577
Carriage Return
Epoch 1.0 average loss = 0.9875595457106828
Epoch 1.0 test loss = 1.2340823989925962
Carriage Return
Epoch 2.0 average loss = 0.980946093176802
Epoch 2.0 test loss = 1.2831533749898274
Carriage Return
Epoch 3.0 average loss = 0.9706784320871035
Epoch 3.0 test loss = 0.6773978050308999
Carriage Return
Epoch 4.0 average loss = 0.9638860825200876
Epoch 4.0 test loss = 0.6879052122433981
Carriage Return
Epoch 5.0 average loss = 0.9784950837741295
Epoch 5.0 test loss = 6.583910908361878
Carriage Return
Epoch 6.0 average loss = 0.9534197646131118
Epoch 6.0 test loss = 1.5420988689769397
Carriage Return
Epoch 7.000000000000001 average loss = 0.9462688105801741
Epoch 7.000000000000001 test loss = 1.8764306427252413
Carriage Return
Epoch 8.0 average loss = 0.9377489638825257
Epoch 8.0 test loss = 1.92