In [1]:
__author__ = "Yicheng Li"
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import statsmodels.api as sm
import random
from sklearn import preprocessing
import tensorflow as tf

  from pandas.core import datetools


In [2]:
df = pd.read_pickle('df_daily_interpolated.pickle')
df = df.dropna()

In [20]:
scaler = preprocessing.MinMaxScaler(feature_range=(0, 1))

In [23]:
# function to create train, validation, test data given sequence length
def load_data(df, seq_len):
    labels = df['BTC_weighted_mean'].as_matrix()
    feature_set = [4]
    
    data_raw = df.as_matrix() # convert to numpy array
#     data_raw = preprocessing.minmax_scale(data_raw[:, feature_set]) # normalizing features
    data_raw = scaler.fit_transform(data_raw[:, feature_set])
    data = []
    
    # create all possible sequences of length seq_len
    for index in range(len(data_raw) - seq_len + 1): 
        data.append(data_raw[index: index + seq_len, :])
    
    data = np.array(data)
    
    valid_set_size_percentage = 20 
    test_set_size_percentage = 10 
    
    valid_set_size = int(np.round(valid_set_size_percentage/100*data.shape[0]));  
    test_set_size = int(np.round(test_set_size_percentage/100*data.shape[0]));
    train_set_size = data.shape[0] - (valid_set_size + test_set_size);
    
    x_train = data[:train_set_size,:-1,:] # cannot see last day, which we aim to predict
    y_train = labels[seq_len-1:train_set_size+seq_len-1]
    
    x_valid = data[train_set_size:train_set_size+valid_set_size,:-1,:]
    y_valid = labels[seq_len-1+train_set_size:train_set_size+valid_set_size+seq_len-1]
    
    x_test = data[train_set_size+valid_set_size:,:-1,:]
    y_test = labels[seq_len-1+train_set_size+valid_set_size:]
    
    return [x_train, y_train, x_valid, y_valid, x_test, y_test]

In [24]:
# create train, test data
seq_len = 10 # choose sequence length
x_train, y_train, x_valid, y_valid, x_test, y_test = load_data(df, seq_len)
y_train = y_train.reshape([-1,1])
y_valid = y_valid.reshape([-1,1])
y_test = y_test.reshape([-1,1])
print('x_train.shape = ',x_train.shape)
print('y_train.shape = ', y_train.shape)
print('x_valid.shape = ',x_valid.shape)
print('y_valid.shape = ', y_valid.shape)
print('x_test.shape = ', x_test.shape)
print('y_test.shape = ',y_test.shape)

x_train.shape =  (410, 9, 1)
y_train.shape =  (410, 1)
x_valid.shape =  (117, 9, 1)
y_valid.shape =  (117, 1)
x_test.shape =  (58, 9, 1)
y_test.shape =  (58, 1)


In [86]:
tmp = scaler.transform(y_train)
print(tmp[:3])
print(np.isnan(tmp).sum())
tmp2 = scaler.inverse_transform(tmp)
tmp2[:3]

[[1.27170531e-04]
 [6.40153354e-05]
 [1.48272860e-04]]
0


array([[573.84267864],
       [572.64966359],
       [574.24130613]])

In [45]:
tmp.max()

1.0

In [6]:
# shuffle training data
s = np.arange(x_train.shape[0])
np.random.shuffle(s)
x_train = x_train[s]
y_train = y_train[s]

In [7]:
def SMAPE(y, y_pred):
    if len(y) != len(y_pred):
        raise ValueError('Length of prediction array is not equal to length of y array.')
    return np.mean(np.abs(y-y_pred)*2/(np.abs(y)+np.abs(y_pred)))

## LSTM model graph

In [120]:
index_in_epoch = 0;
perm_array  = np.arange(x_train.shape[0])
np.random.shuffle(perm_array)

# function to get the next batch
def get_next_batch(batch_size):
    global index_in_epoch, x_train, perm_array   
    start = index_in_epoch
    index_in_epoch += batch_size
    
    if index_in_epoch > x_train.shape[0]:
        np.random.shuffle(perm_array) # shuffle permutation array
        start = 0 # start next epoch
        index_in_epoch = batch_size
        
    end = index_in_epoch
    return x_train[perm_array[start:end]], scaler.transform(y_train[perm_array[start:end]])

# parameters
n_steps = seq_len-1 
n_inputs = x_train.shape[-1]
n_neurons = 200  # cell.state_size
n_outputs = 1
n_layers = 1
learning_rate = 1e-5
batch_size = 50
n_epochs = 200 
train_set_size = x_train.shape[0]
test_set_size = x_test.shape[0]
keep_prob = 1
max_gradient_norm = 10

tf.reset_default_graph()

X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.float32, [None, n_outputs])

# use LSTM Cell
layers = [tf.contrib.rnn.LSTMCell(num_units=n_neurons, \
                                  initializer=tf.contrib.layers.xavier_initializer(), \
                                  activation=tf.nn.elu)
         for layer in range(n_layers)]

# use Basic LSTM Cell 
# layers = [tf.contrib.rnn.BasicLSTMCell(num_units=n_neurons, activation=tf.nn.elu)
#           for layer in range(n_layers)]

# use LSTM Cell with peephole connections
#layers = [tf.contrib.rnn.LSTMCell(num_units=n_neurons, 
#                                  activation=tf.nn.leaky_relu, use_peepholes = True)
#          for layer in range(n_layers)]

# use GRU cell
#layers = [tf.contrib.rnn.GRUCell(num_units=n_neurons, activation=tf.nn.leaky_relu)
#          for layer in range(n_layers)]

multi_layer_cell = tf.contrib.rnn.MultiRNNCell(layers)

outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)
outputs = tf.nn.dropout(outputs, keep_prob)

# 'outputs' is a tensor of shape [batch_size, n_steps, n_neurons(cell.state_size)]
stacked_outputs = tf.reshape(outputs, [-1, n_neurons]) 
stacked_outputs = tf.layers.dense(stacked_outputs, n_outputs)
outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])
outputs = outputs[:,n_steps-1,:] # keep only last output of sequence

loss = tf.reduce_mean(tf.square(outputs - y)) # loss function = mean squared error 
# loss = tf.reduce_mean(tf.square(outputs - y)) # loss function = mean squared error 
#loss = tf.reduce_mean(tf.abs(y-outputs)*2/(tf.abs(y)+tf.abs(outputs))) # SMAPE
params = tf.trainable_variables()
gradients = tf.gradients(loss, params)
gradient_norm = tf.global_norm(gradients)
clipped_gradients, _ = tf.clip_by_global_norm(gradients, max_gradient_norm)
clipped_norm = tf.global_norm(clipped_gradients)
param_norm = tf.global_norm(params)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) 
# training_op = optimizer.minimize(loss)
training_op = optimizer.apply_gradients(zip(clipped_gradients, params))

# initialize parameters
sess = tf.Session()
global_step = tf.Variable(0, name='global_step', trainable=False)
saver = tf.train.Saver(max_to_keep=2)

In [116]:
experiment_name = 'LSTM_1_feature_200' # type your experiment name here before running the code below

In [None]:
# run graph
ckpt = tf.train.get_checkpoint_state(experiment_name)
v2_path = ckpt.model_checkpoint_path + ".index" if ckpt else ""
if ckpt and (tf.gfile.Exists(ckpt.model_checkpoint_path) or tf.gfile.Exists(v2_path)):
#     iteration = tf.get_variable('iteration',[1])
    saver.restore(sess, ckpt.model_checkpoint_path)
    iteration = global_step.eval(session=sess) # get last global_step
    print("Start from iteration:", iteration)
else:
    print('There is not saved parameters. Creating model with fresh parameters.')
#     iteration = tf.get_variable('iteration',[1], initializer = tf.zeros_initializer)
    sess.run(tf.global_variables_initializer())
    iteration = 0
#     sess.run(iteration)
    
while True:
    iteration = iteration + 1
    x_batch, y_batch = get_next_batch(batch_size) # fetch the next training batch 
     
    # output_feed contains the things we want to fetch.
    output_feed = [training_op, loss, param_norm]
    [_, loss_val, param_norm_val] \
        = sess.run(output_feed, feed_dict={X: x_batch, y: y_batch})
    
    if iteration % int(50*train_set_size/batch_size) == 0:
#             loss_train = loss.eval(feed_dict={X: x_train, y: y_train}) 
#             loss_valid = loss.eval(feed_dict={X: x_valid, y: y_valid}) 
        y_train_pred = scaler.inverse_transform(sess.run(outputs, feed_dict={X: x_train}))
        y_valid_pred = scaler.inverse_transform(sess.run(outputs, feed_dict={X: x_valid}))
        print('%.2f epochs: loss = %.7f, param_norm = %.3f, SMAPE train/valid = %.6f/%.6f' \
              %(iteration*batch_size/train_set_size, loss_val, param_norm_val, \
                SMAPE(y_train, y_train_pred), \
                SMAPE(y_valid, y_valid_pred)))
#             print('%.2f epochs: loss train/valid = %.6f/%.6f'%(
#                 iteration*batch_size/train_set_size, loss_train, loss_valid))
    if iteration % int(100*train_set_size/batch_size) == 0:
        global_step.assign(iteration).eval(session=sess) # set and update(eval) global_step with index, i
        save_path = saver.save(sess, "./"+experiment_name+"/model.ckpt", global_step=global_step)
        print('Saved parameters to %s' % save_path)

y_train_pred = sess.run(outputs, feed_dict={X: x_train})
y_valid_pred = sess.run(outputs, feed_dict={X: x_valid})
y_test_pred = sess.run(outputs, feed_dict={X: x_test})

There is not saved parameters. Creating model with fresh parameters.
50.00 epochs: loss = 0.0011946, param_norm = 18.033, SMAPE train/valid = 0.380471/0.521423
100.00 epochs: loss = 0.0001283, param_norm = 18.099, SMAPE train/valid = 0.141018/0.137317
Saved parameters to ./LSTM_1_feature_200/model.ckpt-820
150.00 epochs: loss = 0.0000809, param_norm = 18.117, SMAPE train/valid = 0.057489/0.115894
200.00 epochs: loss = 0.0000674, param_norm = 18.112, SMAPE train/valid = 0.056074/0.114925
Saved parameters to ./LSTM_1_feature_200/model.ckpt-1640
250.00 epochs: loss = 0.0000613, param_norm = 18.106, SMAPE train/valid = 0.055889/0.113671
300.00 epochs: loss = 0.0000843, param_norm = 18.099, SMAPE train/valid = 0.055809/0.112461
Saved parameters to ./LSTM_1_feature_200/model.ckpt-2460
350.00 epochs: loss = 0.0000710, param_norm = 18.093, SMAPE train/valid = 0.055484/0.111175
400.00 epochs: loss = 0.0001224, param_norm = 18.087, SMAPE train/valid = 0.054436/0.109971
Saved parameters to ./LSTM