# Development Training fit

Model research script. This is a testing ground for the production functions.

In [1]:
#Import libraries
from data_proc import *
from model import *
import datetime
import pandas as pd
import numpy as np
import time

In [2]:
#Import Keras module
import tensorflow as tf

In [3]:
#Establish Cloud SQL connection
import MySQLdb
conn = MySQLdb.connect(host="35.194.155.252", user="root", passwd="60761271", db="HSI_DATA")

In [4]:
from keras.utils import to_categorical

Using TensorFlow backend.


In [5]:
#Check GPU
#from tensorflow.python.client import device_lib
#print(device_lib.list_local_devices())

In [6]:
#pretty plots
%matplotlib inline

## Fetch Data

In [7]:
#Define forward return step size
step = 1
upper_cutoff = 0.0005
lower_cutoff = -0.0005

In [8]:
#Define Training, Validation, test date
training_start = datetime.date(2007, 8, 31)
training_end = datetime.date(2015, 12, 30)
validation_start = datetime.date(2015, 12, 31)
validation_end = datetime.date(2016, 5, 31)
test_start = datetime.date(2017, 6, 1)
latest_close = datetime.date.today() + datetime.timedelta(days=-4)

In [9]:
latest_close

datetime.date(2017, 12, 15)

In [10]:
#Fetch training data
X = pd.concat([get_data(conn, training_start, latest_close, 'GLOBAL_INDICES', '*'), 
                      get_data(conn, training_start, latest_close, 'hsi_data', '*')], axis=1)

In [11]:
#Fetch forward returns
y = get_fwd_ret(conn, training_start, latest_close, step)
y = get_labels_from_fwd_ret(y, upper_cutoff=upper_cutoff, lower_cutoff=lower_cutoff)

In [12]:
#Close connection
conn.close()

## Preprocess the input and labels

In [13]:
#Split data into train, validation and test
_, X_train, X_val, X_test = train_val_test_split(X, training_start, validation_start, test_start, latest_close, 'RobustScaler')
_, y_train, y_val, y_test = train_val_test_split(y, training_start, validation_start, test_start, latest_close, None)

In [14]:
y_train, y_val, y_test = to_categorical(y_train, 3), to_categorical(y_val, 3), to_categorical(y_test, 3)

### Fit GRU Model

Fit GRU model based on previous research

In [None]:
#Model Parameters
#DROP_OUT = (0.26036828997037875, 0.10581996841419966)
DROP_OUT = 0.5
LEARN_RATE = 0.0005
GRU_SIZE = 100
DENSE_SIZE = 90
N_FEATURES = X.shape[1]
TIMESTEP = 11
BATCH_SIZE = 128
EPOCHS = 500
NUM_LAYERS = 5

In [None]:
# Save every N iterations
#save_every_n = 200

model = DirectionLSTM(n_class=3, batch_size=BATCH_SIZE, num_steps=TIMESTEP,
                gru_size=GRU_SIZE, dense_size=DENSE_SIZE, num_layers=NUM_LAYERS, 
                learning_rate=LEARN_RATE)

#saver = tf.train.Saver(max_to_keep=100)
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    # Use the line below to load a checkpoint and resume training
    #saver.restore(sess, 'checkpoints/______.ckpt')
    counter = 0
    
    for e in range(EPOCHS):
        # Train network
        val_acc = []
        new_state = sess.run(model.initial_state)
        loss = 0
        for x, y in get_batch(X_train, y_train, BATCH_SIZE, TIMESTEP):
            start = time.time()
            feed = {model.inputs: x,
                    model.targets: y,
                    model.keep_prob: DROP_OUT,
                    model.initial_state: new_state}
            batch_loss, new_state, _ = sess.run([model.loss, 
                                                 model.final_state, 
                                                 model.optimizer], 
                                                 feed_dict=feed)
            
            end = time.time()
            
            if counter % 50 == 0:
                print('Epoch: {}/{}   '.format(e+1, EPOCHS),
                      'Training Step: {}  '.format(counter),
                      'Training loss: {:.4f}  '.format(batch_loss),
                      '{:.4f} sec/batch'.format((end-start)))
                           
                val_state = sess.run(model.initial_state)
                
                for xx, yy in get_batch(X_val, y_val, BATCH_SIZE, TIMESTEP):
                    feed = {model.inputs:xx, 
                            model.targets:yy,
                            model.keep_prob: 1.0,
                            model.initial_state: val_state}
                    
                    batch_loss, batch_acc, val_state = sess.run([model.loss, model.acc, model.final_state], feed_dict=feed)
                    val_acc.append(batch_acc)
                    #print(batch_acc)
                    
                print("Val acc: {:.3f}".format(np.mean(val_acc)),
                      "Val Loss: {:.4f}".format(batch_loss))
            counter +=1
        
            #if (counter % save_every_n == 0):
                #saver.save(sess, "checkpoints/i{}_l{}.ckpt".format(counter, lstm_size))
    
    #saver.save(sess, "checkpoints/i{}_l{}.ckpt".format(counter, lstm_size))

Epoch: 1/500    Training Step: 0   Training loss: 1.0974   0.6289 sec/batch
Val acc: 0.266 Val Loss: 1.1230
Epoch: 4/500    Training Step: 50   Training loss: 0.7081   0.0777 sec/batch
Val acc: 0.490 Val Loss: 0.7093
Epoch: 7/500    Training Step: 100   Training loss: 0.7098   0.0831 sec/batch
Val acc: 0.490 Val Loss: 0.7005
Epoch: 11/500    Training Step: 150   Training loss: 0.7677   0.0791 sec/batch
Val acc: 0.490 Val Loss: 0.7107
Epoch: 14/500    Training Step: 200   Training loss: 0.6992   0.0786 sec/batch
Val acc: 0.490 Val Loss: 0.6982
Epoch: 17/500    Training Step: 250   Training loss: 0.6954   0.0793 sec/batch
Val acc: 0.490 Val Loss: 0.6998
Epoch: 21/500    Training Step: 300   Training loss: 0.7285   0.0986 sec/batch
Val acc: 0.491 Val Loss: 0.6970
Epoch: 24/500    Training Step: 350   Training loss: 0.6987   0.0776 sec/batch
Val acc: 0.495 Val Loss: 0.6957
Epoch: 27/500    Training Step: 400   Training loss: 0.6940   0.0781 sec/batch
Val acc: 0.517 Val Loss: 0.6936
Epoch: 