In [None]:
# Full ticker specification required.
ticker = 'seekingalpha-XOXO-2016-Q1'
batchSize = 50
epochs = 20

learningRate = 1e-3
maxNumIntervals = 250
recurrentType = 'LSTM'
recurrentSize = 128
denseSize = 128
regression = False
classBins = 3

# Specify feature as saved tag.
featureTags = ['MFCC', 'Pitches']
logName = 'XOXO-B50-E20-LR1E3-LSTM-R128-D128-Class-3-MFCC-Pitches'

In [None]:
# !rm -rf checkpoints/*
# !rm -rf tensorboard/*
# !rm *.log

In [None]:
from keras.layers.recurrent import LSTM, GRU
from keras.layers import Input, Activation, Flatten, Dense, Masking
from keras.callbacks import CSVLogger, ModelCheckpoint, TensorBoard
from keras import regularizers, optimizers
from keras.models import Model, load_model
import numpy as np

In [None]:
from tensorflow.python.client import device_lib
localDeviceProtos = device_lib.list_local_devices()
[x.name for x in localDeviceProtos if x.device_type == 'GPU']

In [None]:
features = np.concatenate([np.load('features/' + tag + '-' + ticker + '.npy') for tag in featureTags], axis = 2)
intervals = np.load('features/intervals-' + ticker + '.npy')
intervals[intervals == 0] = 1 # Avoid divide by zero.
targetsInt = np.load('targets/' + ticker + '.npy')
targets = np.sum(targetsInt, axis = 1) / intervals

# Shape sanity check.
features.shape, targets.shape

In [None]:
# Bucket confidence labels into even bins.
if not regression:
    sortIndices = np.argsort(targets)
    features = features[sortIndices]
    targets = targets[sortIndices] # Floats.
    binSize = int(len(targets) / float(classBins))

    # Assign class labels.
    for i in range(classBins):
        if i < classBins - 1:
            targets[i * binSize:(i + 1) * binSize] = i
        else: targets[i * binSize:] = i

In [None]:
np.random.seed(224)
indices = list(range(len(targets)))
np.random.shuffle(indices)
shuffledFeatures = features[indices]
shuffledTargets = targets[indices]

total = float(len(targets))
trainBoundary = int(0.7 * total)
valBoundary = trainBoundary + int(0.2 * total)

trainFeatures = shuffledFeatures[:trainBoundary]
trainTargets = shuffledTargets[:trainBoundary]
valFeatures = shuffledFeatures[trainBoundary:valBoundary]
valTargets = shuffledTargets[trainBoundary:valBoundary]
testFeatures = shuffledFeatures[valBoundary:]
testTargets = shuffledTargets[valBoundary:]

In [None]:
if recurrentType == 'GRU': Recurrent = GRU
elif recurrentType == 'LSTM': Recurrent = LSTM
numBins = 1 if regression else classBins

In [None]:
featureDim = trainFeatures.shape[2] # Shape has batch size, time steps, and then features.
exInput = Input(shape = (maxNumIntervals, featureDim)) # Batch size is implicit.
out = Masking()(exInput) # Ignore zeroed positions.
out = Recurrent(recurrentSize)(out)
out = Dense(denseSize)(out)
out = Activation('relu')(out)
out = Dense(numBins)(out)
if regression: output = out
else: output = Activation('softmax')(out)

In [None]:
model = Model(inputs = exInput, outputs = output)
adam = optimizers.Adam(lr = learningRate)
if regression: model.compile(optimizer = adam, loss = 'mean_squared_error')
else: model.compile(optimizer = adam, loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'])
model.summary()

In [None]:
import h5py
fullName = 'IntraFile-' + logName
logger = CSVLogger(fullName + '.log')
checkpointer = ModelCheckpoint(filepath = 'checkpoints/weights' + fullName + '.hdf5', save_best_only = True, verbose = 1)
tensorboarder = TensorBoard(log_dir = 'tensorboard/logs' + fullName)
# model = load_model('checkpoints/weights.hdf5')
model.fit(trainFeatures, trainTargets,
          batch_size = batchSize, epochs = epochs,
          validation_data = (valFeatures, valTargets),
          callbacks = [logger, checkpointer, tensorboarder])

In [None]:
if regression: predict = model.predict(valFeatures[:30])
else: predict = np.argmax(model.predict(valFeatures[:30]), axis = 1)
predict

In [None]:
valTargets[:30]

In [None]:
if not regression: correct = np.sum(valTargets[:30] == predict)
else: correct = None
correct