## Adversarial LSTM

Implementing Adversarial LSTM, using a TensorFlow computational graph, as described in "Enhancing Stock Movement Prediction with Adversarial Training".

Imports follow...

In [1]:
! pip install tensorflow
! pip install scikit-learn



In [2]:
import argparse
import numpy as np
import os
import random

from sklearn.utils import shuffle
import tensorflow as tf
from tensorflow.keras import layers, Model
from tensorflow.keras.initializers import glorot_uniform
from time import time
from datetime import datetime


# The Data



The authors of this paper used two datasets -- ACL18 & KDD17 -- as benchmarked in prior research conducted in this field. This notebook uses the KDD17 dataset, which is stored as:

> * kdd17
  *   tickers
   *   AMZ.csv (etc.)
  *   training_dates.csv

The loaded data is temporally split into training/validation/testing datasets, and each split into three further components: the present value (pv), a weekday indicator vector (wd), and the ground truth (gt).

Instances (timeframes) without enough history, which can be defined by **seq**, are dropped.



In [5]:
def load_data_temp(data_path, tra_date, val_date, tes_date,
                   seq=2, date_format='%Y-%m-%d'):
  file_names = [f for f in os.listdir(data_path)
            if os.path.isfile(os.path.join(data_path, f))]
  print(file_names)

  data_EOD = []

  # Obtain all file names under the data path
  for index, fname in enumerate(file_names):
    single_EOD = np.genfromtxt(
        os.path.join(data_path, fname), dtype=np.float64, delimiter=',',
        skip_header=False
    )
    data_EOD.append(single_EOD)
  fea_dim = data_EOD[0].shape[1] - 2

  trading_dates = np.genfromtxt(
    'kdd17/trading_dates.csv', dtype=str,
    delimiter=',', skip_header=False
  )

  # One-hot encode the weekdays of training dates
  dates_index = {}
  data_wd = np.zeros([len(trading_dates), 5], dtype=np.float64)
  wd_encodings = np.identity(5, dtype=np.float64)
  for index, date in enumerate(trading_dates):
      dates_index[date] = index
      data_wd[index] = wd_encodings[datetime.strptime(date, date_format).weekday()]

  # Separate into tra/val/tes instances
  tra_ind = dates_index[tra_date]
  val_ind = dates_index[val_date]
  tes_ind = dates_index[tes_date]

  # count training, validation, and testing instances
  tra_num = 0
  val_num = 0
  tes_num = 0

  # training
  for date_ind in range(tra_ind, val_ind):
    # filter out instances without length enough history
    if date_ind < seq:
      continue
    for tic_ind in range(len(file_names)):
      if abs(data_EOD[tic_ind][date_ind][-2]) > 1e-8:
        if data_EOD[tic_ind][date_ind - seq: date_ind, :].min() > -123320:
          tra_num += 1
  #print(tra_num, ' training instances')

  # validation
  for date_ind in range(val_ind, tes_ind):
    # filter out instances without length enough history
    if date_ind < seq:
      continue
    for tic_ind in range(len(file_names)):
      if abs(data_EOD[tic_ind][date_ind][-2]) > 1e-8:
        if data_EOD[tic_ind][date_ind - seq: date_ind, :].min() > -123320:
          val_num += 1
  #print(val_num, ' validation instances')

  # testing
  for date_ind in range(tes_ind, len(trading_dates)):
    # filter out instances without length enough history
    if date_ind < seq:
      continue
    for tic_ind in range(len(file_names)):
      if abs(data_EOD[tic_ind][date_ind][-2]) > 1e-8:
        if data_EOD[tic_ind][date_ind - seq: date_ind, :].min() > -123320:
          tes_num += 1
  #print(tes_num, ' testing instances')


  # generate training, validation, and testing instances
  # training
  tra_pv = np.zeros([tra_num, seq, fea_dim], dtype=np.float64)
  tra_wd = np.zeros([tra_num, seq, 5], dtype=np.float64)
  tra_gt = np.zeros([tra_num, 1], dtype=np.float64)
  ins_ind = 0

  for date_ind in range(tra_ind, val_ind):
    # filter out instances without length enough history
    if date_ind < seq:
      continue
    for tic_ind in range(len(file_names)):
        if abs(data_EOD[tic_ind][date_ind][-2]) > 1e-8 and \
                data_EOD[tic_ind][date_ind - seq: date_ind, :].min() > -123320:
          tra_pv[ins_ind] = data_EOD[tic_ind][date_ind - seq: date_ind, : -2]
          tra_wd[ins_ind] = data_wd[date_ind - seq: date_ind, :]
          tra_gt[ins_ind, 0] = (data_EOD[tic_ind][date_ind][-2] + 1) / 2
          ins_ind += 1

  # validation
  val_pv = np.zeros([val_num, seq, fea_dim], dtype=np.float64)
  val_wd = np.zeros([val_num, seq, 5], dtype=np.float64)
  val_gt = np.zeros([val_num, 1], dtype=np.float64)
  ins_ind = 0

  for date_ind in range(val_ind, tes_ind):
    # filter out instances without length enough history
    if date_ind < seq:
      continue
    for tic_ind in range(len(file_names)):
      if abs(data_EOD[tic_ind][date_ind][-2]) > 1e-8 and \
                      data_EOD[tic_ind][date_ind - seq: date_ind, :].min() > -123320:
        val_pv[ins_ind] = data_EOD[tic_ind][date_ind - seq: date_ind, :-2]
        val_wd[ins_ind] = data_wd[date_ind - seq: date_ind, :]
        val_gt[ins_ind, 0] = (data_EOD[tic_ind][date_ind][-2] + 1) / 2
        ins_ind += 1

  # testing
  tes_pv = np.zeros([tes_num, seq, fea_dim], dtype=np.float64)
  tes_wd = np.zeros([tes_num, seq, 5], dtype=np.float64)
  tes_gt = np.zeros([tes_num, 1], dtype=np.float64)
  ins_ind = 0

  for date_ind in range(tes_ind, len(trading_dates)):
    # filter out instances without length enough history
    if date_ind < seq:
      continue
    for tic_ind in range(len(file_names)):
      if abs(data_EOD[tic_ind][date_ind][-2]) > 1e-8 and \
                        data_EOD[tic_ind][date_ind - seq: date_ind, :].min() > -123320:
        tes_pv[ins_ind] = data_EOD[tic_ind][date_ind - seq: date_ind, :-2]
        # # for the momentum indicator
        # tes_pv[ins_ind, -1, -1] = data_EOD[tic_ind][date_ind - 1, -1] - data_EOD[tic_ind][date_ind - 11, -1]
        tes_wd[ins_ind] = data_wd[date_ind - seq: date_ind, :]
        tes_gt[ins_ind, 0] = (data_EOD[tic_ind][date_ind][-2] + 1) / 2
        ins_ind += 1

  # pv: present_value, wd: weekday, gt: ground_truth
  return tra_pv, tra_wd, tra_gt, val_pv, val_wd, val_gt, tes_pv, tes_wd, tes_gt


## ALSTM class

In [3]:
data_path="kdd17/tickers"
tra_date='2014-01-02'
val_date='2015-08-03'
tes_date='2015-10-01'
seq=5
date_format='%Y-%m-%d'

In [4]:
DEVICE_NAME = '/gpu:0'
tf.random.set_seed(20241029)
print('device name:', DEVICE_NAME)

device name: /gpu:0


The following block contains the ALSTM class.

This AttentionLayer is a wrapper class for the attention layer in the AdvLSTM class.

AdvLSTM is the main class for the model.

In [6]:
class AttentionLayer(layers.Layer):

  """
  A custom Layer to process the attention mechanism described by the authors.
  As the Keras computational graph requires wrappers around tensors, due to its
  incompatability with eager execution, the corresponding layers have been
  moved into here.

  This layer computes a custom attention "score" for the LSTM output.
  """

  def __init__(self, units, **kwargs):
      super(AttentionLayer, self).__init__(**kwargs)
      # Set up the required variables for processing, formerly part of the
      # AdvLSTM class.
      self.units = units
      self.av_W = tf.Variable(initial_value=glorot_uniform()(shape=(units, units)), trainable=True)
      self.av_b = tf.Variable(initial_value=tf.zeros((units,)), trainable=True)
      self.av_u = tf.Variable(initial_value=glorot_uniform()(shape=(units,)), trainable=True)

  def call(self, inputs):
      a_laten = tf.tanh(tf.tensordot(inputs, self.av_W, axes=1) + self.av_b)
      a_scores = tf.tensordot(a_laten, self.av_u, axes=1) # Dot product the scores
      a_alphas = tf.nn.softmax(a_scores, axis=1)  # Apply softmax along time axis

      # Obtain context vector
      a_con = tf.reduce_sum(inputs * tf.expand_dims(a_alphas, -1), 1)
      return a_con

  # This is needed for saving and loading the model with custom layers
  def get_config(self):
      config = super(AttentionLayer, self).get_config()
      config.update({'units': self.units})
      return config

In [13]:
class AdvLSTM():

  """
  AdvLSTM class.
  """

  def __init__(self, data_path, model_path, save_path,
                parameters,
                steps=1,
                epochs=50,
                batch_size=256,
                gpu=True,
                tra_date='2014-01-02', val_date='2015-08-03', tes_date='2015-10-01',
                date_format='%Y-%m-%d',
                att=0, hinge=0, fix_init=0, adv=0, reload=0):

    self.data_path = data_path
    self.model_path = model_path
    self.save_path = save_path

    self.parameters = parameters
    self.steps = steps
    self.epochs = epochs
    self.batch_size = batch_size

    self.tra_date = tra_date
    self.val_date = val_date
    self.tes_date = tes_date
    self.date_format = date_format

    self.att = att
    self.hinge = hinge
    self.fix_init = fix_init
    self.adv = adv
    self.reload = reload

    self.fea_dim = None

    # Load data
    self.tra_pv, self.tra_wd, self.tra_gt, \
    self.val_pv, self.val_wd, self.val_gt, \
    self.tes_pv, self.tes_wd, self.tes_gt = load_data_temp(
      data_path=self.data_path,
      tra_date=self.tra_date,
      val_date=self.val_date,
      tes_date=self.tes_date,
      seq=self.parameters['seq'],
      date_format=self.date_format
    )
    self.fea_dim = self.tra_pv.shape[2]  # Number of features
    self.model = self.construct_model()

    self.attention_score_layer = layers.Dense(self.parameters['unit'], activation='tanh')  # Create Dense layer here
    self.attention_weights_layer = layers.Dense(1, activation='softmax') # Create Dense layer here
    self.model = self.construct_model()


  def get_batch(self, sta_ind=None):

    """
    Standard-issue batching.
    """

    if sta_ind is None:
      sta_ind = random.randrange(0, self.tra_pv.shape[0])
    if sta_ind + self.batch_size < self.tra_pv.shape[0]:
      end_ind = sta_ind + self.batch_size
    else:
      sta_ind = self.tra_pv.shape[0] - self.batch_size
      end_ind = self.tra_pv.shape[0]
    return (
      self.tra_pv[sta_ind:end_ind, :, :],
      self.tra_wd[sta_ind:end_ind, :, :],
      self.tra_gt[sta_ind:end_ind, :]
    )

  def compute_loss(self, pred, gt, hinge_loss=False):

    """
    Computes the loss for the model. If I remember correctly, the authors prefer
    Hinge loss.
    """

    if hinge_loss:
      return tf.reduce_mean(tf.maximum(0.0, 1 - gt * pred))  # Hinge loss for binary classification
    else:
      return tf.keras.losses.BinaryCrossentropy()(gt, pred)  # Cross-entropy for probabilities

  def construct_model(self):

    """
    Builds the Keras model for the adversarial LSTM.
    - Incorporates the Dense + LSTM layers, as well as custom AttentionLayer
    - If this is successful, the model is compiled and returned.
    - Sets up the graph but does not train the model.
    - Adversarial examples are generated outside of it. (tbd if valid)
    """

    pv_input = layers.Input(shape=(self.parameters['seq'], self.fea_dim))
    wd_input = layers.Input(shape=(self.parameters['seq'], 5))
    gt_input = layers.Input(shape=(1,))

    units = self.parameters['unit']

    # LSTM and Attention Layer
    dense_layer = layers.TimeDistributed(layers.Dense(self.fea_dim, activation='tanh'))(pv_input)
    lstm_output = layers.LSTM(units, return_sequences=True)(dense_layer)
    a_con = AttentionLayer(units)(lstm_output) # Obtain results of the attention layer.

    # Extract the last timestep's LSTM output
    lstm_last_step = layers.Lambda(lambda x: x[:, -1, :])(lstm_output)

    # Concatenate features
    feature_concat = layers.Concatenate()([lstm_last_step, a_con])

    # Prediction layer
    predictions = layers.Dense(1, activation='linear')(feature_concat)

    model = tf.keras.Model(inputs=[pv_input, wd_input, gt_input], outputs=predictions)
    return model


  def train(self):

    """
    Trains the entire model.
    """

    self.model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=self.parameters['lr']),
                       loss=self.compute_loss,
                       metrics=['accuracy'])

    for epoch in range(self.epochs):
      for batch in range(self.tra_pv.shape[0] // self.batch_size):
        pv_b, wd_b, gt_b = self.get_batch(batch * self.batch_size)

        with tf.GradientTape(persistent=True) as tape:  # Tape must be persistent. this is a strange nuance.
          predictions = self.model([pv_b, wd_b, gt_b], training=True) # First generate predictions.
          loss = self.compute_loss(predictions, gt_b, hinge_loss=self.hinge)

          # Compute adversarial loss if enabled
          if self.adv:
            tape.watch(predictions)
            adv_loss = self.compute_loss(predictions, gt_b, hinge_loss=self.hinge)
            # Calculate the gradients.
            delta_adv = tape.gradient(adv_loss, predictions)
            delta_adv = tf.stop_gradient(delta_adv)
            delta_adv = tf.nn.l2_normalize(delta_adv, axis=1)
            perturbed_inputs = predictions + self.parameters['eps'] * delta_adv

            adv_loss = self.compute_loss(perturbed_inputs, gt_b, hinge_loss=self.hinge)

        # Compute gradients for normal training
        gradients = tape.gradient(loss, self.model.trainable_variables)
        self.model.optimizer.apply_gradients(zip(gradients, self.model.trainable_variables))

        # Compute gradients for adversarial loss (if enabled)
        if self.adv:
          adv_gradients = tape.gradient(adv_loss, self.model.trainable_variables)
          self.model.optimizer.apply_gradients(zip(adv_gradients, self.model.trainable_variables))

        del tape  # free memory

      # Evaluate on validation data
      val_perf = self.model.evaluate([self.val_pv, self.val_wd, self.val_gt], self.val_gt, verbose=0)
      print(f"Epoch {epoch + 1}/{self.epochs}, Validation Loss: {val_perf[0]}, Accuracy: {val_perf[1]}")


In [16]:
# Parameters for your model, adjust based on your setup
parameters = {
    'seq': 5,  # Sequence length
    'unit': 64,  # Number of units in LSTM
    'lr': 0.001,  # Learning rate
    'eps': 0.01,  # Epsilon for adversarial perturbations
    'alp': 0.1,   # Regularization term
    'bet': 0.1,   # Adversarial loss weight
    # Add any other parameters required by your model
}


# Initialize your model
tttttttt = AdvLSTM(
    data_path=data_path,
    model_path=None,
    save_path=None,
    parameters=parameters,
    steps=1,  # Example: steps for processing
    epochs=50,
    batch_size=256,
    gpu=False,
    tra_date='2014-01-02',
    val_date='2015-08-03',
    tes_date='2015-10-01',
    date_format='%Y-%m-%d',
    att=0, hinge=0, fix_init=0, reload=0,
    adv=0, # enable adversarial training
)

# Just a simple demonstration to ensure that this works.
model = tttttttt.construct_model()
predictions = model.predict(x=[tttttttt.tes_pv, tttttttt.tes_wd, tttttttt.tes_gt])
print(predictions.shape)  # Should print (num_test_samples, 1)

['DCM.csv', 'BAC.csv', 'BA.csv', 'CHL.csv', 'CMCSA.csv', 'CVX.csv', 'AAPL.csv', 'D.csv', 'BRK-B.csv', 'BHP.csv', 'AMZN.csv']
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step
(2268, 1)


Train the model and obtain results.

In [17]:
tttttttt.train()

Epoch 1/50, Validation Loss: 7.451866149902344, Accuracy: 0.5692771077156067
Epoch 2/50, Validation Loss: 7.736703872680664, Accuracy: 0.5271084308624268
Epoch 3/50, Validation Loss: 7.860823631286621, Accuracy: 0.5030120611190796
Epoch 4/50, Validation Loss: 7.907081604003906, Accuracy: 0.4969879388809204
Epoch 5/50, Validation Loss: 7.912779808044434, Accuracy: 0.4939759075641632
Epoch 6/50, Validation Loss: 7.904788970947266, Accuracy: 0.5512048006057739
Epoch 7/50, Validation Loss: 7.896721363067627, Accuracy: 0.5240963697433472
Epoch 8/50, Validation Loss: 7.892404556274414, Accuracy: 0.5210843086242676
Epoch 9/50, Validation Loss: 7.891769886016846, Accuracy: 0.5240963697433472
Epoch 10/50, Validation Loss: 7.8938398361206055, Accuracy: 0.5271084308624268
Epoch 11/50, Validation Loss: 7.897712230682373, Accuracy: 0.5301204919815063
Epoch 12/50, Validation Loss: 7.902705669403076, Accuracy: 0.5271084308624268
Epoch 13/50, Validation Loss: 7.908337593078613, Accuracy: 0.51807230710

In [18]:
predicted_labels = (predictions > 0.2).astype(int)

In [19]:
from sklearn.metrics import accuracy_score, precision_score

accuracy = accuracy_score(tttttttt.tes_gt, predicted_labels)
precision = precision_score(tttttttt.tes_gt, predicted_labels)
print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")

Accuracy: 0.48500881834215165
Precision: 0.5399449035812672
