# Algo Trading Example

Based on examples providied by ...

## Pycon DE Karlsruhe
### Why Python Has Taken Over Finance

General Thoughts and Practical Examples

Dr. Yves J. Hilpisch | The Python Quants GmbH
Karlsuhe, 26. October 2017

original source: https://gist.github.com/yhilpisch/eb6c8978cf9a752e38529d57ed34b7cc


In [1]:
import numpy as np
import pandas as pd
from pylab import plt
plt.style.use('seaborn')
%matplotlib inline

In [2]:
# data as provided by Thomson Reuters Eikon API
file_location = './data/tr_eikon_eod_data.csv'

In [3]:
data = pd.read_csv(file_location, index_col=0, parse_dates=True)
symbol = 'EUR='
data = pd.DataFrame(data[symbol])
data.head()

Unnamed: 0_level_0,EUR=
Date,Unnamed: 1_level_1
2010-01-01,1.4323
2010-01-04,1.4411
2010-01-05,1.4368
2010-01-06,1.4412
2010-01-07,1.4318


### the features

In [4]:
# calulate the daily return : R(t) = ln( P(t)/P(t-1) )
data['Returns'] = np.log(data / data.shift(1))
data.head()

Unnamed: 0_level_0,EUR=,Returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2010-01-01,1.4323,
2010-01-04,1.4411,0.006125
2010-01-05,1.4368,-0.002988
2010-01-06,1.4412,0.003058
2010-01-07,1.4318,-0.006544


In [5]:
# Note: Need to add an explanation of why we are calulating these directional values for the returns
# lag_1 tells you whether the return was positive (ie. 1) 1 dag ago, 0 otherwise; and so on for lag2-5
# 
lags = 5
cols = []
for lag in range(1, lags+1):
    col = 'lag_%s' % lag
    data[col] = np.where(data['Returns'].shift(lag) > 0, 1, 0)
    cols.append(col)

In [6]:
data.head(7)

Unnamed: 0_level_0,EUR=,Returns,lag_1,lag_2,lag_3,lag_4,lag_5
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2010-01-01,1.4323,,0,0,0,0,0
2010-01-04,1.4411,0.006125,0,0,0,0,0
2010-01-05,1.4368,-0.002988,1,0,0,0,0
2010-01-06,1.4412,0.003058,0,1,0,0,0
2010-01-07,1.4318,-0.006544,1,0,1,0,0
2010-01-08,1.4412,0.006544,0,1,0,1,0
2010-01-11,1.4513,0.006984,1,0,1,0,1


In [7]:
# use the data starting from 6 rows down onward becasue thats when the lag_1 thru lag_5 all become meaningfull.
data = data.iloc[lags+1:]
data.head()

Unnamed: 0_level_0,EUR=,Returns,lag_1,lag_2,lag_3,lag_4,lag_5
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2010-01-11,1.4513,0.006984,1,0,1,0,1
2010-01-12,1.4494,-0.00131,1,1,0,1,0
2010-01-13,1.451,0.001103,0,1,1,0,1
2010-01-14,1.4502,-0.000551,1,0,1,1,0
2010-01-15,1.4382,-0.008309,0,1,0,1,1


In [26]:
data.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2210 entries, 2010-01-11 to 2018-06-29
Data columns (total 7 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   EUR=     2210 non-null   float64
 1   Returns  2210 non-null   float64
 2   lag_1    2210 non-null   int64  
 3   lag_2    2210 non-null   int64  
 4   lag_3    2210 non-null   int64  
 5   lag_4    2210 non-null   int64  
 6   lag_5    2210 non-null   int64  
dtypes: float64(2), int64(5)
memory usage: 138.1 KB


### Use Tensorflow to train a model

In [23]:
# Updateing to use TF 2.0 with Kera api
# https://www.tensorflow.org/guide/keras/overview
from __future__ import absolute_import, division, print_function, unicode_literals

import tensorflow as tf
from tensorflow import keras

print(tf.__version__)

2.1.0


### Explanation of the DNN that we are putting together.
We are going to use the returns from the past 5 days to predict a positive/negative return for today
* Our dataset has 2210 records/observations ( ie. number of days in the timeseries data )
* Each observation is a 5 x 1 array ( ie. the profit indicator for 5 days of lag ) (x)
* There is a returns indicator (1=positive/0=negative)  for each day - which is the label (y)


### Getting the Data Ready

In [10]:
# Create the feature columns (fc)
# tf.contrib is not supported in 2.0
# fc = [tf.contrib.layers.real_valued_column('lags', dimension=lags)]   <== Wont work on 2.x

AttributeError: module 'tensorflow' has no attribute 'contrib'

In [52]:
# function to return the features columns (fc) & lable array (la)
# The lags columns are used for feature columns
# the returns array is used as label

def get_data():
    fc = {'lags': tf.constant(data[cols].values, dtype=tf.int32 )}
    la = tf.constant((data['Returns'].values > 0).astype(int),  dtype=tf.int32)
    return fc, la

In [53]:
fc, la = get_data()

In [57]:
fc['lags']

<tf.Tensor: shape=(2210, 5), dtype=int32, numpy=
array([[1, 0, 1, 0, 1],
       [1, 1, 0, 1, 0],
       [0, 1, 1, 0, 1],
       ...,
       [0, 1, 1, 1, 0],
       [0, 0, 1, 1, 1],
       [1, 0, 0, 1, 1]], dtype=int32)>

In [44]:
la

<tf.Tensor: shape=(2210,), dtype=int32, numpy=array([1, 0, 1, ..., 0, 1, 1], dtype=int32)>

### Creating the Model - DNN

In [89]:
# 2 hidden layers of 50 nodes each
# The last (output) layer has 2 nodes as the returns label has only 2 values (1/)
model = keras.Sequential([
    keras.layers.Dense(40, activation='relu'),
    keras.layers.Dense(40, activation='relu'),
    keras.layers.Dense(2, activation='relu', name='output')
])


In [96]:

model = tf.estimator.DNNClassifier(hidden_units=[50, 50],
                                feature_columns=fc)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/var/folders/rm/p5jrbh0n66jfgz2rfkz4_zcw0000gn/T/tmp6k6ne7xc', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [90]:
# Compile the model
# refer to https://www.tensorflow.org/tutorials/keras/classification
model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

### Do the learning

In [97]:
# use 20% of data for validation
model.fit(x=fc['lags'], y=la, validation_split=0.2, epochs=20 )

AttributeError: 'DNNClassifierV2' object has no attribute 'fit'

In [92]:
model.evaluate(x=fc['lags'], y=la, steps=1)



[0.6913245916366577, 0.49864253]

### Algorithmic Trading

In [93]:
pred = model.predict( x=fc['lags'])

In [95]:
pred

array([[0.        , 0.        ],
       [0.2147379 , 0.        ],
       [0.        , 0.        ],
       ...,
       [0.        , 0.        ],
       [0.        , 0.        ],
       [0.39506862, 0.        ]], dtype=float32)

In [94]:
np.where(np.array(list(pred)) > 0, 1, -1)

array([[-1, -1],
       [ 1, -1],
       [-1, -1],
       ...,
       [-1, -1],
       [-1, -1],
       [ 1, -1]])