# Prototyping

Creates a neural network which evaluates a time series and produce a set of predicted values for the time series
Predicted values may be used in a policy to make a trade. This policy may be modeled by simple multiple regression or a neural network.

## Data
Test data is taken as most recent to avoid lookahead bias. Train data is split into a validation and training set during fitting.


## TODO
- Convert feature percentages to stdev
- Adding VIX as a signal
- Adding High/Low as signals
- Multiple securities/ aggregate samples
- Policy network
- Regularization (l2)
- Dilated convolution

In [112]:
import tensorflow as tf
import pandas as pd
import numpy as np
from tensorflow import keras
import seaborn as sns
import matplotlib.pyplot as plt
from tensorflow.keras import layers
import pandas_datareader as pdr
from datetime import datetime

def from_network(symbol):
    return pdr.get_data_yahoo(symbols=symbol, start=datetime(1900, 1, 1))

def from_file(symbol):
    dataset_path = keras.utils.get_file("{}.csv".format(symbol), "http://localhost:8000/data/daily/{}.csv".format(symbol))
    column_names = ['Date','Open','High','Low','Close','Adj Close','Volume'] 
    return pd.read_csv(dataset_path, 
                              names=column_names, 
                              dtype={'Close': np.float64,'Open': np.float64,'High': np.float64,'Adj Close': np.float64, 'Volume': np.float64},
                              header=0,
                              na_values = "?", 
                              comment='\t',
                              sep=",",
                              skipinitialspace=True)

#dataset = raw_dataset.copy()
dataset = from_network('IBM')
dataset = dataset.sort_values(by=['Date'],ascending=False)




In [113]:
print dataset

                  High         Low        Open       Close      Volume  \
Date                                                                     
2019-01-22  123.800003  121.540001  123.300003  122.519997   9932800.0   
2019-01-18  124.720001  122.709999  123.269997  123.820000   6008500.0   
2019-01-17  122.410004  120.550003  120.559998  122.190002   5029900.0   
2019-01-16  122.000000  120.830002  121.580002  121.620003   3841100.0   
2019-01-15  121.930000  120.820000  120.959999  121.730003   3507500.0   
2019-01-14  120.650002  119.760002  120.510002  120.389999   5228700.0   
2019-01-11  121.620003  120.199997  121.580002  121.459999   3722400.0   
2019-01-10  121.860001  119.949997  120.080002  121.790001   3910000.0   
2019-01-09  121.400002  119.870003  120.910004  120.690002   3633700.0   
2019-01-08  120.570000  118.980003  119.660004  119.830002   4763600.0   
2019-01-07  118.830002  116.669998  117.500000  118.150002   3751200.0   
2019-01-04  117.489998  114.440002  11

In [None]:

dataset_stats = dataset.describe()
dataset_stats = dataset_stats.transpose()
NUM_INPUT_NEURONS = 64
NUM_OUTPUT_NEURONS = 3
NUM_SAMPLES = len(dataset)
NUM_TEST_SAMPLES = int(.33 * NUM_SAMPLES)
print NUM_SAMPLES
# Create features (only close price for now)
def convert_to_percentage(old, new):
    return (old - new) / old


def convert_labels_to_category(labels): 
    # Simplification - If positive return, 1, else 0
    # return map(lambda arr: 1 if arr[0] > 1 else 0, labels)
    # rounding simpliciation, 10th of percentage
    return map(lambda arr: map(lambda val: round(val,2),arr), labels)

def convert_to_train(raw_dataset):
    dataset = raw_dataset.copy()
    features = []
    labels = []
    for i in range(5, len(dataset) - NUM_INPUT_NEURONS):

        feature_dataset = dataset[i:i+NUM_INPUT_NEURONS].copy()
        latest_close = feature_dataset['Close'].iloc[0]
        
        features.append(
            feature_dataset['Close']
                .map(lambda current: convert_to_percentage(latest_close, current))
                .tolist()
        )
        labels.append([
            dataset['Close'].iloc[i-1] / latest_close, # 1 day trade
            dataset['Close'].iloc[i-3] / latest_close, # 3 day trade
            dataset['Close'].iloc[i-5] / latest_close, # 5 day trade
        ])
        
    # Without converting labels the precision is hard to determine accuracy. 
    # Rather than crude 0/1, maybe this can be more sophisticated
    labels = convert_labels_to_category(labels)
    
    return [features,labels]
converted_feature_set = convert_to_train(dataset)
init = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())

def build_model():
  model = keras.Sequential([
    layers.Dense(64, activation=tf.nn.relu, input_shape=[len(converted_feature_set[0][0])]),
    layers.Dense(32, activation=tf.nn.relu),
    layers.Dense(16, activation=tf.nn.relu),
    layers.Dense(NUM_OUTPUT_NEURONS)
  ])

  optimizer = tf.train.RMSPropOptimizer(0.001)

  model.compile(loss='mse',
                optimizer='sgd',
                metrics=[
                    'mae',
                #    'accuracy'
                ])
  return model

tensorboard = keras.callbacks.TensorBoard(log_dir='./logs', histogram_freq=0,
                          write_graph=True, write_images=False)

model = build_model()
model.summary()

In [83]:
#len(converted_feature_set[0][0])
print NUM_TEST_SAMPLES
train_data = np.array(converted_feature_set[0][NUM_TEST_SAMPLES:])
train_labels = np.array(converted_feature_set[1][NUM_TEST_SAMPLES:])

test_data = np.array(converted_feature_set[0][:NUM_TEST_SAMPLES])
test_labels = np.array(converted_feature_set[1][:NUM_TEST_SAMPLES])


4739


In [84]:
print train_data
print train_labels

[[ 0.          0.02263494 -0.00290192 ... -0.01450958 -0.01871714
  -0.01218804]
 [ 0.         -0.02612827 -0.00950119 ... -0.04230976 -0.03562945
  -0.07719715]
 [ 0.          0.0162037   0.0462963  ... -0.00925926 -0.04976852
  -0.09519626]
 ...
 [ 0.          0.02063341  0.01631476 ... -0.06717851 -0.05470251
  -0.07485604]
 [ 0.         -0.00440963 -0.02988732 ... -0.07692311 -0.09750124
  -0.11954927]
 [ 0.         -0.02536583 -0.03951219 ... -0.09268291 -0.11463414
  -0.12585361]]
[[1.01 1.01 1.05]
 [1.02 1.02 1.05]
 [0.97 1.01 1.01]
 ...
 [1.   1.   0.98]
 [1.02 1.02 1.01]
 [1.   1.02 1.02]]


In [85]:

history = model.fit(
    train_data, train_labels,
    epochs=80, validation_split = 0.2, verbose=1,
    callbacks=[tensorboard]
)

Train on 7643 samples, validate on 1911 samples
Epoch 1/80
Epoch 2/80
Epoch 3/80
Epoch 4/80
Epoch 5/80
Epoch 6/80
Epoch 7/80
Epoch 8/80
Epoch 9/80
Epoch 10/80
Epoch 11/80
Epoch 12/80
Epoch 13/80
Epoch 14/80
Epoch 15/80
Epoch 16/80
Epoch 17/80
Epoch 18/80
Epoch 19/80
Epoch 20/80
Epoch 21/80
Epoch 22/80
Epoch 23/80
Epoch 24/80
Epoch 25/80
Epoch 26/80
Epoch 27/80
Epoch 28/80
Epoch 29/80
Epoch 30/80
Epoch 31/80
Epoch 32/80
Epoch 33/80
Epoch 34/80
Epoch 35/80
Epoch 36/80
Epoch 37/80
Epoch 38/80
Epoch 39/80
Epoch 40/80
Epoch 41/80
Epoch 42/80
Epoch 43/80
Epoch 44/80
Epoch 45/80
Epoch 46/80
Epoch 47/80


Epoch 48/80
Epoch 49/80
Epoch 50/80
Epoch 51/80
Epoch 52/80
Epoch 53/80
Epoch 54/80
Epoch 55/80
Epoch 56/80
Epoch 57/80
Epoch 58/80
Epoch 59/80
Epoch 60/80
Epoch 61/80
Epoch 62/80
Epoch 63/80
Epoch 64/80
Epoch 65/80
Epoch 66/80
Epoch 67/80
Epoch 68/80
Epoch 69/80
Epoch 70/80
Epoch 71/80
Epoch 72/80
Epoch 73/80
Epoch 74/80
Epoch 75/80
Epoch 76/80
Epoch 77/80
Epoch 78/80
Epoch 79/80
Epoch 80/80


In [86]:
outputs = model.predict(test_data)

In [87]:
# one day return
actual = map(lambda arr: arr[0],outputs)
# signal step for our policy network
signals = map(lambda arr: sum(arr) / len(arr),outputs)
# primitive policy temporarily in place of a RL policy network
trades = map(lambda signal: 1 if round(signal,2) > 1 else 0,signals)

In [88]:
df = pd.DataFrame({
    'signal':signals,
    'actual': actual,
    'trade':trades,
})

In [89]:
df['entry_success'] = df.apply (lambda row: 1 if (row['actual'] > 1.00 and row['trade'] == 1) else 0,axis=1)
df['entry_failure'] = df.apply (lambda row: 1 if (row['actual'] < 1.00 and row['trade'] == 1) else 0,axis=1)
df['avoid_success'] = df.apply (lambda row: 1 if (row['actual'] < 1.00 and row['trade'] == 0) else 0,axis=1)
df['avoid_failure'] = df.apply (lambda row: 1 if (row['actual'] > 1.00 and row['trade'] == 0) else 0,axis=1)


In [90]:
# primitive policy - replace with a policy network which maximizes reward
def label_success (row):
    return 0 if (row['entry_failure'] == 1 or row['entry_failure'] == 1) else 1

success = df.apply (lambda row: label_success (row),axis=1)
df['success'] = success;
df

Unnamed: 0,actual,signal,trade,entry_success,entry_failure,avoid_success,avoid_failure,success
0,1.009045,1.004949,0,0,0,0,1,1
1,1.006607,1.000724,0,0,0,0,1,1
2,1.015358,1.011751,1,1,0,0,0,1
3,1.023943,1.010005,1,1,0,0,0,1
4,1.010803,0.997734,0,0,0,0,1,1
5,1.007053,0.988832,0,0,0,0,1,1
6,1.000050,0.982295,0,0,0,0,1,1
7,0.995832,0.982412,0,0,0,1,0,1
8,1.008703,0.995042,0,0,0,0,1,1
9,1.010177,1.005058,1,1,0,0,0,1


In [91]:
print '\nNon-loss events'
print sum(df['success'])
print sum(df['success']) / (NUM_TEST_SAMPLES * 1.00)

print '\nLose trades'
print sum(df['entry_failure'])
print sum(df['entry_failure']) / (NUM_TEST_SAMPLES * 1.00)

print '\nWin trades'
print sum(df['entry_success'])
print sum(df['entry_success']) / (NUM_TEST_SAMPLES * 1.00)

print '\nMissed opportunities'
print sum(df['avoid_failure'])
print sum(df['avoid_failure']) / (NUM_TEST_SAMPLES * 1.00)

print '\nBullets dodged'
print sum(df['avoid_success'])
print sum(df['avoid_success']) / (NUM_TEST_SAMPLES * 1.00)


Non-loss events
4615
0.973834142224

Lose trades
124
0.0261658577759

Win trades
1377
0.290567630302

Missed opportunities
1070
0.225786030808

Bullets dodged
2168
0.457480481114


In [121]:
df1 = dataset.copy().head(NUM_TEST_SAMPLES)
df2 = df.copy()

In [122]:
pd.concat([df1.reset_index(),df2], axis=1)

Unnamed: 0,Date,High,Low,Open,Close,Volume,Adj Close,actual,signal,trade,entry_success,entry_failure,avoid_success,avoid_failure,success
0,2019-01-22,123.800003,121.540001,123.300003,122.519997,9932800.0,122.519997,1.009045,1.004949,0,0,0,0,1,1
1,2019-01-18,124.720001,122.709999,123.269997,123.820000,6008500.0,123.820000,1.006607,1.000724,0,0,0,0,1,1
2,2019-01-17,122.410004,120.550003,120.559998,122.190002,5029900.0,122.190002,1.015358,1.011751,1,1,0,0,0,1
3,2019-01-16,122.000000,120.830002,121.580002,121.620003,3841100.0,121.620003,1.023943,1.010005,1,1,0,0,0,1
4,2019-01-15,121.930000,120.820000,120.959999,121.730003,3507500.0,121.730003,1.010803,0.997734,0,0,0,0,1,1
5,2019-01-14,120.650002,119.760002,120.510002,120.389999,5228700.0,120.389999,1.007053,0.988832,0,0,0,0,1,1
6,2019-01-11,121.620003,120.199997,121.580002,121.459999,3722400.0,121.459999,1.000050,0.982295,0,0,0,0,1,1
7,2019-01-10,121.860001,119.949997,120.080002,121.790001,3910000.0,121.790001,0.995832,0.982412,0,0,0,1,0,1
8,2019-01-09,121.400002,119.870003,120.910004,120.690002,3633700.0,120.690002,1.008703,0.995042,0,0,0,0,1,1
9,2019-01-08,120.570000,118.980003,119.660004,119.830002,4763600.0,119.830002,1.010177,1.005058,1,1,0,0,0,1
