# Prototyping

Creates a neural network which evaluates a time series and produce a set of predicted values for the time series
Predicted values may be used in a policy to make a trade. This policy may be modeled by simple multiple regression or a neural network.

## Data
Test data is taken as most recent to avoid lookahead bias. Train data is split into a validation and training set during fitting.


## TODO
- Convert feature percentages to stdev
- Adding VIX as a signal
- Adding High/Low as signals
- Multiple securities/ aggregate samples
- Policy network
- Regularization (l2)
- Dilated convolution

In [59]:
import tensorflow as tf
import pandas as pd
import numpy as np
from tensorflow import keras
import seaborn as sns
import matplotlib.pyplot as plt
from tensorflow.keras import layers
import pandas_datareader as pdr
from datetime import datetime

def from_network(symbol):
    return pdr.get_data_yahoo(symbols=symbol, start=datetime(1900, 1, 1))

def from_file(symbol):
    dataset_path = keras.utils.get_file("{}.csv".format(symbol), "http://localhost:8000/data/daily/{}.csv".format(symbol))
    column_names = ['Date','Open','High','Low','Close','Adj Close','Volume'] 
    return pd.read_csv(dataset_path, 
                              names=column_names, 
                              dtype={'Close': np.float64,'Open': np.float64,'High': np.float64,'Adj Close': np.float64, 'Volume': np.float64},
                              header=0,
                              na_values = "?", 
                              comment='\t',
                              sep=",",
                              skipinitialspace=True)

#dataset = raw_dataset.copy()
dataset = from_network('SPY')
dataset = dataset.sort_values(by=['Date'],ascending=False)

dataset_stats = dataset.describe()
dataset_stats = dataset_stats.transpose()
NUM_INPUT_NEURONS = 64
NUM_OUTPUT_NEURONS = 3
NUM_SAMPLES = len(dataset)
NUM_TEST_SAMPLES = int(.33 * NUM_SAMPLES)
#NUM_TEST_SAMPLES = 1000
print NUM_SAMPLES
# Create features (only close price for now)
def convert_to_percentage(old, new):
    return (old - new) / old


def convert_labels_to_category(labels): 
    # Simplification - If positive return, 1, else 0
    # return map(lambda arr: 1 if arr[0] > 1 else 0, labels)
    # rounding simpliciation, 10th of percentage
    return map(lambda arr: map(lambda val: round(val,2),arr), labels)

def convert_to_train(raw_dataset):
    dataset = raw_dataset.copy()
    features = []
    labels = []
    for i in range(5, len(dataset) - NUM_INPUT_NEURONS):

        feature_dataset = dataset[i:i+NUM_INPUT_NEURONS].copy()
        latest_close = feature_dataset['Close'].iloc[0]
        
        features.append(
            feature_dataset['Close']
                .map(lambda current: convert_to_percentage(latest_close, current))
                .tolist()
        )
        labels.append([
            dataset['Close'].iloc[i-1] / latest_close, # 1 day trade
            dataset['Close'].iloc[i-3] / latest_close, # 3 day trade
            dataset['Close'].iloc[i-5] / latest_close, # 5 day trade
        ])
        
    # Without converting labels the precision is hard to determine accuracy. 
    # Rather than crude 0/1, maybe this can be more sophisticated
    labels = convert_labels_to_category(labels)
    
    return [features,labels]
converted_feature_set = convert_to_train(dataset)
init = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())

def build_model():
  model = keras.Sequential([
    layers.Dense(64, activation=tf.nn.relu, input_shape=[len(converted_feature_set[0][0])]),
    layers.Dense(32, activation=tf.nn.relu),
    layers.Dense(16, activation=tf.nn.relu),
    layers.Dense(NUM_OUTPUT_NEURONS)
  ])

  optimizer = tf.train.RMSPropOptimizer(0.001)

  model.compile(loss='mse',
                optimizer='sgd',
                metrics=[
                    'mae',
                #    'accuracy'
                ])
  return model

tensorboard = keras.callbacks.TensorBoard(log_dir='./logs', histogram_freq=0,
                          write_graph=True, write_images=False)

model = build_model()
model.summary()



6542
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_16 (Dense)             (None, 64)                4160      
_________________________________________________________________
dense_17 (Dense)             (None, 32)                2080      
_________________________________________________________________
dense_18 (Dense)             (None, 16)                528       
_________________________________________________________________
dense_19 (Dense)             (None, 3)                 51        
Total params: 6,819
Trainable params: 6,819
Non-trainable params: 0
_________________________________________________________________


In [60]:
#len(converted_feature_set[0][0])
print NUM_TEST_SAMPLES
train_data = np.array(converted_feature_set[0][NUM_TEST_SAMPLES:])
train_labels = np.array(converted_feature_set[1][NUM_TEST_SAMPLES:])

test_data = np.array(converted_feature_set[0][:NUM_TEST_SAMPLES])
test_labels = np.array(converted_feature_set[1][:NUM_TEST_SAMPLES])


2158


In [61]:
print train_data
print train_labels

[[ 0.          0.00160514  0.00124843 ... -0.03968251 -0.03415375
  -0.04369539]
 [ 0.         -0.00035728  0.02188279 ... -0.03581638 -0.04537336
  -0.04590925]
 [ 0.          0.02223212  0.02071428 ... -0.04500001 -0.0455357
  -0.03937503]
 ...
 [ 0.          0.00280309  0.00630583 ... -0.00840926 -0.00911114
  -0.00490651]
 [ 0.          0.00351259  0.00983838 ... -0.01194772 -0.00773127
   0.00281097]
 [ 0.          0.00634809  0.00775852 ... -0.0112835  -0.0007041
   0.00141044]]
[[1.   0.98 0.96]
 [1.   1.   0.98]
 [1.   1.   0.98]
 ...
 [1.   1.   1.  ]
 [1.   1.   1.  ]
 [1.   1.   1.  ]]


In [73]:

history = model.fit(
    train_data, train_labels,
    epochs=80, validation_split = 0.2, verbose=1,
    callbacks=[tensorboard]
)

Train on 3452 samples, validate on 863 samples
Epoch 1/90
Epoch 2/90
Epoch 3/90
Epoch 4/90
Epoch 5/90
Epoch 6/90
Epoch 7/90
Epoch 8/90
Epoch 9/90
Epoch 10/90
Epoch 11/90
Epoch 12/90
Epoch 13/90
Epoch 14/90
Epoch 15/90
Epoch 16/90
Epoch 17/90
Epoch 18/90
Epoch 19/90
Epoch 20/90
Epoch 21/90
Epoch 22/90
Epoch 23/90
Epoch 24/90
Epoch 25/90
Epoch 26/90
Epoch 27/90
Epoch 28/90
Epoch 29/90
Epoch 30/90
Epoch 31/90
Epoch 32/90
Epoch 33/90
Epoch 34/90
Epoch 35/90
Epoch 36/90
Epoch 37/90
Epoch 38/90
Epoch 39/90
Epoch 40/90
Epoch 41/90
Epoch 42/90
Epoch 43/90
Epoch 44/90
Epoch 45/90
Epoch 46/90
Epoch 47/90


Epoch 48/90
Epoch 49/90
Epoch 50/90
Epoch 51/90
Epoch 52/90
Epoch 53/90
Epoch 54/90
Epoch 55/90
Epoch 56/90
Epoch 57/90
Epoch 58/90
Epoch 59/90
Epoch 60/90
Epoch 61/90
Epoch 62/90
Epoch 63/90
Epoch 64/90
Epoch 65/90
Epoch 66/90
Epoch 67/90
Epoch 68/90
Epoch 69/90
Epoch 70/90
Epoch 71/90
Epoch 72/90
Epoch 73/90
Epoch 74/90
Epoch 75/90
Epoch 76/90
Epoch 77/90
Epoch 78/90
Epoch 79/90
Epoch 80/90
Epoch 81/90
Epoch 82/90
Epoch 83/90
Epoch 84/90
Epoch 85/90
Epoch 86/90
Epoch 87/90
Epoch 88/90
Epoch 89/90
Epoch 90/90


In [74]:
outputs = model.predict(test_data)

In [75]:
# one day return
actual = map(lambda arr: arr[0],outputs)
# signal step for our policy network
signals = map(lambda arr: sum(arr) / len(arr),outputs)
# primitive policy temporarily in place of a RL policy network
trades = map(lambda signal: 1 if round(signal,2) > 1 else 0,signals)

In [76]:
df = pd.DataFrame({
    'signal':signals,
    'actual': actual,
    'trade':trades,
})

In [77]:
df['entry_success'] = df.apply (lambda row: 1 if (row['actual'] > 1.00 and row['trade'] == 1) else 0,axis=1)
df['entry_failure'] = df.apply (lambda row: 1 if (row['actual'] < 1.00 and row['trade'] == 1) else 0,axis=1)
df['avoid_success'] = df.apply (lambda row: 1 if (row['actual'] < 1.00 and row['trade'] == 0) else 0,axis=1)
df['avoid_failure'] = df.apply (lambda row: 1 if (row['actual'] > 1.00 and row['trade'] == 0) else 0,axis=1)


In [78]:
# primitive policy - replace with a policy network which maximizes reward
def label_success (row):
    return 0 if (row['entry_failure'] == 1 or row['entry_failure'] == 1) else 1

success = df.apply (lambda row: label_success (row),axis=1)
df['success'] = success;
df

Unnamed: 0,actual,signal,trade,entry_success,entry_failure,avoid_success,avoid_failure,success
0,0.994061,1.001055,0,0,0,1,0,1
1,1.005111,1.005983,1,1,0,0,0,1
2,1.001842,1.006654,1,1,0,0,0,1
3,1.001131,1.001591,0,0,0,0,1,1
4,0.982471,0.992030,0,0,0,1,0,1
5,0.993998,0.997984,0,0,0,1,0,1
6,0.979161,0.994006,0,0,0,1,0,1
7,0.986842,1.004810,0,0,0,1,0,1
8,0.984634,1.005040,1,0,1,0,0,0
9,0.983880,1.000018,0,0,0,1,0,1


In [79]:
print '\nNon-loss events'
print sum(df['success'])
print sum(df['success']) / (NUM_TEST_SAMPLES * 1.00)

print '\nLose trades'
print sum(df['entry_failure'])
print sum(df['entry_failure']) / (NUM_TEST_SAMPLES * 1.00)

print '\nWin trades'
print sum(df['entry_success'])
print sum(df['entry_success']) / (NUM_TEST_SAMPLES * 1.00)

print '\nMissed opportunities'
print sum(df['avoid_failure'])
print sum(df['avoid_failure']) / (NUM_TEST_SAMPLES * 1.00)

print '\nBullets dodged'
print sum(df['avoid_success'])
print sum(df['avoid_success']) / (NUM_TEST_SAMPLES * 1.00)


Non-loss events
2128
0.98609823911

Lose trades
30
0.0139017608897

Win trades
198
0.0917516218721

Missed opportunities
707
0.327618164968

Bullets dodged
1223
0.566728452271
