# Prototyping

Creates a neural network which evaluates a time series and produce a set of predicted values for the time series
Predicted values may be used in a policy to make a trade. This policy may be modeled by simple multiple regression or a neural network.

## Data
Test data is taken as most recent to avoid lookahead bias. Train data is split into a validation and training set during fitting.


## TODO
- Convert feature percentages to stdev
- Adding VIX as a signal
- Adding High/Low as signals
- Multiple securities/ aggregate samples
- Policy network
- Regularization (l2)
- Dilated convolution

In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
from tensorflow import keras
import seaborn as sns
import matplotlib.pyplot as plt
from tensorflow.keras import layers
import pandas_datareader as pdr
from datetime import datetime

def from_network(symbol):
    return pdr.get_data_yahoo(symbols=symbol, start=datetime(1900, 1, 1))

def from_file(symbol):
    dataset_path = keras.utils.get_file("{}.csv".format(symbol), "http://localhost:8000/data/daily/{}.csv".format(symbol))
    column_names = ['Date','Open','High','Low','Close','Adj Close','Volume'] 
    return pd.read_csv(dataset_path, 
                              names=column_names, 
                              dtype={'Close': np.float64,'Open': np.float64,'High': np.float64,'Adj Close': np.float64, 'Volume': np.float64},
                              header=0,
                              na_values = "?", 
                              comment='\t',
                              sep=",",
                              skipinitialspace=True)

#dataset = raw_dataset.copy()
dataset = from_network('SPY')
dataset = dataset.sort_values(by=['Date'],ascending=False)




In [2]:
print dataset

                  High         Low        Open       Close       Volume  \
Date                                                                      
2019-01-22  265.059998  261.059998  264.820007  262.859985  115355500.0   
2019-01-18  266.980011  263.000000  264.980011  266.459991  127900300.0   
2019-01-17  263.920013  259.959991  260.010010  262.959991   96118400.0   
2019-01-16  261.970001  260.600006  260.829987  260.980011   77636700.0   
2019-01-15  260.700012  257.809998  257.820007  260.350006   85208300.0   
2019-01-14  258.299988  256.410004  256.859985  257.399994   70908200.0   
2019-01-11  259.010010  257.029999  257.679993  258.980011   73858100.0   
2019-01-10  259.160004  255.500000  256.260010  258.880005   96823900.0   
2019-01-09  258.910004  256.190002  257.559998  257.970001   95006600.0   
2019-01-08  257.309998  254.000000  256.820007  256.769989  102512600.0   
2019-01-07  255.949997  251.690002  252.690002  254.380005  103139100.0   
2019-01-04  253.110001  2

In [3]:

dataset_stats = dataset.describe()
dataset_stats = dataset_stats.transpose()
NUM_INPUT_NEURONS = 64
NUM_OUTPUT_NEURONS = 1
NUM_SAMPLES = len(dataset)
NUM_TEST_SAMPLES = int(.25 * NUM_SAMPLES)
NUM_TEST_SAMPLES = 1000
DAY_OFFSET = 5
print NUM_SAMPLES
# Create features (only close price for now)
def convert_to_percentage(old, new):
    return (old - new) / old

def convert_labels_to_category(labels): 
    # Simplification - If positive return, 1, else 0
    # return map(lambda arr: 1 if arr[0] > 1 else 0, labels)
    # rounding simpliciation
    return map(lambda arr: map(lambda val: round(val,4),arr), labels)

def convert_to_train(raw_dataset):
    dataset = raw_dataset.copy()
    features = []
    labels = []
    for i in range(DAY_OFFSET, len(dataset) - NUM_INPUT_NEURONS):

        feature_dataset = dataset[i:i+NUM_INPUT_NEURONS].copy()
        latest_close = feature_dataset['Close'].iloc[0]
        
        features.append(
            feature_dataset['Close']
                .map(lambda current: convert_to_percentage(latest_close, current))
                .tolist()
        )
        labels.append([
            dataset['Close'].iloc[i-1] / latest_close, # 1 day trade
#             dataset['Close'].iloc[i-2] / latest_close, # 2 day trade
#             dataset['Close'].iloc[i-3] / latest_close, # 3 day trade
#             dataset['Close'].iloc[i-4] / latest_close, # 4 day trade
#             dataset['Close'].iloc[i-5] / latest_close, # 5 day trade
        ])
        
    # Without converting labels the precision is hard to determine accuracy. 
    # Rather than crude 0/1, maybe this can be more sophisticated
    labels = convert_labels_to_category(labels)
    
    return [features,labels]
converted_feature_set = convert_to_train(dataset)
init = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())

def build_model():
  model = keras.Sequential([
    layers.Dense(64, activation=tf.nn.relu, input_shape=[len(converted_feature_set[0][0])]),
    layers.Dense(32, kernel_regularizer=keras.regularizers.l1_l2(l1=0.0001, l2=0.0001), activation=tf.nn.relu),
    layers.Dense(16, kernel_regularizer=keras.regularizers.l1_l2(l1=0.0001, l2=0.0001), activation=tf.nn.relu),
    #layers.Dense(16, activation=tf.nn.relu),
    layers.Dense(NUM_OUTPUT_NEURONS)
  ])

  model.compile(loss='mse',
                optimizer='sgd',
                metrics=[
                    'mae',
                ])
  return model

tensorboard = keras.callbacks.TensorBoard(log_dir='./logs', histogram_freq=0,
                          write_graph=True, write_images=False)

model = build_model()
model.summary()

6542
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 64)                4160      
_________________________________________________________________
dense_1 (Dense)              (None, 32)                2080      
_________________________________________________________________
dense_2 (Dense)              (None, 16)                528       
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 17        
Total params: 6,785
Trainable params: 6,785
Non-trainable params: 0
_________________________________________________________________


In [4]:
#len(converted_feature_set[0][0])
print NUM_TEST_SAMPLES
train_data = np.array(converted_feature_set[0][NUM_TEST_SAMPLES:])
train_labels = np.array(converted_feature_set[1][NUM_TEST_SAMPLES:])

test_data = np.array(converted_feature_set[0][:NUM_TEST_SAMPLES])
test_labels = np.array(converted_feature_set[1][:NUM_TEST_SAMPLES])


1000


In [5]:
print train_data
print train_labels

[[ 0.         -0.00551303  0.00922086 ...  0.04166468  0.04898282
   0.0599112 ]
 [ 0.          0.0146531   0.01960218 ...  0.05419706  0.06506552
   0.05836972]
 [ 0.          0.00502267  0.00714003 ...  0.0511621   0.04436672
   0.06293086]
 ...
 [ 0.          0.00280309  0.00630583 ... -0.00840926 -0.00911114
  -0.00490651]
 [ 0.          0.00351259  0.00983838 ... -0.01194772 -0.00773127
   0.00281097]
 [ 0.          0.00634809  0.00775852 ... -0.0112835  -0.0007041
   0.00141044]]
[[1.0023]
 [0.9945]
 [1.0149]
 ...
 [0.9965]
 [1.0028]
 [1.0035]]


In [6]:

history = model.fit(
    train_data, train_labels,
    epochs=120, validation_split = 0.2, verbose=1,
    callbacks=[tensorboard]
)

Train on 4378 samples, validate on 1095 samples
Epoch 1/120
Epoch 2/120
Epoch 3/120
Epoch 4/120
Epoch 5/120
Epoch 6/120
Epoch 7/120
Epoch 8/120
Epoch 9/120
Epoch 10/120
Epoch 11/120
Epoch 12/120
Epoch 13/120
Epoch 14/120
Epoch 15/120
Epoch 16/120
Epoch 17/120
Epoch 18/120
Epoch 19/120
Epoch 20/120
Epoch 21/120
Epoch 22/120
Epoch 23/120
Epoch 24/120
Epoch 25/120
Epoch 26/120
Epoch 27/120
Epoch 28/120
Epoch 29/120
Epoch 30/120
Epoch 31/120
Epoch 32/120
Epoch 33/120
Epoch 34/120
Epoch 35/120
Epoch 36/120
Epoch 37/120
Epoch 38/120
Epoch 39/120
Epoch 40/120
Epoch 41/120
Epoch 42/120
Epoch 43/120
Epoch 44/120
Epoch 45/120
Epoch 46/120
Epoch 47/120
Epoch 48/120
Epoch 49/120


Epoch 50/120
Epoch 51/120
Epoch 52/120
Epoch 53/120
Epoch 54/120
Epoch 55/120
Epoch 56/120
Epoch 57/120
Epoch 58/120
Epoch 59/120
Epoch 60/120
Epoch 61/120
Epoch 62/120
Epoch 63/120
Epoch 64/120
Epoch 65/120
Epoch 66/120
Epoch 67/120
Epoch 68/120
Epoch 69/120
Epoch 70/120
Epoch 71/120
Epoch 72/120
Epoch 73/120
Epoch 74/120
Epoch 75/120
Epoch 76/120
Epoch 77/120
Epoch 78/120
Epoch 79/120
Epoch 80/120
Epoch 81/120
Epoch 82/120
Epoch 83/120
Epoch 84/120
Epoch 85/120
Epoch 86/120
Epoch 87/120
Epoch 88/120
Epoch 89/120
Epoch 90/120
Epoch 91/120
Epoch 92/120
Epoch 93/120
Epoch 94/120
Epoch 95/120
Epoch 96/120
Epoch 97/120


Epoch 98/120
Epoch 99/120
Epoch 100/120
Epoch 101/120
Epoch 102/120
Epoch 103/120
Epoch 104/120
Epoch 105/120
Epoch 106/120
Epoch 107/120
Epoch 108/120
Epoch 109/120
Epoch 110/120
Epoch 111/120
Epoch 112/120
Epoch 113/120
Epoch 114/120
Epoch 115/120
Epoch 116/120
Epoch 117/120
Epoch 118/120
Epoch 119/120
Epoch 120/120


In [7]:
outputs = model.predict(test_data)

In [8]:
print test_data[0]
print test_labels[0]
print outputs[0]

[ 0.         -0.00613837 -0.00574985 -0.00221448  0.00244757  0.01173267
  0.01946385  0.05124315  0.02804973  0.02905981  0.03749026  0.03624703
  0.04358975  0.08958818  0.06487955  0.03974357  0.02385392  0.00901318
  0.00792538 -0.01192699 -0.03096349 -0.03131312 -0.02614612 -0.02591303
 -0.02397053 -0.04832946 -0.04992232 -0.08508156 -0.07090132 -0.06441343
 -0.06674434 -0.04273504 -0.03923856 -0.0227273  -0.02960371 -0.02610723
 -0.04545459 -0.06344218 -0.06068374 -0.04972812 -0.05695417 -0.05893556
 -0.07909874 -0.08974362 -0.09172501 -0.06884228 -0.06212129 -0.05629379
 -0.06258748 -0.05139865 -0.04028747 -0.02509709 -0.03080805 -0.04926182
 -0.03076928 -0.06297588 -0.06841498 -0.07323235 -0.07381508 -0.08954941
 -0.08935509 -0.06604507 -0.07206689 -0.05738158]
[1.0115]
[1.0039079]


In [9]:
# primitive policy temporarily in place of a RL policy network

# signal step for our policy network
signals = map(lambda arr: sum(arr) / len(arr),outputs)
# signals = map(lambda arr: arr[0],outputs)
trades = map(lambda signal: 1 if round(signal,4) > 1 else 0,signals)

In [10]:
outputs[1:]

array([[0.99778545],
       [0.9984807 ],
       [0.9933344 ],
       [0.997865  ],
       [0.99797845],
       [1.0011455 ],
       [1.0101322 ],
       [1.0135732 ],
       [1.0064023 ],
       [1.0093355 ],
       [1.007729  ],
       [1.0001848 ],
       [1.0032508 ],
       [0.9969384 ],
       [1.0025123 ],
       [1.0028467 ],
       [0.99518824],
       [1.0001926 ],
       [1.0020008 ],
       [1.0014293 ],
       [1.001884  ],
       [0.9967786 ],
       [0.9981499 ],
       [0.9974891 ],
       [0.9962426 ],
       [0.99673676],
       [1.0008991 ],
       [1.001658  ],
       [0.99867004],
       [1.0004447 ],
       [1.0047042 ],
       [1.0065296 ],
       [1.0082805 ],
       [1.0050966 ],
       [1.0007591 ],
       [1.0069189 ],
       [1.0022867 ],
       [1.0015996 ],
       [1.0016139 ],
       [0.9962689 ],
       [1.000705  ],
       [0.99953663],
       [0.99604785],
       [0.9970583 ],
       [0.99840474],
       [1.0000813 ],
       [0.9979477 ],
       [0.999

In [11]:
# one day return
actual = map(lambda arr: arr[0],test_labels)

df = pd.DataFrame({
    'signal':signals,
    'actual': actual,
    'trade':trades,
})

In [12]:
df['entry_success'] = df.apply (lambda row: 1 if (row['actual'] > 1.00 and row['trade'] == 1) else 0,axis=1)
df['entry_failure'] = df.apply (lambda row: 1 if (row['actual'] < 1.00 and row['trade'] == 1) else 0,axis=1)
df['avoid_success'] = df.apply (lambda row: 1 if (row['actual'] < 1.00 and row['trade'] == 0) else 0,axis=1)
df['avoid_failure'] = df.apply (lambda row: 1 if (row['actual'] > 1.00 and row['trade'] == 0) else 0,axis=1)


In [13]:
# primitive policy - replace with a policy network which maximizes reward
def label_success (row):
    return 0 if (row['entry_failure'] == 1 or row['entry_failure'] == 1) else 1

success = df.apply (lambda row: label_success (row),axis=1)
df['success'] = success;
df

Unnamed: 0,actual,signal,trade,entry_success,entry_failure,avoid_success,avoid_failure,success
0,1.0115,1.003908,1,1,0,0,0,1
1,0.9939,0.997785,0,0,0,1,0,1
2,1.0004,0.998481,0,0,0,0,1,1
3,1.0035,0.993334,0,0,0,0,1,1
4,1.0047,0.997865,0,0,0,0,1,1
5,1.0094,0.997978,0,0,0,0,1,1
6,1.0079,1.001145,1,1,0,0,0,1
7,1.0335,1.010132,1,1,0,0,0,1
8,0.9761,1.013573,1,0,1,0,0,0
9,1.0010,1.006402,1,1,0,0,0,1


In [14]:
print '\nPrecision' # optimize for this since we can increase discovery, so long as we find enough trades
print sum(df['entry_success']) * 1.00 / (sum(df['entry_success']) + sum(df['entry_failure'])) 

print '\nRecall'
print sum(df['entry_success']) * 1.00 / (sum(df['entry_success']) + sum(df['avoid_failure']))

print '\nAccuracy'
print sum(df['entry_success']) * 1.00 / (NUM_TEST_SAMPLES)

print '\nNon-loss events'
print sum(df['success'])
print sum(df['success']) / (NUM_TEST_SAMPLES * 1.00)

print '\nLose trades'
print sum(df['entry_failure'])
print sum(df['entry_failure']) / (NUM_TEST_SAMPLES * 1.00)

print '\nWin trades'
print sum(df['entry_success'])
print sum(df['entry_success']) / (NUM_TEST_SAMPLES * 1.00)

print '\nMissed opportunities'
print sum(df['avoid_failure'])
print sum(df['avoid_failure']) / (NUM_TEST_SAMPLES * 1.00)

print '\nBullets dodged'
print sum(df['avoid_success'])
print sum(df['avoid_success']) / (NUM_TEST_SAMPLES * 1.00)



Precision
0.545454545455

Recall
0.731428571429

Accuracy
0.384

Non-loss events
680
0.68

Lose trades
320
0.32

Win trades
384
0.384

Missed opportunities
141
0.141

Bullets dodged
141
0.141


In [15]:
df1 = dataset.copy().iloc[DAY_OFFSET-1:].head(NUM_TEST_SAMPLES)
df2 = df.copy()

In [16]:
pd.concat([df1.reset_index(),df2], axis=1).drop(['High','Low','Open','Volume','Adj Close'], axis=1)

Unnamed: 0,Date,Close,actual,signal,trade,entry_success,entry_failure,avoid_success,avoid_failure,success
0,2019-01-15,260.350006,1.0115,1.003908,1,1,0,0,0,1
1,2019-01-14,257.399994,0.9939,0.997785,0,0,0,1,0,1
2,2019-01-11,258.980011,1.0004,0.998481,0,0,0,0,1,1
3,2019-01-10,258.880005,1.0035,0.993334,0,0,0,0,1,1
4,2019-01-09,257.970001,1.0047,0.997865,0,0,0,0,1,1
5,2019-01-08,256.769989,1.0094,0.997978,0,0,0,0,1,1
6,2019-01-07,254.380005,1.0079,1.001145,1,1,0,0,0,1
7,2019-01-04,252.389999,1.0335,1.010132,1,1,0,0,0,1
8,2019-01-03,244.210007,0.9761,1.013573,1,0,1,0,0,0
9,2019-01-02,250.179993,1.0010,1.006402,1,1,0,0,0,1
