# Prototyping trades
Creates a neural network which evaluates a time series and produce a set of future predictions
Signal values/ may be used as when to make a trade or inputs into a policy

In [335]:
import tensorflow as tf
import pandas as pd
import numpy as np
from tensorflow import keras
import seaborn as sns
import matplotlib.pyplot as plt
from tensorflow.keras import layers


dataset_path = keras.utils.get_file("XLK.csv", "http://localhost:8000/data/daily/XLK.csv")

column_names = ['Date','Open','High','Low','Close','Adj Close','Volume'] 
raw_dataset = pd.read_csv(dataset_path, 
#                           names=column_names, 
                          dtype={'Close': np.float64,'Open': np.float64,'High': np.float64,'Adj Close': np.float64, 'Volume': np.float64},
                          header=0,
                          na_values = "?", 
                          comment='\t',
                          sep=",",
                          skipinitialspace=True)


dataset = raw_dataset.copy()
dataset = dataset.sort_values(by=['Date'],ascending=False)

dataset_stats = dataset.describe()
dataset_stats = dataset_stats.transpose()
NUM_INPUT_NEURONS = 64
NUM_OUTPUT_NEURONS = 3
NUM_SAMPLES = len(dataset)
NUM_TEST_SAMPLES = round(.3 * NUM_SAMPLES,0)
print NUM_SAMPLES
# Create features (only close price for now)
def convert_to_percentage(old, new):
    return (old - new) / old


def convert_labels_to_category(labels): 
    # Simplification - If positive return, 1, else 0
    # return map(lambda arr: 1 if arr[0] > 1 else 0, labels)
    # rounding simpliciation, 10th of percentage
    return map(lambda arr: map(lambda val: round(val,2),arr), labels)

def convert_to_train(raw_dataset):
    dataset = raw_dataset.copy()
    features = []
    labels = []
    for i in range(5, len(dataset) - NUM_INPUT_NEURONS):

        feature_dataset = dataset[i:i+NUM_INPUT_NEURONS].copy()
        latest_close = feature_dataset['Close'].iloc[0]
        
        features.append(
            feature_dataset['Close']
                .map(lambda current: convert_to_percentage(latest_close, current))
                .tolist()
        )
        labels.append([
            dataset['Close'].iloc[i-1] / latest_close, # 1 day trade
            dataset['Close'].iloc[i-3] / latest_close, # 3 day trade
            dataset['Close'].iloc[i-5] / latest_close, # 5 day trade
        ])
        
    # Without converting labels the precision is hard to determine accuracy. 
    # Rather than crude 0/1, maybe this can be more sophisticated
    labels = convert_labels_to_category(labels)
    
    return [features,labels]
converted_feature_set = convert_to_train(dataset)
init = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
sess.run(init)
def build_model():
  model = keras.Sequential([
    layers.Dense(64, activation=tf.nn.relu, input_shape=[len(converted_feature_set[0][0])]),
    layers.Dense(32, activation=tf.nn.relu),
    layers.Dense(16, activation=tf.nn.relu),
    layers.Dense(NUM_OUTPUT_NEURONS)
  ])

  optimizer = tf.train.RMSPropOptimizer(0.001)

  model.compile(loss='mse',
                optimizer='sgd',
                metrics=[
                    'mae',
                #    'accuracy'
                ])
  return model

tensorboard = keras.callbacks.TensorBoard(log_dir='./logs', histogram_freq=0,
                          write_graph=True, write_images=False)

model = build_model()
model.summary()



Downloading data from http://localhost:8000/data/daily/XLK.csv
4955
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_94 (Dense)             (None, 64)                4160      
_________________________________________________________________
dense_95 (Dense)             (None, 32)                2080      
_________________________________________________________________
dense_96 (Dense)             (None, 16)                528       
_________________________________________________________________
dense_97 (Dense)             (None, 3)                 51        
Total params: 6,819
Trainable params: 6,819
Non-trainable params: 0
_________________________________________________________________


In [345]:
#len(converted_feature_set[0][0])
train_data = np.array(converted_feature_set[0][NUM_TEST_SAMPLES:])
train_labels = np.array(converted_feature_set[1][NUM_TEST_SAMPLES:])

test_data = np.array(converted_feature_set[0][:NUM_TEST_SAMPLES])
test_labels = np.array(converted_feature_set[1][:NUM_TEST_SAMPLES])


In [346]:
print train_data
print train_labels

[[ 0.         -0.01425326 -0.02073207 ...  0.09459021  0.08713965
   0.09264661]
 [ 0.         -0.00638777 -0.00926225 ...  0.09996803  0.10539761
   0.10891086]
 [ 0.         -0.00285624 -0.00476039 ...  0.11107585  0.1145668
   0.09615995]
 ...
 [ 0.         -0.00168634  0.03457133 ...  0.1163575   0.11298482
   0.11551568]
 [ 0.          0.03619663  0.03114613 ...  0.11447811  0.11700471
   0.11952862]
 [ 0.         -0.00524018  0.01790331 ...  0.08384291  0.08646161
   0.08296815]]
[[0.99 1.   1.  ]
 [0.99 0.99 0.98]
 [0.99 0.97 0.98]
 ...
 [0.99 1.04 1.04]
 [1.   1.   1.04]
 [1.04 1.02 1.07]]


In [347]:

history = model.fit(
    train_data, train_labels,
    epochs=60, validation_split = 0.2, verbose=1,
    callbacks=[tensorboard]
)

Train on 2719 samples, validate on 680 samples
Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60


Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60


In [348]:
outputs = model.predict(test_data)


In [349]:

# one day return
actual = map(lambda arr: arr[0],outputs)
# signal step for our policy network
signals = map(lambda arr: sum(arr) / len(arr),outputs)
# primitive policy temporarily in place of a RL policy network
trades = map(lambda signal: 1 if round(signal,2) > 1 else 0,signals)

In [350]:
df = pd.DataFrame({
    'signal':signals,
#    'actual':test_labels,
    'actual': actual,
    'trade':trades,
})

In [351]:
df['entry_success'] = df.apply (lambda row: 1 if (row['actual'] > 1.00 and row['trade'] == 1) else 0,axis=1)
df['entry_failure'] = df.apply (lambda row: 1 if (row['actual'] < 1.00 and row['trade'] == 1) else 0,axis=1)
df['avoid_success'] = df.apply (lambda row: 1 if (row['actual'] < 1.00 and row['trade'] == 0) else 0,axis=1)
df['avoid_failure'] = df.apply (lambda row: 1 if (row['actual'] > 1.00 and row['trade'] == 0) else 0,axis=1)


In [352]:
# primitive policy - replace with a policy network which maximizes reward
def label_success (row):
    return 0 if (row['entry_failure'] == 1) else 1

success = df.apply (lambda row: label_success (row),axis=1)
df['success'] = success;
df

Unnamed: 0,actual,signal,trade,entry_success,entry_failure,avoid_success,avoid_failure,success
0,0.998989,0.997559,0,0,0,1,0,1
1,1.005388,1.005238,1,1,0,0,0,1
2,1.004635,1.006961,1,1,0,0,0,1
3,1.000928,1.005100,1,1,0,0,0,1
4,1.003784,1.011030,1,1,0,0,0,1
5,0.998332,1.010496,1,0,1,0,0,0
6,0.998609,1.013737,1,0,1,0,0,0
7,0.994338,1.002572,0,0,0,1,0,1
8,0.994328,0.999333,0,0,0,1,0,1
9,0.991576,0.991536,0,0,0,1,0,1


In [354]:
print '\nLoss trades'
print sum(df['entry_failure'])
print sum(df['entry_failure']) / (NUM_SAMPLES * 1.00)

print '\nNon-loss trades'
print sum(df['success'])
print sum(df['success']) / (NUM_SAMPLES * 1.00)

print '\nWin trades'
print sum(df['entry_success'])
print sum(df['entry_success']) / (NUM_SAMPLES * 1.00)

print '\nLoss opportunities'
print sum(df['avoid_failure'])
print sum(df['avoid_failure']) / (NUM_SAMPLES * 1.00)


Loss trades
32
0.00645812310797

Non-loss trades
1455
0.293642785066

Win trades
426
0.0859737638749

Loss opportunities
308
0.0621594349142
