In [134]:
import tensorflow as tf
import pandas as pd
import numpy as np
from tensorflow import keras
import seaborn as sns
import matplotlib.pyplot as plt
from tensorflow.keras import layers


dataset_path = keras.utils.get_file("SPY.csv", "http://localhost:8000/data/daily/SPY.csv")

column_names = ['Date','Open','High','Low','Close','Adj Close','Volume'] 
raw_dataset = pd.read_csv(dataset_path, 
#                           names=column_names, 
                          dtype={'Close': np.float64,'Open': np.float64,'High': np.float64,'Adj Close': np.float64, 'Volume': np.float64},
                          header=0,
                          na_values = "?", 
                          comment='\t',
                          sep=",",
                          skipinitialspace=True)


dataset = raw_dataset.copy()
dataset = dataset.sort_values(by=['Date'],ascending=False)

print len(dataset)

dataset.head(5)

dataset_stats = dataset.describe()
dataset_stats = dataset_stats.transpose()


# Create features (only close price for now)
def convert_to_percentage(old, new):
    return (old - new) / old


def convert_labels_to_category(labels): 
    # Simplification - If positive return, 1, else 0
    # return map(lambda arr: 1 if arr[0] > 1 else 0, labels)
    # rounding simpliciation, 10th of percentage
    return map(lambda arr: round(arr[0],2), labels)

def convert_to_train(raw_dataset):
    dataset = raw_dataset.copy()
    features = []
    labels = []
    for i in range(5, len(dataset) - 50):

        feature_dataset = dataset[i:i+50].copy()
        latest_close = feature_dataset['Close'].iloc[0]
        
        features.append(
            feature_dataset['Close']
                .map(lambda current: convert_to_percentage(latest_close, current))
                .tolist()
        )
        labels.append([
            dataset['Close'].iloc[i-1] / latest_close, # 1 day trade
            #dataset['Close'].iloc[i-3] / latest_close, # 3 day trade
            #dataset['Close'].iloc[i-5] / latest_close, # 5 day trade
        ])
        
    # Without converting labels the precision is hard to determine accuracy. 
    # Rather than crude 0/1, maybe this can be more sophisticated
    labels = convert_labels_to_category(labels)
    
    return [features,labels]
converted_feature_set = convert_to_train(dataset)
init = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
sess.run(init)
def build_model():
  model = keras.Sequential([
    layers.Dense(64, activation=tf.nn.relu, input_shape=[len(converted_feature_set[0][0])]),
    layers.Dense(32, activation=tf.nn.relu),
    layers.Dense(16, activation=tf.nn.relu),
    layers.Dense(1)
  ])

  optimizer = tf.train.RMSPropOptimizer(0.001)

  model.compile(loss='mse',
                optimizer='sgd',
                metrics=['accuracy'])
  return model

tensorboard = keras.callbacks.TensorBoard(log_dir='./logs', histogram_freq=0,
                          write_graph=True, write_images=False)

model = build_model()
model.summary()



6445
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_34 (Dense)             (None, 64)                3264      
_________________________________________________________________
dense_35 (Dense)             (None, 32)                2080      
_________________________________________________________________
dense_36 (Dense)             (None, 16)                528       
_________________________________________________________________
dense_37 (Dense)             (None, 1)                 17        
Total params: 5,889
Trainable params: 5,889
Non-trainable params: 0
_________________________________________________________________


In [135]:

#len(converted_feature_set[0][0])
train_data = np.array(converted_feature_set[0][900:])
train_labels = np.array(converted_feature_set[1][900:])

test_data = np.array(converted_feature_set[0][:900])
test_labels = np.array(converted_feature_set[1][:900])


In [136]:
print train_data
print train_labels

[[ 0.         -0.01299094 -0.02653142 ... -0.0270311  -0.02113519
  -0.02048569]
 [ 0.         -0.01336683 -0.01099929 ... -0.0080398  -0.00739864
  -0.007152  ]
 [ 0.          0.00233632 -0.00316383 ...  0.00588947  0.00613285
   0.00725232]
 ...
 [ 0.          0.         -0.00139082 ... -0.0006943  -0.00139082
   0.00278164]
 [ 0.         -0.00139082  0.00069652 ... -0.00139082  0.00278164
   0.01321391]
 [ 0.          0.00208444  0.01597333 ...  0.00416667  0.01458444
   0.01666667]]
[1.01 0.99 0.99 ... 1.   1.   1.  ]


In [137]:

history = model.fit(
    train_data, train_labels,
    epochs=50, validation_split = 0.2, verbose=1,
    callbacks=[tensorboard]
)

Train on 4392 samples, validate on 1098 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [138]:
outputs = model.predict(test_data)


In [139]:

signals = map(lambda arr: arr[0],outputs)
trades = map(lambda signal: 1 if round(signal,2) > 1 else 0,signals)

In [140]:
df = pd.DataFrame({
    'signal':signals,
    'actual':test_labels,
    'trade':trades,
})

In [141]:
def label_success (row):
    entered_trade_successfully = row['actual'] > 1.00 and row['trade'] == 1
    #skipped_trade_successfully = row['trade'] == 0
    skipped_trade_successfully = row['actual'] <= 1.00 and row['trade'] == 0
    return 1 if (entered_trade_successfully or skipped_trade_successfully) else 0

success = df.apply (lambda row: label_success (row),axis=1)

df['success'] = success

In [142]:
df

Unnamed: 0,actual,signal,trade,success
0,1.01,0.999848,0,0
1,1.00,1.001239,0,1
2,1.00,1.002118,0,1
3,1.00,1.002127,0,1
4,1.00,1.004922,0,1
5,1.00,1.004544,0,1
6,1.01,1.004546,0,0
7,0.99,1.005382,1,0
8,1.01,1.005768,1,1
9,1.00,1.001553,0,1


In [143]:
sum(success)

619