# Value + error

Use valuation model and error model in order to determine when to trade

In [3]:
import tensorflow as tf
import pandas as pd
import numpy as np
import keras
import seaborn as sns
import matplotlib.pyplot as plt
from keras import layers
import pandas_datareader as pdr
from datetime import datetime
from keras.models import load_model
from keras.utils import CustomObjectScope
from keras.initializers import glorot_uniform
import itertools
from keras.utils import CustomObjectScope
from keras.initializers import glorot_uniform


In [5]:

NUM_INPUT_NEURONS = 64
NUM_OUTPUT_NEURONS = 1
DAY_OFFSET = 5

def from_network(symbol):
    return pdr.get_data_yahoo(symbols=symbol, start=datetime(1900, 1, 1)).sort_values(by=['Date'],ascending=False)

# Create features (only close price for now)
def convert_to_percentage(old, new):
    return (old - new) / old

def convert_labels_to_category(labels): 
    # Simplification - If positive return, 1, else 0
    # return map(lambda arr: 1 if arr[0] > 1 else 0, labels)
    # rounding simpliciation
    return map(lambda arr: map(lambda val: round(val,4),arr), labels)

def convert_to_train(raw_dataset, offset=5):
    dataset = raw_dataset.copy()
    features = []
    labels = []
    for i in range(offset, len(dataset) - NUM_INPUT_NEURONS):

        feature_dataset = dataset[i:i+NUM_INPUT_NEURONS].copy()
        latest_close = feature_dataset['Close'].iloc[0]
        
        features.append(
            feature_dataset['Close']
                .map(lambda current: convert_to_percentage(latest_close, current))
                .tolist()
        )
        labels.append([
            dataset['Close'].iloc[i-1] / latest_close, # 1 day trade
        ])
        
    # Without converting labels the precision is hard to determine accuracy. 
    # Rather than crude 0/1, maybe this can be more sophisticated
    labels = convert_labels_to_category(labels)
    
    return [features,labels]

def split_data(symbol):
    fetched = from_network(symbol)
    converted = convert_to_train(fetched)
    features = converted[0]
    labels = converted[1]
    training = [ # since we decide a model, use all data for training
        features[1:],
        labels[1:]
    ]
    validation = [
        features[5:1000],
        labels[5:1000]
    ]
    prediction = [
        features[:5],
        labels[:5],
    ]
    
    return {
        'symbol': symbol,
        'prediction': prediction,
        'validation': validation,
        'training': training,
    }

    
def combine_all(accum, prep):
    return {   
        'prediction':[
            accum['prediction'][0] + prep['prediction'][0],
            accum['prediction'][1] + prep['prediction'][1],
        ],
        'validation':[
            accum['validation'][0] + prep['validation'][0],
            accum['validation'][1] + prep['validation'][1],
        ],
        'training':[
            accum['training'][0] + prep['training'][0],
            accum['training'][1] + prep['training'][1],
        ],
    }



In [6]:
with CustomObjectScope({'GlorotUniform': glorot_uniform()}):
    error_model = load_model('sector_model_error.h5')
    valuation_model = load_model('sector_model.h5')



In [None]:
prepped_data = map(split_data, [
    'QQQ',
    'SPY',
    'IWM',
    'XLK',
    'XLF',
    'XLE',
    'XLP',
    'XLV',
    'XLY',
    'XLI',
    'XLU',
])


In [12]:
for i in range(len(prepped_data)):
    print prepped_data[i]['symbol']
combined = reduce(combine_all, prepped_data,{
    'prediction':[[],[]],
    'validation':[[],[]],
    'training':[[],[]],
})

#len(converted_feature_set[0][0])
# print len(combined['training'][0])
# train_data = np.array(combined['training'][0])
# train_labels = np.array(combined['training'][1])

print len(combined['validation'][0])
validation_data = np.array(combined['validation'][0])
validation_labels = np.array(combined['validation'][1])

# print len(combined['prediction'][0])
# prediction_data = np.array(combined['prediction'][0])
# prediction_labels = np.array(combined['prediction'][1])


SPY
995


In [16]:
value_predictions = valuation_model.predict(validation_data)
error_data = np.array(map(lambda inputs: list(itertools.chain(inputs[0],inputs[1])), zip(validation_data, validation_labels)))
error_predictions = error_model.predict(error_data)




In [20]:
# value_predictions

In [21]:
# error_predictions

In [22]:
# validation_labels

In [31]:
# primitive policy temporarily in place of a RL policy network

# signal step for our policy network
signals = map(lambda arr: (arr[0] - arr[1])[0],zip(value_predictions, error_predictions))
trades = map(lambda signal: 1 if round(signal,4) > 1 else 0,signals)

In [32]:
# one day return
actual = map(lambda arr: arr[0],validation_labels)

df = pd.DataFrame({
    'signal':signals,
    'actual': actual,
    'trade':trades,
})
df['entry_success'] = df.apply (lambda row: 1 if (row['actual'] > 1.00 and row['trade'] == 1) else 0,axis=1)
df['entry_failure'] = df.apply (lambda row: 1 if (row['actual'] < 1.00 and row['trade'] == 1) else 0,axis=1)
df['avoid_success'] = df.apply (lambda row: 1 if (row['actual'] < 1.00 and row['trade'] == 0) else 0,axis=1)
df['avoid_failure'] = df.apply (lambda row: 1 if (row['actual'] > 1.00 and row['trade'] == 0) else 0,axis=1)

# primitive policy - replace with a policy network which maximizes reward
def label_success (row):
    return 0 if (row['entry_failure'] == 1 or row['entry_failure'] == 1) else 1

success = df.apply (lambda row: label_success (row),axis=1)
df['success'] = success;
df

Unnamed: 0,actual,signal,trade,entry_success,entry_failure,avoid_success,avoid_failure,success
0,0.9924,1.006126,1,0,1,0,0,0
1,1.0085,1.000891,1,1,0,0,0,1
2,1.0005,1.003101,1,1,0,0,0,1
3,1.0021,1.002105,1,1,0,0,0,1
4,0.9865,1.000315,1,0,1,0,0,0
5,1.0133,1.001057,1,1,0,0,0,1
6,1.0076,1.003917,1,1,0,0,0,1
7,1.0024,1.005774,1,1,0,0,0,1
8,1.0115,1.006273,1,1,0,0,0,1
9,0.9939,1.004409,1,0,1,0,0,0


In [33]:
print df['actual'].corr(df['signal'])
print df['actual'].corr(df['trade'])
print df['actual'].corr(df['entry_success'])
print df['actual'].corr(df['success'])

0.15049251377887793
0.09348664724763191
0.4291298930222943
0.3595118730771406


In [35]:
NUM_TEST_SAMPLES = len(validation_data)
print NUM_TEST_SAMPLES

print '\nPrecision' # optimize for this since we can increase discovery, so long as we find enough trades
print sum(df['entry_success']) * 1.00 / (sum(df['entry_success']) + sum(df['entry_failure'])) 

print '\nRecall'
print sum(df['entry_success']) * 1.00 / (sum(df['entry_success']) + sum(df['avoid_failure']))

print '\nAccuracy'
print sum(df['entry_success']) * 1.00 / (NUM_TEST_SAMPLES)

print '\nNon-loss events'
print sum(df['success'])
print sum(df['success']) / (NUM_TEST_SAMPLES * 1.00)

print '\nLose trades'
print sum(df['entry_failure'])
print sum(df['entry_failure']) / (NUM_TEST_SAMPLES * 1.00)

print '\nWin trades'
print sum(df['entry_success'])
print sum(df['entry_success']) / (NUM_TEST_SAMPLES * 1.00)

print '\nMissed opportunities'
print sum(df['avoid_failure'])
print sum(df['avoid_failure']) / (NUM_TEST_SAMPLES * 1.00)

print '\nBullets dodged'
print sum(df['avoid_success'])
print sum(df['avoid_success']) / (NUM_TEST_SAMPLES * 1.00)


995

Precision
0.575371549894

Recall
0.517175572519

Accuracy
0.272361809045

Non-loss events
795
0.798994974874

Lose trades
200
0.201005025126

Win trades
271
0.272361809045

Missed opportunities
253
0.254271356784

Bullets dodged
257
0.258291457286
