In [21]:
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np 
import pandas as pd 
#Pure technical time series analysis on stocks
from collections import defaultdict
from bbg.sapi import BbgSapi
import pickle as pkl 

In [25]:
company = pd.read_excel('company.xlsx')

In [22]:
%%capture
total = defaultdict()
sapi = BbgSapi()
stock_names= ['atus', 'amzn','armk','anet','azo']
stock_tickers= ['ATUS US Equity', 'AMZN US Equity', 'ARMK US Equity','ANET US Equity','AZO US Equity']
atus = sapi.bdh('ATUS US Equity', start_date ='1970-01-01')
atus.set_index ('asofdate', inplace = True)

In [23]:
for i, j in zip(stock_names, stock_tickers): 
    total[i] = sapi.bdh(j,start_date = '1970-01-01')

In [24]:
with open('total.pkl','wb') as picklefile :
    pkl.dump(total,picklefile)

# Pure LSTM time series

In [5]:
#load dataframe into properly shaped numpy arrays
"""LSTM work by taking in a numpy array of 3 dimensions (N,M,F), 
N is the number of training sequence,
W is the sequence length (the size of the window),
F is the number of features of each sequence
"""

def load_data(df, seq_len, normalise_window):
    result = np.array(df)
    row = round(0.9* result.shape[0])
    train = result[:int(row), :]
    np.random.shuffle(train)
    x_train = train[:, :-1]
    y_train = train[:, -1]
    x_test = result[int(row):, :-1]
    y_test = result[int(row):, -1]

    x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
    x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))  

    return [x_train, y_train, x_test, y_test]
    

In [12]:
load_data(atus[['data_value']],50,10)

[array([], shape=(272, 0, 1), dtype=float64),
 array([22.59, 33.44, 17.79, 27.3 , 22.76, 16.95, 19.03, 32.01, 18.42,
        30.96, 18.41, 19.97, 30.01, 20.1 , 21.24, 32.35, 27.3 , 18.87,
        32.27, 18.19, 31.05, 18.36, 19.21, 20.19, 20.9 , 22.75, 21.23,
        29.5 , 25.15, 21.8 , 18.6 , 16.89, 21.3 , 17.92, 34.3 , 25.52,
        20.23, 32.68, 30.69, 21.44, 31.02, 28.66, 21.67, 17.1 , 33.88,
        16.57, 31.91, 20.97, 31.2 , 31.49, 19.25, 17.66, 20.48, 18.56,
        32.91, 21.9 , 23.06, 17.51, 16.79, 18.71, 17.06, 18.71, 19.15,
        17.32, 16.89, 27.93, 18.78, 21.47, 20.21, 19.81, 19.7 , 27.02,
        31.9 , 18.41, 28.24, 30.89, 17.7 , 25.6 , 19.86, 31.34, 27.67,
        21.9 , 19.56, 17.9 , 19.55, 22.83, 24.94, 17.9 , 31.09, 21.86,
        29.69, 30.68, 27.  , 23.27, 20.08, 17.6 , 23.52, 30.  , 18.13,
        32.86, 18.79, 19.33, 22.38, 18.67, 19.63, 20.25, 24.58, 20.34,
        29.25, 27.53, 18.5 , 19.19, 31.04, 32.71, 21.01, 20.57, 18.33,
        32.48, 26.75, 16.98, 25

In [21]:
def build_model(layers):
    model = Sequential()

    model.add(LSTM(
        input_dim=layers[0],
        output_dim=layers[1],
        return_sequences=True))
    model.add(Dropout(0.2))

    model.add(LSTM(
        layers[2],
        return_sequences=False))
    model.add(Dropout(0.2))

    model.add(Dense(
        output_dim=layers[3]))
    model.add(Activation("linear"))

    start = time.time()
    model.compile(loss="mse", optimizer="rmsprop")
    print("> Compilation Time : ", time.time() - start)
    return model

In [22]:
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
from keras.models import Sequential

epochs  = 1
seq_len = 50

print('> Loading data... ')

X_train, y_train, X_test, y_test = load_data(atus[['data_value']], seq_len, True)

print('> Data Loaded. Compiling...')

model = lstm.build_model([1, 50, 100, 1])

model.fit(
    X_train,
    y_train,
    batch_size=512,
    nb_epoch=epochs,
    validation_split=0.05)

predicted = predict_point_by_point(model, X_test)

> Loading data... 


TypeError: expected str, bytes or os.PathLike object, not DataFrame

In [17]:
def load_data(filename, seq_len, normalise_window):
    f = open(filename, 'rb').read()
    data = f.decode().split('\n')

    sequence_length = seq_len + 1
    result = []
    for index in range(len(data) - sequence_length):
        result.append(data[index: index + sequence_length])
    
    if normalise_window:
        result = normalise_windows(result)

    result = np.array(result)

    row = round(0.9 * result.shape[0])
    train = result[:int(row), :]
    np.random.shuffle(train)
    x_train = train[:, :-1]
    y_train = train[:, -1]
    x_test = result[int(row):, :-1]
    y_test = result[int(row):, -1]

    x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
    x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))  

    return [x_train, y_train, x_test, y_test]

def normalise_windows(window_data):
    normalised_data = []
    for window in window_data:
        normalised_window = [((float(p) / float(window[0])) - 1) for p in window]
        normalised_data.append(normalised_window)
    return normalised_data

def build_model(layers):
    model = Sequential()

    model.add(LSTM(
        input_shape=(layers[1], layers[0]),
        output_dim=layers[1],
        return_sequences=True))
    model.add(Dropout(0.2))

    model.add(LSTM(
        layers[2],
        return_sequences=False))
    model.add(Dropout(0.2))

    model.add(Dense(
        output_dim=layers[3]))
    model.add(Activation("linear"))

    start = time.time()
    model.compile(loss="mse", optimizer="rmsprop")
    print("> Compilation Time : ", time.time() - start)
    return model

def predict_point_by_point(model, data):
    #Predict each timestep given the last sequence of true data, in effect only predicting 1 step ahead each time
    predicted = model.predict(data)
    predicted = np.reshape(predicted, (predicted.size,))
    return predicted

def predict_sequence_full(model, data, window_size):
    #Shift the window by 1 new prediction each time, re-run predictions on new window
    curr_frame = data[0]
    predicted = []
    for i in range(len(data)):
        predicted.append(model.predict(curr_frame[newaxis,:,:])[0,0])
        curr_frame = curr_frame[1:]
        curr_frame = np.insert(curr_frame, [window_size-1], predicted[-1], axis=0)
    return predicted

def predict_sequences_multiple(model, data, window_size, prediction_len):
    #Predict sequence of 50 steps before shifting prediction run forward by 50 steps
    prediction_seqs = []
    for i in range(int(len(data)/prediction_len)):
        curr_frame = data[i*prediction_len]
        predicted = []
        for j in range(prediction_len):
            predicted.append(model.predict(curr_frame[newaxis,:,:])[0,0])
            curr_frame = curr_frame[1:]
            curr_frame = np.insert(curr_frame, [window_size-1], predicted[-1], axis=0)
        prediction_seqs.append(predicted)
    return prediction_seqs