In [1]:
from config import CONFIG
from utils import series_to_supervised

CONFIG

{'pair': 'BTC_ETH',
 'period': 300,
 'input_size': 500,
 'output_size': 10,
 'lstm_hidden_size': 200,
 'columns': ['Close', 'Volume', 'Low', 'High'],
 'csv_src_file': 'BTC_ETH',
 'name': 'lstm',
 'folder': {'data': 'data/', 'weights': 'weights/'},
 'filename': 'BTC_ETH_lstm_i500_o10_Close_Volume_Low_High'}

In [2]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from pandas import read_csv
from pandas import DataFrame
from pandas import concat
from matplotlib import pyplot
 
#data file path
dfp = ''.join([CONFIG['folder']['data'], CONFIG['csv_src_file'], '.csv'])

#Columns of price data to use
columns = CONFIG['columns']
# df = pd.read_csv(dfp).dropna().tail(1000000)
dataset = pd.read_csv(dfp)

# to drop values before 2018 1514764800, March 2018 1519862400, July 2017 1498867200
dataset = dataset[dataset.Timestamp > 1483228800]
dataset.head()

Unnamed: 0,Close,Timestamp,High,Low,Open,Volume
147395,0.0084,1483229100,0.008412,0.008373,0.008379,23.101541
147396,0.00845,1483229400,0.008463,0.008388,0.0084,42.334102
147397,0.008511,1483229700,0.008511,0.008444,0.00846,70.720062
147398,0.008522,1483230000,0.008522,0.008511,0.008511,7.221969
147399,0.00853,1483230300,0.00853,0.008522,0.008522,20.96574


In [3]:
values = dataset.loc[:,columns].values

In [4]:
# parameters to prepare the dataset for learning 
n_lag = CONFIG['input_size']
n_out = CONFIG['output_size']
n_features = len(columns)
n_lag,n_features,n_out

(500, 4, 10)

In [5]:
# scale dataset
scaler = MinMaxScaler(feature_range=(0, 1))
scaled = scaler.fit_transform(values)

In [6]:
# frame as supervised learning
reframed = series_to_supervised(scaled, n_lag, n_out)
reframed.head(10)

Unnamed: 0,var1(t-500),var2(t-500),var3(t-500),var4(t-500),var1(t-499),var2(t-499),var3(t-499),var4(t-499),var1(t-498),var2(t-498),...,var3(t+7),var4(t+7),var1(t+8),var2(t+8),var3(t+8),var4(t+8),var1(t+9),var2(t+9),var3(t+9),var4(t+9)
500,0.003928,0.003895,0.003878,0.003777,0.004276,0.007137,0.003985,0.004123,0.004701,0.011923,...,0.002208,0.002176,0.002375,0.002927,0.002134,0.002313,0.002431,0.000452,0.002447,0.002279
501,0.004276,0.007137,0.003985,0.004123,0.004701,0.011923,0.004377,0.004457,0.004778,0.001218,...,0.002134,0.002313,0.002431,0.000452,0.002447,0.002279,0.002431,0.002936,0.002454,0.002442
502,0.004701,0.011923,0.004377,0.004457,0.004778,0.001218,0.004852,0.004532,0.004832,0.003535,...,0.002447,0.002279,0.002431,0.002936,0.002454,0.002442,0.002501,0.002153,0.002517,0.002348
503,0.004778,0.001218,0.004852,0.004532,0.004832,0.003535,0.004929,0.004587,0.004832,0.002484,...,0.002454,0.002442,0.002501,0.002153,0.002517,0.002348,0.002542,0.001085,0.002447,0.00232
504,0.004832,0.003535,0.004929,0.004587,0.004832,0.002484,0.004762,0.004587,0.004833,0.000836,...,0.002517,0.002348,0.002542,0.001085,0.002447,0.00232,0.002523,0.000341,0.002575,0.00232
505,0.004832,0.002484,0.004762,0.004587,0.004833,0.000836,0.004984,0.004587,0.004833,0.001794,...,0.002447,0.00232,0.002523,0.000341,0.002575,0.00232,0.002576,0.000571,0.002646,0.002388
506,0.004833,0.000836,0.004984,0.004587,0.004833,0.001794,0.004984,0.004587,0.004832,0.000966,...,0.002575,0.00232,0.002576,0.000571,0.002646,0.002388,0.002608,0.00061,0.002659,0.002388
507,0.004833,0.001794,0.004984,0.004587,0.004832,0.000966,0.004852,0.004587,0.004624,0.002151,...,0.002646,0.002388,0.002608,0.00061,0.002659,0.002388,0.002605,0.000516,0.002729,0.002423
508,0.004832,0.000966,0.004852,0.004587,0.004624,0.002151,0.00472,0.004586,0.004729,0.003743,...,0.002659,0.002388,0.002605,0.000516,0.002729,0.002423,0.00257,8e-05,0.002588,0.002403
509,0.004624,0.002151,0.00472,0.004586,0.004729,0.003743,0.004774,0.004484,0.004795,0.000671,...,0.002729,0.002423,0.00257,8e-05,0.002588,0.002403,0.002353,0.003495,0.002473,0.002368


In [7]:
# drop columns we don't want to predict
# We're only concerned with the estimating the close value,
# Close should be first in the list of column in the config file

cols_to_drop = []

for i in range (n_out):
    for j in range(1, n_features):
        cols_to_drop.append(reframed.shape[1]-(i*n_features+j))

reframed.drop(reframed.columns[cols_to_drop], axis=1, inplace=True)

reframed.head()

Unnamed: 0,var1(t-500),var2(t-500),var3(t-500),var4(t-500),var1(t-499),var2(t-499),var3(t-499),var4(t-499),var1(t-498),var2(t-498),...,var1(t),var1(t+1),var1(t+2),var1(t+3),var1(t+4),var1(t+5),var1(t+6),var1(t+7),var1(t+8),var1(t+9)
500,0.003928,0.003895,0.003878,0.003777,0.004276,0.007137,0.003985,0.004123,0.004701,0.011923,...,0.002842,0.002807,0.002716,0.002222,0.002152,0.00225,0.002368,0.002187,0.002375,0.002431
501,0.004276,0.007137,0.003985,0.004123,0.004701,0.011923,0.004377,0.004457,0.004778,0.001218,...,0.002807,0.002716,0.002222,0.002152,0.00225,0.002368,0.002187,0.002375,0.002431,0.002431
502,0.004701,0.011923,0.004377,0.004457,0.004778,0.001218,0.004852,0.004532,0.004832,0.003535,...,0.002716,0.002222,0.002152,0.00225,0.002368,0.002187,0.002375,0.002431,0.002431,0.002501
503,0.004778,0.001218,0.004852,0.004532,0.004832,0.003535,0.004929,0.004587,0.004832,0.002484,...,0.002222,0.002152,0.00225,0.002368,0.002187,0.002375,0.002431,0.002431,0.002501,0.002542
504,0.004832,0.003535,0.004929,0.004587,0.004832,0.002484,0.004762,0.004587,0.004833,0.000836,...,0.002152,0.00225,0.002368,0.002187,0.002375,0.002431,0.002431,0.002501,0.002542,0.002523


In [8]:
reframed_values = reframed.values
# split into train and test sets
training_size = int(0.8* reframed_values.shape[0])
train = reframed_values[:training_size, :]
test = reframed_values[training_size:, :]

In [9]:
# split into input and outputs
n_obs = n_lag * n_features

# We're only concerned with the estimating the close value,
# Close should be first in the list of column in the config file

n_outputs = n_out * n_features
train_x, train_y = train[:, :n_obs], train[:, -n_out:]
test_x, test_y = test[:, :n_obs], test[:, -n_out:]

# reshape input to be 3D [samples, timesteps, features]
train_x = train_x.reshape((train_x.shape[0], n_lag, n_features))
test_x = test_x.reshape((test_x.shape[0], n_lag, n_features))

# reshape output to be 3D [samples, timesteps, features]
train_y = train_y.reshape(-1, n_out, 1)
test_y = test_y.reshape(-1, n_out, 1)

print(train_x.shape, train_y.shape, test_x.shape, test_y.shape)

(120948, 500, 4) (120948, 10, 1) (30237, 500, 4) (30237, 10, 1)


In [10]:
model_name=''.join([CONFIG['folder']['weights'], CONFIG['filename'], '_model', '.json'])
model_weights_name=''.join([CONFIG['folder']['weights'], CONFIG['filename'], '_model_weights', '.h5'])

In [11]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Input
from keras.layers import LSTM, CuDNNLSTM, GRU,CuDNNGRU
from keras.layers import Conv1D, AveragePooling1D, MaxPooling1D
from keras.layers import Dropout, Flatten
from keras.layers import Activation, BatchNormalization
from keras.layers import TimeDistributed
from keras.layers import Bidirectional
from keras.layers import RepeatVector
from keras.callbacks import ModelCheckpoint

units= CONFIG['lstm_hidden_size']
dropout = .8

# design network
model = Sequential()
model.add(Bidirectional(LSTM(units), input_shape=(train_x.shape[1], train_x.shape[2])))
model.add(Dropout(dropout))

model.add(RepeatVector(n_out))

model.add(LSTM(units, return_sequences=True))
model.add(Dropout(dropout))

# We're only concerned with the estimating the close value,
# otherwise use n_outputs instead of 1
# Dense(n_outputs, ...
model.add(TimeDistributed(Dense(1, activation='relu')))

model.compile(loss='mse', optimizer='adam')

# store model
# serialize model to JSON
model_json = model.to_json()
with open(model_name, "w") as json_file:
    json_file.write(model_json)

model.summary()

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
bidirectional_1 (Bidirection (None, 400)               328000    
_________________________________________________________________
dropout_1 (Dropout)          (None, 400)               0         
_________________________________________________________________
repeat_vector_1 (RepeatVecto (None, 10, 400)           0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 10, 200)           480800    
_________________________________________________________________
dropout_2 (Dropout)          (None, 10, 200)           0         
_________________________________________________________________
time_distributed_1 (TimeDist (None, 10, 1)             201       
Total params: 809,001
Trainable params: 809,001
Non-trainable params: 0
_________________________________________________________________


In [12]:
epochs=5
batch_size=100

In [None]:
# fit network
history = model.fit(train_x, train_y, epochs=epochs, batch_size=batch_size,
                    validation_data=(test_x, test_y), verbose=1, shuffle=False,
                    callbacks=[ModelCheckpoint(model_weights_name, monitor='val_loss', verbose=1,save_best_only='true',
                                              save_weights_only=True)])

Train on 120948 samples, validate on 30237 samples
Epoch 1/5

In [None]:
# Load the best weights
model.load_weights(model_weights_name)
model.compile(loss='mse', optimizer='adam')
model.evaluate(test_x, test_y)

In [None]:
from matplotlib import pyplot

In [None]:
# plot history
pyplot.plot(history.history['loss'], label='train')
pyplot.plot(history.history['val_loss'], label='test')
pyplot.legend()
pyplot.show()

In [None]:
# Test the prediction of test data
y = model.predict(test_x)

In [None]:
a = test_y[:,0]
b = y[:,0]
c = np.append(b, y[-1], axis=0)

# Show how the model fits the test data
pyplot.plot(a[:100], label='original')
pyplot.plot(b[:100], label='model')
pyplot.legend()
pyplot.show()

# Show how the model predicts data
pos = int(a.shape[0]-n_out*4)
pyplot.plot(a[pos:], label='original')
pyplot.plot(c[pos:], label='model')
pyplot.legend()
pyplot.show()


In [None]:
#Prediction on public data!
period = CONFIG['period']
import time
from urllib.request import urlopen
import json

# Download a live bitcoin price data set
def dl_X(now = None, points = n_lag, period = period, pair=CONFIG['pair']):
    if now == None:
        now = time.time() 
    end = now - now % period
    #print end, time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime(end))
    start = end - points*period
    #print start, time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime(start))
    url = "https://poloniex.com/public?command=returnChartData&currencyPair=%s&start=%d&end=%d&period=%d" % (pair, start, end, period)
    openUrl = urlopen(url)
    r = openUrl.read()
    openUrl.close()
    d = json.loads(r.decode())[-n_lag:]
    df = pd.DataFrame(d)
    original_columns=[u'close', u'date', u'high', u'low', u'open',u'volume']
    new_columns = ['Close','Timestamp','High','Low','Open','Volume']
    df = df.loc[:,original_columns]
    df.columns = new_columns
    
    return df

In [None]:
def predict(when=None):
    rt_df = dl_X(when)
    rt_values = rt_df.loc[:,columns].values
    rt_scaled = scaler.transform(rt_values)
    rt_x = rt_scaled.reshape((1, n_lag, n_features))
    print(rt_x.shape)
    return rt_scaled, model.predict(rt_x)

In [None]:
# do some now & past predictions
for t in [0, 100, 200, 300, 500, 1000, 2000]:
    rt_x, prediction = predict(time.time()-t*period)

    current = rt_x[:,0]
    prediction = prediction[0]

    pyplot.plot(current, label='current')

    # shift train predictions for plotting
    predictPlot = np.empty_like(current)
    predictPlot[:] = np.nan
    predictPlot = np.append(predictPlot, prediction)

    pyplot.plot(predictPlot, label='prediction')
    pyplot.legend()
    pyplot.show()

In [None]:
#Prediction on live data!
starttime=time.time()
while True:
    now = time.time() 
    end = now - now % period
    print(time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime(end)))
    rt_x, prediction = predict()

    current = rt_x[:,0]
    prediction = prediction[0]

    pyplot.plot(current, label='current')

    # shift train predictions for plotting
    predictPlot = np.empty_like(current)
    predictPlot[:] = np.nan
    predictPlot = np.append(predictPlot, prediction)

    pyplot.plot(predictPlot, label='prediction')
    pyplot.legend()
    pyplot.show()

    time.sleep(period - ((time.time() - starttime) % period))