In [1]:
import pandas as pd

Get dataframe from json and prepare it for normzlization by removing timestamp from it

In [2]:
source_df = pd.read_json('chart_data.json')[1:]
timestamps_col = source_df.pop('date')
source_df.head()

Unnamed: 0,close,high,low,open,quoteVolume,volume,weightedAverage
1,225.0,225.0,225.0,225.0,0.0,0.0,225.0
2,225.0,225.0,225.0,225.0,0.0,0.0,225.0
3,225.0,225.0,225.0,225.0,0.0,0.0,225.0
4,225.0,225.0,225.0,225.0,0.0,0.0,225.0
5,225.0,225.0,225.0,225.0,0.0,0.0,225.0


Normalize dataframe

In [3]:
from sklearn import preprocessing
scaled_values = preprocessing.MinMaxScaler().fit_transform(source_df.values)
scaled_df = pd.DataFrame(scaled_values)
scaled_df.columns = source_df.columns
scaled_df['index'] = scaled_df.index
source_df['index'] = source_df.index

In [4]:
import numpy as np
 
class PastSampler(object):
    '''
    Forms training samples for predicting future values from past value
    '''
     
    def __init__(self, P, F, sliding_window = True):
        # Predict K future sample using N previous samples
        self.F = F
        self.P = P
        self.sliding_window = sliding_window
 
    def transform(self, A):
        M = self.P + self.F     #Number of samples per row (sample + target)
        # I - list of indexes of K and N
        if self.sliding_window:
            # dynamic samples location
            # M slides per each sample
            I = np.arange(M) + np.arange(A.shape[0] - M + 1).reshape(-1, 1)
        else:
            # static samples location
            # 1 slide per each sample
            if A.shape[0] % M == 0:
                I = np.arange(M) + np.arange(0, A.shape[0], M).reshape(-1,1)
            else:
                I = np.arange(M) + np.arange(0, A.shape[0] -M, M).reshape(-1,1)
        # A[I]: https://docs.scipy.org/doc/numpy/user/quickstart.html#indexing-with-arrays-of-indices
        # B is list of num_of_features-dimenstional charts
        B = A[I].reshape(-1, M * A.shape[1], A.shape[2])
        ci = self.P * A.shape[1]    #Number of features per sample
        return B[:, :ci], B[:, ci:] #Sample matrix, Target matrix

In [5]:
NPS, NFS = 50, 5         #Number of past and future samples
ps = PastSampler(NPS, NFS, sliding_window=True)
X, Y = ps.transform(scaled_df.values[:, None, :])
input_times, output_times = ps.transform(timestamps_col.values[:,None,None])
original_X, original_Y = ps.transform(source_df.values[:, None, :])

In [6]:
original_X.shape, original_Y.shape

((364143, 50, 8), (364143, 5, 8))

In [7]:
target_Y_column_index = scaled_df.columns.get_loc('weightedAverage')
Y = Y[:,:, target_Y_column_index]
X.shape, Y.shape

((364143, 50, 8), (364143, 5))

In [8]:
sample_X = pd.DataFrame(X[0])
sample_X.columns = scaled_df.columns
sample_Y = Y[0]
sample_X

Unnamed: 0,close,high,low,open,quoteVolume,volume,weightedAverage,index
0,0.006364,0.006362,0.006376,0.006364,0.0,0.0,0.011313,0.0
1,0.006364,0.006362,0.006376,0.006364,0.0,0.0,0.011313,1.0
2,0.006364,0.006362,0.006376,0.006364,0.0,0.0,0.011313,2.0
3,0.006364,0.006362,0.006376,0.006364,0.0,0.0,0.011313,3.0
4,0.006364,0.006362,0.006376,0.006364,0.0,0.0,0.011313,4.0
5,0.006364,0.006362,0.006376,0.006364,0.0,0.0,0.011313,5.0
6,0.006364,0.006362,0.006376,0.006364,0.0,0.0,0.011313,6.0
7,0.006364,0.006362,0.006376,0.006364,0.0,0.0,0.011313,7.0
8,0.006364,0.006362,0.006376,0.006364,0.0,0.0,0.011313,8.0
9,0.006364,0.006362,0.006376,0.006364,0.0,0.0,0.011313,9.0


In [9]:
sample_Y

array([0.0122687, 0.0122687, 0.0122687, 0.0122687, 0.0122687])

In [10]:
training_size = int(0.8 * X.shape[0])
test_size = X.shape[0] - training_size

X_train, X_test = X[:training_size], X[training_size:]
Y_train, Y_test = Y[:training_size], Y[training_size:]

In [11]:
X_train.shape, X_test.shape, Y_train.shape, Y_test.shape

((291314, 50, 8), (72829, 50, 8), (291314, 5), (72829, 5))

In [12]:
from tensorflow.keras.layers import GRU
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation

model = Sequential()
model.add(GRU(units=50, input_shape=X_train.shape[1:], return_sequences=False))
model.add(Activation('tanh'))
model.add(Dropout(0.2))
model.add(Dense(NFS))
model.add(Activation('relu'))
model.compile(loss='mse', optimizer='adam')
model.fit(X_train, Y_train, batch_size=50, validation_data=(X_test, Y_test), epochs=2)

  from ._conv import register_converters as _register_converters


Train on 291314 samples, validate on 72829 samples
Epoch 1/2
Epoch 2/2


<tensorflow.python.keras.callbacks.History at 0x7f5347b9ff28>

In [13]:
prediction = model.predict(X_test)

In [14]:
Y_test

array([[0.48139444, 0.48157061, 0.48265471, 0.4826994 , 0.48246644],
       [0.48157061, 0.48265471, 0.4826994 , 0.48246644, 0.48305537],
       [0.48265471, 0.4826994 , 0.48246644, 0.48305537, 0.48406718],
       ...,
       [0.35471328, 0.35463578, 0.35465086, 0.35412296, 0.35398454],
       [0.35463578, 0.35465086, 0.35412296, 0.35398454, 0.35373859],
       [0.35465086, 0.35412296, 0.35398454, 0.35373859, 0.35434309]])

In [21]:
prediction = prediction.reshape((prediction.shape[0] * prediction.shape[1], 1)).T

In [26]:
prediction[prediction>0]

array([], dtype=float32)