In [11]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from datetime import datetime
from datetime import timedelta
import warnings
warnings.filterwarnings('ignore')
# sns.set()

In [4]:
df = pd.read_csv('../dataset/GOOG-year.csv')
date_ori = pd.to_datetime(df.iloc[:, 0]).tolist()
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2016-11-02,778.200012,781.650024,763.450012,768.700012,768.700012,1872400
1,2016-11-03,767.25,769.950012,759.030029,762.130005,762.130005,1943200
2,2016-11-04,750.659973,770.359985,750.560974,762.02002,762.02002,2134800
3,2016-11-07,774.5,785.190002,772.549988,782.52002,782.52002,1585100
4,2016-11-08,783.400024,795.632996,780.190002,790.51001,790.51001,1350800


In [5]:
minmax = MinMaxScaler().fit(df.iloc[:, 1:].astype('float32'))
df_log = minmax.fit_transform(df.iloc[:, 1:].astype('float32'))
df_log = pd.DataFrame(df_log)
df_log.head()

Unnamed: 0,0,1,2,3,4,5
0,0.123285,0.093923,0.12408,0.112708,0.112708,0.280549
1,0.083119,0.05418,0.108808,0.090008,0.090008,0.296007
2,0.022265,0.055573,0.079545,0.089628,0.089628,0.337838
3,0.109713,0.105948,0.155523,0.160459,0.160459,0.217824
4,0.142359,0.141421,0.181922,0.188066,0.188066,0.16667


In [8]:
class Model:
    def __init__(
        self,
        learning_rate,
        num_layers,
        size,
        layer_unit,
        output_size,
        forget_bias = 0.1,
    ):
        def lstm_cell(layer_unit):
            return tf.nn.rnn_cell.LSTMCell(layer_unit, state_is_tuple = False)

        rnn_cells = tf.nn.rnn_cell.MultiRNNCell(
            [lstm_cell(layer_unit) for _ in range(num_layers)],
            state_is_tuple = False,
        )
        self.X = tf.placeholder(tf.float32, (None, None, size))
        self.Y = tf.placeholder(tf.float32, (None, output_size))
        drop = tf.contrib.rnn.DropoutWrapper(
            rnn_cells, output_keep_prob = forget_bias)
        self.hidden_layer = tf.placeholder(
            tf.float32, (None, num_layers * 2 * layer_unit)
        )
        # output: [batch_size, window_size, cell.output_size]
        # state: [batch_size,cell.output_size]
        self.outputs, self.last_state = tf.nn.dynamic_rnn(
            drop, self.X, initial_state = self.hidden_layer, dtype = tf.float32
        )  
        self.logits = tf.layers.dense(self.outputs[-1], output_size)
        self.cost = tf.reduce_mean(tf.square(self.Y - self.logits))
        self.optimizer = tf.train.AdamOptimizer(learning_rate).minimize(self.cost)

In [9]:
num_layers = 1
layer_unit = 128
timestamp = 5
epoch = 100
dropout_rate = 0.7
future_day = 50

In [12]:
tf.reset_default_graph()
modelnn = Model(learning_rate=0.01, num_layers=num_layers, size=df_log.shape[1], layer_unit=layer_unit, output_size=df_log.shape[1], forget_bias=dropout_rate)
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())



In [13]:
## X0: t,t+1,t+2    Y0: t+1,t+2,t+3
## X1: t+3,t+4,t+5  Y1: t+4,t+5,t+6
# batch_size=1
for i in range(epoch):
    init_value = np.zeros((1, num_layers * 2 * size_layer))
    total_loss = 0
    for k in range(0, df_log.shape[0] - 1, timestamp):
        index = min(k + timestamp, df_log.shape[0] -1)
        batch_x = np.expand_dims(df_log.iloc[k : index, :].values, axis = 0)
        #print('batch_x:\n',batch_x)
        batch_y = df_log.iloc[k + 1 : index + 1, :].values
        #print('batch_y:\n',batch_y)
        last_state, _, loss = sess.run(
            [modelnn.last_state, modelnn.optimizer, modelnn.cost],
            feed_dict = {
                modelnn.X: batch_x,
                modelnn.Y: batch_y,
                modelnn.hidden_layer: init_value,},)
        init_value = last_state
        total_loss += loss
    total_loss /= df_log.shape[0] // timestamp
    if (i + 1) % 10 == 0:
        print('epoch:', i + 1, 'avg loss:', total_loss)

epoch: 10 avg loss: 0.010562663848977536
epoch: 20 avg loss: 0.0072811601415742186
epoch: 30 avg loss: 0.007155357702577021
epoch: 40 avg loss: 0.007435436437372118
epoch: 50 avg loss: 0.0057987177616450935
epoch: 60 avg loss: 0.005212539696367457
epoch: 70 avg loss: 0.006234557470306754
epoch: 80 avg loss: 0.00696694890037179
epoch: 90 avg loss: 0.006126180127030239
epoch: 100 avg loss: 0.007490287029650062


In [None]:
output_predict = np.zeros((df_log.shape[0] + future_day, df_log.shape[1]))
output_predict[0] = df_log.iloc[0]
upper_b = (df_log.shape[0] // timestamp) * timestamp
init_value = np.zeros((1, num_layers * 2 * size_layer))
for k in range(0, (df_log.shape[0] // timestamp) * timestamp, timestamp):
    out_logits, last_state = sess.run(
        [modelnn.logits, modelnn.last_state],
        feed_dict = {modelnn.X: np.expand_dims(df_log.iloc[k : k + timestamp], axis = 0),modelnn.hidden_layer: init_value})
    init_value = last_state
    output_predict[k + 1 : k + timestamp + 1] = out_logits

out_logits, last_state = sess.run(
    [modelnn.logits, modelnn.last_state],
    feed_dict = {modelnn.X: np.expand_dims(df_log.iloc[upper_b:], axis = 0),modelnn.hidden_layer: init_value,},)
init_value = last_state
output_predict[upper_b + 1 : df_log.shape[0] + 1] = out_logits
df_log.loc[df_log.shape[0]] = out_logits[-1]
date_ori.append(date_ori[-1] + timedelta(days = 1))