In [1]:
import sys
import warnings

if not sys.warnoptions:
    warnings.simplefilter('ignore')

In [6]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from datetime import datetime
from datetime import timedelta
from tqdm import tqdm
import yfinance as yf

sns.set()
tf.compat.v1.random.set_random_seed(1234)

In [18]:
df = yf.download('AAPL', start='2020-01-01', end='2020-12-31')
df.head()



[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-01-02,74.059998,75.150002,73.797501,75.087502,73.249016,135480400
2020-01-03,74.287498,75.144997,74.125,74.357498,72.53688,146322800
2020-01-06,73.447502,74.989998,73.1875,74.949997,73.114891,118387200
2020-01-07,74.959999,75.224998,74.370003,74.597504,72.771034,108872000
2020-01-08,74.290001,76.110001,74.290001,75.797501,73.94165,132079200


### Scale

In [23]:
minmax = MinMaxScaler().fit(df.iloc[:, 4:5].astype('float32'))
df_log = minmax.transform(df.iloc[:, 4:5].astype('float32'))
df_log = pd.DataFrame(df_log)
df_log.head()

Unnamed: 0,0
0,0.231023
1,0.222082
2,0.229339
3,0.225022
4,0.23972


## Split train and test


In [24]:
test_size = 30
simulation_size = 10

df_train = df_log.iloc[:-test_size]
df_test = df_log.iloc[-test_size:]
df.shape, df_train.shape, df_test.shape

((252, 6), (222, 1), (30, 1))

In [25]:
import tensorflow as tf
import numpy as np

class Model:
    def __init__(self,
                 learning_rate,
                 num_layers,
                 size,
                 size_layer,
                 output_size,
                 forget_bias=0.1):
        
        def lstm_cell(size_layer):
            return tf.keras.layers.LSTMCell(size_layer)

        rnn_cells = tf.keras.layers.StackedRNNCells(
            [lstm_cell(size_layer) for _ in range(num_layers)]
        )
        self.X = tf.keras.Input(shape=(None, size))
        self.Y = tf.keras.Input(shape=(output_size,))
        
        drop = tf.keras.layers.Dropout(rate=1 - forget_bias)
        rnn_cells = tf.keras.layers.RNN(rnn_cells, return_state=True)
        
        self.hidden_layer = tf.keras.Input(shape=(num_layers * 2 * size_layer,))
        
        self.outputs, _ = rnn_cells(self.X, initial_state=self.hidden_layer)
        self.logits = tf.keras.layers.Dense(output_size)(self.outputs)
        
        self.model = tf.keras.Model(inputs=[self.X, self.hidden_layer], outputs=self.logits)
        self.model.compile(optimizer=tf.optimizers.Adam(learning_rate), loss='mean_squared_error')

def calculate_accuracy(real, predict):
    real = np.array(real) + 1
    predict = np.array(predict) + 1
    percentage = 1 - np.sqrt(np.mean(np.square((real - predict) / real)))
    return percentage * 100

def anchor(signal, weight):
    buffer = []
    last = signal[0]
    for i in signal:
        smoothed_val = last * weight + (1 - weight) * i
        buffer.append(smoothed_val)
        last = smoothed_val
    return buffer


In [27]:
num_layers = 1
size_layer = 128
timestamp = 5
epoch = 300
dropout_rate = 0.8
future_day = test_size
learning_rate = 0.01

In [29]:
def forecast():
    tf.reset_default_graph()
    modelnn = Model(
        learning_rate, num_layers, df_log.shape[1], size_layer, df_log.shape[1], dropout_rate
    )
    sess = tf.InteractiveSession()
    sess.run(tf.global_variables_initializer())
    date_ori = pd.to_datetime(df.iloc[:, 0]).tolist()
    
    pbar = tqdm(range(epoch), desc = 'train loop')
    for i in pbar:
        init_value = np.zeros((1, num_layers * 2 * size_layer))
        total_loss, total_acc = [], []
        for k in range(0, df_train.shape[0] - 1, timestamp):
            index = min(k + timestamp, df_train.shape[0] - 1)
            batch_x = np.expand_dims(
                df_train.iloc[k : index, :].values, axis = 0
            )
            batch_y = df_train.iloc[k + 1: index + 1, :].values
            logits, last_state, _, loss = sess.run(
                [modelnn.logits, modelnn.last_state, modelnn.optimizer, modelnn.cost],
                feed_dict = {
                    modelnn.X: batch_x,
                    modelnn.Y: batch_y,
                    modelnn.hidden_layer: init_value,
                },
            )
            init_value = last_state
            total_loss.append(loss)
            total_acc.append(calculate_accuracy(batch_y[:, 0], logits[:, 0]))
        pbar.set_postfix(cost = np.mean(total_loss), acc = np.mean(total_acc))
        
    future_day = test_size
    output_predict = np.zeros((df_train.shape[0] + future_day, df_train.shape[1]))
    output_predict[0] = df_train.iloc[0]
    upper_b = (df_train.shape[0] // timestamp) * timestamp
    init_value = np.zeros((1, num_layers * 2 * size_layer))
    
    for k in range(0, (df_train.shape[0] // timestamp) * timestamp, timestamp):
        out_logits, last_state = sess.run(
            [modelnn.logits, modelnn.last_state],
            feed_dict = {
                modelnn.X: np.expand_dims(
                    df_train.iloc[k : k + timestamp], axis = 0
                ),
                modelnn.hidden_layer: init_value,
            },
        )
        init_value = last_state
        output_predict[k + 1 : k + timestamp + 1] = out_logits
        
    if upper_b != df_train.shape[0]:
        out_logits, last_state = sess.run(
            [modelnn.logits, modelnn.last_state],
            feed_dict = {
                modelnn.X: np.expand_dims(df_train.iloc[upper_b], axis=0),
                modelnn.hidden_layer: init_value,
            },
        )
        output_predict[upper_b + 1 : df_train.shape[0] + 1] = out_logits
        future_day -= 1
        date_ori.append(date_ori[-1] + timedelta(days = 1))
    
    init_value = last_state
    
    for i in range(future_day):
        o = output_predict[-future_day - timestamp + i : -future_day + i]
        out_logits, last_state = sess.run(
            [modelnn.logits, modelnn.last_state],
            feed_dict = {
                modelnn.X: np.expand_dims(o, axis=0),
                modelnn.hidden_layer: init_value,
            },
        )
        init_value = last_state
        output_predict[-future_day + i] = out_logits[-1]
        date_ori.append(date_ori[-1] + timedelta(days = 1))
        
    output_predict = minmax.inverse_transform(output_predict)
    deep_future = anchor(output_predict[:, 0], 0.3)
    
    return deep_future[-test_size:]

In [30]:
result = []
for i in range(simulation_size):
    print('simulation %d' % (i + 1))
    result.append(forecast())

simulation 1


AttributeError: module 'tensorflow' has no attribute 'reset_default_graph'