In [1]:
# https://www.kdnuggets.com/2018/11/keras-long-short-term-memory-lstm-model-predict-stock-prices.html

import numpy as np
import matplotlib.pyplot as plt
# Plotly
import plotly.plotly as py
import plotly.tools as tls
import pandas as pd
from datetime import datetime
from requests_html import HTMLSession
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dropout
import time
import tensorflow as tf

In [25]:
def getStockInfo(stockCode):
    url = 'https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol={0}&outputsize=full&apikey=TIR873DLX4ZC9WTV'.format(
        stockCode)
    stock_detail = HTMLSession().get(url).json()['Time Series (Daily)']


# df is the original unprocessing dataframe
    df = pd.DataFrame.from_dict(stock_detail).T  # .T for .transpose()
    df.index = pd.to_datetime(df.index, format='%Y-%m-%d')
    df.index.names = ['Date']  # rename index
    df = df.sort_index(ascending=True)
    df.rename(columns={'1. open': 'Open', '2. high': 'High', '3. low': 'Low',
                       '4. close': 'Close', '5. volume': 'Volume'}, inplace=True)  # rename column
    df[['Open']] = df[['Open']].astype(float)
    df[['High']] = df[['High']].astype(float)
    df[['Low']] = df[['Low']].astype(float)
    df[['Close']] = df[['Close']].astype(float)
    df[['Volume']] = df[['Volume']].astype(int)
    df['Date_int'] = pd.to_datetime(df.index)
    df['Date_int'] = df['Date_int'].map(datetime.toordinal)
    return df

In [26]:
stockCode = '0700.HK'
df = getStockInfo(stockCode)


Unnamed: 0_level_0,Open,High,Low,Close,Volume,Date_int
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2004-06-16,0.875,0.925,0.815,0.83,2198875000,731748
2004-06-17,0.83,0.875,0.825,0.845,419007500,731749
2004-06-18,0.84,0.85,0.79,0.805,182990000,731750
2004-06-21,0.82,0.825,0.79,0.8,114085000,731753
2004-06-22,0.0,0.0,0.0,0.0,0,731754


In [27]:
df = df[:2000]
print(df.shape)
df.head()

(2000, 6)


Unnamed: 0_level_0,Open,High,Low,Close,Volume,Date_int
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2004-06-16,0.875,0.925,0.815,0.83,2198875000,731748
2004-06-17,0.83,0.875,0.825,0.845,419007500,731749
2004-06-18,0.84,0.85,0.79,0.805,182990000,731750
2004-06-21,0.82,0.825,0.79,0.8,114085000,731753
2004-06-22,0.0,0.0,0.0,0.0,0,731754


In [22]:
valid_set_size_percentage = 10 
test_set_size_percentage = 10 

print(df[['Close']].values)

# date = np.array(date)
# (-1,1) unknown row, 1 column
date = df.index
date = date.values
date = date.reshape(-1,1)

print(date)

[[391.6  ]
 [395.6  ]
 [393.6  ]
 ...
 [  0.805]
 [  0.845]
 [  0.83 ]]
[['2019-04-18T00:00:00.000000000']
 ['2019-04-17T00:00:00.000000000']
 ['2019-04-16T00:00:00.000000000']
 ...
 ['2004-06-18T00:00:00.000000000']
 ['2004-06-17T00:00:00.000000000']
 ['2004-06-16T00:00:00.000000000']]


In [None]:
sc = MinMaxScaler(feature_range = (0, 1))
df_scaled = sc.fit_transform(df)
len(df)


In [None]:
valid_set_size = int(np.round(valid_set_size_percentage/100*len(df)));  
test_set_size = int(np.round(test_set_size_percentage/100*len(df)));
train_set_size = len(df) - (valid_set_size + test_set_size);
  
training_set_scaled = df_scaled[:train_set_size]
valid_set_scaled = df_scaled[train_set_size:train_set_size+valid_set_size]
test_set_scaled = df_scaled[train_set_size+valid_set_size:]


# valid_stock_price = df[train_set_size:valid_set_size]
# date_valid = date[train_set_size:train_set_size+valid_set_size]
# print(date_valid)



In [None]:
X_train = []
y_train = []
X_valid = []
y_valid = []
X_test = []
y_test = []

# LSTMs expect our data to be in a specific format, 
# usually a 3D array. We start by creating data in 60 timesteps 
# and converting it into an array using NumPy.
# Next, we convert the data into a 3D dimension array with X_train samples, 
# 60 timestamps, and one feature at each step.
    
for i in range(60, len(training_set_scaled)):
    X_train.append(training_set_scaled[i-60:i, 0])
    y_train.append(training_set_scaled[i, 0])
    
for i in range(60, len(valid_set_scaled)):
    X_valid.append(valid_set_scaled[i-60:i, 0])
    y_valid.append(valid_set_scaled[i, 0])
    
for i in range(60, len(test_set_scaled)):
    X_test.append(test_set_scaled[i-60:i, 0])
    y_test.append(test_set_scaled[i, 0])

X_train, y_train = np.array(X_train), np.array(y_train)
X_valid, y_valid = np.array(X_valid), np.array(y_valid)
X_test, y_test = np.array(X_test), np.array(y_test)


X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_valid = np.reshape(X_valid, (X_valid.shape[0], X_valid.shape[1], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

print(X_train.shape,X_valid.shape,X_test.shape)


In [None]:
regressor = Sequential()

# units means that how many output nodes of dense layer should be returned.
# Because the fully connected layer(dense layer) should consist of input and output.
# Then , the mean of dimensionality of the output space could be translated to the number of ouput nodes.

# regressor.add(LSTM(units = 100, return_sequences = True, input_shape = (X_train.shape[1], 1), kernel_initializer='random_uniform'))
# regressor.add(Dropout(0.5))

# regressor.add(Dense(20,activation='relu'))

# regressor.add(Dense(1,activation='sigmoid'))

optimizer = tf.keras.optimizers.RMSprop(lr=0.0001)

regressor.add(LSTM(units = 50,return_sequences = True,input_shape = (X_train.shape[1], 1), kernel_initializer='random_uniform'))
regressor.add(Dropout(0.2))

regressor.add(LSTM(units = 50, return_sequences = True))
regressor.add(Dropout(0.2))

regressor.add(LSTM(units = 50, return_sequences = True))
regressor.add(Dropout(0.2))

regressor.add(LSTM(units = 50))
regressor.add(Dropout(0.2))

regressor.add(Dense(units = 1))

regressor.summary()

In [None]:
finishTime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) 

tb_callback = tf.keras.callbacks.TensorBoard(log_dir='logs/{}'.format(finishTime))

regressor.compile(optimizer = optimizer, loss = 'mean_squared_error')

regressor.fit(X_train, y_train, epochs = 100, batch_size = 32,validation_data=[X_valid,y_valid],callbacks=[tb_callback])

In [None]:
predicted_stock_price = regressor.predict(X_test)
predicted_stock_price = sc.inverse_transform(predicted_stock_price)
valid_stock_price = regressor.predict(X_valid)
valid_stock_price = sc.inverse_transform(valid_stock_price)
# print(predicted_stock_price.shape)
# print(real_stock_price.shape)
regressor.evaluate(X_test,y_test)

In [None]:
real_stock_price = df[train_set_size+valid_set_size:]
real_stock_price = real_stock_price[60:]

date_predict = date[train_set_size+valid_set_size+60:]

In [None]:
fig = plt.figure(figsize = (18,9))
plt.plot(date_predict,real_stock_price, color = 'black', label = '{} Stock Price'.format(name))
plt.plot(date_predict,predicted_stock_price, color = 'green', label = 'Predicted {} Stock Price'.format(name))
plt.title('{} Stock Price Prediction'.format(name))
plt.xlabel('Time')
plt.ylabel('{} Stock Price'.format(name))
plt.legend()
plt.show()

In [None]:
finishTime = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) 

In [None]:
regressor.save('trained_model/model/{}_model_{}.h5'.format(name,finishTime))  # creates a HDF5 file 'my_model.h5'
# 將參數儲存至 HDF5 檔案（不含模型）
regressor.save_weights('trained_model/model_weight/{}_model_weights_{}.h5'.format(name,finishTime))

In [None]:
fig.savefig('img/{}_{}.png'.format(name,finishTime))

In [None]:
# Converting to Plotly's Figure object..
# plotly_fig = tls.mpl_to_plotly(fig)
plotly_fig = py.plot_mpl(fig, filename="my first plotly plot")