In [1]:
# https://www.kdnuggets.com/2018/11/keras-long-short-term-memory-lstm-model-predict-stock-prices.html

import numpy as np
import matplotlib.pyplot as plt
# Plotly
import plotly.plotly as py
import plotly.tools as tls
import pandas as pd
import datetime
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dropout
import time
import tensorflow as tf

In [2]:
name = '0700.HK'
filename = 'daily_{}'.format(name)
df = pd.read_csv('dataset/{}.csv'.format(filename))
df = df.dropna()

df['Date'] = pd.to_datetime(df.Date,format='%Y-%m-%d').sort_values(ascending=True)
date = df['Date']
# date = df['Date'].map(datetime.datetime.toordinal)

valid_set_size_percentage = 10 
test_set_size_percentage = 10 

df = df.iloc[:, 4:5].values

# date = np.array(date)
# (-1,1) unknown row, 1 column
date = date.values
date = date.reshape(-1,1)



In [3]:
sc = MinMaxScaler(feature_range = (0, 1))
df_scaled = sc.fit_transform(df)
len(df)


2471

In [4]:
valid_set_size = int(np.round(valid_set_size_percentage/100*len(df)));  
test_set_size = int(np.round(test_set_size_percentage/100*len(df)));
train_set_size = len(df) - (valid_set_size + test_set_size);
  
training_set_scaled = df_scaled[:train_set_size]
valid_set_scaled = df_scaled[train_set_size:train_set_size+valid_set_size]
test_set_scaled = df_scaled[train_set_size+valid_set_size:]

real_stock_price = df[train_set_size+valid_set_size:]
real_stock_price = real_stock_price[60:]

date_predict = date[train_set_size+valid_set_size+60:]


In [5]:
X_train = []
y_train = []
X_valid = []
y_valid = []
X_test = []
y_test = []

# LSTMs expect our data to be in a specific format, 
# usually a 3D array. We start by creating data in 60 timesteps 
# and converting it into an array using NumPy.
# Next, we convert the data into a 3D dimension array with X_train samples, 
# 60 timestamps, and one feature at each step.
    
for i in range(60, len(training_set_scaled)):
    X_train.append(training_set_scaled[i-60:i, 0])
    y_train.append(training_set_scaled[i, 0])
    
for i in range(60, len(valid_set_scaled)):
    X_valid.append(valid_set_scaled[i-60:i, 0])
    y_valid.append(valid_set_scaled[i, 0])
    
for i in range(60, len(test_set_scaled)):
    X_test.append(test_set_scaled[i-60:i, 0])
    y_test.append(test_set_scaled[i, 0])

X_train, y_train = np.array(X_train), np.array(y_train)
X_valid, y_valid = np.array(X_valid), np.array(y_valid)
X_test, y_test = np.array(X_test), np.array(y_test)


X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_valid = np.reshape(X_valid, (X_valid.shape[0], X_valid.shape[1], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

print(X_train.shape,X_valid.shape,X_test.shape)


(1917, 60, 1) (187, 60, 1) (187, 60, 1)


In [29]:
regressor = Sequential()

# units means that how many output nodes of dense layer should be returned.
# Because the fully connected layer(dense layer) should consist of input and output.
# Then , the mean of dimensionality of the output space could be translated to the number of ouput nodes.

regressor.add(LSTM(units = 100, return_sequences = True, input_shape = (X_train.shape[1], 1), kernel_initializer='random_uniform'))
regressor.add(Dropout(0.5))

regressor.add(Dense(20,activation='relu'))

regressor.add(Dense(1,activation='sigmoid'))

optimizer = tf.keras.optimizers.RMSprop(lr=0.0001)

finishTime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) 

tb_callback = tf.keras.callbacks.TensorBoard(log_dir='logs/{}'.format(finishTime))

regressor.compile(optimizer = 'adam', loss = 'mean_squared_error')

# print(X_train[1].shape)

regressor.fit(X_train, y_train, epochs = 20, batch_size = 32,validation_data=[X_valid,y_valid],callbacks=[tb_callback])

ValueError: A target array with shape (1917, 1) was passed for an output of shape (None, 60, 1) while using as loss `mean_squared_error`. This loss expects targets to have the same shape as the output.

In [None]:
predicted_stock_price = regressor.predict(X_test)
predicted_stock_price = sc.inverse_transform(predicted_stock_price)
print(predicted_stock_price.shape)
print(real_stock_price.shape)

In [None]:
fig = plt.figure(figsize = (18,9))
plt.plot(date_predict,real_stock_price, color = 'black', label = '{} Stock Price'.format(name))
plt.plot(date_predict,predicted_stock_price, color = 'green', label = 'Predicted {} Stock Price'.format(name))
plt.title('{} Stock Price Prediction'.format(name))
plt.xlabel('Time')
plt.ylabel('{} Stock Price'.format(name))
plt.legend()
plt.show()

In [None]:
finishTime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) 

In [None]:
regressor.save('trained_model/model/{}_model_{}.h5'.format(name,finishTime))  # creates a HDF5 file 'my_model.h5'
# 將參數儲存至 HDF5 檔案（不含模型）
regressor.save_weights('trained_model/model_weight/{}_model_weights_{}.h5'.format(name,finishTime))

In [None]:
fig.savefig('img/{}_{}.png'.format(name,finishTime))

In [None]:
# Converting to Plotly's Figure object..
# plotly_fig = tls.mpl_to_plotly(fig)
plotly_fig = py.plot_mpl(fig, filename="my first plotly plot")