## Using Long Short Term Memory (LSTM) model to predict the closing price of a company using the past 60 day stock price data.

In [None]:
import math
import pandas_datareader as web
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
import matplotlib.pyplot as plt
import datetime as dt
plt.style.use('fivethirtyeight')

In [None]:
#Getting stock quote
company = 'TSLA'
start = dt.datetime(2020, 3, 11)
end = dt.datetime(2021, 3, 11)

df = web.DataReader(company, 'yahoo', start, end)

df

In [None]:
#Visualize the closing price history
plt.figure(figsize=(16,8))
plt.title(f"{company} Close Price History")
plt.xlabel('Date',fontsize=18)
plt.ylabel(f"{company} Close Price USD ($)",fontsize=18)

plt.plot(df['Close'])
plt.show()

In [None]:
#Create a new dataframe with only the 'Close column'
data = df.filter(['Close'])

#Convert the dataframe to a numpy array
dataset = data.values

#Get the number of rows to train the model on (80% of data that we have)
training_data_len = math.ceil(len(dataset)*.8) #Math.ceil to round up

training_data_len

In [None]:
#Scale the data
scaler = MinMaxScaler(feature_range=(0,1))

#Computes min and max value to be used for scaling, then transforms the data based on the min max values
scaled_data = scaler.fit_transform(dataset)

scaled_data

In [None]:
#Create the training data set
prediction_days = 60

#Create a scaled training data set, 0 up to 80% of allocated data
train_data = scaled_data[0:training_data_len, :]

#Split the data into x_train and y_train data sets
#Independent training variable, list of 60 closing prices that is going to be used to predict next day closing price in y_train
x_train = []
#Dependent training variable, list of next day closing prices corresponding to each list in x_train to be predicted by LSTM
y_train = [] 

#Append past 60 values of the train_data
for i in range(prediction_days, len(train_data)):
    x_train.append(train_data[i-60:i, 0]) 
    y_train.append(train_data[i,0]) 
    
#Convert x_train and y_train to numpy arrays
x_train, y_train = np.array(x_train), np.array(y_train)

In [None]:
#Reshape the data, as LSTM requires a 3-dimensional input
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
x_train.shape

In [None]:
#Building the LSTM model
model = Sequential()

model.add(LSTM(units=50, return_sequences=True, input_shape=(x_train.shape[1], 1)))  #Number of layers, we can adjust
model.add(Dropout(0.2))

model.add(LSTM(units=50, return_sequences=True))
model.add(Dropout(0.2))

model.add(LSTM(units=50))
model.add(Dropout(0.2))
          
model.add(Dense(units=1)) #Prediction of the next closing value
          
model.compile(optimizer='adam', loss='mean_squared_error')

In [None]:
#Training the LSTM model
model.fit(x_train, y_train, batch_size=32, epochs=100)

In [None]:
#Create the testing data set

#Creating new array contained scaled values 
test_data = scaled_data[training_data_len - prediction_days: , :]


x_test = []
y_test = dataset[training_data_len: , :] #Containing values our model want to predict

for i in range(prediction_days, len(test_data)):
    x_test.append(test_data[i-prediction_days:i,0])#Append the past 60 values to test dataset
    
#Converting into numpy array to use in LSTM model
x_test = np.array(x_test)

In [None]:
#Reshape the data
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))
x_test.shape

In [None]:
#Get the predicted price values
predictions = model.predict(x_test)
predictions = scaler.inverse_transform(predictions)

In [None]:
#Getting RMSE to see how accurate the LSTM model is in predicting the closing price
rmse = np.sqrt(((predictions - y_test) ** 2).mean())
rmse

In [None]:
#Plot the data
train = data[:training_data_len]
valid = data[training_data_len:]
valid['Predictions'] = predictions

#Visualise the model
plt.figure(figsize=(16,8))
plt.title(f"Model on {company} Close Price")
plt.xlabel('Date',fontsize=18)
plt.ylabel(f"{company} Close Price USD ($)",fontsize=18)

plt.plot(train['Close']) 
plt.plot(valid[['Close','Predictions']])
plt.legend(['Train','Val','Predictions'], loc='lower right')

In [None]:
valid

In [None]:
#Setting up the data before predicting future price

#Get quote
quote = web.DataReader(company, data_source='yahoo', start='2021-01-01', end='2022-03-20')

#Create a new dataframe
new_df = quote.filter(['Close'])

#Get the last 60 day closing price values and convert the dataframe to an array
last_60_days = new_df[-60:].values

#Scale the data to be values between 0 and 1
last_60_days_scaled = scaler.transform(last_60_days)

#Create an empty list
X_test = []

#Append the past 60 days to X_test list
X_test.append(last_60_days_scaled)
              
#Convert the X_test data set to a numpty array
X_test = np.array(X_test)
              
#Reshape the data
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

#Get the predicted scaled price
pred_price = model.predict(X_test)

#Undo the scaling
pred_price = scaler.inverse_transform(pred_price)

print(pred_price)

In [None]:
#Predicted future price (next day)

quote2 = web.DataReader(company, data_source='yahoo', start='2022-03-21', end='2022-03-21')
print(tsla_quote2['Close'])