In [None]:
import numpy as np
import pandas as pd
import math
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

np.random.seed(7)

# Data crunch

In [None]:
# load dataset
dataframe = pd.read_csv('clean_sm_day.csv', usecols=['Close'])
dataset = dataframe.values
dataset = dataset.astype('float32')

In [None]:
# Visualizing
plt.plot(dataset) #TODO: need to convert to returns

In [None]:
# normalize the dataset
scaler = MinMaxScaler(feature_range=(0, 1))
dataset = scaler.fit_transform(dataset)

In [None]:
# split into train and test sets
train_size = int(len(dataset) * 0.8)
test_size = len(dataset) - train_size
train =dataset[0:train_size,:]
test = dataset[train_size:len(dataset),:]

In [None]:
# convert an array of values into a dataset matrix with X=t, Y=t+1
def create_dataset(dataset, look_back=1):
	dataX, dataY = [], []
	for i in range(len(dataset)-look_back-1):
		a = dataset[i:(i+look_back), 0]
		dataX.append(a)
		dataY.append(dataset[i + look_back, 0])
	return np.array(dataX), np.array(dataY)

# reshape into X=t and Y=t+1
look_back = 1
trainX, trainY = create_dataset(train, look_back)
testX, testY = create_dataset(test, look_back)

In [None]:
# reshape input to be [sample, time steps, features]
trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1])) # figure out what this does
testX = np.reshape(testX, (testX.shape[0], 1, testX.shape[1]))

# Fitting model

In [None]:
# train and fit the LSTM network
model = Sequential()
model.add(LSTM(4, input_shape=(1, look_back))) # what does 4 and input_shape do?
model.add(Dense(1)) # what does Dense do?
model.compile(loss='mean_squared_error', optimizer='adam') # adam?
model.fit(trainX, trainY, epochs=10, batch_size=1, verbose=2) # epochs? verbose?

In [None]:
# make predictions
forecast_normalized = model.predict(testX)
# inverting the normalization to original scale
forecast = scaler.inverse_transform(forecast_normalized)
testY = scaler.inverse_transform([testY])

# Evaluating

In [None]:
test_ahead = testY[0][:5]
forecast_ahead = forecast[:,0][:5]

testScore = math.sqrt(mean_squared_error(test_ahead, forecast_ahead))
print(f"RMSE: {round(testScore,3)}")

In [None]:
# Plotting predictions
plt.plot(dataset)

# shifting predictions
testPredictPlot = np.empty_like(dataset)
testPredictPlot[:, :] = np.nan
testPredictPlot[len(trainY)+(look_back*2)+1:len(dataset)-1, :] = forecast
plt.plot(testPredictPlot)