In [None]:
def addMovingAvg(data, stockName):
  print("Adding moving avg to {name}".format(name = stockName))
  data['DateTime'] = data.index
  data['New_ID'] = range(1, 1+len(data))
  data.index = data['New_ID']
  data= data.drop(['New_ID'],axis=1)
  ma_day = [30, 50]
  for ma in ma_day:
      column_name = f"MovingAvg_{ma}"
      data[column_name] = data['Close'].rolling(ma).mean()
  print(len(data))
  for ma in ma_day:
      column_name = f"MovingAvg_{ma}"
      column_df = pd.DataFrame(data[column_name])
      date = datetime(2021, 3, 10)
      for day in range(1,ma+1):
          _ = data.head(day)['Close']
          avg = sum(_)/(day)
          data.at[day,column_name] = avg
  data.index = data['DateTime'] #change back index to date time for ML 
  data= data.drop(['DateTime'],axis=1) #drop
  print("Adding moving avg to {name}, {name} head:".format(name = stockName))
  data.head(10)

In [None]:
def transform(data, stockName):
  data['Open-high'] = data['Open']-data['High']
  data['Open-low'] = data['Open'] - data['Low']
  data['Close-high'] = data['Close']-data['High']
  data['Close-low'] = data['Close'] - data['Low']
  data['High-low'] = data['High'] - data['Low']
  data['Open-close'] = data['Open'] - data['Close']
  addMovingAvg(data, stockName)

In [None]:
def getLSTMTrainData(predictionDays: int, scaled_data, trainLength:int):
  train_data = scaled_data[0:trainLength, :]
  #Split the data into x_train and y_train data sets
  #Independent training variable, list of 60 closing prices that is going to be used to predict next day closing price in y_train
  x_train, y_train = [], []
  #Dependent training variable, list of next day closing prices corresponding to each list in x_train to be predicted by LSTM
  #Append past 60 values of the train_data
  for i in range(predictionDays, trainLength):
      x_train.append(train_data[i-predictionDays:i, 0]) 
      y_train.append(train_data[i,0]) 
  x_train, y_train = np.array(x_train), np.array(y_train)
  # reshape as LSTM requires 3d array
  x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
  return x_train, y_train

In [None]:
def getLSTMTestData(prediction_days:int, dataset, scaled_data, trainLength:int):
  #Creating new array contained scaled values 
  test_data = scaled_data[trainLength - prediction_days: , :]
  x_test = []
  y_test = dataset[trainLength: , :] #Containing values our model want to predict
  for i in range(prediction_days, len(test_data)):
      x_test.append(test_data[i-prediction_days:i,0])#Append the past 60 values to test dataset
      
  #Converting into numpy array to use in LSTM model
  x_test = np.array(x_test)
  x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))
  return x_test, y_test

In [None]:
def trainLSTM(x_train, y_train, x_test):
  #Building the LSTM model
  model = Sequential()
  #First LSTM layer, giving it 50 neurons and return_sequences=True as we need to add another LSTM layer
  model.add(LSTM(50, return_sequences=True, input_shape=(x_train.shape[1], 1)))
  #Second LSTM layer
  model.add(LSTM(50, return_sequences=False))
  #Dense layer with 25 neurons
  model.add(Dense(25))
  #Dense layer with 1 neuron
  model.add(Dense(1))
  model.compile(optimizer='adam', loss='mean_squared_error')
  model.fit(x_train, y_train, batch_size=32, epochs=100)
  predicted = model.predict(x_test)
  return scaler.inverse_transform(predictions)

In [None]:
  
def evalPredicted(predicted, actual, modelName):
  # RMSE = np.sqrt(((predicted - actual) ** 2).mean())
  # R2 = met.r2_score(actual,predicted)
  MSE = met.mean_squared_error(actual, predicted)
  RMSE = math.sqrt(MSE)
  R2 = met.r2_score(actual,predicted)
  print('{modelName} Root Mean Squared Error is {rmse}'.format(modelName=modelName,rmse=RMSE))
  print('{modelName} R Squared is {r2}'.format(modelName=modelName,r2=R2))
  return RMSE, R2
  

In [None]:
def plotModelResult(closingPrices, trainLength, predicted, name):
  #Plot the data
  train = closingPrices[:trainLength] # no need ?
  data = closingPrices[trainLength:]
  data['Predicted']=predicted
  data.rename(columns={'Close':'Actual'},inplace = True)
  #Visualise the model
  plt.figure(figsize=(16,8))
  plt.title("{name} Close Price".format(name=name))
  plt.xlabel('Date',fontsize=18)
  plt.ylabel("{name} Close Price USD ($)".format(name=name,fontsize=18))
  #plt.plot(train['Close']) 
  plt.plot(data[['Actual','Predicted']])
  plt.legend(['Actual Price','Predicted Price'])

In [None]:
def runLSTM(data, stockName, trainLength):
  """
  @input (data: df/df-like data of one stock, stockName: string of that stock)
  @output RMSE value of predicted against actual Values
  """
  closingPrices = data.filter(['Close'])
  dataset = closingPrices.values
  #Scale the data
  scaler = MinMaxScaler(feature_range=(0,1))
  #Computes min and max value to be used for scaling, then transforms the data based on the min max values
  scaled_data = scaler.fit_transform(dataset)
  #Create the training data set
  prediction_days = 60
  x_train, y_train = getLSTMTrainData(prediction_days, scaled_data,trainLength)
  x_test, y_test = getLSTMTestData(prediction_days, dataset, scaled_data, trainLength)
  predicted = trainLSTM(x_train, y_train,x_test)
  plotModelResult(closingPrices, trainLength, predicted, stockName)
  RMSE, R2 = evalPredicted(predicted, y_test, "LSTM")
  LSTM = data[training_data_len:]
  LSTM['Predicted']=predictions
  LSTM.rename(columns={'Close':'Actual'},inplace = True)
  return RMSE, LSTM

In [None]:
def trainARIMA(train_data, test_data, stockName):
  history = [x for x in train_data]
  predicted = []
  N_test_observations = len(test_data)
  for time_point in range(N_test_observations):
      model = ARIMA(history, order=(4,1,0))
      model_fit = model.fit(disp=0)
      output = model_fit.forecast()
      yhat = output[0]
      predicted.append(yhat)
      true_test_value = test_data[time_point]
      history.append(true_test_value)
  return predicted

In [None]:
def runARIMA(data, stockName, trainLength):
  train_data, test_data = data[0:trainLength]['Close'].values, data[trainLength:]['Close'].values
  predicted = trainARIMA(train_data, test_data, stockName)
  plotModelResult(data.filter(['Close']), trainLength, predicted, name)
  RMSE, R2 = evalPredicted(predicted,)
  ARIMA=pd.DataFrame({"Actual":test_data})
  ARIMA["Predicted"]=pd.DataFrame(predicted)
  return RMSE, ARIMA

In [None]:
  
def getOptimalModel(data, stockName):
  # Transform data to include useful columns to use later on
  transform(data, stockName) # check if it modifies actual data
  trainLength = int(len(data)*0.8)
  LSTM_RMSE, LSTM = runLSTM(data, stockName,trainLength )
  ARIMA_RMSE, ARIMA = runARIMA(data,stockName,trainLength)
  if ARIMA_RMSE<LSTM_RMSE:
        print("ARIMA is Optimal")
        return ARIMA
    else:
        print("LSTM is Optimal")
        return LSTM