# Data pre processing

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd


In [None]:
## import the data set

dataset_train = pd.read_csv('Google_Stock_Price_Train.csv')
training_set = dataset_train.iloc[:, 1:2].values # if we just put 1 then we get returned a vector but if we use 1:2 we get returned a dataframe but both have same values


# Feature scaling 

In [None]:
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler(feature_range=(0,1))
training_set_scaled = sc.fit_transform(training_set)

# Creating timesteps

time steps are the batches of data inputted into the RNN to be trained on, in this example we will use 60 timesteps per input, this means that each day we will look at the previous 60 days of data to predict the output.

In [None]:
x_train = []
y_train = []

for i in range(60,1258):
    x_train.append(training_set_scaled[i-60:i,0])
    y_train.append(training_set_scaled[i,0])

x_train, y_train = np.array(x_train),np.array(y_train)


In [None]:
# Reshaping

# using np.reshape we are creating a new array were the first column is the batch number (as all info needs to be in batches), second column the timestep(day one) and third column the amount of info per timestep (google open price)

x_train = np.reshape(x_train, (x_train.shape[0],x_train.shape[1], 1))

# Building the RNN

In [None]:
import tensorflow as tf
from keras.layers import Dense, LSTM, Dropout
from keras.models import Sequential

In [None]:
regressor = Sequential()

In [None]:
regressor.add(LSTM(units=50, return_sequences=True, input_shape = (x_train.shape[1],1) ))
regressor.add(Dropout(rate=0.2))

In [None]:
regressor.add(LSTM(units=50, return_sequences=True))
regressor.add(Dropout(rate=0.2))

In [None]:
regressor.add(LSTM(units=50, return_sequences=True))
regressor.add(Dropout(rate=0.2))

In [None]:
regressor.add(LSTM(units=50))
regressor.add(Dropout(rate=0.2))

In [None]:
regressor.add(Dense(units=1))

## Compiling 

In [None]:
regressor.compile(optimizer='adam', loss='mean_squared_error')

In [None]:
regressor.fit(x = x_train, y = y_train, epochs = 102, batch_size=32)

In [None]:
regressor.save('google_stock')

# Makeing the prediction

In [None]:
dataset_test = pd.read_csv('Google_Stock_Price_Test.csv')
real_stock_price = dataset_test.iloc[:, 1:2].values

In [None]:
dataset_total = pd.concat((dataset_train['Open'], dataset_test['Open']), axis = 0)
inputs = dataset_total[len(dataset_total) - len(dataset_test) - 60:].values
inputs = inputs.reshape(-1,1)
inputs = sc.transform(inputs)
X_test = []
for i in range(60, 80):
    X_test.append(inputs[i-60:i, 0])
X_test = np.array(X_test)
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
predicted_stock_price = regressor.predict(X_test)
predicted_stock_price = sc.inverse_transform(predicted_stock_price)

In [None]:
plt.plot(real_stock_price, color = 'red', label = 'Real Google Stock Price')
plt.plot(predicted_stock_price, color = 'blue', label = 'Predicted Google Stock Price')
plt.title('Google Stock Price Prediction')
plt.xlabel('Time')
plt.ylabel('Google Stock Price')
plt.legend()
plt.show()