In [None]:
# Remove Warnings
import warnings
warnings.filterwarnings('ignore')

### The First Step | Import library

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from datetime import datetime
from sklearn import preprocessing
from sklearn import metrics 
import tensorflow as tf
from tensorflow import keras
import joblib

### The Second Step | preparing Dataset

In [None]:
# Reading Dataset
Data = pd.read_csv("/kaggle/input/google-stock-prediction/GOOG.csv")
Data

In [None]:
# Checking The Imported Dataset
Data.describe()

In [None]:
# Geting Info
Data.info()

In [None]:
# Creating DataFrame From Data
df = pd.DataFrame(Data)
df

In [None]:
# Removing %h-%m-%s From Date Column
df['date'] = pd.to_datetime(df['date'])
df['date'] = df['date'].dt.strftime('%d-%m-%Y')
df

In [None]:
# Checking The Type Of Data Column(object)
df.info()

In [None]:
# Converting Object To Datetime
df['date'] = pd.to_datetime(df['date'], dayfirst=True)

In [None]:
# Remove 'symbol' Column From Dataset
df.drop(columns= 'symbol', axis=1,inplace=True )

In [None]:
# Checking The Type Of Data Column(Datatime64) And Remove 'symbol' Column
df.info()

### The Third Step | Visualization Dataset

In [None]:
# Checking distribution

features = ['close', 'high', 'low', 'open', 'volume', 'adjClose', 'adjHigh', 'adjLow', 'adjOpen', 'adjVolume']
sns.set_palette("PiYG")
plt.figure(figsize=(16,26))
for idx, column in enumerate(features): 
    plt.subplot(5, 2, idx + 1)
    sns.distplot(df, x=df[column], color='#75f8f2')
    plt.title(column, backgroundcolor='black', color='orange', fontsize=25)
    plt.xticks()
    plt.xlabel(column, fontsize=16)
    plt.ylabel('Density', fontsize=16)
    plt.grid()

plt.tight_layout()
plt.show()    

In [None]:
# checking The Behavior Of Features In Relation To 'close'
features = ['high', 'low', 'open', 'volume', 'adjClose', 'adjHigh', 'adjLow', 'adjOpen']
sns.set_palette("PiYG")
plt.figure(figsize=(16,26))
for idx, column in enumerate(features):
    plt.subplot(5, 2,  idx + 1)
    sns.scatterplot(x =df['close'], y=df[column] , data = df, color='orange')
    plt.title(column, backgroundcolor='black', color='#75f8f2', fontsize=25)
    plt.xlabel('Close', fontsize=16)
    plt.ylabel(column, fontsize=16)
    plt.grid()
plt.tight_layout()
plt.show()

In [None]:
# Subplot
features = ['close', 'high', 'low', 'open', 'volume', 'adjClose', 'adjHigh', 'adjLow', 'adjOpen', 'adjVolume']
fig  = plt.subplots(nrows = 3, ncols = 3,figsize = (15,10))
for i in range(len(features)) :
    plt.subplot(2,5,i+1)
    ax = sns.boxplot(df[features[i]], color="#75f8f2")

plt.show()

In [None]:
# Creating new dataset For plot
df1 = df.drop(['date', 'divCash', 'splitFactor'], axis=1)
df1

In [None]:
plt.figure(figsize = (15,25))
for idx, i in enumerate(df1):
    plt.subplot(8, 2, idx + 1)
    plt.plot(df1.index.values,df1[i], color='#75f8f2')
    plt.title(i,backgroundcolor='black',color='orange',fontsize=25)
    plt.xlabel(i, size = 16)
plt.tight_layout()                     
plt.show()

In [None]:
# New Style
plt.style.use("fivethirtyeight")

In [None]:
# copy from df
df1= df.copy()

In [None]:
# Checking and choosing the best time step
ma_days = [5, 10, 20, 30, 60]

for MA in ma_days:
    column_name = f"MA in {MA} days"
    df1[column_name] = df1['close'].rolling(MA).mean()   

plt.figure(figsize=(12,10))

plt.plot(df1['date'],df1['close'],label='close')
plt.plot(df1['date'],df1['MA in 5 days'],label= '5 days')
plt.plot(df1['date'],df1['MA in 10 days'],label= '10 days')
plt.plot(df1['date'],df1['MA in 20 days'],label= '20 days')
plt.plot(df1['date'],df1['MA in 30 days'],label= '30 days')
plt.plot(df1['date'],df1['MA in 60 days'],label= '60 days')
plt.legend()
plt.show()

In [None]:
# Cheking DataFram
df1

### The Forth Step | Prepering dataset(Train, Test) to using in RNN Models

#### Normalizing

In [None]:
# Copy from df1
df2= df1.copy()

In [None]:
# Normalaze Data For Create Trin and Test with new dataframe with only the 'Close' column 
scaler = preprocessing.MinMaxScaler(feature_range=(0,1))
# fit scaler on only one column('close')
scaled = scaler.fit_transform(df2.filter(['close'])) 
# now have a scaled dataframe with 'colose column'
df3=pd.DataFrame(scaled, columns=['close'])

In [None]:
# Get describe horizontally with '.T'
df3.describe().T

#### Examining the divisions of Test and Train

In [None]:
# Finding a Length of %80 from the number of rows to Create a train Dataset (using array)
new_dataset_length = int(np.ceil( len(df2.filter(['close']).values) * .8 )) 
print('Length of %80 of Dataset is ',new_dataset_length, ' Therefore, the test data length is ',len(df.close) - new_dataset_length)

#### Creating Train and Test from the column of 'close'

In [None]:
# Create the training data set with 20 time steps 
# finding the best time step from MA that 20 was the most optimal

train = scaled[0:int(new_dataset_length), :]

time_step = 20

X_train = []
y_train = []

for i in range(time_step, len(train)):
    X_train.append(train[i-time_step:i, 0])
    y_train.append(train[i, 0])
    if i<= (time_step+1):
        print(X_train)
        print(y_train)
        print()

In [None]:
# Converting the X_train and y_train to numpy arrays 
X_train, y_train = np.array(X_train), np.array(y_train)

In [None]:
# Reshape the X_train 
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))

In [None]:
# Creating the test dataset
test = scaled[new_dataset_length-time_step:, :]

In [None]:
# Creating DataSets of y_test
y_test =  np.array(scaler.inverse_transform(df3))[new_dataset_length:, :]

In [None]:
# Creating dataset of X_test
X_test = []
for i in range(time_step, len(test)):
    X_test.append(test[i-time_step:i, 0])

In [None]:
# Converting dataset to a numpy array
X_test = np.array(X_test)
y_test = np.array(y_test)

In [None]:
# Reshaping the data for learning in RNN model
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1 ))

### The fifth Step | Create Models of RNN base on LSTM, GRU, Bidirectional

#### Creating Model by LSTM

In [None]:
# Creat RNN
RNN1 = tf.keras.models.Sequential()

In [None]:
# The First LSTM Layer
RNN1.add(tf.keras.layers.LSTM(units=130, return_sequences=True, input_shape=(X_train.shape[1],1)))

In [None]:
# The Second LSTM Layer
RNN1.add(tf.keras.layers.LSTM(units=65, return_sequences=False))

In [None]:
# The Connection
RNN1.add(tf.keras.layers.Dense(units=30))

In [None]:
# The Output layer
RNN1.add(tf.keras.layers.Dense(units=1))

In [None]:
# Compiling The RNN
RNN1.compile(optimizer='adam', loss='mean_squared_error',metrics=['accuracy'])

In [None]:
# Fitting Model on The Train and Validation Dataset
Model_1 = RNN1.fit(X_train, y_train,validation_data=(X_test, y_test), batch_size=32, epochs=10)

In [None]:
# Geting The Model Predicted Price Values 
predictions1 = RNN1.predict(X_test)
# Converting scaled number to actual number
predictions1 = scaler.inverse_transform(predictions1)

In [None]:
# geting the score and error
print(f'Mean Absolute Error: {metrics.mean_absolute_error(y_test, predictions1)}')
print(f'Mean Squared Error: {metrics.mean_squared_error(y_test, predictions1)}')
print(f'Root Mean Squared Error: {np.sqrt(metrics.mean_squared_error(y_test, predictions1))}')
print(f'R2_Score: {metrics.r2_score(y_test, predictions1)}')

In [None]:
# Change of style
plt.style.use("seaborn-v0_8-muted")

In [None]:
# Create train and val data from DataFrame
train = df2.filter(['close'])[:new_dataset_length]
valid = df2.filter(['close'])[new_dataset_length:]
valid['Predictions1'] = predictions1

# Visualizing the data
plt.figure(figsize=(16,6))
plt.title('Model_1')
plt.xlabel('Date', fontsize=18)
plt.ylabel('Close Price', fontsize=18)
plt.plot(df1['date'].iloc[:1007],train['close'])
plt.plot(df1['date'].iloc[1007:],valid[['close', 'Predictions1']])
plt.legend(['Train', 'Val', 'Predictions'], loc='best')
plt.show()

#### Creating Model by GRU

In [None]:
# Creat RNN
RNN2 = tf.keras.models.Sequential()

In [None]:
# The First GRU Layer
RNN2.add(tf.keras.layers.GRU(units=130, return_sequences=True, input_shape=(X_train.shape[1],1), activation='tanh'))

In [None]:
# The Second GRU Layer
RNN2.add(tf.keras.layers.GRU(units=65, return_sequences=False, activation='tanh'))

In [None]:
# The Connection
RNN2.add(tf.keras.layers.Dense(units=30))

In [None]:
# The Output layer
RNN2.add(tf.keras.layers.Dense(units=1))

In [None]:
# Compiling The RNN
RNN2.compile(optimizer='adam', loss='mean_squared_error',metrics=['accuracy'])

In [None]:
# Compiling The RNN
RNN2.compile(optimizer='adam', loss='mean_squared_error',metrics=['accuracy'])

In [None]:
# Fitting Model on The Train and Validation Dataset
Model_2 = RNN2.fit(X_train, y_train,validation_data=(X_test, y_test), batch_size=32, epochs=10)

In [None]:
# Geting The Model Predicted Price Values 
predictions2 = RNN2.predict(X_test)
# Converting scaled number to actual number
predictions2 = scaler.inverse_transform(predictions2)

In [None]:
# geting the score and error
print(f'Mean Absolute Error: {metrics.mean_absolute_error(y_test, predictions2)}')
print(f'Mean Squared Error: {metrics.mean_squared_error(y_test, predictions2)}')
print(f'Root Mean Squared Error: {np.sqrt(metrics.mean_squared_error(y_test, predictions2))}')
print(f'R2_Score: {metrics.r2_score(y_test, predictions2)}')

In [None]:
# Create train and val data from DataFrame
train = df2.filter(['close'])[:new_dataset_length]
valid = df2.filter(['close'])[new_dataset_length:]
valid['Predictions2'] = predictions2

# Visualizing the data
plt.figure(figsize=(16,6))
plt.title('Model_2')
plt.xlabel('Date', fontsize=18)
plt.ylabel('Close Price', fontsize=18)
plt.plot(df1['date'].iloc[:1007],train['close'])
plt.plot(df1['date'].iloc[1007:],valid[['close', 'Predictions2']])
plt.legend(['Train', 'Val', 'Predictions'], loc='best')
plt.show()

#### Creating Model by LSTM & Bidirectional

In [None]:
# Creat RNN
RNN3 = tf.keras.models.Sequential()

In [None]:
# The First Bidirectional & LSTM  Layer
RNN3.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(units=130, return_sequences=True, input_shape=(X_train.shape[1],1))))

In [None]:
# The Second Bidirectional & LSTM Layer
RNN3.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(units=65, return_sequences=False)))

In [None]:
# The Connection
RNN3.add(tf.keras.layers.Dense(units=30))

In [None]:
# The Output layer
RNN3.add(tf.keras.layers.Dense(units=1))

In [None]:
# Compiling The RNN
RNN3.compile(optimizer='adam', loss='mean_squared_error',metrics=['accuracy'])

In [None]:
# Fitting Model on The Train and Validation Dataset
Model_3 = RNN2.fit(X_train, y_train,validation_data=(X_test, y_test), batch_size=32, epochs=10)

In [None]:
# Geting The Models Predicted Price Values 
predictions3 = RNN3.predict(X_test)
# Converting scaled number to actual number
predictions3 = scaler.inverse_transform(predictions3)

In [None]:
# geting the score and error
print(f'Mean Absolute Error: {metrics.mean_absolute_error(y_test, predictions3)}')
print(f'Mean Squared Error: {metrics.mean_squared_error(y_test, predictions3)}')
print(f'Root Mean Squared Error: {np.sqrt(metrics.mean_squared_error(y_test, predictions3))}')
print(f'R2_Score: {metrics.r2_score(y_test, predictions3)}')

In [None]:
# Create train and val data from DataFrame
train = df2.filter(['close'])[:new_dataset_length]
valid = df2.filter(['close'])[new_dataset_length:]
valid['Predictions3'] = predictions3

# Visualizing the data
plt.figure(figsize=(16,6))
plt.title('Model_3')
plt.xlabel('Date', fontsize=18)
plt.ylabel('Close Price', fontsize=18)
plt.plot(df1['date'].iloc[:1007],train['close'])
plt.plot(df1['date'].iloc[1007:],valid[['close', 'Predictions3']])
plt.legend(['Train', 'Val', 'Predictions'], loc='best')
plt.show()

#### Creating Model by LSTM & GRU

In [None]:
# Creat RNN
RNN4 = tf.keras.models.Sequential()

In [None]:
# The First Layer of LSTM 
RNN4.add(tf.keras.layers.LSTM(units=130, return_sequences=True, input_shape=(X_train.shape[1],1)))

In [None]:
# The Second Layer of GRU 
RNN4.add(tf.keras.layers.GRU(units=65, return_sequences=False, activation='tanh'))

In [None]:
# The Connection
RNN4.add(tf.keras.layers.Dense(units=30))

In [None]:
# The Output layer
RNN4.add(tf.keras.layers.Dense(units=1))

In [None]:
# Compiling The RNN
RNN4.compile(optimizer='adam', loss='mean_squared_error',metrics=['accuracy'])

In [None]:
# Fitting Model on The Train and Validation Dataset
Model_4 = RNN4.fit(X_train, y_train,validation_data=(X_test, y_test), batch_size=32, epochs=10)

In [None]:
# Geting The Models Predicted Price Values 
predictions4 = RNN4.predict(X_test)
# Converting scaled number to actual number
predictions4 = scaler.inverse_transform(predictions4)

In [None]:
# geting the score and error
print(f'Mean Absolute Error: {metrics.mean_absolute_error(y_test, predictions4)}')
print(f'Mean Squared Error: {metrics.mean_squared_error(y_test, predictions4)}')
print(f'Root Mean Squared Error: {np.sqrt(metrics.mean_squared_error(y_test, predictions4))}')
print(f'R2_Score: {metrics.r2_score(y_test, predictions4)}')

In [None]:
# Create train and val data from DataFrame
train = df2.filter(['close'])[:new_dataset_length]
valid = df2.filter(['close'])[new_dataset_length:]
valid['Predictions4'] = predictions4

# Visualizing the data
plt.figure(figsize=(16,6))
plt.title('Model_4')
plt.xlabel('Date', fontsize=18)
plt.ylabel('Close Price', fontsize=18)
plt.plot(df1['date'].iloc[:1007],train['close'])
plt.plot(df1['date'].iloc[1007:],valid[['close', 'Predictions4']])
plt.legend(['Train', 'Val', 'Predictions'], loc='best')
plt.show()

In [None]:
# Save all of Models for WebApp or other predictions
joblib.dump(Model_1, 'model1')
joblib.dump(Model_2, 'model2') # The best model
joblib.dump(Model_4, 'model4')