In [None]:
import math # Mathematical functions 
import numpy as np # Fundamental package for scientific computing with Python
import pandas as pd # Additional functions for analysing and manipulating data
from datetime import date, timedelta, datetime # Date Functions
from pandas.plotting import register_matplotlib_converters # This function adds plotting functions for calender dates
import matplotlib.pyplot as plt # Important package for visualization - we use this to plot the market data
import matplotlib.dates as mdates # Formatting dates
import tensorflow as tf
from sklearn.metrics import mean_absolute_error, mean_squared_error # Packages for measuring model performance / errors
from tensorflow.keras import Sequential # Deep learning library, used for neural networks
from tensorflow.keras.layers import LSTM, Dense, Dropout # Deep learning classes for recurrent and regular densely-connected layers
from tensorflow.keras.callbacks import EarlyStopping # EarlyStopping during model training
from sklearn.preprocessing import RobustScaler, MinMaxScaler # This Scaler removes the median and scales the data according to the quantile range to normalize the price data 
import seaborn as sns # Visualization
sns.set_style('white', { 'axes.spines.right': False, 'axes.spines.top': False})

# check the tensorflow version and the number of available GPUs
print('Tensorflow Version: ' + tf.__version__)
physical_devices = tf.config.list_physical_devices('GPU')
print("Num GPUs:", len(physical_devices))

import yfinance as yf 
df = yf.download("AAPL", start = "2008-08-08", end = "2014-06-08", interval = '1d')

Tensorflow Version: 2.12.0
Num GPUs: 1
[*********************100%***********************]  1 of 1 completed


In [None]:
#first=[0.0, 0.2, -0.04, 0.16, 0.04, -0.08, 0.08, 0.24, 0.08, 0.04, 0.04, 0.12, 0.0, -0.16, 0.24, -0.08, 0.24, 0.08, 0.32, 0.12, 0.12, -0.16, 0.08, 0.08, 0.04, 0.12, 0.12, 0.12, 0.24, -0.04, 0.08, 0.04, 0.08, 0.12, 0.0, -0.08, 0.12, 0.2, 0.12, 0.0, -0.08, -0.12, 0.04, 0.12, -0.08, 0.32, -0.04, -0.16, -0.08, 0.08, 0.08, -0.2, -0.2, -0.08, -0.16, 0.0, 0.12, 0.0, -0.04, 0.0, -0.2, 0.0, -0.04, -0.12, 0.0, -0.08, -0.16, -0.16, -0.16, -0.2, 0.0, -0.04, 0.0, -0.04, -0.04, -0.2, -0.16, -0.04, -0.04, -0.12, 0.0, -0.04, -0.08, 0.0, -0.04, -0.2, 0.0, -0.16, -0.04, -0.04, 0.0, -0.08, -0.2, -0.12, -0.2, -0.08, 0.0, -0.08, -0.08, -0.16, -0.16, -0.12, -0.12, -0.12, -0.04, -0.12, -0.04, 0.0, -0.12, -0.16, -0.04, 0.0, -0.04, -0.04, -0.12, -0.04, 0.04, -0.08, -0.08, 0.04, 0.0, -0.04, 0.0, -0.04, -0.32, -0.04, -0.08, -0.04, -0.12, -0.04, 0.04, -0.16, 0.0, 0.12, -0.12, -0.16, 0.0, -0.12, -0.08, -0.08, -0.08, 0.0, -0.12, -0.2, -0.08, -0.08, -0.12, -0.12, -0.08, 0.04, -0.16, -0.08, -0.2, -0.04, -0.16, 0.08, -0.12, -0.2, -0.04, -0.2, -0.12, -0.08, 0.0, -0.16, 0.0, -0.2, -0.12, -0.08, 0.04, -0.16, -0.08, -0.08, -0.12, -0.12, -0.04, -0.04, 0.0, -0.04, -0.04, 0.08, -0.16, -0.04, -0.28, -0.04, -0.04, -0.08, -0.04, -0.04, -0.08, 0.0, -0.04, -0.16, -0.08, -0.12, 0.0, -0.08, -0.24, -0.24, -0.12, -0.2, 0.0, -0.04, -0.2, -0.08, 0.0, -0.04, -0.08, 0.0, -0.04, -0.08, 0.04, -0.04, -0.04, -0.16, 0.0, 0.08, -0.08, 0.04, -0.16, -0.2, -0.2, 0.0, 0.0, -0.2, -0.04, 0.0, -0.04, -0.28, -0.24, -0.12, -0.08, -0.08, -0.12, -0.08, -0.12, -0.08, -0.16, -0.12, -0.12, -0.12, -0.24, -0.08, 0.04, -0.16, -0.08, -0.04, -0.12, -0.12, -0.04, -0.08, -0.2, -0.12, -0.04, -0.16, -0.08, 0.08, -0.04, -0.12, 0.0, -0.04, -0.2, 0.0, -0.08, -0.12, 0.0, 0.04, -0.16, -0.12, -0.12, -0.12, -0.24, -0.08, 0.04, -0.16, -0.08, -0.04, -0.12, -0.12, -0.04, -0.08, -0.2, -0.12, -0.04, -0.16, -0.08, 0.08, -0.04, -0.12, 0.0, -0.04, -0.2, 0.0, -0.08, -0.12, 0.0, 0.04, -0.16, -0.12, -0.12, -0.12, 0.0, -0.2, 0.04, -0.04, -0.24, -0.08, -0.2, -0.16, -0.12, -0.2, -0.28, 0.0, -0.04, -0.12, -0.2, 0.0, -0.08, -0.2, -0.04, -0.16, 0.08, -0.04, -0.08, -0.08, -0.08, -0.08, -0.16, -0.2, -0.16, -0.2, -0.12, -0.32, -0.08, -0.08, -0.12, -0.24, -0.2, -0.08, -0.08, 0.0, -0.08, -0.28, 0.08, -0.08, -0.28, -0.16, -0.2, 0.04, -0.04, 0.0, -0.04, -0.2, -0.2, -0.08, -0.16, -0.08, 0.08, -0.28, -0.08, -0.12]


In [None]:
four_years= [-0.28, 0.0, -0.24, -0.16, 0.0, -0.12, -0.12, 0.0, -0.12, -0.2, -0.16, -0.08, -0.08, -0.04, 0.08, -0.24, -0.2, -0.08, -0.16, -0.08, -0.08, -0.24, -0.24, -0.16, -0.12, -0.24, -0.24, -0.2, -0.16, -0.2, -0.24, -0.16, -0.32, -0.2, -0.12, -0.12, -0.2, -0.16, -0.08, -0.08, -0.24, -0.16, -0.16, -0.16, -0.28, -0.2, -0.16, -0.12, -0.24, 0.0, 0.0, -0.16, -0.04, -0.12, -0.52, -0.2, -0.2, -0.24, -0.2, -0.24, -0.08, -0.2, -0.12, -0.28, -0.12, 0.04, -0.08, -0.28, -0.08, -0.12, -0.08, -0.2, 0.08, -0.12, -0.2, -0.24, -0.16, -0.12, -0.08, -0.12, 0.04, -0.12, -0.08, -0.12, -0.08, -0.28, -0.28, -0.2, -0.12, -0.04, -0.08, 0.0, -0.2, -0.2, -0.36, -0.08, -0.04, -0.12, -0.16, -0.24, -0.16, -0.04, -0.04, -0.2, -0.2, -0.08, -0.08, -0.08, -0.12, -0.28, -0.12, -0.12, -0.16, -0.12, -0.16, -0.16, -0.12, -0.04, -0.2, -0.04, -0.16, -0.08, -0.16, -0.16, -0.2, -0.04, -0.08, -0.08, -0.2, -0.08, -0.12, -0.2, -0.24, -0.16, -0.16, -0.16, -0.08, -0.16, -0.12, -0.04, 0.0, -0.12, -0.12, -0.2, 0.0, -0.12, -0.04, -0.12, -0.16, -0.12, -0.2, -0.16, -0.12, -0.08, -0.08, -0.16, -0.2, -0.2, -0.16, -0.12, -0.24, -0.16, -0.12, -0.12, -0.2, -0.24, -0.08, -0.08, -0.16, -0.12, -0.2, -0.12, 0.12, -0.12, -0.2, -0.04, 0.04, -0.04, -0.08, 0.04, -0.24, -0.08, -0.28, -0.08, -0.04, -0.2, -0.04, -0.08, -0.12, -0.08, 0.0, -0.08, -0.2, 0.04, -0.08, -0.08, -0.2, -0.28, -0.16, -0.12, -0.24, -0.08, -0.2, -0.12, -0.12, -0.32, -0.2, -0.04, -0.08, -0.2, -0.08, -0.08, 0.04, -0.2, -0.12, -0.24, -0.24, 0.0, -0.12, -0.4, -0.2, 0.0, -0.2, -0.2, -0.16, -0.16, -0.04, -0.24, -0.08, -0.12, -0.2, -0.24, -0.08, -0.28, -0.08, -0.12, -0.16, -0.24, -0.28, 0.0, -0.2, -0.2, -0.24, -0.16, -0.24, -0.16, -0.36, -0.16, -0.2, 0.04, -0.16, -0.12, -0.28, -0.04, 0.04, -0.12, 0.0, -0.16, -0.24, -0.08, -0.04, -0.28, -0.08, -0.16, -0.08, -0.12, -0.16, 0.0, -0.24, -0.04, -0.2, -0.2, -0.04, -0.04, -0.08, -0.12, -0.2, -0.08, -0.08, -0.12, -0.28, -0.24, -0.2, -0.08, -0.08, -0.08, -0.12, -0.04, -0.24, -0.12, -0.16, -0.24, -0.12, -0.16, -0.08, -0.12, -0.04, -0.28, -0.24, -0.32, -0.08, -0.24, 0.04, 0.04, -0.32, -0.16, -0.08, -0.08, -0.24, -0.32, -0.16, 0.0, -0.24, -0.04, -0.16, -0.2, -0.08, -0.2, -0.04, -0.32, -0.2, -0.16, -0.32, 0.0, -0.24, 0.04, -0.2, -0.16, -0.2, -0.08, -0.12, -0.24, -0.12, -0.12, -0.32, -0.28, -0.04, 0.0, -0.24, -0.16, -0.04, -0.24, -0.12, -0.32, -0.2, 0.0, -0.28, -0.04, -0.04, -0.08, -0.16, -0.16, -0.12, -0.08, -0.24, -0.12, -0.16, -0.2, -0.04, -0.28, -0.2, 0.04, -0.16, 0.08, -0.12, -0.16, -0.04, -0.16, -0.16, -0.12, -0.24, -0.2, 0.0, -0.04, -0.36, -0.16, -0.2, -0.28, -0.16, -0.36, 0.0, -0.2, -0.12, 0.0, -0.12, -0.24, -0.08, -0.08, -0.12, -0.2, -0.2, -0.08, -0.16, -0.08, 0.04, -0.2, -0.04, -0.12, -0.24, -0.12, -0.08, -0.08, -0.04, -0.12, 0.0, 0.0, -0.2, -0.2, 0.0, 0.04, -0.36, -0.08, -0.04, -0.12, -0.12, -0.08, -0.24, -0.04, -0.12, -0.12, -0.08, -0.08, -0.12, -0.24, -0.12, -0.12, -0.2, -0.08, 0.04, -0.16, -0.16, -0.2, -0.16, -0.24, 0.0, -0.04, -0.24, -0.16, 0.0, -0.08, -0.16, -0.24, -0.16, -0.32, 0.0, -0.16, -0.2, -0.12, -0.12, -0.12, -0.12, -0.16, -0.12, -0.08, -0.04, -0.36, -0.08, -0.2, -0.2, -0.24, -0.08, -0.24, -0.2, -0.08, -0.12, -0.28, -0.4, -0.2, -0.16, -0.16, -0.12, -0.28, -0.2, -0.16, -0.04, 0.04, -0.12, -0.48, -0.2, -0.2, -0.04, -0.24, -0.4, -0.2, 0.0, -0.12, -0.16, -0.08, 0.04, -0.12, -0.12, -0.2, 0.0, -0.2, -0.16, -0.16, -0.28, -0.32, -0.2, -0.08, -0.28, -0.2, -0.16, -0.28, -0.24, -0.24, -0.12, -0.2, -0.08, -0.16, -0.12, -0.16, -0.2, -0.4, -0.2, -0.24, -0.36, -0.16, -0.24, -0.2, -0.36, -0.04, -0.2, -0.2, -0.24, -0.28, -0.16, -0.04, -0.04, -0.12, -0.12, -0.04, -0.32, -0.28, -0.2, -0.2, -0.16, -0.44, -0.2, -0.2, -0.08, -0.24, -0.36, -0.08, -0.08, -0.04, -0.24, -0.32, -0.24, -0.08, -0.08, -0.32, -0.24, -0.16, -0.12, -0.32, -0.4, -0.32, -0.28, -0.24, -0.24, -0.04, -0.2, -0.4, -0.12, -0.28, -0.36, -0.2, -0.2, -0.12, -0.24, -0.32, -0.16, 0.04, -0.28, -0.24, -0.16, -0.2, -0.2, -0.32, -0.32, -0.24, -0.12, -0.08, -0.2, -0.24, 0.0, -0.12, -0.16, -0.2, -0.24, -0.28, -0.28, -0.4, -0.2, -0.12, -0.12, -0.04, -0.04, -0.16, -0.12, -0.08, -0.08, -0.16, -0.04, -0.16, -0.2, -0.44, -0.16, -0.04, -0.2, -0.04, -0.08, -0.08, -0.12, -0.24, -0.16, -0.08, -0.12, -0.32, -0.2, -0.16, -0.16, -0.24, -0.16, -0.04, -0.32, -0.12, -0.2, -0.08, -0.2, -0.24, -0.2, -0.12, -0.4, -0.2, -0.28, -0.24, -0.16, -0.16, -0.16, -0.04, -0.04, -0.08, -0.28, -0.16, -0.2, -0.08, -0.24, -0.24, -0.2, -0.28, -0.28, -0.12, -0.4, -0.28, -0.04, -0.28, -0.16, -0.2, -0.32, -0.2, -0.28, -0.2, -0.24, -0.08, -0.12, -0.28, -0.08, -0.28, -0.24, -0.08, -0.2, -0.24, -0.12, -0.16, -0.24, -0.04, -0.2, -0.16, -0.08, -0.24, -0.24, -0.08, -0.32, -0.2, -0.08, -0.24, -0.16, -0.24, -0.16, 0.0, -0.16, -0.16, -0.32, -0.16, -0.16, -0.12, -0.36, -0.36, -0.32, -0.2, -0.28, -0.2, -0.16, -0.16, -0.2, -0.36, -0.32, -0.24, -0.16, -0.16, -0.2, -0.2, -0.28, -0.2, -0.4, -0.32, -0.2, -0.32, -0.24, -0.32, -0.12, -0.32, -0.12, -0.16, -0.24, -0.32, -0.04, 0.0, -0.36, -0.2, -0.2, -0.32, -0.36, -0.4, -0.12, -0.08, -0.24, -0.24, -0.16, -0.12, -0.16, -0.2, -0.16, -0.16, -0.04, -0.24, -0.16, -0.4, -0.36, -0.24, -0.24, -0.32, -0.36, -0.2, -0.24, -0.04, -0.24, -0.16, -0.08, -0.36, -0.16, -0.32, -0.2, -0.16, -0.16, -0.12, -0.36, -0.28, -0.32, -0.28, -0.16, -0.2, -0.24, -0.16, -0.48, -0.24, -0.2, -0.36, -0.2, -0.32, -0.12, -0.4, -0.16, -0.32, -0.24, -0.28, -0.2, -0.16, -0.08, -0.32, -0.08, -0.28, -0.16, -0.2, -0.28, -0.2, -0.4, -0.04, -0.12, -0.4, -0.48, -0.12, -0.4, -0.08, -0.28, -0.04, -0.08, -0.16, -0.24, -0.28, -0.2, -0.12, -0.16, -0.2, -0.12, -0.08, -0.32, -0.24, -0.2, -0.12, -0.36, -0.28, -0.2, -0.32, -0.24, -0.12, -0.32, -0.28, -0.2, -0.28, -0.16, -0.16, -0.28, -0.24, -0.32, -0.36, -0.2, -0.28, -0.2, -0.28, -0.2, -0.32, -0.32, -0.32, -0.44, -0.4, -0.2, -0.2, -0.36, -0.2, -0.32, -0.2, -0.2, -0.32, -0.32, -0.24, -0.32, -0.28, -0.2, -0.04, -0.4, -0.2, -0.08, 0.04, -0.28, -0.12, -0.12, -0.36, -0.24, -0.36, -0.4, -0.28, -0.28, -0.24, -0.36, -0.24, -0.28, -0.28, -0.36, -0.08, -0.2, -0.08, -0.32, -0.2, -0.2, -0.28, -0.36, -0.4, -0.24, -0.32, -0.16, -0.2, -0.08, -0.2, -0.16, -0.12, -0.24, -0.24, -0.28, -0.12, -0.4, -0.28, -0.2, -0.16, -0.28, -0.16, -0.08, -0.24, -0.12, -0.16, -0.2, -0.12, -0.16, -0.08, -0.2, -0.2, -0.16, -0.16, -0.4, -0.2, -0.24, 0.04, -0.24, -0.2, -0.28, -0.32, -0.16, -0.08, -0.32, -0.12, -0.16, -0.28, -0.2, -0.08, -0.24, -0.28, -0.2, -0.2, -0.2, -0.2, -0.16, -0.32, -0.16, -0.24, -0.2, -0.24, -0.28, -0.12, -0.24, -0.28, -0.36, -0.28, -0.12, -0.2, -0.24, -0.16, -0.16, -0.28, -0.28, -0.16, -0.16, -0.16, -0.16, -0.2, -0.32, -0.16, -0.16, -0.04, -0.08, -0.08, -0.12, -0.16, -0.12, -0.28, -0.16, -0.36, -0.28, -0.24, -0.28, -0.4, -0.2, -0.16, -0.12, -0.32, -0.24, -0.28, -0.28, -0.28, -0.12, -0.12, -0.2, -0.2, -0.16, -0.24, -0.2, -0.08, -0.36, -0.12, -0.12, -0.36, -0.2, 0.0, -0.12, -0.28, -0.2, -0.16, -0.36, -0.2, -0.12, -0.32, -0.28, -0.32, -0.4, -0.2, 0.08, -0.24, -0.2, -0.16, -0.28, -0.2, -0.24, -0.08, -0.28, -0.2, -0.2, -0.12, -0.28, -0.16, -0.28, -0.2, -0.32, -0.2, -0.2, -0.12, -0.24, -0.36, -0.32, -0.24, -0.24, -0.24, 0.0, 0.0, -0.44, -0.32, -0.2, -0.2, -0.08, -0.28, -0.12, -0.12, -0.24, -0.04, -0.12, -0.28, -0.24, -0.16, -0.12, -0.2, -0.04, -0.08, -0.08, -0.12, -0.36, -0.24, 0.0, -0.28, -0.04, -0.04, -0.44, -0.04, -0.32, -0.24, -0.16, -0.24, -0.32, -0.28, -0.24, -0.12, -0.2, -0.28, -0.12, -0.04, -0.12, -0.24, -0.32, -0.08, -0.44, -0.32, -0.24, -0.2, -0.12, -0.08, -0.24, -0.16, -0.08, -0.12, -0.12, -0.04, -0.16, -0.36, -0.2, -0.08, -0.16, -0.2, -0.24, -0.12, -0.36, 0.0, -0.08, -0.24, -0.2, -0.2, 0.0, -0.12, -0.16, 0.04, -0.28, -0.24, -0.2, -0.16, -0.16, -0.44, -0.08, -0.12, -0.16, -0.24, -0.2, -0.2, -0.16, -0.12, -0.2, -0.04, -0.32, -0.24, -0.12, -0.12, -0.16, -0.16, -0.28, -0.32, -0.16, -0.12, -0.24, -0.2, -0.2, 0.04, -0.44, -0.16, -0.16, -0.08, -0.24, -0.12, -0.08, 0.04, -0.24, -0.08, -0.24, -0.16, -0.4, -0.04, -0.12, -0.24, -0.2, -0.04, -0.08, -0.24, -0.24, -0.12, -0.2, -0.2, -0.12, 0.0, -0.12, 0.04, -0.16, -0.36, -0.12, -0.04, -0.28, -0.2, -0.2, -0.2, -0.44, -0.2, -0.28, -0.36, -0.16, -0.12, -0.16, -0.08, -0.24, -0.12, -0.2, -0.28, -0.32, -0.08, -0.24, -0.12, -0.16, -0.08, -0.04, -0.04, -0.28, -0.24, -0.16, -0.2, -0.2, -0.16, -0.36, -0.28, -0.32, -0.16, -0.12, -0.08, -0.12, -0.2, -0.2, -0.04, -0.2, -0.2, -0.24, -0.2, -0.24, -0.36, -0.24, -0.12, -0.12, -0.24, -0.24, -0.32, -0.36, -0.2, -0.16, -0.28, -0.2, -0.08, -0.08, -0.2, -0.24, -0.16, -0.36, -0.04, -0.24, -0.32, -0.4, -0.12, -0.08, -0.08, -0.36, -0.24, -0.16, -0.32, -0.28, -0.16, -0.28, -0.08, -0.24, -0.2, -0.08, -0.12, -0.32, -0.2, -0.16, -0.16, -0.32, -0.28, -0.28, -0.2, -0.2, 0.0, -0.16, -0.36, -0.16, -0.08, -0.16, -0.16, -0.2, -0.16, -0.12, 0.0, -0.08, -0.16, -0.2, -0.2, -0.16, -0.24, -0.16, -0.36, -0.12, -0.24, -0.08, -0.08, -0.24, -0.2, -0.2, -0.24, -0.08, -0.16, -0.08, -0.24, -0.32, -0.12, -0.04, -0.12, -0.16, -0.32, -0.2, -0.24, -0.28, -0.16, -0.2, -0.16, -0.28, -0.2, -0.24, -0.2, -0.12, -0.32, -0.16, -0.2, -0.32, -0.2, -0.2, -0.12, -0.04, -0.16, -0.2, -0.24, 0.0, -0.16, -0.28, -0.48, -0.2, -0.32, -0.08, -0.36, -0.08, -0.16, -0.16, -0.08, -0.2, -0.08, -0.16, -0.24, -0.12, -0.08, -0.08, -0.16, 0.08, -0.2, -0.28, -0.08, -0.2, -0.2, -0.2, 0.04, -0.16, 0.04, -0.28, -0.08, -0.12, -0.16, -0.28, -0.36, -0.08, 0.04, -0.16, -0.24, -0.04, -0.32, -0.24, 0.0, -0.16, -0.12, -0.36, -0.16, -0.2, -0.36, -0.08, -0.12, -0.24, -0.32, -0.2, -0.16, -0.2, -0.12, -0.12, -0.2, -0.16, -0.28, -0.24, 0.0, -0.12, -0.24, -0.04, -0.2, -0.24, -0.16, -0.2, -0.16, -0.28, -0.12, -0.16, -0.08, 0.0, -0.24, -0.32, -0.12, -0.16, -0.28, -0.2, -0.16, -0.2, -0.32, -0.36, -0.08, -0.08, -0.2, -0.28, 0.0, -0.16, -0.32, -0.12, -0.2, -0.2, -0.24, -0.16, -0.12, -0.16, -0.32, -0.32, -0.16, -0.16, -0.4, -0.36, -0.16, -0.2, -0.12]

In [None]:
#df = pd.read_csv("/content/upload_DJIA_table.csv")

In [None]:
#df = df.loc[::-1].reset_index(drop=True)
df

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2008-08-08,5.852143,6.058929,5.848214,6.055357,5.153826,713997200
2008-08-11,6.073929,6.303571,6.059643,6.198571,5.275718,891304400
2008-08-12,6.197143,6.403214,6.196786,6.311786,5.372077,836278800
2008-08-13,6.356429,6.428571,6.282143,6.403571,5.450197,842346400
2008-08-14,6.368929,6.444643,6.351429,6.404286,5.450805,711300800
...,...,...,...,...,...,...
2014-06-02,22.641430,22.672501,22.232143,22.451786,19.992477,369350800
2014-06-03,22.445000,22.812143,22.437500,22.769285,20.275200,292709200
2014-06-04,22.765715,23.138929,22.718214,23.029285,20.506723,335482000
2014-06-05,23.078571,23.191786,22.950357,23.119642,20.587179,303805600


In [None]:
df = df[:-27]

In [None]:
df['Sentiment Score'] = four_years

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Sentiment Score'] = four_years


In [None]:
df

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Sentiment Score
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2008-08-08,5.852143,6.058929,5.848214,6.055357,5.153826,713997200,-0.28
2008-08-11,6.073929,6.303571,6.059643,6.198571,5.275718,891304400,0.00
2008-08-12,6.197143,6.403214,6.196786,6.311786,5.372077,836278800,-0.24
2008-08-13,6.356429,6.428571,6.282143,6.403571,5.450197,842346400,-0.16
2008-08-14,6.368929,6.444643,6.351429,6.404286,5.450805,711300800,0.00
...,...,...,...,...,...,...,...
2014-04-23,18.895000,18.968929,18.730356,18.741072,16.595537,394940000,-0.40
2014-04-24,20.293215,20.357143,20.026072,20.277500,17.956064,759911600,-0.36
2014-04-25,20.161785,20.428213,20.141430,20.426430,18.087946,390275200,-0.16
2014-04-28,20.457144,21.276787,20.448214,21.217501,18.788452,669485600,-0.20


In [None]:
# Indexing Batches
train_df = df.sort_values(by=['Date']).copy()

# List of considered Features
FEATURES = ['High', 'Low', 'Open', 'Close', 'Volume', #'Sentiment Score'
            #, 'Month', 'Year', 'Adj Close'
           ]

print('FEATURE LIST')
print([f for f in FEATURES])

data = pd.DataFrame(train_df)
data_filtered = data[FEATURES]

# We add a prediction column and set dummy values to prepare the data for scaling
data_filtered_ext = data_filtered.copy()
data_filtered_ext['Prediction'] = data_filtered_ext['Close']

# Print the tail of the dataframe
data_filtered_ext.tail()

FEATURE LIST
['High', 'Low', 'Open', 'Close', 'Volume']


Unnamed: 0_level_0,High,Low,Open,Close,Volume,Prediction
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2014-04-23,18.968929,18.730356,18.895,18.741072,394940000,18.741072
2014-04-24,20.357143,20.026072,20.293215,20.2775,759911600,20.2775
2014-04-25,20.428213,20.14143,20.161785,20.42643,390275200,20.42643
2014-04-28,21.276787,20.448214,20.457144,21.217501,669485600,21.217501
2014-04-29,21.285,21.053928,21.205,21.154642,337377600,21.154642


In [None]:
# Get the number of rows in the data
nrows = data_filtered.shape[0]

# Convert the data to numpy values
np_data_unscaled = np.array(data_filtered)
np_data = np.reshape(np_data_unscaled, (nrows, -1))
print(np_data.shape)

# Transform the data by scaling each feature to a range between 0 and 1
scaler = MinMaxScaler()
np_data_scaled = scaler.fit_transform(np_data_unscaled)

# Creating a separate scaler that works on a single column for scaling predictions
scaler_pred = MinMaxScaler()
df_Close = pd.DataFrame(data_filtered_ext['Close'])
np_Close_scaled = scaler_pred.fit_transform(df_Close)

(1440, 5)


In [None]:
# Set the sequence length - this is the timeframe used to make a single prediction
sequence_length = 100

# Prediction Index
index_Close = data.columns.get_loc("Close")
#index_Close = 1


# Split the training data into train and train data sets
# As a first step, we get the number of rows to train the model on 80% of the data 
train_data_len = math.ceil(np_data_scaled.shape[0] * 0.8)

# Create the training and test data
train_data = np_data_scaled[0:train_data_len, :]
test_data = np_data_scaled[train_data_len - sequence_length:, :]

# The RNN needs data with the format of [samples, time steps, features]
# Here, we create N samples, sequence_length time steps per sample, and 6 features
def partition_dataset(sequence_length, data):
    x, y = [], []
    data_len = data.shape[0]
    for i in range(sequence_length, data_len):
        x.append(data[i-sequence_length:i,:]) #contains sequence_length values 0-sequence_length * columsn
        y.append(data[i, index_Close]) #contains the prediction values for validation,  for single-step prediction
    
    # Convert the x and y to numpy arrays
    x = np.array(x)
    y = np.array(y)
    return x, y

# Generate training data and test data
x_train, y_train = partition_dataset(sequence_length, train_data)
x_test, y_test = partition_dataset(sequence_length, test_data)

# Print the shapes: the result is: (rows, training_sequence, features) (prediction value, )
print(x_train.shape, y_train.shape)
print(x_test.shape, y_test.shape)

# Validate that the prediction value and the input match up
# The last close price of the second input sample should equal the first prediction value
print(x_train[1][sequence_length-1][index_Close])
print(y_train[0])

(1052, 100, 5) (1052,)
(288, 100, 5) (288,)
0.01146016420088214
0.01146016420088214


In [None]:
# Configure the neural network model
model = Sequential()

# Model with n_neurons = inputshape Timestamps, each with x_train.shape[2] variables
n_neurons = x_train.shape[1] * x_train.shape[2]
print(n_neurons, x_train.shape[1], x_train.shape[2])
model.add(LSTM(n_neurons, return_sequences=True, input_shape=(x_train.shape[1], x_train.shape[2]))) 
model.add(LSTM(n_neurons, return_sequences=False))
model.add(Dense(5))
model.add(Dense(1))

# Compile the model
model.compile(optimizer='adam', loss='mse')

500 100 5


In [None]:
# Training the model
epochs = 100
batch_size = 64
early_stop = EarlyStopping(monitor='loss', patience=5, verbose=1)
history = model.fit(x_train, y_train, 
                    batch_size=batch_size, 
                    epochs=epochs,
                    validation_data=(x_test, y_test)
                   )
                    
                    #callbacks=[early_stop])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [None]:
# Plot training & validation loss values
fig, ax = plt.subplots(figsize=(16, 5), sharex=True)
sns.lineplot(data=history.history["loss"])
plt.title("Model loss")
plt.ylabel("Loss")
plt.xlabel("Epoch")
ax.xaxis.set_major_locator(plt.MaxNLocator(epochs))
plt.legend(["Train", "Test"], loc="upper left")
plt.grid()
plt.show()

In [None]:
# Get the predicted values
y_pred_scaled = model.predict(x_test)

# Unscale the predicted values
y_pred = scaler_pred.inverse_transform(y_pred_scaled)
y_test_unscaled = scaler_pred.inverse_transform(y_test.reshape(-1, 1))

# Mean Absolute Error (MAE)
MAE = mean_absolute_error(y_test_unscaled, y_pred)
print(f'Median Absolute Error (MAE): {np.round(MAE, 2)}')

# Mean Absolute Percentage Error (MAPE)
MAPE = np.mean((np.abs(np.subtract(y_test_unscaled, y_pred)/ y_test_unscaled))) * 100
print(f'Mean Absolute Percentage Error (MAPE): {np.round(MAPE, 2)} %')

# Median Absolute Percentage Error (MDAPE)
MDAPE = np.median((np.abs(np.subtract(y_test_unscaled, y_pred)/ y_test_unscaled)) ) * 100
print(f'Median Absolute Percentage Error (MDAPE): {np.round(MDAPE, 2)} %')

In [None]:
# The date from which on the date is displayed
display_start_date = "2019-01-01" 

# Add the difference between the valid and predicted prices
train = pd.DataFrame(data_filtered_ext['Close'][:train_data_len + 1]).rename(columns={'Close': 'y_train'})
valid = pd.DataFrame(data_filtered_ext['Close'][train_data_len:]).rename(columns={'Close': 'y_test'})
valid.insert(1, "y_pred", y_pred, True)
valid.insert(1, "residuals", valid["y_pred"] - valid["y_test"], True)
df_union = pd.concat([train, valid])

# Zoom in to a closer timeframe
#df_union_zoom = df_union[df_union.index > display_start_date]

# Create the lineplot
fig, ax1 = plt.subplots(figsize=(16, 8))
plt.title("y_pred vs y_test")
plt.ylabel("AAPL", fontsize=18)
sns.set_palette(["#090364", "#1960EF", "#EF5919"])
sns.lineplot(data=df_union[['y_pred', 'y_train', 'y_test']], linewidth=1.0, dashes=False, ax=ax1)

# Create the bar plot with the differences
df_sub = ["#2BC97A" if x > 0 else "#C92B2B" for x in df_union["residuals"].dropna()]
ax1.bar(height=df_union['residuals'].dropna(), x=df_union['residuals'].dropna().index, width=3, label='residuals', color=df_sub)
plt.legend()
plt.show()

In [None]:
#y_pred

In [None]:
df.tail()

In [None]:
### Calculate RMSE performance metrics
import math
from sklearn.metrics import mean_squared_error
math.sqrt(mean_squared_error(y_test_unscaled,y_pred))