#Stock Price Forecasting using ARIMA model
* We used 3 stocks to test our forecasting model- EXX5, IQQE and IUS4.
* Data sets are collected from [Yahoo Finance](https://finance.yahoo.com/)
* Time Window: May 2021 to May 2022

#Importing Libraries

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
import scipy.stats as scs
import statsmodels.api as sm
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.stattools import kpss
from scipy.stats import normaltest
from statsmodels.tsa.stattools import acf,pacf
from pmdarima.arima import auto_arima
import scipy.interpolate as sci
import scipy.optimize as sco
from sklearn.metrics import mean_squared_error, mean_absolute_error
import math
import scipy.optimize as sco
import ta
from ta.volatility import AverageTrueRange

#Importing Data Sets

#Formatting Data Sets

##Setting Date as the index of the DataFrame.

In [3]:
df = pd.read_csv("../src/daily_data/Si_230101_240831.csv", sep=";")
# df = pd.read_excel("../src/stock_data.xlsx", sheet_name="Si")

In [4]:
df.rename(
    columns = {col: col.replace("<", "").replace(">", "") for col in df.columns},
    inplace=True
)

In [5]:
df.head()

Unnamed: 0,TICKER,PER,DATE,TIME,OPEN,HIGH,LOW,CLOSE,VOL
0,Si,D,230103,0,70105,72280,69873,72236,920700
1,Si,D,230104,0,72227,72600,71310,72530,846368
2,Si,D,230105,0,72510,72844,71675,72563,708928
3,Si,D,230106,0,72530,72707,71560,72108,848286
4,Si,D,230109,0,72100,72100,70001,70350,1354959


In [6]:
df["DATE"] = pd.to_datetime(df["DATE"], format="%y%m%d")

In [7]:
df["DATE"].min()

Timestamp('2023-01-03 00:00:00')

In [8]:
test_start_index = df.loc[df["DATE"] == "2024-01-03"].index[0]

In [11]:
# обучение модели и предикт на 1 день

train_data = df.loc[df["DATE"] < "2024-05-01" ]
test_data = df.loc[df["DATE"] >= "2024-05-01"]
# train_period = 44000
# predict_periods = 168

# for i in range(test_start_index, len(df), predict_periods):
train_data = train_data["CLOSE"]

scaler = MinMaxScaler(feature_range=(0, 1))
train_values = scaler.fit_transform(np.array(train_data).reshape(-1, 1))
model = auto_arima(
    train_values,
    trace=True,
    error_action='ignore',
    suppress_warnings=True
)
forecast = model.predict(n_periods=len(test_data))
forecast = scaler.inverse_transform(forecast.reshape(-1, 1))
test_data["prediction"] = forecast

Performing stepwise search to minimize aic
 ARIMA(2,1,2)(0,0,0)[0] intercept   : AIC=-1522.154, Time=1.13 sec
 ARIMA(0,1,0)(0,0,0)[0] intercept   : AIC=-1510.706, Time=0.04 sec
 ARIMA(1,1,0)(0,0,0)[0] intercept   : AIC=-1514.902, Time=0.02 sec
 ARIMA(0,1,1)(0,0,0)[0] intercept   : AIC=-1514.235, Time=0.15 sec
 ARIMA(0,1,0)(0,0,0)[0]             : AIC=-1510.702, Time=0.02 sec
 ARIMA(1,1,2)(0,0,0)[0] intercept   : AIC=-1514.599, Time=0.82 sec
 ARIMA(2,1,1)(0,0,0)[0] intercept   : AIC=-1512.870, Time=0.42 sec
 ARIMA(3,1,2)(0,0,0)[0] intercept   : AIC=-1519.336, Time=0.50 sec
 ARIMA(2,1,3)(0,0,0)[0] intercept   : AIC=-1518.890, Time=0.79 sec
 ARIMA(1,1,1)(0,0,0)[0] intercept   : AIC=-1513.034, Time=0.16 sec
 ARIMA(1,1,3)(0,0,0)[0] intercept   : AIC=-1520.883, Time=0.42 sec
 ARIMA(3,1,1)(0,0,0)[0] intercept   : AIC=-1521.308, Time=0.43 sec
 ARIMA(3,1,3)(0,0,0)[0] intercept   : AIC=-1517.597, Time=0.53 sec
 ARIMA(2,1,2)(0,0,0)[0]             : AIC=-1524.001, Time=0.34 sec
 ARIMA(1,1,2)(0,0,0

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_data["prediction"] = forecast


In [12]:
mae = mean_absolute_error(test_data["CLOSE"], test_data["prediction"])
rmse = np.sqrt(mean_squared_error(test_data["CLOSE"], test_data["prediction"]))
print(mae)
print(rmse)

5428.567054199845
5840.3644553402755


In [13]:
test_data["prev_prediction"] = test_data["prediction"].shift()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_data["prev_prediction"] = test_data["prediction"].shift()


In [14]:
test_data

Unnamed: 0,TICKER,PER,DATE,TIME,OPEN,HIGH,LOW,CLOSE,VOL,prediction,prev_prediction
338,Si,D,2024-05-02,0,94167,94167,93121,93450,752635,94237.083986,
339,Si,D,2024-05-03,0,93451,93480,93047,93193,617359,94227.495948,94237.083986
340,Si,D,2024-05-06,0,93263,93465,92345,92846,677472,94205.216481,94227.495948
341,Si,D,2024-05-07,0,92848,92990,92400,92799,532861,94250.468081,94205.216481
342,Si,D,2024-05-08,0,92902,93475,92685,93199,778232,94198.904156,94250.468081
...,...,...,...,...,...,...,...,...,...,...,...
418,Si,D,2024-08-26,0,88694,89651,88213,89113,659823,94229.696712,94221.526045
419,Si,D,2024-08-27,0,89105,89789,88760,89600,526971,94216.695203,94229.696712
420,Si,D,2024-08-28,0,89600,89967,88934,89412,522477,94229.800571,94216.695203
421,Si,D,2024-08-29,0,89413,90125,89303,89977,653571,94221.195442,94229.800571


In [15]:
test_data.shape

(85, 11)

In [16]:
test_data.loc[(
    (test_data["prediction"] > test_data["prev_prediction"]) &
    (test_data["CLOSE"] > test_data["OPEN"])
)].shape[0]

15

In [17]:
test_data.loc[(
    (test_data["prediction"] < test_data["prev_prediction"]) &
    (test_data["CLOSE"] < test_data["OPEN"])
)].shape[0]

22

In [90]:
def get_atr(data, window=14):
    atr = AverageTrueRange(high=data["HIGH"], low=data["LOW"], close=data["CLOSE"], window=window,).average_true_range()
    return atr

In [91]:
myatr = get_atr(df)
df["ATR"] = round(myatr, 2)

In [None]:
df

In [93]:
test_df = df.loc[df["DATE"] >= "2024-07-01"].reset_index(drop=True)

In [None]:
test_df.head()

In [95]:
test_df.to_csv(f"test_data_{train_period}.csv", sep=";", encoding="cp1251", index=False)