# Objective

The objective of this notebook is to transfer and edit all functions of `spy_stock_eda.ipynb` and build into `arima_tools.py` file. This notebook is for testing and building.

In [1]:
import pandas as pd
import numpy as np
import sys
sys.path.append('../')
import src.tda_api_tools as tda
from src.arima_tools import arima_tools
from statsmodels.tsa.stattools import acf, pacf

import warnings
from statsmodels.tools.sm_exceptions import ConvergenceWarning
warnings.simplefilter('ignore', ConvergenceWarning)
warnings.simplefilter('ignore', category=UserWarning)


## Test Data

In [2]:
# Get daily closing price of SPY
symbol = "SPY"
file_path = "../data/{}_daily01.csv".format(symbol)
data_df = pd.read_csv(file_path, index_col="datetime")
data_df.index = pd.DatetimeIndex(data_df.index)
data_df.head()

Unnamed: 0_level_0,open,high,low,close,volume
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2015-10-14 01:00:00,200.18,200.87,198.94,199.29,99106161
2015-10-15 01:00:00,200.08,202.36,199.64,202.35,134142195
2015-10-16 01:00:00,202.83,203.29,201.92,203.27,114580052
2015-10-19 01:00:00,202.5,203.37,202.13,203.37,76523897
2015-10-20 01:00:00,202.85,203.84,202.5471,203.09,78448484


## Data Manipulation


In [3]:
atl = arima_tools()

In [4]:
atl.moving_averages(data_df, [10, 100], "close")

Unnamed: 0_level_0,close,MA(10),MA(100)
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2016-03-08 01:00:00,198.40,197.36942,200.046192
2016-03-09 01:00:00,199.38,197.98742,200.047092
2016-03-10 01:00:00,199.54,198.38742,200.018992
2016-03-11 01:00:00,202.76,199.15450,200.013892
2016-03-14 01:00:00,202.50,200.04850,200.005192
...,...,...,...
2020-12-24 01:00:00,369.00,368.38100,347.275400
2020-12-28 01:00:00,372.17,368.96800,347.676000
2020-12-29 01:00:00,371.46,369.64800,348.047300
2020-12-30 01:00:00,371.99,369.88800,348.421500


In [5]:
atl.differences(data_df)

Unnamed: 0_level_0,open,high,low,close,volume,open_lag_1,high_lag_1,low_lag_1,close_lag_1,volume_lag_1
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2015-10-14 01:00:00,200.18,200.87,198.9400,199.29,99106161,,,,,
2015-10-15 01:00:00,200.08,202.36,199.6400,202.35,134142195,-0.10,1.49,0.7000,3.06,35036034.0
2015-10-16 01:00:00,202.83,203.29,201.9200,203.27,114580052,2.75,0.93,2.2800,0.92,-19562143.0
2015-10-19 01:00:00,202.50,203.37,202.1300,203.37,76523897,-0.33,0.08,0.2100,0.10,-38056155.0
2015-10-20 01:00:00,202.85,203.84,202.5471,203.09,78448484,0.35,0.47,0.4171,-0.28,1924587.0
...,...,...,...,...,...,...,...,...,...,...
2020-12-24 01:00:00,368.08,369.03,367.4500,369.00,26457853,-0.20,-0.59,0.2328,1.43,-19743549.0
2020-12-28 01:00:00,371.74,372.59,371.0700,372.17,39000402,3.66,3.56,3.6200,3.17,12542549.0
2020-12-29 01:00:00,373.81,374.00,370.8300,371.46,53680451,2.07,1.41,-0.2400,-0.71,14680049.0
2020-12-30 01:00:00,372.34,373.10,371.5700,371.99,49455259,-1.47,-0.90,0.7400,0.53,-4225192.0


## Stationarity with ADF and Integration Order Determination

In [6]:
atl.adf_test(data_df.close)

series name                       close
Test Statistic                -0.607381
p-value                        0.869305
#Lags Used                            9
Number of Observations Used        1304
reject null hypothesis            False
Critical Value (1%)            -3.43537
Critical Value (5%)            -2.86376
Critical Value (10%)           -2.56795
dtype: object

In [9]:
atl.find_lag_stationary(data_df.close)

1

## PACF and AR Order Determination

In [7]:
atl.get_pacf_lag(data_df)

{'close': 2}