In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from datetime import datetime
from statsmodels.tsa.arima.model import ARIMA
import yfinance as yf
from statsmodels.tools.sm_exceptions import ValueWarning, ConvergenceWarning
import warnings

In [2]:
# suppress warnings

# since market data is not always available on weekdays, we cannot set frequency...
warnings.filterwarnings('ignore', category=ValueWarning)
# we are brute forcing to find the best parameters, so not all models will converge
warnings.filterwarnings('ignore', category=ConvergenceWarning)
# not all starting parameters are gonna be valid either
warnings.filterwarnings('ignore', category=UserWarning)

In [3]:
# SET CONSTANTS
INITIAL_INVESTMENT = 100
TICKERS = ['^GSPC', 'COST', 'AC.TO', 'BTC-USD']

TRAIN_START_DATE = '2023-01-01'
TRAIN_END_DATE = '2023-12-31'
TEST_START_DATE = '2024-01-01'
TEST_END_DATE = datetime.today().strftime('%Y-%m-%d')

In [None]:
# prepare data
train_data, test_data = {}, {}

for ticker in TICKERS:
    df1 = yf.download(ticker, start=TRAIN_START_DATE, end=TRAIN_END_DATE).dropna()
    df2 = yf.download(ticker, start=TEST_START_DATE, end=TEST_END_DATE).dropna()
    
    # drop columns we dont need
    df1.drop(columns=['Open','High','Low','Adj Close','Volume'], inplace=True)
    df2.drop(columns=['Open','High','Low','Adj Close','Volume'], inplace=True)
    
    # rename columns
    df1.rename(columns={'Close': 'price'}, inplace=True)
    df2.rename(columns={'Close': 'price'}, inplace=True)
    
    # set index as datetime
    df1.index = pd.to_datetime(df1.index)
    df2.index = pd.to_datetime(df2.index)
    
    train_data[ticker] = df1.copy()
    test_data[ticker] = df2.copy()
    
    # infer frequency. dont do this if you have daily prices
    # train_data[ticker] = train_data[ticker].asfreq(pd.infer_freq(train_data[ticker].index))
    # test_data[ticker] = test_data[ticker].asfreq(pd.infer_freq(test_data[ticker].index))

In [4]:
def run_simulation(data: pd.Series, thresh: float, strategy: str, buy_cond: str, sell_cond: str) -> dict:
    ''' Runs an investment simulation on the given data and investment strategy.

        Args:
            data (pd.Series): The data to run the simulation on.
            thresh (float): The threshold to use for the strategy.
            buy_cond (str): The buy condition to use for the strategy.
            sell_cond (str): The sell condition to use for the strategy.

        Returns:
            dict: dictionary of results: {'returns': float, 'transactions': list}
    '''
    buy_conditions = {
        
    }

In [13]:
def forecast_data(data: pd.Series, model: tuple) -> float:
    ''' Forecast the next value in the time series using ARIMA
    
        Args:
            data (pd.Series): time series data
            model (tuple): ARIMA model parameters
            
        Returns:
            float: forecasted value
    '''
    fit = ARIMA(data, order=model).fit()
    forecast = fit.forecast(steps=1)

    return float(forecast.iloc[0])

In [14]:
def optimize_order(data: pd.Series) -> tuple:
    ''' Finds optimal order for ARIMA model with minimum AIC

    Args:
        data (pd.Series): training data to fit ARIMA model

    Returns:
        tuple: optimal order for ARIMA(p, d, q) model
    '''
    # generate candidate orders
    AR = range(0, 5)
    I = range(0, 2)
    MA = range(0, 5)
    ignore_orders = [(0,0,0), (0,1,0), (0,2,1)]

    orders = []
    for p in AR:
        for d in I:
            for q in MA:
                if (p, d, q) not in ignore_orders:
                    orders.append((p, d, q))

    # fit ARIMA models
    min_order = None
    min_aic = float('inf')

    for order in orders:
        model = ARIMA(data, order=order)
        fit = model.fit()
        print(f'Fit model of order: {order} AIC: {fit.aic}')

        if fit.aic < min_aic:
            min_aic = fit.aic
            min_order = order
        
    print(f'Minimum AIC: {min_aic} with order: {min_order}')
    return min_order