In [None]:
from prophet import Prophet
from neuralprophet import NeuralProphet
import xgboost as xgb

import sys, os, yaml, ta
import pandas as pd
import numpy as np
from datetime import datetime
from datetime import timedelta
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error
import matplotlib.pyplot as plt

import yfinance as yf
from dateutil.relativedelta import relativedelta

from stock_forecast_module import *

pd.set_option('display.max_rows', 200)
pd.set_option('display.max_columns', 200)

import warnings
warnings.filterwarnings("ignore")
warnings.filterwarnings("ignore", category=UserWarning, module="prophet")
warnings.filterwarnings("ignore", category=UserWarning, module="neuralprophet")

import logging
logging.getLogger("prophet").setLevel(logging.ERROR)
logging.getLogger("neuralprophet").setLevel(logging.ERROR)
logging.getLogger("cmdstanpy").disable=True

In [None]:
# Load inputs from inputs.yml
with open('inputs.yml', 'r') as file:
    inputs = yaml.safe_load(file)

# Access specific inputs
start_dt = inputs['params']['start_dt']
split_dt = inputs['params']['split_dt']
real_fc_start_dt = datetime.today().date().strftime('%Y-%m-%d') # inputs['params']['real_fc_start_dt']
real_fc_end_dt = inputs['params']['real_fc_end_dt']
stock_sticker = inputs['params']['stock_sticker']

print(f"Stock sticker: {stock_sticker}")
print(f"Start date: {start_dt}")
print(f"Split date: {split_dt}")
print(f"Real forecast start date: {real_fc_start_dt}")
print(f"Real forecast end date: {real_fc_end_dt}")

In [None]:
# Download SP500, NASDAQ, and VIX data
today_date = datetime.today().date()

sp500_data = yf.download('^GSPC',   start=start_dt, end=today_date.strftime('%Y-%m-%d'))
nasdaq_data = yf.download('^IXIC',  start=start_dt, end=today_date.strftime('%Y-%m-%d'))
vix_data = yf.download('^VIX',      start=start_dt, end=today_date.strftime('%Y-%m-%d'))

In [None]:
# Download AAPL, NVDA, and MSFT stock price data
aapl_data = yf.download('AAPL', start=start_dt, end=today_date.strftime('%Y-%m-%d'))
nvda_data = yf.download('NVDA', start=start_dt, end=today_date.strftime('%Y-%m-%d'))
msft_data = yf.download('MSFT', start=start_dt, end=today_date.strftime('%Y-%m-%d'))

display(aapl_data.head())
display(aapl_data.tail())

In [None]:
stock_data, stocks_indicator_features = import_stock_data(stock_sticker, start_dt)
display(stock_data.head())
display(stock_data.tail())

In [None]:
### DATA PREPROCESSING & FEATURE ENGINEERING ###

df, exo_vars = preprocess_data(stock_data, 
                               stocks_additional_features=stocks_indicator_features, 
                               sticker=stock_sticker)

# get stock market data (SP500, NASDAQ, VIX)
df_market = clean_market_data(sp500_data, nasdaq_data, vix_data)
df = df.merge(df_market, on='ds', how='left')
# add market data columns to exogeneous variables
df_market_cols = df_market.columns.tolist()
df_market_cols.remove('ds')  # Remove 'ds' from the list of market columns
exo_vars.extend(df_market_cols)

# get tech stocks data (AAPL, NVDA, MSFT)
df_tech = clean_tech_stocks_data(aapl_data, nvda_data, msft_data)
df = df.merge(df_tech, on='ds', how='left')
# add market data columns to exogeneous variables
df_tech_cols = df_tech.columns.tolist()
df_tech_cols.remove('ds')  # Remove 'ds' from the list of market columns
exo_vars.extend(df_tech_cols)

# get earnings dates (might be helpfull for training/predictions - Right now, not used)
df_earnings, eps_cols = get_earnings_dates(sticker=stock_sticker)
df = df.merge(df_earnings, on='ds', how='left')

# get federal reserve data
df, exo_vars = get_federal_reserve_data(df, exo_vars, start_date=start_dt)

# get daily news sentiment data
df_news_sentiment, exo_vars = extract_historical_news_sentiment(exo_vars, folder_path="gdelt_news_sentiment")
df = df.merge(df_news_sentiment, on='ds', how='left')

# create train and test sets from overall dataset
df, df_train, df_test = create_train_test_sets(df, split_date=split_dt, start_date=start_dt)

print("using exogeneous features:", exo_vars)

In [None]:
df.head()

In [None]:
n_days = (pd.to_datetime(df_test['ds'].max()) - pd.to_datetime(df_test['ds'].min())).days + 1

m = NeuralProphet(
    # n_forecasts=n_days,
    # n_changepoints=10,
    yearly_seasonality=True,
    weekly_seasonality=False,
    daily_seasonality=False,
    # n_lags=5,
)
# Add monthly seasonality
m.add_seasonality(name='monthly', period=30.5, fourier_order=5)
# Add quarterly seasonality
m.add_seasonality(name='quarterly', period=91.25, fourier_order=5)

# train on training data
metrics = m.fit(df_train[['ds','y']])

# make predictions
future = df[['ds','y']].copy(deep=True)
forecast = m.predict(future)

In [None]:
m.set_plotting_backend("plotly-static")
m.plot(forecast)

In [None]:
forecast.tail()

In [None]:
# Add monthly seasonality
m.add_seasonality(name='monthly', period=30.5, fourier_order=5)

# Add quarterly seasonality
m.add_seasonality(name='quarterly', period=91.25, fourier_order=5)