In [None]:
from prophet import Prophet
from neuralprophet import NeuralProphet
import xgboost as xgb

import sys, os, yaml, ta
import pandas as pd
import numpy as np
from datetime import datetime
from datetime import timedelta
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error
import matplotlib.pyplot as plt

import yfinance as yf
from dateutil.relativedelta import relativedelta

from stock_forecast_module import *

pd.set_option('display.max_rows', 200)
pd.set_option('display.max_columns', 200)

import warnings
warnings.filterwarnings("ignore")
warnings.filterwarnings("ignore", category=UserWarning, module="prophet")
warnings.filterwarnings("ignore", category=UserWarning, module="neuralprophet")

import logging
logging.getLogger("prophet").setLevel(logging.ERROR)
logging.getLogger("neuralprophet").setLevel(logging.ERROR)
logging.getLogger("cmdstanpy").disable=True

In [None]:
# Load inputs from inputs.yml
with open('inputs.yml', 'r') as file:
    inputs = yaml.safe_load(file)

# Access specific inputs
start_dt = inputs['params']['start_dt']
split_dt = inputs['params']['split_dt']
real_fc_start_dt = datetime.today().date().strftime('%Y-%m-%d') # inputs['params']['real_fc_start_dt']
real_fc_end_dt = inputs['params']['real_fc_end_dt']
stock_sticker = inputs['params']['stock_sticker']

print(f"Stock sticker: {stock_sticker}")
print(f"Start date: {start_dt}")
print(f"Split date: {split_dt}")
print(f"Real forecast start date: {real_fc_start_dt}")
print(f"Real forecast end date: {real_fc_end_dt}")

In [None]:
# Download SP500, NASDAQ, and VIX data
today_date = datetime.today().date()

sp500_data = yf.download('^GSPC',   start=start_dt, end=today_date.strftime('%Y-%m-%d'))
nasdaq_data = yf.download('^IXIC',  start=start_dt, end=today_date.strftime('%Y-%m-%d'))
vix_data = yf.download('^VIX',      start=start_dt, end=today_date.strftime('%Y-%m-%d'))

In [None]:
# Download AAPL, NVDA, and MSFT stock price data
aapl_data = yf.download('AAPL', start=start_dt, end=today_date.strftime('%Y-%m-%d'))
nvda_data = yf.download('NVDA', start=start_dt, end=today_date.strftime('%Y-%m-%d'))
msft_data = yf.download('MSFT', start=start_dt, end=today_date.strftime('%Y-%m-%d'))

display(aapl_data.head())
display(aapl_data.tail())

In [None]:
stock_data, stocks_indicator_features = import_stock_data(stock_sticker, start_dt)
display(stock_data.head())
display(stock_data.tail())

In [None]:
### DATA PREPROCESSING & FEATURE ENGINEERING ###

df, exo_vars = preprocess_data(stock_data, 
                               stocks_additional_features=stocks_indicator_features, 
                               sticker=stock_sticker)

# get stock market data (SP500, NASDAQ, VIX)
df_market = clean_market_data(sp500_data, nasdaq_data, vix_data)
df = df.merge(df_market, on='ds', how='left')
# add market data columns to exogeneous variables
df_market_cols = df_market.columns.tolist()
df_market_cols.remove('ds')  # Remove 'ds' from the list of market columns
exo_vars.extend(df_market_cols)

# get tech stocks data (AAPL, NVDA, MSFT)
df_tech = clean_tech_stocks_data(aapl_data, nvda_data, msft_data)
df = df.merge(df_tech, on='ds', how='left')
# add market data columns to exogeneous variables
df_tech_cols = df_tech.columns.tolist()
df_tech_cols.remove('ds')  # Remove 'ds' from the list of market columns
exo_vars.extend(df_tech_cols)

# get earnings dates (might be helpfull for training/predictions - Right now, not used)
df_earnings, eps_cols = get_earnings_dates(sticker=stock_sticker)
df = df.merge(df_earnings, on='ds', how='left')

# get federal reserve data
df, exo_vars = get_federal_reserve_data(df, exo_vars, start_date=start_dt)

# get daily news sentiment data
df_news_sentiment, exo_vars = extract_historical_news_sentiment(exo_vars, folder_path="gdelt_news_sentiment")
df = df.merge(df_news_sentiment, on='ds', how='left')

# create train and test sets from overall dataset
df, df_train, df_test = create_train_test_sets(df, split_date=split_dt, start_date=start_dt)

print("using exogeneous features:", exo_vars)

In [None]:
n_days = (pd.to_datetime(df_test['ds'].max()) - pd.to_datetime(df_test['ds'].min())).days + 1

m = NeuralProphet(
    n_changepoints=30,
    yearly_seasonality=False,
    weekly_seasonality=False,
    daily_seasonality=False,
    n_lags=2,
)
# Add monthly & quarterly seasonality
m = m.add_seasonality(name='monthly', period=30.5, fourier_order=5)
m = m.add_seasonality(name='quarterly', period=91.25, fourier_order=5)
m = m.add_country_holidays("US")

# add lagged-regressors for all exogeneous variables
for c in exo_vars:
    m.add_lagged_regressor(c)
    # m.add_future_regressor(c)

# train on training data
metrics = m.fit(df_train[['ds','y']+exo_vars])

In [None]:
df_test.tail()

In [None]:
pd.to_datetime(df_test['ds'].min())

In [None]:
def predict_future_exo_vars_with_neuralprophet(df_train, df_test, exo_vars, lag_vars=None):
    df = pd.concat([df_train, df_test], axis=0, ignore_index=True)
    df_exo_vars_forecast = df[['ds']].copy(deep=True)

    # calculate number of days to forecast i.e., how many days are there in df_test
    n_days_to_forecast = pd.to_datetime(df_test['ds'].max()) - pd.to_datetime(split_dt)
    n_days_to_forecast = n_days_to_forecast.days + 1

    # making raw forecasts for exogenous variables
    for var in ['RSI']:#,'SentimentScore']:
        print(var)
        m = NeuralProphet(
                n_forecasts=n_days_to_forecast,
                n_changepoints=20,
                yearly_seasonality=True,
                weekly_seasonality=True,
                daily_seasonality=False,
                # ar_layers=[64, 128, 64],
                # learning_rate=0.001,
                # epochs=100,
                # n_lags=10,
                # batch_size=128,
            )
        # Add monthly & quarterly seasonality
        m = m.add_seasonality(name='monthly', period=30.5, fourier_order=9)
        m = m.add_seasonality(name='quarterly', period=91.25, fourier_order=9)
        m = m.add_seasonality(name='biannually', period=182.5, fourier_order=9)
        m = m.add_country_holidays("US")
        df_train_var = df_train[['ds', var]].rename(columns={var: 'y'})
        m.fit(df_train_var, freq='D')
        future = m.make_future_dataframe(df_train_var, n_historic_predictions=True, periods=n_days_to_forecast)
        future = future[future['ds'].dt.dayofweek < 5]
        forecast = m.predict(future)
        df_exo_vars_forecast = df_exo_vars_forecast.merge(forecast[['ds', 'yhat1']], on='ds', how='left')
        df_exo_vars_forecast.rename(columns={'yhat1': var}, inplace=True)
    return df_exo_vars_forecast, m, forecast


df_exo_vars_forecast, model, forecast = predict_future_exo_vars_with_neuralprophet(df_train, df_test, exo_vars, lag_vars=None)

In [None]:
df_exo_vars_forecast['ds'] = df_exo_vars_forecast['ds'] + timedelta(days=1)

In [None]:
# def unroll_forecasts(df_forecast, n_forecasts=3, freq='D'):
#     all_rows = []
#     for i in range(1, n_forecasts + 1):
#         df_i = df_forecast[['ds', f'yhat{i}']].copy()
#         df_i['ds'] = pd.to_datetime(df_i['ds']) + pd.to_timedelta(i, unit=freq.lower())
#         df_i.rename(columns={f'yhat{i}': 'yhat'}, inplace=True)
#         all_rows.append(df_i)
#     df_out = pd.concat(all_rows).sort_values('ds').reset_index(drop=True)
#     return df_out

# df2 = unroll_forecasts(df2[['ds']+[f'yhat{t}' for t in range(1,92)]], n_forecasts=91)

In [None]:
for col in ['RSI']:#,'SentimentScore']:
    fig, ax = plt.subplots(1,1,figsize=(12,4))
    ax.plot(df['ds'],  df[col], label=f'{col}: Actual', c='blue')
    ax.plot(df_exo_vars_forecast['ds'], df_exo_vars_forecast[col], label=f'{col}: Forecast', c='red')
    ax.set_xlabel('Date')
    # ax.set_xlim([pd.to_datetime('2023-01-01'), pd.to_datetime(df3['ds'].max())])
    ax.legend()
    plt.show()

In [None]:
df_tmp = df_exo_vars_forecast[df_exo_vars_forecast['ds'] >= split_dt].copy(deep=True)
df_tmp = df_tmp[df_tmp['ds'].dt.dayofweek < 5]
df_tmp = df_tmp.rename(columns={'RSI':'RSI_predict'})

df_tmp1 = df[df['ds'] >= split_dt].copy(deep=True)
df_tmp1 = df_tmp1.rename(columns={'RSI':'RSI_actual'})
df_tmp  = df_tmp[['ds','RSI_predict']].merge(df_tmp1[['ds','RSI_actual']], on='ds', how='left')
df_tmp = df_tmp.dropna(subset=['RSI_predict','RSI_actual'])

actual_test = df_tmp['RSI_actual']
predicted_test = df_tmp['RSI_predict']

mape, rmse, mae = calculate_performance_scores(actual_test, predicted_test)
print('max absolute percentage error:', np.round(mape*100.0, 2),'%')

In [None]:
sys.exit()

In [None]:
# make predictions
future = df[['ds','y']+exo_vars].copy(deep=True)
# future[future['ds'] >= pd.to_datetime(split_dt)][exo_vars] = np.nan
future.loc[future['ds'] >= pd.to_datetime(split_dt), exo_vars] = np.nan
forecast = m.predict(future)

In [None]:
future.tail(10)

In [None]:
sys.exit()

In [None]:
df1 = forecast[forecast['ds'] <  split_dt]
df2 = forecast[forecast['ds'] >= split_dt]

fig, ax = plt.subplots(1,1,figsize=(10,5))
ax.plot(df_train['ds'], df_train['y'], c='blue', label='Actual (TrainSet)',   ls='-')
ax.plot(df_test['ds'],  df_test['y'],  c='blue', label='Actual (TestSet)',    ls='--')
ax.plot(df1['ds'],      df1['yhat1'],  c='red',  label='Forecast (TrainSet)', ls='-')
ax.plot(df2['ds'],      df2['yhat1'],  c='red',  label='Forecast (TestSet)',  ls='--')
ax.axvline(pd.to_datetime(split_dt), color='k', linestyle='-.', label='Split Date')
ax.set_xlim(pd.to_datetime(split_dt) - relativedelta(years=1), pd.to_datetime(df['ds'].max()))
ax.legend()

In [None]:
df1 = forecast[forecast['ds']<split_dt].dropna(subset=['yhat1'])
actual_test   = df1['y'].values
predicted_test= df1['yhat1'].values
mape, rmse, mae = calculate_performance_scores(actual_test, predicted_test)
print('train max absolute percentage error:', np.round(mape*100.0, 2),'%')

df2 = forecast[forecast['ds']>=split_dt]
actual_test   = df2['y'].values
predicted_test= df2['yhat1'].values
mape, rmse, mae = calculate_performance_scores(actual_test, predicted_test)
print('test max absolute percentage error:', np.round(mape*100.0, 2),'%')

In [None]:
future.tail()