In [None]:
import yfinance as yf
import pandas as pd

# Download NVDA daily data (from January 1, 2020 to March 1, 2025)
nvda_data = yf.download("NVDA", start="2020-01-01", end="2025-03-01", progress=False)

# Check if columns are MultiIndex; if so, flatten them
if isinstance(nvda_data.columns, pd.MultiIndex):
    # For columns that are multi-indexed, use the first level as the column name.
    nvda_data.columns = nvda_data.columns.get_level_values(0)

# Export the data to a CSV file.
csv_filename = "NVDA_daily_prices.csv"
nvda_data.to_csv(csv_filename)
print(f"Data successfully downloaded and saved to '{csv_filename}'.")

# Reset the index so that the Date becomes a column.
nvda_data = nvda_data.reset_index()
print("Data head (first 5 rows):")
print(nvda_data.head())

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Define training and test date ranges.
train_end = "2024-12-31"
test_start = "2025-01-01"
test_end = "2025-03-28"

# Create DataFrames for training and test periods.
df_train = nvda_data[nvda_data['Date'] <= train_end].copy()
df_test = nvda_data[(nvda_data['Date'] >= test_start) & (nvda_data['Date'] <= test_end)].copy()

print("Training period:", df_train['Date'].min(), "to", df_train['Date'].max())
print("Test period:", df_test['Date'].min(), "to", df_test['Date'].max())

# Create a numeric variable 't' representing days since a baseline (here, 2020-01-01)
baseline = pd.to_datetime("2020-01-01")
df_train['t'] = (pd.to_datetime(df_train['Date']) - baseline).dt.days
df_test['t']  = (pd.to_datetime(df_test['Date']) - baseline).dt.days

# Fit a simple linear regression using the "Close" price as the target.
lr_model = LinearRegression()
lr_model.fit(df_train[['t']], df_train['Close'])

# Generate predictions for the test set.
predictions = lr_model.predict(df_test[['t']])
print("Predictions shape before flattening:", predictions.shape)
# Force the predictions to be 1D.
predictions = predictions.ravel()
print("Predictions shape after flattening:", predictions.shape)

# Add the predictions to the test DataFrame.
df_test['Excel_Predicted'] = predictions

# Compute the Mean Squared Error (MSE) for the simulated Excel model.
mse_excel = mean_squared_error(df_test['Close'], df_test['Excel_Predicted'])
print("Simulated Excel Model MSE:", mse_excel)

# Debug: Show the column names of df_test to confirm that 'Excel_Predicted' is present.
print("df_test columns after prediction:", df_test.columns.tolist())

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Print a snippet of key columns before adding residuals.
print("df_test BEFORE calculating residuals:")
print(df_test[['Date', 'Close', 'Excel_Predicted']].head())

# Convert 'Close' and 'Excel_Predicted' to NumPy arrays.
actual_array = df_test['Close'].to_numpy()
predicted_array = df_test['Excel_Predicted'].to_numpy()
print("Shape of actual_array:", actual_array.shape)
print("Shape of predicted_array:", predicted_array.shape)

# Calculate residuals (element-wise difference).
try:
    residuals = actual_array - predicted_array
    print("Residuals computed successfully; shape:", residuals.shape)

    # Add the residuals as a new column in df_test.
    df_test['Residual'] = residuals
    print("Added 'Residual' column. Current df_test columns:")
    print(df_test.columns.tolist())

    # Print a few rows to verify the new column.
    print("df_test AFTER calculating residuals:")
    print(df_test[['Date', 'Close', 'Excel_Predicted', 'Residual']].head())
except Exception as e:
    print("Error calculating or adding residuals:")
    print(e)

# Plot the residuals if they exist.
if 'Residual' in df_test.columns:
    plt.figure(figsize=(12,6))
    plt.bar(pd.to_datetime(df_test['Date']), df_test['Residual'], color='orange')
    plt.xlabel("Date")
    plt.ylabel("Residual (Actual - Predicted)")
    plt.title("Residual Analysis (Simulated Excel Model)")
    plt.xticks(rotation=45)
    plt.grid(True)
    plt.show()
else:
    print("Column 'Residual' not found in df_test. Please inspect the DataFrame columns above.")

In [None]:
from prophet import Prophet
from sklearn.metrics import mean_squared_error
import pandas as pd

# --- Prepare the training data for Prophet ---
# Use the same training set (df_train) from earlier
df_prophet_train = df_train[['Date', 'Close']].copy()
df_prophet_train = df_prophet_train.rename(columns={'Date': 'ds', 'Close': 'y'})
df_prophet_train['ds'] = pd.to_datetime(df_prophet_train['ds'])
df_prophet_train['y'] = pd.to_numeric(df_prophet_train['y'], errors='coerce')
df_prophet_train.dropna(inplace=True)  # Drop any rows that aren’t valid

# Debug: Preview the Prophet training data
print("Prophet training data (first 5 rows):")
print(df_prophet_train.head())

# --- Initialize and fit the Prophet model ---
# (daily_seasonality=True may help with daily data)
prophet_model = Prophet(daily_seasonality=True)
prophet_model.fit(df_prophet_train)

# --- Create a Future DataFrame for the Test Period with the Exact Starting Point ---
# Instead of using Prophet's make_future_dataframe, we build it manually so that both models use the same dates.
# Here we use a date_range with frequency 'B' (business days).
future_test = pd.DataFrame({
    'ds': pd.date_range(start=test_start, end=test_end, freq='B')
})
print("Future test DataFrame (first 5 rows):")
print(future_test.head())

# --- Forecast with Prophet ---
forecast = prophet_model.predict(future_test)
print("Prophet forecast (first 5 rows):")
print(forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].head())

# --- Merge Prophet Forecast with Actual Test Data ---
# Ensure the test DataFrame has a matching 'ds' column.
df_test['ds'] = pd.to_datetime(df_test['Date'])
df_prophet = pd.merge(df_test, forecast[['ds', 'yhat']], on='ds', how='inner')

# Compute and print the MSE for the Prophet model.
mse_prophet = mean_squared_error(df_prophet['Close'], df_prophet['yhat'])
print("Prophet Model MSE:", mse_prophet)

# Debug: Preview merged DataFrame
print("Merged Prophet forecast with actual test data (first 5 rows):")
print(df_prophet[['ds', 'Close', 'yhat']].head())

In [None]:
import matplotlib.pyplot as plt

# --- Compute Residuals for the Prophet Model ---
df_prophet['Residual_Prophet'] = df_prophet['Close'] - df_prophet['yhat']
print("Prophet residuals (first 5 rows):")
print(df_prophet[['ds', 'Close', 'yhat', 'Residual_Prophet']].head())

# --- Plot: Actual vs. Prophet Forecast ---
plt.figure(figsize=(12,6))
plt.plot(df_prophet['ds'], df_prophet['Close'], label='Actual Price', marker='o', linestyle='-')
plt.plot(df_prophet['ds'], df_prophet['yhat'], label='Prophet Forecast', marker='s', linestyle='--')
plt.xlabel("Date")
plt.ylabel("NVDA Close Price")
plt.title("NVDA Actual vs Prophet Forecast (Jan-Feb 2025)")
plt.legend()
plt.xticks(rotation=45)
plt.grid(True)
plt.show()

# --- Plot: Prophet Residuals ---
plt.figure(figsize=(12,6))
plt.bar(df_prophet['ds'], df_prophet['Residual_Prophet'], color='green')
plt.xlabel("Date")
plt.ylabel("Residual (Actual - Prophet Forecast)")
plt.title("Residual Analysis (Prophet Model)")
plt.xticks(rotation=45)
plt.grid(True)
plt.show()

# --- Identify Dates with Unusually High Prophet Errors ---
# Flag any date with a residual larger than 2 standard deviations.
threshold_prophet = 2 * df_prophet['Residual_Prophet'].std()
unusual_prophet = df_prophet[abs(df_prophet['Residual_Prophet']) > threshold_prophet]
print("Dates with unusually high errors in the Prophet model:")
print(unusual_prophet[['ds', 'Close', 'yhat', 'Residual_Prophet']])

# --- Combined Comparison Plot ---
plt.figure(figsize=(14,7))
# Use the same test DataFrame for the Excel model (df_test should already have Excel_Predicted)
plt.plot(pd.to_datetime(df_test['Date']), df_test['Close'], label='Actual Price', marker='o', linestyle='-')
plt.plot(pd.to_datetime(df_test['Date']), df_test['Excel_Predicted'], label='Excel Forecast', marker='x', linestyle='--')
plt.plot(df_prophet['ds'], df_prophet['yhat'], label='Prophet Forecast', marker='s', linestyle=':')
plt.xlabel("Date")
plt.ylabel("NVDA Close Price")
plt.title("Comparison of Forecasts (Jan-Feb 2025)")
plt.legend()
plt.xticks(rotation=45)
plt.grid(True)
plt.show()

# --- Print MSEs for a Final Comparison ---
print("Simulated Excel Model MSE:", mse_excel)
print("Prophet Model MSE:", mse_prophet)