In [None]:
import yfinance as yf
import pandas as pd
from prophet import Prophet
import matplotlib.pyplot as plt
import numpy as np

# 1. Data Retrieval: Download NVDA daily price data
# Set your desired start and end dates (adjust as necessary)
start_date = "2025-01-01"
end_date = "2025-02-28"
nvda_data = yf.download("NVDA", start=start_date, end=end_date)
nvda_data.reset_index(inplace=True)

# 2. Data Export: Save the data as a CSV file
nvda_data.to_csv("nvda_data.csv", index=False)
print("Data saved to nvda_data.csv")


In [None]:
# Install required packages if needed (uncomment the next lines if not already installed)
# !pip install yfinance prophet matplotlib

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from prophet import Prophet
from sklearn.metrics import mean_squared_error
from google.colab import files

# ------------------------------
# 1. Load the CSV data
# ------------------------------
# Read the CSV file provided in your Colab session
df_hw = pd.read_csv("nvda_data_hw.csv")

# Convert the 'Date' column to datetime and sort the DataFrame by date
df_hw['Date'] = pd.to_datetime(df_hw['Date'])
df_hw.sort_values('Date', inplace=True)

# Optional: display the first few rows to verify the data loaded correctly
print("Data preview:")
print(df_hw.head())



In [None]:

# We assume 'Close' is the target variable.
df_prophet = df_hw[['Date', 'Close']].rename(columns={'Date': 'ds', 'Close': 'y'})

# ------------------------------
# 3. Fit the Prophet Model
# ------------------------------
# Initialize and fit the model on the historical data.
model = Prophet(daily_seasonality=True)
model.fit(df_prophet)

# ------------------------------
# 4. Create Future DataFrame for Forecasting
# ------------------------------
# We want to forecast NVDA's daily price for January and February 2025.
# First, define the target forecast end date:
forecast_end_date = pd.to_datetime("2025-02-28")

# Compute how many days to forecast beyond the last date in the training data:
last_date = df_prophet['ds'].max()
forecast_horizon = (forecast_end_date - last_date).days

# Create the future DataFrame (includes the training period by default)
future = model.make_future_dataframe(periods=forecast_horizon)
forecast = model.predict(future)

# ------------------------------
# 5. Extract Forecast for January and February 2025
# ------------------------------
forecast_jan_feb = forecast[(forecast['ds'] >= "2025-01-01") & (forecast['ds'] <= "2025-02-28")]

# Plot the forecast for the entire period (training + forecast)
fig1 = model.plot(forecast)
plt.title("NVDA Daily Price Forecast")
plt.xlabel("Date")
plt.ylabel("Price (USD)")
plt.show()

# Plot forecast components (trend, weekly, yearly seasonality, etc.)
fig2 = model.plot_components(forecast)
plt.show()



In [None]:
# 6. Evaluate Model Performance on Historical Data (MSE)
# ------------------------------
# Limit the forecast to the training period.
forecast_train = forecast[forecast['ds'] <= last_date].copy()
# Set index to 'ds' for easier alignment.
df_train = df_prophet.set_index('ds')
forecast_train = forecast_train.set_index('ds')

# Add the in-sample prediction ('yhat') to the training data.
df_train['yhat'] = forecast_train['yhat']
# Calculate residuals
df_train['residual'] = df_train['y'] - df_train['yhat']

# Compute Mean Squared Error (MSE)
mse = mean_squared_error(df_train['y'], df_train['yhat'])
print("Mean Squared Error (MSE) on training data:", mse)

# ------------------------------
# 7. Residual Analysis
# ------------------------------
# Plot residuals over time.
plt.figure(figsize=(10, 6))
plt.plot(df_train.index, df_train['residual'], marker='o', linestyle='-', label="Residuals")
plt.axhline(y=0, color='r', linestyle='--')
plt.xlabel("Date")
plt.ylabel("Residual (Actual - Predicted)")
plt.title("Residuals of Prophet Model on Historical Data")
plt.xticks(rotation=45)
plt.tight_layout()
plt.legend()
plt.show()

# Identify dates with unusually high residuals using a threshold (e.g., 2 standard deviations)
threshold = 2 * np.std(df_train['residual'])
anomalies = df_train[abs(df_train['residual']) > threshold]
print("Dates with unusually high residuals (>|2*std|):")
print(anomalies[['y', 'yhat', 'residual']])
