In [None]:
import yfinance as yf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error

# (Repeating the steps performed in previous notebooks)
# 1. Load HDFC Bank Data
ticker = "HDFCBANK.NS"
df = yf.download(ticker, start="2015-01-01", end="2024-01-01")
df = df[['Close']].dropna()

# 2. Train-Test Split (80/20)
split_point = int(len(df) * 0.8)
train_data = df.iloc[:split_point]
test_data = df.iloc[split_point:]

# 3. Apply First Differencing
train_diff = train_data['Close'].diff().dropna()
test_diff = test_data['Close'].diff().dropna()

# Symmetric Mean Absolute Percentage Error (SMAPE)
def smape_kun(y_true, y_pred):
    return 100/len(y_true) * np.sum(2 * np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred)))

In [None]:
# Ensure we are getting a 1D array of pure floats
train_diff_values = train_diff.values.flatten().astype(float)
test_diff_values = test_diff.values.flatten().astype(float)

# Initialize history as a list of pure floats
history = list(train_diff_values)
predictions = []

for t in range(len(test_diff_values)):
    # Convert history to a clean numpy array of floats
    curr_history = np.asarray(history, dtype=float)

    # We use d=0 because we are feeding already-differenced data
    model = ARIMA(curr_history, order=(5, 0, 0))
    model_fit = model.fit()

    # Forecast the next day's price change
    yhat = model_fit.forecast()[0]
    predictions.append(yhat)

    # Observe actual value and update history
    obs = test_diff_values[t]
    history.append(obs)

    if t % 100 == 0:
        print(f'Progress: {t}/{len(test_diff_values)} | Predicted: {yhat:.3f}, Actual: {obs:.3f}')

In [None]:
# Reconstruct Actual Prices (Reversing cumsum)
last_train_price = train_data['Close'].iloc[-1]
reverse_actuals = np.r_[last_train_price, test_diff_values].cumsum()
reverse_predictions = np.r_[last_train_price, predictions].cumsum()

# Final Metrics
mse = mean_squared_error(reverse_actuals, reverse_predictions)
smape_error = smape_kun(reverse_actuals, reverse_predictions)

print(f'\n--- Model Evaluation ---')
print(f'Testing Mean Squared Error: {mse:.3f}')
print(f'SMAPE: {smape_error:.3f}%')

In [None]:
# Visualization
plt.figure(figsize=(14, 7))
plt.plot(train_data.index, train_data['Close'], color='blue', label='Train (History)')
plt.plot(test_data.index, reverse_actuals, color='green', label='Test (Actual)')
plt.plot(test_data.index, reverse_predictions, color='red', linestyle='--', label='ARIMA Forecast')
plt.title('HDFC Bank: Final ARIMA Prediction (Walk-Forward)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

In [None]:
# Calculate Residuals (Actual Change - Predicted Change)
residuals = test_diff_values - predictions

# Plot Residuals
plt.figure(figsize=(14, 5))
plt.plot(test_data.index[1:], residuals, color='purple', alpha=0.7)
plt.axhline(0, color='black', linestyle='--')
plt.title('ARIMA Model Residuals (Prediction Errors)')
plt.ylabel('Error Amount')
plt.show()

# Histogram of Residuals (Should be Normally Distributed)
plt.figure(figsize=(10, 5))
plt.hist(residuals, bins=50, edgecolor='black', color='skyblue')
plt.title('Distribution of Residuals')
plt.show()

In [None]:
# Get the very latest prediction from the model
next_change = model_fit.forecast()[0]

# Extract the actual price value
last_actual_price = df['Close'].iloc[-1].item()
tomorrow_forecast = last_actual_price + next_change

# Print the results
print(f"--- HDFC Bank Forecasting Results ---")
print(f"Latest Actual Price ('2024-01-01') : ₹{last_actual_price:.2f}")
print(f"Predicted Price ('2024-01-02') : ₹{tomorrow_forecast:.2f}")
print(f"Expected Move: {next_change:+.2f} ({ (next_change/last_actual_price)*100 :+.2f}%)")