In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

# Download HDFC Bank Data
ticker = "HDFCBANK.NS"
df = yf.download(ticker, start="2015-01-01", end="2024-01-01")

df.columns = df.columns.get_level_values(0)

# Cleaning: Ensure no missing values (forward fill for market holidays)
df = df.dropna()
df.ffill(inplace=True)

print(f"Dataset Shape: {df.shape}")
df.head()

In [None]:
# 1-Day Lag Plot using Plotly
data_lag = df.copy()
data_lag['Lag1'] = data_lag['Close'].shift(1)

fig = go.Figure(data=go.Scatter(x=data_lag['Close'], y=data_lag['Lag1'], mode='markers', marker=dict(color='royalblue', opacity=0.5)))
fig.update_layout(title=f'HDFC Bank: 1-Day Lag Plot', xaxis_title="Price at Time (t)", yaxis_title="Price at Time (t-1)", template='plotly_white')
fig.show()

In [None]:
# 80/20 Chronological Split
split_point = int(len(df) * 0.8)
train_data = df.iloc[:split_point]
test_data = df.iloc[split_point:]

plt.figure(figsize=(12, 6))
plt.plot(train_data.index, train_data['Close'], label='Training Data (80%)', color='blue')
plt.plot(test_data.index, test_data['Close'], label='Testing Data (20%)', color='orange')
plt.title("HDFC Bank Price Split")
plt.legend()
plt.show()

In [None]:
def check_stationarity(series):
    result = adfuller(series.dropna())
    print(f'ADF Statistic: {result[0]:.4f}')
    print(f'p-value: {result[1]:.4f}')
    print('Stationary' if result[1] <= 0.05 else 'Non-Stationary')

print("Checking Stationarity of Raw HDFC Close Prices:")
check_stationarity(train_data['Close'])

In [None]:
# Apply First Differencing
train_diff = train_data['Close'].diff().dropna()

# Visualize the Difference
plt.figure(figsize=(12, 5))
plt.plot(train_diff, color='green')
plt.title("Stationary Data: HDFC Daily Price Changes (First Difference)")
plt.axhline(0, color='black', linestyle='--')
plt.show()

In [None]:
print("Checking Stationarity of Differenced Data:")
check_stationarity(train_diff)

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 5))

# Identify the Moving Average (q) order
plot_acf(train_diff, ax=ax1, lags=40)

# Identify the Auto-Regressive (p) order
plot_pacf(train_diff, ax=ax2, lags=40)

plt.show()