# Time Series Exploratory Data Analysis (EDA) Template

Reusable notebook for investigating time series datasets:
- Trends, seasonality, decomposition
- Categorical effects (month, weekday, holiday)
- Statistical significance & effect sizes
- Diagnostics (stationarity, autocorrelation, normality)
- Nice visualizations for presentation

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller, acf, pacf
from scipy.stats import ttest_ind, shapiro
from statsmodels.stats.diagnostic import acorr_ljungbox

sns.set_theme(style="whitegrid", context="talk", palette="deep")

# read data
df = pd.read_csv("../dummy_timeseries.csv", parse_dates=["date"])
df.info()
df.head()


## 1. Basic Checks

In [None]:
print(df.describe(include="all"))
print("Date range:", df["date"].min(), "to", df["date"].max())
print("Missing values:\n", df.isna().sum())

## 2. Trend & Rolling Mean

In [None]:
plt.figure(figsize=(14,6))
plt.plot(df["date"], df["value"], alpha=0.4, label="Daily values")
plt.plot(df["date"], df["value"].rolling(30).mean(), label="30-day rolling mean", linewidth=3)
plt.title("Time Series with Rolling Mean")
plt.legend()
plt.show()

## 3. Seasonal Exploration

In [None]:
# By month
plt.figure(figsize=(12,5))
sns.boxplot(x="month_name", y="value", data=df,
            order=pd.date_range("2000-01-01","2000-12-31", freq="M").month_name().unique())
plt.xticks(rotation=45)
plt.title("Distribution of Values by Month")
plt.show()

# By weekday
plt.figure(figsize=(10,5))
sns.barplot(x="weekday_name", y="value", data=df,
            order=["Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday"])
plt.title("Average Value by Weekday")
plt.show()

# Holiday vs non-holiday
plt.figure(figsize=(6,5))
sns.boxplot(x="holiday", y="value", data=df)
plt.title("Holiday vs Non-Holiday Values")
plt.show()

# Heatmap: average value by weekday vs month
pivot = df.pivot_table(index="weekday_name", columns="month_name", values="value", aggfunc="mean")
plt.figure(figsize=(12,6))
sns.heatmap(pivot, annot=True, fmt=".1f", cmap="coolwarm")
plt.title("Average Value by Weekday & Month")
plt.show()


## 4. Statistical Tests & Effect Sizes

In [None]:
# ANOVA by month
model_month = ols("value ~ C(month_name)", data=df).fit()
anova_month = sm.stats.anova_lm(model_month, typ=2)
eta_sq_month = anova_month["sum_sq"][0] / anova_month["sum_sq"].sum()

# ANOVA by weekday
model_weekday = ols("value ~ C(weekday_name)", data=df).fit()
anova_weekday = sm.stats.anova_lm(model_weekday, typ=2)
eta_sq_weekday = anova_weekday["sum_sq"][0] / anova_weekday["sum_sq"].sum()

# t-test holidays
holiday_vals = df.loc[df["holiday"]==1, "value"]
nonholiday_vals = df.loc[df["holiday"]==0, "value"]
t_stat, p_val = ttest_ind(holiday_vals, nonholiday_vals, equal_var=False)
cohen_d = (holiday_vals.mean() - nonholiday_vals.mean()) / np.sqrt(
    (holiday_vals.std()**2 + nonholiday_vals.std()**2) / 2
)

print("ANOVA by Month:\n", anova_month)
print(f"η² (effect size): {eta_sq_month:.3f}\n")

print("ANOVA by Weekday:\n", anova_weekday)
print(f"η² (effect size): {eta_sq_weekday:.3f}\n")

print(f"T-test Holidays vs Non-Holidays: t={t_stat:.3f}, p={p_val:.3e}")
print(f"Cohen's d: {cohen_d:.3f}")


## 5. Time Series Decomposition

In [None]:
df_ts = df.set_index("date")["value"].asfreq("D")

decomp = seasonal_decompose(df_ts, model="additive", period=365)
decomp.plot()
plt.show()


## 6. Autocorrelation

In [None]:
fig, axes = plt.subplots(1,2, figsize=(14,5))
sm.graphics.tsa.plot_acf(df.set_index("date")["value"], lags=60, ax=axes[0])
sm.graphics.tsa.plot_pacf(df.set_index("date")["value"], lags=60, ax=axes[1])
plt.show()

# Ljung–Box test
lb_test = acorr_ljungbox(df.set_index("date")["value"], lags=[20], return_df=True)
print(lb_test)

## 7. Distributional Checks and Normality

In [None]:
plt.figure(figsize=(10,5))
sns.histplot(df["value"], kde=True, bins=40)
plt.title("Distribution of Values")
plt.show()

# Shapiro-Wilk test on residuals after decomposition
resid = seasonal_decompose(df.set_index("date")["value"], model="additive", period=365).resid.dropna()
shapiro_stat, shapiro_p = shapiro(resid)
print(f"Shapiro-Wilk Test on Residuals: W={shapiro_stat:.3f}, p={shapiro_p:.3e}")


## 8. Stationarity Test (ADF)

In [None]:
adf_result = adfuller(df_ts.dropna())
print("ADF Statistic:", adf_result[0])
print("p-value:", adf_result[1])
for key, val in adf_result[4].items():
    print(f"Critical Value {key}: {val:.3f}")

## 9. Feature Engineering & Outliers

In [None]:
# Weekend flag
df["is_weekend"] = df["weekday_name"].isin(["Saturday","Sunday"]).astype(int)

# Outlier detection (IQR)
Q1, Q3 = df["value"].quantile([0.25, 0.75])
IQR = Q3 - Q1
outliers = df[(df["value"] < Q1 - 1.5*IQR) | (df["value"] > Q3 + 1.5*IQR)]
print(f"Detected {len(outliers)} potential outliers")

# Correlation heatmap
plt.figure(figsize=(6,5))
sns.heatmap(df[["holiday","is_weekend","value"]].corr(), annot=True, cmap="vlag", center=0)
plt.title("Correlation Heatmap")
plt.show()