# Time Series Analysis

This notebook demonstrates time series analysis techniques.

**Libraries:**
- [statsmodels](https://www.statsmodels.org/) - Statistical models
- [pmdarima](https://alkaline-ml.com/pmdarima/) - Auto-ARIMA
- [tsfresh](https://tsfresh.readthedocs.io/) - Feature extraction
- [sktime](https://www.sktime.net/) - Time series ML

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller, acf, pacf
from statsmodels.tsa.arima.model import ARIMA
import pmdarima as pm
from tsfresh import extract_features
from tsfresh.feature_extraction import MinimalFCParameters
from sktime.forecasting.naive import NaiveForecaster
from sktime.forecasting.model_selection import temporal_train_test_split
from sktime.performance_metrics.forecasting import mean_absolute_percentage_error

%matplotlib inline

## Generate Sample Time Series Data

In [None]:
np.random.seed(42)

# Create time series with trend, seasonality, and noise
n_periods = 365 * 2  # 2 years of daily data
dates = pd.date_range("2023-01-01", periods=n_periods, freq="D")

# Components
trend = np.linspace(100, 150, n_periods)
seasonal = 20 * np.sin(2 * np.pi * np.arange(n_periods) / 365)  # Yearly
weekly = 5 * np.sin(2 * np.pi * np.arange(n_periods) / 7)  # Weekly
noise = np.random.randn(n_periods) * 5

ts = pd.Series(trend + seasonal + weekly + noise, index=dates, name="value")

print(f"Time series shape: {ts.shape}")
print(f"Date range: {ts.index.min()} to {ts.index.max()}")
ts.head(10)

In [None]:
# Visualize the time series
fig, ax = plt.subplots(figsize=(14, 5))
ts.plot(ax=ax)
ax.set_title("Sample Time Series (Trend + Seasonality + Noise)")
ax.set_xlabel("Date")
ax.set_ylabel("Value")
plt.show()

## Time Series Decomposition

Decompose the series into trend, seasonal, and residual components.

In [None]:
decomposition = seasonal_decompose(ts, model="additive", period=365)

fig, axes = plt.subplots(4, 1, figsize=(14, 12))
decomposition.observed.plot(ax=axes[0], title="Observed")
decomposition.trend.plot(ax=axes[1], title="Trend")
decomposition.seasonal.plot(ax=axes[2], title="Seasonal")
decomposition.resid.plot(ax=axes[3], title="Residual")
plt.tight_layout()
plt.show()

## Statistical Tests for Stationarity

### Augmented Dickey-Fuller Test

In [None]:
result = adfuller(ts.dropna())

print("Augmented Dickey-Fuller Test:")
print(f"  Test Statistic: {result[0]:.4f}")
print(f"  p-value: {result[1]:.4f}")
print(f"  Lags Used: {result[2]}")
print("  Critical Values:")
for key, value in result[4].items():
    print(f"    {key}: {value:.4f}")

conclusion = "stationary (reject null)" if result[1] < 0.05 else "non-stationary (fail to reject)"
print(f"\n  -> Series is {conclusion}")

### ACF and PACF Plots

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 4))

# ACF
acf_values = acf(ts, nlags=40)
axes[0].bar(range(len(acf_values)), acf_values)
axes[0].axhline(y=0, linestyle="-", color="black")
axes[0].axhline(y=1.96/np.sqrt(len(ts)), linestyle="--", color="gray")
axes[0].axhline(y=-1.96/np.sqrt(len(ts)), linestyle="--", color="gray")
axes[0].set_title("Autocorrelation Function (ACF)")
axes[0].set_xlabel("Lag")

# PACF
pacf_values = pacf(ts, nlags=40)
axes[1].bar(range(len(pacf_values)), pacf_values)
axes[1].axhline(y=0, linestyle="-", color="black")
axes[1].axhline(y=1.96/np.sqrt(len(ts)), linestyle="--", color="gray")
axes[1].axhline(y=-1.96/np.sqrt(len(ts)), linestyle="--", color="gray")
axes[1].set_title("Partial Autocorrelation Function (PACF)")
axes[1].set_xlabel("Lag")

plt.tight_layout()
plt.show()

## ARIMA Modeling

In [None]:
# Use monthly data for faster computation
monthly_ts = ts.resample("ME").mean()

# Split data
train_size = int(len(monthly_ts) * 0.8)
train, test = monthly_ts[:train_size], monthly_ts[train_size:]

print(f"Training: {len(train)} months")
print(f"Testing: {len(test)} months")

In [None]:
# Fit ARIMA model
model = ARIMA(train, order=(1, 1, 1))
fitted = model.fit()

print("ARIMA(1,1,1) Model:")
print(f"  AIC: {fitted.aic:.2f}")
print(f"  BIC: {fitted.bic:.2f}")

# Forecast
forecast = fitted.forecast(steps=len(test))
mape = np.mean(np.abs((test - forecast) / test)) * 100
print(f"  Forecast MAPE: {mape:.2f}%")

In [None]:
# Plot forecast
fig, ax = plt.subplots(figsize=(12, 5))
train.plot(ax=ax, label="Training Data")
test.plot(ax=ax, label="Test Data")
forecast.plot(ax=ax, label="Forecast", linestyle="--")
ax.set_title("ARIMA Forecast")
ax.legend()
plt.show()

## Auto-ARIMA with pmdarima

In [None]:
auto_model = pm.auto_arima(
    train,
    start_p=0, start_q=0,
    max_p=3, max_q=3,
    seasonal=False,
    trace=True,
    error_action="ignore",
    suppress_warnings=True,
    stepwise=True,
)

print(f"\nBest model: ARIMA{auto_model.order}")
print(f"AIC: {auto_model.aic():.2f}")

## tsfresh - Feature Extraction

In [None]:
# Prepare data for tsfresh
tsfresh_data = []
for series_id in range(5):
    for t, val in enumerate(ts.values[:100]):
        tsfresh_data.append({"id": series_id, "time": t, "value": val + np.random.randn() * 10})

tsfresh_df = pd.DataFrame(tsfresh_data)
print(f"tsfresh input shape: {tsfresh_df.shape}")

In [None]:
# Extract features
features = extract_features(
    tsfresh_df,
    column_id="id",
    column_sort="time",
    column_value="value",
    default_fc_parameters=MinimalFCParameters(),
    n_jobs=1,
)

print(f"Extracted {features.shape[1]} features for {features.shape[0]} time series")
features.head()

## sktime Forecasting

In [None]:
# Prepare data for sktime
y = monthly_ts.copy()
y.index = pd.PeriodIndex(y.index, freq="M")

# Split
y_train, y_test = temporal_train_test_split(y, test_size=0.2)

# Naive forecaster
forecaster = NaiveForecaster(strategy="last")
forecaster.fit(y_train)
y_pred = forecaster.predict(fh=list(range(1, len(y_test) + 1)))

mape = mean_absolute_percentage_error(y_test, y_pred)
print(f"Naive Forecaster MAPE: {mape * 100:.2f}%")

## Anomaly Detection

In [None]:
window = 30
rolling_mean = ts.rolling(window=window).mean()
rolling_std = ts.rolling(window=window).std()

# Z-score anomaly detection
z_scores = (ts - rolling_mean) / rolling_std
anomalies = ts[np.abs(z_scores) > 3]

print(f"Detected {len(anomalies)} anomalies (|z-score| > 3)")

In [None]:
fig, axes = plt.subplots(2, 1, figsize=(14, 8))

axes[0].plot(ts.index, ts.values, alpha=0.5, label="Original")
axes[0].plot(rolling_mean.index, rolling_mean.values, label="30-day MA")
axes[0].fill_between(rolling_mean.index, rolling_mean - 2*rolling_std,
                     rolling_mean + 2*rolling_std, alpha=0.2, label="2 Std Dev")
axes[0].scatter(anomalies.index, anomalies.values, color="red", s=50, label="Anomalies")
axes[0].set_title("Time Series with Anomalies")
axes[0].legend()

axes[1].plot(z_scores.index, z_scores.values)
axes[1].axhline(y=3, color="red", linestyle="--", label="Threshold")
axes[1].axhline(y=-3, color="red", linestyle="--")
axes[1].set_title("Z-Scores")
axes[1].legend()

plt.tight_layout()
plt.show()

---

## Summary

In this notebook, we covered:

1. **Time Series Decomposition**: Trend, seasonal, and residual components
2. **Statistical Tests**: ADF test, ACF, PACF
3. **ARIMA Modeling**: Manual and auto-ARIMA
4. **Feature Extraction**: tsfresh for automated feature generation
5. **Forecasting**: sktime naive forecaster
6. **Anomaly Detection**: Z-score based detection