# Getting Started with fastcpd: Part 1 - Generating Data

This tutorial shows how to generate synthetic data for testing change point detection algorithms.

---

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from fastcpd.datasets import (
    make_mean_change,
    make_variance_change,
    make_regression_change,
    make_glm_change,
    make_arma_change,
    make_garch_change
)

%matplotlib inline

## 1. Mean Changes (Simplest Case)

Data where the mean shifts at change points.

In [None]:
# Generate data with 3 change points
data_dict = make_mean_change(
    n_samples=500,
    n_changepoints=3,
    noise_std=1.0,
    seed=42
)

# What you get back
print("Data shape:", data_dict['data'].shape)
print("True change points:", data_dict['changepoints'])
print("SNR (dB):", f"{data_dict['metadata']['snr_db']:.2f}")

# Visualize
plt.figure(figsize=(12, 4))
plt.plot(data_dict['data'], linewidth=0.8)
for cp in data_dict['changepoints']:
    plt.axvline(cp, color='red', linestyle='--', alpha=0.7, label='True CP' if cp == data_dict['changepoints'][0] else '')
plt.xlabel('Time')
plt.ylabel('Value')
plt.title('Mean Change Data')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

## 2. Variance Changes

Data where variance/volatility changes at change points.

In [None]:
data_dict = make_variance_change(
    n_samples=500,
    n_changepoints=3,
    seed=42
)

print("Change points:", data_dict['changepoints'])
print("Variance ratios:", [f"{v:.2f}" for v in data_dict['metadata']['variance_ratios']])

# Visualize
plt.figure(figsize=(12, 4))
plt.plot(data_dict['data'], linewidth=0.8)
for cp in data_dict['changepoints']:
    plt.axvline(cp, color='red', linestyle='--', alpha=0.7)
plt.xlabel('Time')
plt.ylabel('Value')
plt.title('Variance Change Data')
plt.grid(True, alpha=0.3)
plt.show()

## 3. Regression (Linear Model)

Data with covariates where regression coefficients change.

In [None]:
data_dict = make_regression_change(
    n_samples=500,
    n_changepoints=2,
    n_features=3,
    noise_std=0.5,
    seed=42
)

print("Data shape (y + X):", data_dict['data'].shape)
print("Change points:", data_dict['changepoints'])
print("\nCoefficients per segment:")
for i, coef in enumerate(data_dict['true_coefs']):
    print(f"  Segment {i+1}: {[f'{c:.2f}' for c in coef]}")

# Visualize response variable
plt.figure(figsize=(12, 4))
plt.plot(data_dict['y'], linewidth=0.8)
for cp in data_dict['changepoints']:
    plt.axvline(cp, color='red', linestyle='--', alpha=0.7)
plt.xlabel('Time')
plt.ylabel('Response (y)')
plt.title('Regression with Coefficient Changes')
plt.grid(True, alpha=0.3)
plt.show()

## 4. GLM: Binomial (Logistic Regression)

Binary/count data with GLM coefficients changing.

In [None]:
data_dict = make_glm_change(
    n_samples=500,
    n_changepoints=2,
    n_features=3,
    family='binomial',
    seed=42
)

print("Data shape:", data_dict['data'].shape)
print("Response values:", np.unique(data_dict['y']))
print("Change points:", data_dict['changepoints'])

# Visualize
plt.figure(figsize=(12, 4))
plt.plot(data_dict['y'], 'o', markersize=2, alpha=0.6)
for cp in data_dict['changepoints']:
    plt.axvline(cp, color='red', linestyle='--', alpha=0.7)
plt.xlabel('Time')
plt.ylabel('Binary Response')
plt.title('Binomial GLM Data')
plt.grid(True, alpha=0.3)
plt.show()

## 5. GLM: Poisson (Count Data)

Count data with changing intensity.

In [None]:
data_dict = make_glm_change(
    n_samples=500,
    n_changepoints=2,
    n_features=3,
    family='poisson',
    seed=42
)

print("Response range:", [data_dict['y'].min(), data_dict['y'].max()])
print("Change points:", data_dict['changepoints'])

# Visualize
plt.figure(figsize=(12, 4))
plt.plot(data_dict['y'], linewidth=0.8)
for cp in data_dict['changepoints']:
    plt.axvline(cp, color='red', linestyle='--', alpha=0.7)
plt.xlabel('Time')
plt.ylabel('Count')
plt.title('Poisson GLM Data')
plt.grid(True, alpha=0.3)
plt.show()

## 6. ARMA Time Series

Autoregressive moving average models with parameter changes.

In [None]:
data_dict = make_arma_change(
    n_samples=500,
    n_changepoints=2,
    orders=[(1,1), (2,0), (0,2)],  # ARMA(1,1), AR(2), MA(2)
    seed=42
)

print("Change points:", data_dict['changepoints'])
print("ARMA orders per segment:", data_dict['metadata']['orders'])

# Visualize
plt.figure(figsize=(12, 4))
plt.plot(data_dict['data'], linewidth=0.8)
for cp in data_dict['changepoints']:
    plt.axvline(cp, color='red', linestyle='--', alpha=0.7)
plt.xlabel('Time')
plt.ylabel('Value')
plt.title('ARMA Time Series with Parameter Changes')
plt.grid(True, alpha=0.3)
plt.show()

## 7. GARCH (Volatility Changes)

Financial time series with changing volatility regimes.

In [None]:
data_dict = make_garch_change(
    n_samples=500,
    n_changepoints=2,
    volatility_regimes=['low', 'high', 'medium'],
    seed=42
)

print("Change points:", data_dict['changepoints'])
print("Volatility regimes:", data_dict['metadata']['volatility_regimes'])

# Visualize returns and volatility
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 6), sharex=True)

ax1.plot(data_dict['data'], linewidth=0.6)
for cp in data_dict['changepoints']:
    ax1.axvline(cp, color='red', linestyle='--', alpha=0.7)
ax1.set_ylabel('Returns')
ax1.set_title('GARCH: Returns and Volatility')
ax1.grid(True, alpha=0.3)

ax2.plot(data_dict['volatility'], color='orange', linewidth=0.8)
for cp in data_dict['changepoints']:
    ax2.axvline(cp, color='red', linestyle='--', alpha=0.7, label='Change Point' if cp == data_dict['changepoints'][0] else '')
ax2.set_xlabel('Time')
ax2.set_ylabel('Volatility')
ax2.legend()
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## Summary

You can generate data for 7 different change point scenarios:

1. **Mean changes** - `make_mean_change()`
2. **Variance changes** - `make_variance_change()`
3. **Regression** - `make_regression_change()`
4. **Binomial GLM** - `make_glm_change(family='binomial')`
5. **Poisson GLM** - `make_glm_change(family='poisson')`
6. **ARMA** - `make_arma_change()`
7. **GARCH** - `make_garch_change()`

Each function returns:
- `data`: The generated time series
- `changepoints`: True change point locations
- `metadata`: Additional information (SNR, parameters, etc.)

---

**Next**: Part 2 - Detecting Change Points