# Brent Oil Price Multi Change Point Modeling (Raw Prices) - Colab Version

Setups and Imports

In [1]:
import pymc as pm
import pytensor.tensor as pt
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import arviz as az

from google.colab import files
uploaded = files.upload()

# Load Brent oil price data
df = pd.read_csv("brent_oil_log_returns.csv")
df['Date'] = pd.to_datetime(df['Date'])
df = df.sort_values("Date").reset_index(drop=True)
prices = df['Price'].values
dates = df['Date'].values
n = len(prices)

Saving brent_oil_log_returns.csv to brent_oil_log_returns.csv


Model: Multi Change Point in Raw Price Mean

In [2]:
# Set number of change points
K = 3

# Define the model
with pm.Model() as model:
    # Sample unordered change points
    tau_raw = pm.DiscreteUniform("tau_raw", lower=0, upper=n, shape=K)
    tau = pm.Deterministic("tau", pt.sort(tau_raw))  # Use PyTensor's sort

    # Segment means
    mu = pm.Normal("mu", mu=np.mean(prices), sigma=np.std(prices), shape=K+1)

    # Shared standard deviation
    sigma = pm.HalfNormal("sigma", sigma=10)

    # Build piecewise mean array
    mu_t = pt.zeros(n)
    idx_start = 0

    for i in range(K):
        idx_end = tau[i]
        mu_t = pt.set_subtensor(mu_t[idx_start:idx_end], mu[i])
        idx_start = idx_end

    mu_t = pt.set_subtensor(mu_t[idx_start:], mu[K])  # Final segment

    # Likelihood
    obs = pm.Normal("obs", mu=mu_t, sigma=sigma, observed=prices)

    # Sampling
    trace = pm.sample(5000, tune=2000, target_accept=0.95, random_seed=42)

Output()

ERROR:pymc.stats.convergence:The effective sample size per chain is smaller than 100 for some parameters.  A higher number is needed for reliable rhat and ess computation. See https://arxiv.org/abs/1903.08008 for details


In [3]:
# Summary
summary = az.summary(trace, var_names=["tau", "mu", "sigma"])
print(summary)

# Extract means and taus
mean_tau = trace.posterior["tau"].mean(dim=("chain", "draw")).values.astype(int).flatten()
change_dates = [dates[i] for i in mean_tau]
mu_vals = trace.posterior["mu"].mean(dim=("chain", "draw")).values.flatten()

# Print info
print("\n Detected Change Points:")
for i, d in enumerate(change_dates):
    print(f"  Change {i+1}: {d}")

            mean      sd    hdi_3%   hdi_97%  mcse_mean  mcse_sd  ess_bulk  \
tau[0]  3784.625  31.893  3743.000  3853.000      0.965    0.569    1176.0   
tau[1]  4376.050  13.382  4359.000  4409.000      1.179    0.345      70.0   
tau[2]  4787.673  47.322  4638.000  4813.000     19.077   26.673      15.0   
mu[0]     19.301   0.276    18.788    19.813      0.003    0.003    9878.0   
mu[1]     29.578   0.812    28.076    31.118      0.016    0.009    2637.0   
mu[2]     52.459   1.714    47.997    55.079      0.585    0.709      18.0   
mu[3]     77.346   0.362    76.606    78.005      0.091    0.074      21.0   
sigma     17.111   0.137    16.863    17.380      0.009    0.001     225.0   

        ess_tail  r_hat  
tau[0]    1980.0   1.00  
tau[1]      65.0   1.03  
tau[2]      10.0   1.09  
mu[0]     7609.0   1.00  
mu[1]     4103.0   1.00  
mu[2]       11.0   1.07  
mu[3]       14.0   1.06  
sigma     2527.0   1.01  

 Detected Change Points:
  Change 1: 2002-04-17T00:00:00.00000

Build and save change point table

In [7]:
entries = []
for i, idx in enumerate(mean_tau):
    mean_before = float(mu_vals[i])
    mean_after = float(mu_vals[i+1]) if i+1 < len(mu_vals) else None
    delta = mean_after - mean_before if mean_after is not None else None
    pct_change = (delta / mean_before * 100) if (mean_before != 0 and delta is not None) else None

    entries.append({
        "change_number": i + 1,
        "change_index": int(idx),
        "change_date": pd.to_datetime(change_dates[i]).strftime("%Y-%m-%d"),
        "mean_before": mean_before,
        "mean_after": mean_after,
        "delta": delta,
        "pct_change": pct_change
    })

cp_df_multi = pd.DataFrame(entries)

# Save to CSV
out_csv = "multi_change_points.csv"
cp_df_multi.to_csv(out_csv, index=False)

print(f"\nSaved {len(entries)} change points to {out_csv}")
print(cp_df_multi)



Saved 3 change points to multi_change_points.csv
   change_number  change_index change_date  mean_before  mean_after  \
0              1          3784  2002-04-17    19.300539   29.578002   
1              2          4376  2004-08-05    29.578002   52.458963   
2              3          4787  2006-03-09    52.458963   77.345927   

       delta  pct_change  
0  10.277463   53.249614  
1  22.880961   77.358033  
2  24.886964   47.440824  
