In [1]:
import numpy as np
import pandas as pd

from sklearn.linear_model import PoissonRegressor
from glmext.glm import NegativeBinomialRegressor
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_poisson_deviance


def generate_nb_timeseries(
    n=500,
    seed=42,
    k=2,          # NB dispersion parameter (k > 0 => overdispersion)
):
    rng = np.random.default_rng(seed)
    t = np.arange(n)

    # Covariates
    trend = 0.01 * t
    seasonality = 0.5 * np.sin(2 * np.pi * t / 30)
    promo = rng.binomial(1, 0.1, size=n)

    X = np.column_stack([trend, seasonality, promo])

    beta = np.array([0.8, 1.2, 0.6])
    intercept = 1.0

    # Mean (log-link)
    eta = intercept + X @ beta
    mu = np.exp(eta)

    # Negative Binomial via Gammaâ€“Poisson mixture
    # Var(Y) = mu + k * mu^2
    gamma_shape = 1.0 / k
    gamma_scale = k * mu

    lambda_t = rng.gamma(shape=gamma_shape, scale=gamma_scale)
    y = rng.poisson(lambda_t)

    df = pd.DataFrame({
        "y": y,
        "trend": trend,
        "seasonality": seasonality,
        "promo": promo,
    })

    return df


# ---- Generate data ----
df = generate_nb_timeseries(n=600)
X = df[["trend", "seasonality", "promo"]].values
y = df["y"].values

# ---- Train / test split (time-series aware) ----
split = int(0.8 * len(df))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

# ---- Fit Poisson ----
pois = PoissonRegressor(
    alpha=0.0,
    max_iter=300,
    tol=1e-7,
)
pois.fit(X_train, y_train)

# ---- Fit Negative Binomial ----
nb = NegativeBinomialRegressor(
    alpha=0.0,
    k=0.7,
    max_iter=300,
    tol=1e-7,
)
nb.fit(X_train, y_train)

# ---- Predictions ----
y_pred_pois = pois.predict(X_test)
y_pred_nb = nb.predict(X_test)

# ---- Evaluation ----
print("Poisson deviance:",
      mean_poisson_deviance(y_test, y_pred_pois))

print("NB deviance:",
      mean_poisson_deviance(y_test, y_pred_nb))

print("\nMean predictions:")
print("Poisson:", y_pred_pois.mean())
print("NB:", y_pred_nb.mean())

print("\nVariance of predictions:")
print("Poisson:", y_pred_pois.var())
print("NB:", y_pred_nb.var())


Poisson deviance: 302.8179091777786
NB deviance: 294.39630267787834

Mean predictions:
Poisson: 248.6572288253079
NB: 213.86252139869572

Variance of predictions:
Poisson: 22398.3729876202
NB: 12757.29919435145


In [2]:
# ---- sklearn API compliance ----
assert hasattr(nb, "coef_")
assert hasattr(nb, "intercept_")

# Score method (usually log-likelihood or deviance-based)
print("NB score:", nb.score(X_test, y_test))

# Refit safety
nb.fit(X_train, y_train)
nb.fit(X_train, y_train)

# Works with pipelines
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

pipe = Pipeline([
    ("scaler", StandardScaler()),
    ("nb", NegativeBinomialRegressor(k=0.7))
])

pipe.fit(X_train, y_train)
pipe.predict(X_test)


NB score: -0.00032362743809311034


array([ 59.76543567,  64.26179085,  68.89067273,  73.4233785 ,
        77.59734167,  81.13908206,  83.79510529,  85.36558999,
        85.73376954,  84.8840065 ,  82.90399425,  79.97059997,
        76.3231034 ,  72.23043469,  67.95959359,  63.75088005,
        59.80281065,  56.26679009,  53.24959974,  50.82086052,
        49.02266463,  47.87915739,  70.68613513,  47.60905076,
        48.50133779,  50.08899046,  52.37517961,  55.35259736,
        58.99455682,  63.24403122,  68.00209287,  73.11811954,
        78.38493725,  83.54232422,  88.29152796,  92.32137827,
        95.34344506,  97.1303683 ,  97.54928904,  96.58241471,
        94.32952431,  90.99186018,  86.84167876,  82.18497318,
        77.32554014,  72.53679685, 101.46297032,  95.46366787,
        60.58826789,  57.82480858,  55.7787918 ,  54.47769051,
        53.93772675,  54.17035876,  55.18561757,  56.99207481,
        59.59333833,  62.98109307,  67.12497428,  71.96009594,
       115.37408118,  83.19499557,  89.18766714, 141.74