In [1]:
import numpy as np
import pandas as pd
import patsy
import statsmodels.api as sm
from statsmodels.genmod.bayes_mixed_glm import BinomialBayesMixedGLM
import warnings

# Suppress specific UserWarning
warnings.filterwarnings(
    "ignore",
    message="Laplace fitting did not converge"
)

# Simulate hierarchical data
np.random.seed(42)
n_depts, n_per_dept = 10, 30
dept_labels = ['D{}'.format(i + 1) for i in range(n_depts)]
dept_ids = np.repeat(dept_labels, n_per_dept)
true_intercepts = np.random.normal(50, 5, size=n_depts)
dept_map = dict(zip(dept_labels, true_intercepts))

# Generate continuous 'score'
score = np.array([dept_map[d] for d in dept_ids]) + \
    np.random.normal(0, 2, size=n_depts * n_per_dept)

# Generate binary 'hired' via logistic link on score
logit_p = 1 / (1 + np.exp(-(score - score.mean()) / 5))
hired = np.random.binomial(1, logit_p, size=n_depts * n_per_dept)

# Assemble DataFrame
df = pd.DataFrame({
    'dept': dept_ids,
    'score': score,
    'hired': hired
})
df['dept'] = df['dept'].astype('category')

# Build design matrices for fixed and random effects
exog = patsy.dmatrix(
    '1 + score',
    df,
    return_type='dataframe'
)
exog_vc = patsy.dmatrix(
    '0 + C(dept)',
    df,
    return_type='dataframe'
)
ident = np.zeros(exog_vc.shape[1], dtype=int)

# Fit the BinomialBayesMixedGLM model
model = BinomialBayesMixedGLM(
    endog=df['hired'].values,
    exog=exog,
    exog_vc=exog_vc,
    ident=ident
)

result = model.fit_map(
    method='bfgs',
    minim_opts={
        'maxiter': 3000,
        'gtol': 1e-6
    }
)

# Print summary of fixed and random effects
print(result.summary())

               Binomial Mixed GLM Results
          Type Post. Mean Post. SD   SD  SD (LB) SD (UB)
--------------------------------------------------------
Intercept    M    -6.3008   1.2524                      
score        M     0.1206   0.0240                      
VC_1         V   -10.0000   1.0000 0.000   0.000   0.000
Parameter types are mean structure (M) and variance
structure (V)
Variance parameters are modeled as log standard
deviations


In [2]:
%load_ext rpy2.ipython

In [3]:
%%R
# Install and load library
if (!requireNamespace("lme4", quietly = TRUE)) {
  invisible(
    suppressWarnings(
      suppressMessages(
        install.packages(
          "lme4",
          repos = "https://cloud.r-project.org",
          dependencies = TRUE,
          quiet = TRUE
        )
      )
    )
  )
}

suppressPackageStartupMessages(library(lme4))

# Simulate hierarchical data
set.seed(42)
n_depts <- 10
n_per_dept <- 30
dept <- factor(rep(paste0("D", 1:n_depts), each = n_per_dept))
true_intercepts <- rnorm(n_depts, mean = 50, sd = 5)

# Generate continuous 'score'
score <- true_intercepts[as.integer(dept)] +
         rnorm(n_depts * n_per_dept, mean = 0, sd = 2)

# Generate binary 'hired' via logistic link on score
logit_p <- plogis((score - mean(score)) / 5)
hired <- rbinom(n_depts * n_per_dept, size = 1, prob = logit_p)

# Assemble DataFrame
df <- data.frame(dept, score, hired)

# Fit mixed-effects logistic regression
m_logit <- glmer(
  hired ~ score + (1 | dept),
  family = binomial(link = "logit"),
  data = df
)

# Summarize results
summary(m_logit)

Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: binomial  ( logit )
Formula: hired ~ score + (1 | dept)
   Data: df

      AIC       BIC    logLik -2*log(L)  df.resid 
    356.9     368.0    -175.4     350.9       297 

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-2.5461 -0.7650  0.2591  0.8130  2.5659 

Random effects:
 Groups Name        Variance Std.Dev.
 dept   (Intercept) 0.02505  0.1583  
Number of obs: 300, groups:  dept, 10

Fixed effects:
             Estimate Std. Error z value Pr(>|z|)    
(Intercept) -12.74850    1.90052  -6.708 1.97e-11 ***
score         0.24304    0.03623   6.708 1.97e-11 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Correlation of Fixed Effects:
      (Intr)
score -0.997
