In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm

default = pd.read_csv("../../data/Default.csv")
default

Unnamed: 0,default,student,balance,income
0,No,No,729.526495,44361.625074
1,No,Yes,817.180407,12106.134700
2,No,No,1073.549164,31767.138947
3,No,No,529.250605,35704.493935
4,No,No,785.655883,38463.495879
...,...,...,...,...
9995,No,No,711.555020,52992.378914
9996,No,No,757.962918,19660.721768
9997,No,No,845.411989,58636.156984
9998,No,No,1569.009053,36669.112365


In [2]:
import statsmodels.api as sm
from ISLP.models import ModelSpec as MS 

X = MS(["income", "balance"]).fit_transform(default)
y = default["default"] == "Yes"

results = sm.GLM(y, X, family=sm.families.Binomial()).fit()
results.summary()

0,1,2,3
Dep. Variable:,default,No. Observations:,10000.0
Model:,GLM,Df Residuals:,9997.0
Model Family:,Binomial,Df Model:,2.0
Link Function:,Logit,Scale:,1.0
Method:,IRLS,Log-Likelihood:,-789.48
Date:,"Fri, 25 Jul 2025",Deviance:,1579.0
Time:,21:18:40,Pearson chi2:,6950.0
No. Iterations:,9,Pseudo R-squ. (CS):,0.1256
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
intercept,-11.5405,0.435,-26.544,0.000,-12.393,-10.688
income,2.081e-05,4.99e-06,4.174,0.000,1.1e-05,3.06e-05
balance,0.0056,0.000,24.835,0.000,0.005,0.006


In [15]:
def boot_fn(data: pd.DataFrame, idx):
    df = data.iloc[idx]
    x = MS(["income", "balance"]).fit_transform(df)
    y = df["default"] == "Yes"

    results = sm.GLM(y, x, family=sm.families.Binomial()).fit()
    return results.params

In [21]:
B = 1000
n = default.shape[0]
rng = np.random.default_rng(0)

params_list = []
for _ in range(B):
    idx = rng.choice(default.index, size=n, replace=True)
    params_list.append(boot_fn(default, idx))

params_df = pd.DataFrame(params_list)

print("Bootstrap parameter names:", params_df.columns.tolist())

se = params_df.std(ddof=0)

print("Standard Errors (bootstrap):")
for param in se.index:
    print(f"{param}: {se[param]:.6f}")


Bootstrap parameter names: ['intercept', 'income', 'balance']
Standard Errors (bootstrap):
intercept: 0.435692
income: 0.000005
balance: 0.000230


The estimated standard errors obtained using the **sm.GLM()** function and those computed via the bootstrap method are generally close but not identical. The **sm.GLM()** function provides standard errors based on theoretical assumptions of the model, such as correct model specification, independence, and constant variance of errors. In contrast, the bootstrap method does not rely on these assumptions and instead estimates standard errors empirically by repeatedly resampling the data and refitting the model. As a result, the bootstrap approach can often provide more robust or realistic estimates, especially in cases where model assumptions may not fully hold. In this case, the bootstrap standard errors are slightly smaller, which could reflect sampling variability or mild violations of model assumptions not captured by the GLM-derived errors.