# Test error estimate (with bootstrap methods)

## Imports

In [71]:
import pandas as pd
import numpy as np

from ISLP import load_data
from ISLP.models import summarize

import statsmodels.api as sm


## Data

In [58]:
data = load_data('Default')

data

Unnamed: 0,default,student,balance,income
0,No,No,729.526495,44361.625074
1,No,Yes,817.180407,12106.134700
2,No,No,1073.549164,31767.138947
3,No,No,529.250605,35704.493935
4,No,No,785.655883,38463.495879
...,...,...,...,...
9995,No,No,711.555020,52992.378914
9996,No,No,757.962918,19660.721768
9997,No,No,845.411989,58636.156984
9998,No,No,1569.009053,36669.112365


## Split Data and `sm.GLM()` model

In [59]:
X = data[['income', 'balance']].values
Y = np.array([1 if d=='Yes' else 0 for d in data['default']])

model = sm.GLM(Y, X)
results = model.fit()

summarize(results)

Unnamed: 0,coef,std err,z,P>|z|
x1,-1e-06,7.47e-08,-15.991,0.0
x2,9.7e-05,2.79e-06,34.771,0.0


In [60]:
def estimate_params(data):
    X = data[['income', 'balance']].values
    Y = np.array([1 if d=='Yes' else 0 for d in data['default']])

    model = sm.GLM(Y, X)
    results = model.fit()

    return results.params

## `boot_fn()` function

In [69]:
def boot_fn(func, data,p, B=1000, seed=42):
    rng = np.random.default_rng(seed)
    n = data.shape[0]

    first_ = np.zeros(p)
    second_ = np.zeros(p)

    for _ in range(B):
        idx = rng.choice(data.index, size=n, replace=True)
        params = func(data.loc[idx])

        first_ += params
        second_ += params**2

    return np.sqrt(second_ / B - (first_ / B)**2)

In [70]:
boot_fn(estimate_params, data, 2)

array([8.36894592e-08, 5.30044257e-06])