# Bias-Variance Tradeoff

We will investigate the bias-variance tradeoff using a simulated example. 

Recall that 
- Bias: patterns in the mapping ($X \rightarrow y$) failed to be captured by an underparameterized model 
- Variance: unrealiable estimates of the true parameters due to overparameterization, given finite samples

We will generate data from the following model
$$\mathbb{E}[y \mid x] \;=\; \sin (\pi x ) \;+\; 0.5\,\sin (2\pi x ).$$

Todo: 
- Run the following cell to invoke the interactive plot.  
- Change the type of basis functions and the number of basis functions.  
- Explain your observations. 

In [1]:
import numpy as np, matplotlib.pyplot as plt, seaborn as sns
from ipywidgets import Button, Output, VBox
from IPython.display import display
from matplotlib.patches import Rectangle


In [2]:
sns.set_theme()
from scipy.interpolate import BSpline
from ipywidgets import Dropdown, IntSlider, Output, VBox, HBox
from IPython.display import display

# ------------------------------------------------------------------
# fixed design & true signal
n = 100
x_grid = np.linspace(0, 4, n)
f_true = np.sin(np.pi * x_grid) + 0.5 * np.sin(2 * np.pi * x_grid)
sigma2 = 1.0                      # Var[ε]

# ------------------------------------------------------------------
def design_matrix(x, K, basis):
    x = np.asarray(x)
    if basis == "Polynomial":
        cols = [x**k for k in range(1, K + 1)]
    elif basis == "Trigonometric":
        cols = [np.sin(j * np.pi * x / 2) for j in range(1, K + 1)]
        cols += [np.cos(j * np.pi * x / 2) for j in range(1, K + 1)]
    elif basis.startswith("B-spline"):
        deg = 1 if "linear" in basis else 3
        # uniform internal knots so ~K basis functions
        n_knots = K + deg + 1
        t = np.r_[[x[0]] * (deg+1),
                   np.linspace(x[0], x[-1], n_knots - (deg+1)*2 + 2)[1:-1],
                   [x[-1]] * (deg+1)]
        cols = [BSpline(t, (np.arange(len(t)-deg-1)==i).astype(float), deg)(x)
                for i in range(len(t)-deg-1)]
    else:
        raise ValueError("Unknown basis")
    return np.column_stack(cols)

def risk_components(K, basis):
    X = design_matrix(x_grid, K, basis)
    H = X @ np.linalg.pinv(X.T @ X) @ X.T
    bias2 = np.mean(((np.eye(n) - H) @ f_true) ** 2)
    variance = sigma2 * np.trace(H @ H.T) / n
    return bias2, variance, bias2 + variance

# ------------------------------------------------------------------
# widgets
basis_dd = Dropdown(
    options=["Polynomial", "Trigonometric",
             "B-spline (linear)", "B-spline (cubic)"],
    value="Polynomial", description="Basis:"
)
K_slider = IntSlider(value=3, min=1, max=20, step=1,
                     description="K basis:", continuous_update=False)
out = Output()

def redraw(*_):
    basis = basis_dd.value
    K_sel = K_slider.value
    Ks = np.arange(1, K_slider.max + 1)
    bias, var, tot = zip(*(risk_components(k, basis) for k in Ks))
    # fitted curve for selected K
    X_sel = design_matrix(x_grid, K_sel, basis)
    y_hat = X_sel @ np.linalg.pinv(X_sel.T @ X_sel) @ X_sel.T @ f_true

    with out:
        out.clear_output(wait=True)
        fig, axes = plt.subplots(2, 1, figsize=(7, 7),
                                 height_ratios=[2, 1.4])

        # top: true vs fitted
        axes[0].plot(x_grid, f_true, "k--", label="true f(x)")
        axes[0].plot(x_grid, y_hat, label=f"fit (K={K_sel})")
        axes[0].set(xlabel="x", ylabel="y",
                    title=f"{basis} basis  •  K = {K_sel}")
        axes[0].legend(); axes[0].grid(alpha=.3)

        # bottom: bias², variance, total
        axes[1].plot(Ks, bias, label="bias²")
        axes[1].plot(Ks, var,  label="variance")
        axes[1].plot(Ks, tot,  color="black", label="total risk")
        axes[1].scatter(K_sel, tot[K_sel-1], color="red", zorder=5)
        axes[1].set(xlabel="K", ylabel="Risk"); axes[1].grid(alpha=.3)
        axes[1].legend()

        plt.tight_layout()
        display(fig)      # render inside Output widget
        plt.close(fig)    # prevent duplicate outside widget

# callbacks
basis_dd.observe(redraw, names="value")
K_slider.observe(redraw, names="value")

display(VBox([HBox([basis_dd, K_slider]), out]))
redraw()   # initial draw


VBox(children=(HBox(children=(Dropdown(description='Basis:', options=('Polynomial', 'Trigonometric', 'B-spline…