# Ethical Reminder (from the Lab Alliance Compact)

- Data belongs to truth, not expectations; document steps transparently.
- Perform **your own analysis**; credit all sources and collaborators properly.
- Communicate respectfully; ask for help early; uphold psychological safety.

> By proceeding, you acknowledge the Compact and agree to act accordingly.

# Gaussian peak with linear background

**Learning goals**

- Demonstrate bias when background is neglected.
- Fit wrong model (Gaussian only) vs correct model (Gaussian + linear background).
- Compare parameter estimates, residuals, and AIC.
- Highlight that this is a systematic modeling issue.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit

rng = np.random.default_rng(7)

def gauss(x, A, x0, sigma, C):
    return A*np.exp(-(x-x0)**2/(2*sigma**2)) + C

def gauss_linbg(x, A, x0, sigma, m, c):
    return A*np.exp(-(x-x0)**2/(2*sigma**2)) + (m*x + c)

# Ground truth (with background!)
A_true, x0_true, sigma_true = 5.0, 0.5, 0.8
m_true, c_true = 0.15, 0.3
N = 120
x = np.linspace(-3, 4, N)
y_true = gauss_linbg(x, A_true, x0_true, sigma_true, m_true, c_true)
sigma_y = 0.15  # homoscedastic noise for simplicity
y = y_true + rng.normal(0, sigma_y, size=N)

plt.figure(figsize=(6,4))
plt.plot(x, y, 'o', ms=3, label='data')
plt.plot(x, y_true, '-', label='true (gauss + linear bg)')
plt.xlabel('x'); plt.ylabel('y'); plt.legend(); plt.title('Data with linear background')
plt.show()

## Wrong model: Gaussian-only
Ignoring the linear background forces the Gaussian parameters to absorb the slope, so we expect biased estimates.

In [None]:
p0_wrong = [4.0, 0.0, 1.0, 0.0]  # [A, x0, sigma, C]
def gauss_only(x, A, x0, sigma, C):
    return A*np.exp(-(x-x0)**2/(2*sigma**2)) + C

popt_w, pcov_w = curve_fit(gauss_only, x, y, p0=p0_wrong)
perr_w = np.sqrt(np.diag(pcov_w))

print("WRONG model (no linear background):")
for name, val, err in zip(['A','x0','sigma','C'], popt_w, perr_w):
    print(f"{name:6s} = {val:8.3f} ± {err:6.3f}")

y_fit_w = gauss_only(x, *popt_w)
res_w = y - y_fit_w

import math
def aic(y_obs, y_hat, k, sigma):
    # Gaussian likelihood with known sigma -> equivalent up to a constant
    # Use RSS/sigma^2 proxy; relative comparisons suffice
    rss = np.sum((y_obs - y_hat)**2)
    n = len(y_obs)
    return n*np.log(rss/n) + 2*k

AIC_w = aic(y, y_fit_w, k=4, sigma=sigma_y)
AIC_w

In [None]:
fig, (ax1, ax2) = plt.subplots(2,1,figsize=(6,6), sharex=True)
ax1.plot(x, y, 'o', ms=3, label='data')
ax1.plot(x, y_fit_w, '-', label='wrong fit (gauss+const)')
ax1.set_ylabel('y'); ax1.legend(); ax1.set_title('Wrong model fit')

ax2.axhline(0,color='k',lw=1)
ax2.plot(x, res_w, 'o', ms=3)
ax2.set_xlabel('x'); ax2.set_ylabel('residuals'); ax2.set_title('Residuals (systematic trend?)')
plt.tight_layout(); plt.show()

## Correct model: Gaussian + linear background
Including the background should reduce residual structure and recover unbiased parameters.

In [None]:
p0_right = [4.0, 0.0, 1.0, 0.1, 0.0]  # [A, x0, sigma, m, c]
popt_r, pcov_r = curve_fit(gauss_linbg, x, y, p0=p0_right)
perr_r = np.sqrt(np.diag(pcov_r))

print("RIGHT model (gauss + linear background):")
for name, val, err in zip(['A','x0','sigma','m','c'], popt_r, perr_r):
    print(f"{name:6s} = {val:8.3f} ± {err:6.3f}")

y_fit_r = gauss_linbg(x, *popt_r)
res_r = y - y_fit_r
AIC_r = aic(y, y_fit_r, k=5, sigma=sigma_y)
print(f"AIC wrong: {AIC_w:.2f}   AIC right: {AIC_r:.2f}   (lower is better)")

In [None]:
fig, axs = plt.subplots(2,2,figsize=(10,7), sharex='col')
axs[0,0].plot(x,y,'o',ms=3); axs[0,0].plot(x,y_fit_w,'-',label='wrong'); axs[0,0].set_title('Wrong fit')
axs[0,1].plot(x,y,'o',ms=3); axs[0,1].plot(x,y_fit_r,'-',label='right'); axs[0,1].set_title('Right fit')

axs[1,0].axhline(0,color='k',lw=1); axs[1,0].plot(x,res_w,'o',ms=3); axs[1,0].set_title('Residuals (wrong)')
axs[1,1].axhline(0,color='k',lw=1); axs[1,1].plot(x,res_r,'o',ms=3); axs[1,1].set_title('Residuals (right)')

for ax in axs[1,:]:
    ax.set_xlabel('x'); ax.set_ylabel('residuals')
for ax in axs[0,:]:
    ax.set_ylabel('y')
plt.tight_layout(); plt.show()

### Interpretation & Takeaways

- **Bias from model misspecification**: Ignoring a linear background forces the Gaussian parameters to absorb the slope → biased A, x0, σ estimates.
- **Residual trends**: The wrong model leaves systematic trends in residuals; the right model residuals are closer to structureless noise.
- **Model selection**: Compare fits with AIC (or BIC). Lower AIC indicates better trade-off between fit quality and complexity.
- **Systematic vs. statistical**: This is a **systematic** modeling problem. More data won’t remove bias if the model is wrong.
- **Experiment design**: Use off-peak regions to estimate background; include background terms in the model when justified.

## Exercises
1. Increase the true background slope (m_true) to 0.3; refit both models. How do parameter biases change?
2. Make noise heteroscedastic (σ increasing with x). Does that alter residual patterns or parameter uncertainties?
3. Add a quadratic background term; test if the linear model remains adequate (compare AIC).
4. Try a residual bootstrap for the **correct** model and compare the CI widths to the covariance-based errors.