In [None]:
# from: https://lost-stats.github.io/Model_Estimation/Research_Design/instrumental_variables.html

from linearmodels import IV2SLS
import pandas as pd
import numpy as np

df = pd.read_csv('https://vincentarelbundock.github.io/Rdatasets/csv/AER/CigarettesSW.csv', index_col=0)

# We will use cigarette taxes as an instrument for cigarette prices
# to evaluate the effect of cigarette price on log number of packs smoked
# With income per capita as a control

# Adjust everything for inflation
df['rprice'] = df['price']/df['cpi']
df['rincome'] = df['income']/df['population']/df['cpi']
df['tdiff'] = (df['taxs'] - df['tax'])/df['cpi']

# Specify formula in format of 'y ~ exog + [endog ~ instruments]'.
# The '1' on the right-hand side of the formula adds a constant.
formula = 'np.log(packs) ~ 1 + np.log(rincome) + [np.log(rprice) ~ tdiff]'

# Specify model and data
mod = IV2SLS.from_formula(formula, df)

# Fit model
res = mod.fit()

# Show model summary
res.summary

0,1,2,3
Dep. Variable:,np.log(packs),R-squared:,0.5478
Estimator:,IV-2SLS,Adj. R-squared:,0.5380
No. Observations:,96,F-statistic:,78.788
Date:,"Fri, Feb 13 2026",P-value (F-stat),0.0000
Time:,17:48:09,Distribution:,chi2(2)
Cov. Estimator:,robust,,
,,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
Intercept,9.6904,0.6037,16.051,0.0000,8.5071,10.874
np.log(rincome),0.2483,0.1779,1.3954,0.1629,-0.1005,0.5971
np.log(rprice),-1.2145,0.1985,-6.1194,0.0000,-1.6034,-0.8255
