#### Exercise 14 - Solution - Validate Regression Discontinuity

In [15]:
import pandas as pd
import statsmodels.formula.api as smf
from scipy.stats import ttest_ind


In [19]:

df = pd.read_csv("../data/rd_employee_sales_data.csv")

print("Part-Time Share by Training Status:")
print(df.groupby("Training")['Part_Time'].agg('mean'))

# Perform a t-test for difference in Part_Time mean by Training status
trained = df[df["Training"] == 1]["Part_Time"]
not_trained = df[df["Training"] == 0]["Part_Time"]

t_stat, p_value = ttest_ind(trained, not_trained)
print("\nT-test for Part-Time mean by Training status:")
print(f"T-statistic: {t_stat}, P-value: {p_value}")

Part-Time Share by Training Status:
Training
0    0.28202
1    0.00000
Name: Part_Time, dtype: float64

T-test for Part-Time mean by Training status:
T-statistic: -8.584750131477158, P-value: 3.450607472043139e-17


In [21]:
# Subset using a +/- 10 window around the cutoff plus the running variable 
cutoff = 40
subset = df[(df["Year1_Sales"] >= cutoff - 10) & (df["Year1_Sales"] <= cutoff + 10)].copy()

print("Part-Time Share by Training Status: Workers Near Cutoff:")
print(subset.groupby("Training")['Part_Time'].agg('mean'))


# In this case, no t-test is performed because the means are exactly the same so impossible to reject a null of no difference. 

Part-Time Share by Training Status: Workers Near Cutoff:
Training
0    0.0
1    0.0
Name: Part_Time, dtype: float64


In [14]:
# Run a placebo RD model completely below the true cutoff
placebo_df = df[df["Year1_Sales"] < cutoff].copy()
placebo_cutoff = 30 # Set a new cutoff for the placebo test
placebo_df['Training'] = (placebo_df['Year1_Sales'] >= placebo_cutoff).astype(int)

# New subset for analysis around the placebo cutoff
placebo_subset = placebo_df[(placebo_df["Year1_Sales"] >= placebo_cutoff - 10) & (placebo_df["Year1_Sales"] <= placebo_cutoff + 10)].copy()

# Run the placebo RD model
placebo_model = smf.ols("Year2_Sales ~ Training + Year1_Sales", data=placebo_subset).fit()
print("\nPlacebo RD Model Summary:")
print(placebo_model.summary())


Placebo RD Model Summary:
                            OLS Regression Results                            
Dep. Variable:            Year2_Sales   R-squared:                       0.365
Model:                            OLS   Adj. R-squared:                  0.363
Method:                 Least Squares   F-statistic:                     189.7
Date:                Wed, 02 Jul 2025   Prob (F-statistic):           8.02e-66
Time:                        13:24:06   Log-Likelihood:                -2196.9
No. Observations:                 664   AIC:                             4400.
Df Residuals:                     661   BIC:                             4413.
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                  coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------
Intercept       2.5456 