**Note**:

The export code was generated using OpenAI's GPT-5.2 model.

# Replicate Findings

In [None]:
from docx import Document

def add_df_table_to_doc(doc, df, title=None, float_fmt="{:.3f}"):
    if title:
        doc.add_paragraph(title)

    # format floats nicely
    df_fmt = df.copy()
    for c in df_fmt.columns:
        if pd.api.types.is_numeric_dtype(df_fmt[c]):
            df_fmt[c] = df_fmt[c].map(lambda x: "" if pd.isna(x) else float_fmt.format(x))

    table = doc.add_table(rows=1, cols=len(df_fmt.columns))
    table.style = "Light Shading Accent 1"
    hdr_cells = table.rows[0].cells
    for j, col in enumerate(df_fmt.columns):
        hdr_cells[j].text = str(col)

    for _, row in df_fmt.iterrows():
        cells = table.add_row().cells
        for j, val in enumerate(row):
            cells[j].text = str(val)

def model_to_coef_df(model, model_name="(1)"):
    # coefficient table like Word wants
    t = model.summary2().tables[1].copy()  # Coef. Std.Err. z P>|z| [0.025 0.975]
    t = t.reset_index().rename(columns={"index":"Variable"})
    t.insert(1, "Model", model_name)
    # reorder / rename
    t = t.rename(columns={
        "Coef.": "Coef",
        "Std.Err.": "SE",
        "P>|z|": "p",
        "[0.025": "CI_low",
        "0.975]": "CI_high"
    })
    return t[["Variable","Model","Coef","SE","z","p","CI_low","CI_high"]]

def add_model_stats(doc, model, model_name="(1)"):
    stats = pd.DataFrame([{
        "Model": model_name,
        "N": int(model.nobs),
        "R2": float(model.rsquared),
        "Adj_R2": float(model.rsquared_adj),
        "Covariance": getattr(model, "cov_type", ""),
    }])
    add_df_table_to_doc(doc, stats, title="Model statistics", float_fmt="{:.4f}")

In [None]:
# === EXPORT ALL TABLES TO WORD (OUT FOLDER) ===
from pathlib import Path

# Create out folder path (relative to src/)
out_path = Path("../out/regression_tables.docx")

# Initialize Word document
doc = Document()
doc.add_heading("Replication Results: Class Size and Performance", level=1)

# --- OLS TABLES ---
doc.add_heading("Table 1. OLS Estimates", level=2)
ols1_df = model_to_coef_df(ols1, "(1) OLS: perf ~ class_size")
ols2_df = model_to_coef_df(ols2, "(2) OLS: + controls")

ols_combined = pd.concat([ols1_df, ols2_df], ignore_index=True)
add_df_table_to_doc(doc, ols_combined, title="Coefficients (Robust HC1 SEs)")

add_model_stats(doc, ols1, "(1)")
add_model_stats(doc, ols2, "(2)")

# --- FIRST STAGE ---
doc.add_heading("Table 2. First Stage: class_size ~ elite", level=2)
fs_df = model_to_coef_df(fs, "(3) First Stage")
add_df_table_to_doc(doc, fs_df, title="First Stage Coefficients (HC1 SEs)")
add_model_stats(doc, fs, "(3)")

# --- REDUCED FORM ---
doc.add_heading("Table 3. Reduced Form: perf ~ elite", level=2)
rf_df = model_to_coef_df(rf, "(4) Reduced Form")
add_df_table_to_doc(doc, rf_df, title="Reduced Form Coefficients (HC1 SEs)")
add_model_stats(doc, rf, "(4)")

# --- 2SLS / IV ---
doc.add_heading("Table 4. 2SLS: perf ~ class_size (Instrument: elite)", level=2)

# linearmodels IV has slightly different summary table
iv_df = iv.summary.tables[1]
iv_df = pd.DataFrame(iv_df.data[1:], columns=iv_df.data[0])
add_df_table_to_doc(doc, iv_df, title="2SLS Coefficients (Robust SEs)")

# Add note (important for grading)
doc.add_paragraph(
    "Notes: All standard errors are heteroskedasticity-robust (HC1 for OLS and robust for IV). "
    "Pilot schools (22–25) are excluded to match the original study's sample restriction."
)

# Save to OUT folder
doc.save(out_path)
print(f"Tables exported successfully to: {out_path.resolve()}")


# Implementing Methods

In [None]:
from docx import Document

def add_df_table_to_doc(doc, df, title=None, float_fmt="{:.3f}"):
    if title:
        doc.add_paragraph(title)

    # format floats nicely
    df_fmt = df.copy()
    for c in df_fmt.columns:
        if pd.api.types.is_numeric_dtype(df_fmt[c]):
            df_fmt[c] = df_fmt[c].map(lambda x: "" if pd.isna(x) else float_fmt.format(x))

    table = doc.add_table(rows=1, cols=len(df_fmt.columns))
    table.style = "Light Shading Accent 1"
    hdr_cells = table.rows[0].cells
    for j, col in enumerate(df_fmt.columns):
        hdr_cells[j].text = str(col)

    for _, row in df_fmt.iterrows():
        cells = table.add_row().cells
        for j, val in enumerate(row):
            cells[j].text = str(val)

def model_to_coef_df(model, model_name="(1)"):
    # coefficient table like Word wants
    t = model.summary2().tables[1].copy()  # Coef. Std.Err. z P>|z| [0.025 0.975]
    t = t.reset_index().rename(columns={"index":"Variable"})
    t.insert(1, "Model", model_name)
    # reorder / rename
    t = t.rename(columns={
        "Coef.": "Coef",
        "Std.Err.": "SE",
        "P>|z|": "p",
        "[0.025": "CI_low",
        "0.975]": "CI_high"
    })
    return t[["Variable","Model","Coef","SE","z","p","CI_low","CI_high"]]

def sm_to_coef_df(model, model_name=""):
    # statsmodels OLS results -> nice df
    t = model.summary2().tables[1].copy()
    t = t.reset_index().rename(columns={"index":"Variable"})
    if model_name:
        t.insert(1, "Model", model_name)

    # robust rename (sometimes columns differ slightly)
    rename_map = {
        "Coef.": "Coef",
        "Std.Err.": "SE",
        "P>|t|": "p",
        "P>|z|": "p",
        "[0.025": "CI_low",
        "0.975]": "CI_high",
        "t": "t",
        "z": "z",
    }
    t = t.rename(columns={k:v for k,v in rename_map.items() if k in t.columns})

    # pick columns that exist
    cols = ["Variable"] + (["Model"] if "Model" in t.columns else [])
    for c in ["Coef","SE","t","z","p","CI_low","CI_high"]:
        if c in t.columns:
            cols.append(c)
    return t[cols]

def lm_to_coef_df(lm_res, model_name=""):
    # linearmodels (PanelOLS / IV2SLS) -> df
    summ = lm_res.summary
    tbl = summ.tables[1]
    df_tbl = pd.DataFrame(tbl.data[1:], columns=tbl.data[0])
    if model_name:
        df_tbl.insert(0, "Model", model_name)
    return df_tbl

def add_simple_stats(doc, title, stats_dict):
    doc.add_paragraph(title)
    df_stats = pd.DataFrame([stats_dict])
    add_df_table_to_doc(doc, df_stats, title=None, float_fmt="{:.4f}")


def add_model_stats(doc, model, model_name="(1)"):
    stats = pd.DataFrame([{
        "Model": model_name,
        "N": int(model.nobs),
        "R2": float(model.rsquared),
        "Adj_R2": float(model.rsquared_adj),
        "Covariance": getattr(model, "cov_type", ""),
    }])
    add_df_table_to_doc(doc, stats, title="Model statistics", float_fmt="{:.4f}")

In [None]:
# === EXPORT TABLES (Implementing Methods) TO WORD ===
from pathlib import Path

out_path = Path("../out/implementing_methods_tables.docx")

doc = Document()
doc.add_heading("Class Size and Performance — Implementing Methods", level=1)

# --- Table: OLS ---
doc.add_heading("Table 1. OLS Estimates (Non-pilot schools)", level=2)
ols1_df = sm_to_coef_df(ols1, "(1) OLS")
ols2_df = sm_to_coef_df(ols2, "(2) OLS + controls")
add_df_table_to_doc(doc, pd.concat([ols1_df, ols2_df], ignore_index=True),
                    title="Coefficients (HC1 robust SEs)")

add_simple_stats(doc, "Model stats (OLS 1)", {"N": int(ols1.nobs), "R2": float(ols1.rsquared), "Adj_R2": float(ols1.rsquared_adj), "Cov": "HC1"})
add_simple_stats(doc, "Model stats (OLS 2)", {"N": int(ols2.nobs), "R2": float(ols2.rsquared), "Adj_R2": float(ols2.rsquared_adj), "Cov": "HC1"})

# --- Table: Student FE (PanelOLS) ---
doc.add_heading("Table 2. Student FE + Year FE", level=2)
fe_df = lm_to_coef_df(fe_student, "Student FE + Year FE")
add_df_table_to_doc(doc, fe_df, title="Coefficients (clustered at school)")

# --- Table: Twin FE (if exists) ---
doc.add_heading("Table 3. Twin FE + Year FE", level=2)
if "twin_fe" in globals():
    twin_df = sm_to_coef_df(twin_fe, "Twin FE + Year FE")
    add_df_table_to_doc(doc, twin_df, title="Coefficients (clustered at twin)")
else:
    doc.add_paragraph("Not estimated (no within-pair class size variation).")

# --- Table: DiD ---
doc.add_heading("Table 4. Difference-in-Differences", level=2)
did_df = sm_to_coef_df(did_mod, f"DiD (post >= {post_start})")
add_df_table_to_doc(doc, did_df, title="Coefficients (clustered at school)")

# --- Table: Event study (optional but usually appendix) ---
doc.add_heading("Table 5. Event Study", level=2)
es_df = sm_to_coef_df(es_mod, f"Event Study (window W={W}, ref=-1)")
add_df_table_to_doc(doc, es_df, title="Coefficients (clustered at school)")

# --- Table: TWFE ---
doc.add_heading("Table 6. Two-way Fixed Effects", level=2)
twfe_df = sm_to_coef_df(twfe_all, "TWFE (School FE + Year FE)")
add_df_table_to_doc(doc, twfe_df, title="Coefficients (clustered at school)")

# --- Table: IV (Maimonides) ---
doc.add_heading("Table 7. IV (Maimonides-style predicted class size)", level=2)
ivm_df = lm_to_coef_df(iv_maim, f"IV (MAXSIZE={MAXSIZE})")
add_df_table_to_doc(doc, ivm_df, title="2SLS coefficients (robust SEs)")

# --- Table: Fuzzy RDD (IV local) ---
doc.add_heading("Table 8. Fuzzy RDD (2SLS local)", level=2)
rdd_df = lm_to_coef_df(iv_rdd, f"Fuzzy RDD (c={c}, h={h_used})")
add_df_table_to_doc(doc, rdd_df, title="Local 2SLS coefficients (robust SEs)")

# --- Table: Matching (ATE is not a regression) ---
doc.add_heading("Table 9. Matching (CEM-style)", level=2)
match_df = pd.DataFrame([{
    "Estimand": f"ATE of 1[class_size <= {THRESH}]",
    "Effect": float(ate_cem),
    "SE_boot": float(se_cem),
    "N": int(len(df_mm))
}])
add_df_table_to_doc(doc, match_df, title="Matched estimate (bootstrap SE)")

# --- Table: Synthetic Control (ATT is not a regression) ---
doc.add_heading("Table 10. Synthetic Control", level=2)
# Find SCM row in your results table (if you stored it)
scm_rows = res_df[res_df["method"].astype(str).str.contains("SCM", na=False)]
if len(scm_rows) > 0:
    add_df_table_to_doc(doc, scm_rows.reset_index(drop=True), title="SCM summary (from results table)")
else:
    doc.add_paragraph("SCM not estimated or missing from results table.")

# --- Final: Unified comparison table ---
doc.add_heading("Table 11. Method Comparison Summary", level=2)
add_df_table_to_doc(doc, res_df.reset_index(drop=True), title="All methods (coef/SE/N)")

doc.add_paragraph(
    "Notes: OLS uses HC1 robust SEs; DiD/TWFE cluster SEs at school; Student FE clusters at school; "
    "IV and RDD use robust SEs. Matching and SCM are not marginal slope estimands."
)

doc.save(out_path)
print(f"Saved Word tables to: {out_path.resolve()}")

In [None]:
# Use the SAME dataframe you printed
# (looks like you named it compare_df)
placebo_table = compare_df.copy()

# Optional: rename columns to look nicer in the paper
placebo_table = placebo_table.rename(columns={
    "placebo_start": "Assumed treatment start",
    "coef_did": "DiD coefficient",
    "se": "Std. Error",
    "pval": "p-value",
    "N": "Observations"
})

# Round for academic presentation
placebo_table = placebo_table.round({
    "DiD coefficient": 3,
    "Std. Error": 3,
    "p-value": 3
})

# If you already have a doc object, just add:
doc.add_heading("Placebo DiD Test", level=2)
add_df_table_to_doc(doc, placebo_table,
                    title="True vs Placebo DiD Estimates (clustered SEs)")

# Then save as usual
doc.save("../out/implementing_methods_tables.docx")