<a href="https://colab.research.google.com/github/yostinagirgis/ECO-726-Replication-Project----Results-Extension/blob/main/ECO_726_Replication_Project_(Extension).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [121]:
!pip install linearmodels
import pandas as pd
import numpy as np
from linearmodels.iv import IV2SLS
from statsmodels.formula.api import ols, logit
from statsmodels.regression.linear_model import OLS
import statsmodels.api as sm
from itertools import product



In [122]:
import urllib.request

data_url = 'https://github.com/yostinagirgis/ECO-726-Replication-Project----Results-Extension/raw/main/FAid_Final.dta'
output_file = 'FAid_Final.dta'

print("Downloading data file from GitHub...")
urllib.request.urlretrieve(data_url, output_file)
print("✓ Data file ready!")

import pandas as pd
df = pd.read_stata(output_file)
print(f"✓ Loaded {len(df)} observations")

Downloading data file from GitHub...
✓ Data file ready!
✓ Loaded 7239 observations


###############################
DATA PREPARATION (ALL TABLES)
###############################

In [123]:
# Set panel structure
# Check available columns first
print("Available columns in DataFrame:", df.columns.tolist())

# If 'year' is both an index level AND a column, drop the column to avoid conflict
# when reset_index tries to add 'year' from the index as a column.
if 'year' in df.index.names and 'year' in df.columns:
    print("Detected 'year' as both an index level and a column. Dropping column 'year' before reset_index.")
    df = df.drop(columns=['year'])
    # After dropping, 'year' should only be an index level if it was.

# If 'year' is still an index level, reset it to make it a column.
if 'year' in df.index.names:
    df = df.reset_index()

# At this point, the DataFrame should already have 'risocode' and 'year' columns
# from the initial data load, ready to be set as the MultiIndex.

# Now 'risocode' and the existing 'year' can be used to set the index
df = df.sort_values(['risocode', 'year'])
df = df.set_index(['risocode', 'year'])

# We ensure the MultiIndex is unique after setting it, as pandas allows duplicates in MultiIndex
# Many operations (like reindexing internally) require a unique index.
df = df.loc[~df.index.duplicated(keep='first')]

Available columns in DataFrame: ['risocode', 'recipient_country', 'wb_region', 'year', 'obs', 'wheat_aid', 'fadum', 'fadum_avg', 'US_wheat_production', 'any_war', 'intra_state', 'inter_state', 'intra_state_onset', 'intra_state_offset', 'peace_dur', 'intra_state_dur', 'intensity', 'all_Precip_jan', 'all_Precip_feb', 'all_Precip_mar', 'all_Precip_apr', 'all_Precip_may', 'all_Precip_jun', 'all_Precip_jul', 'all_Precip_aug', 'all_Precip_sep', 'all_Precip_oct', 'all_Precip_nov', 'all_Precip_dec', 'all_Temp_jan', 'all_Temp_feb', 'all_Temp_mar', 'all_Temp_apr', 'all_Temp_may', 'all_Temp_jun', 'all_Temp_jul', 'all_Temp_aug', 'all_Temp_sep', 'all_Temp_oct', 'all_Temp_nov', 'all_Temp_dec', 'all_Precip_jan_faavg', 'all_Precip_feb_faavg', 'all_Precip_mar_faavg', 'all_Precip_apr_faavg', 'all_Precip_may_faavg', 'all_Precip_jun_faavg', 'all_Precip_jul_faavg', 'all_Precip_aug_faavg', 'all_Precip_sep_faavg', 'all_Precip_oct_faavg', 'all_Precip_nov_faavg', 'all_Precip_dec_faavg', 'all_Temp_jan_faavg', '

In [124]:
# Restrict sample period
df = df[(df.index.get_level_values('year') >= 1971) &
        (df.index.get_level_values('year') <= 2006)]

In [125]:
# Convert to thousands
df['wheat_aid'] = df['wheat_aid'] / 1000
df['US_wheat_production'] = df['US_wheat_production'] / 1000
df['recipient_cereals_prod'] = df['recipient_cereals_prod'] / 1000
df['recipient_wheat_prod'] = df['recipient_wheat_prod'] / 1000

In [126]:
# Create lagged variables (within groups)
df['l_US_wheat_production'] = df.groupby(level='risocode')['US_wheat_production'].shift(1)
df['l_intra_state'] = df.groupby(level='risocode')['intra_state'].shift(1)

In [127]:
# Create instrument
df['instrument'] = df['l_US_wheat_production'] * df['fadum_avg']

In [128]:
# Create control variables
df['USA_ln_income'] = np.log(df['USA_rgdpch'])
df['ln_rgdpch_avg'] = df.groupby(level='risocode')['ln_rgdpch'].transform('mean')

df['oil_fadum_avg'] = df['oil_price_2011_USD'] * df['fadum_avg']
df['US_income_fadum_avg'] = df['USA_ln_income'] * df['fadum_avg']
df['US_democ_pres_fadum_avg'] = df['US_president_democ'] * df['fadum_avg']

In [129]:
# Create precipitation and temperature interactions
precip_cols = [f'all_Precip_{month}' for month in ['jan', 'feb', 'mar', 'apr', 'may', 'jun',
                                                     'jul', 'aug', 'sep', 'oct', 'nov', 'dec']]
temp_cols = [f'all_Temp_{month}' for month in ['jan', 'feb', 'mar', 'apr', 'may', 'jun',
                                                 'jul', 'aug', 'sep', 'oct', 'nov', 'dec']]

for col in precip_cols + temp_cols:
    df[f'{col}_faavg'] = df[col] * df['fadum_avg']

In [130]:
# Create year dummies
year_dummies = pd.get_dummies(df.index.get_level_values('year'), prefix='year', drop_first=True)
year_dummies.index = df.index # Assign the MultiIndex from df to year_dummies
df = pd.concat([df, year_dummies], axis=1)

In [131]:
# Create interactions with year dummies
years = range(1972, 2007)  # 1971 is omitted as reference, interactions go up to 2006 (y35)
interaction_vars = ['ln_rgdpch_avg', 'real_usmilaid_avg', 'real_us_nonfoodaid_ecaid_avg',
                   'recipient_pc_cereals_prod_avg', 'cereal_pc_import_quantity_avg']

new_cols = []
for var, year in product(interaction_vars, years):
    var_prefix = {'ln_rgdpch_avg': 'gdp',
                 'real_usmilaid_avg': 'usmil',
                 'real_us_nonfoodaid_ecaid_avg': 'usec',
                 'recipient_pc_cereals_prod_avg': 'rcereal',
                 'cereal_pc_import_quantity_avg': 'rimport'}[var]

    year_idx = year - 1971
    col_name = f'{var_prefix}_y{year_idx}'
    new_cols.append(pd.Series(df[var] * df[f'year_{year}.0'], index=df.index, name=col_name))

df = pd.concat([df] + new_cols, axis=1)

In [132]:
# Create country and region dummies
country_dummies = pd.get_dummies(df.index.get_level_values('risocode'), prefix='country', drop_first=True)
country_dummies.index = df.index # Assign the MultiIndex from df to country_dummies

region_dummies = pd.get_dummies(df['wb_region'], prefix='region', drop_first=True)
region_dummies.index = df.index # Explicitly assign the MultiIndex from df to region_dummies

df = pd.concat([df, country_dummies, region_dummies], axis=1)

In [133]:
# Create region × year interactions
new_interaction_cols = []

# Remove duplicate columns from df to ensure correct Series extraction
df = df.loc[:, ~df.columns.duplicated()]

region_columns_in_df = [col for col in df.columns if col.startswith('region_')]
year_columns_in_df = [col for col in df.columns if col.startswith('year_')]

expected_index_names = df.index.names

for region_col in region_columns_in_df:
    for year_col in year_columns_in_df:
        s_region = df[region_col]
        s_year = df[year_col]

        # Unconditionally set index names to ensure consistency for multiplication
        s_region.index.names = expected_index_names
        s_year.index.names = expected_index_names

        # Perform the multiplication.
        interaction_series = s_region * s_year
        # Assign a proper name for the new Series, which will become the column name
        interaction_series.name = f'{region_col}_{year_col}'
        new_interaction_cols.append(interaction_series)

# Link together all new interaction columns to df at once
df = pd.concat([df] + new_interaction_cols, axis=1)

In [134]:
# Define control groups
US_controls = ['oil_fadum_avg', 'US_income_fadum_avg', 'US_democ_pres_fadum_avg']

weather_controls = (precip_cols + temp_cols +
                   [f'{col}_faavg' for col in precip_cols + temp_cols])

country_chars_controls = ([f'gdp_y{i}' for i in range(2, 37)] +
                          [f'usmil_y{i}' for i in range(2, 37)] +
                          [f'usec_y{i}' for i in range(2, 37)])

cereals_controls = ([f'rcereal_y{i}' for i in range(2, 37)] +
                   [f'rimport_y{i}' for i in range(2, 37)])

baseline_controls = US_controls + weather_controls + country_chars_controls + cereals_controls

In [135]:
# Create in-sample indicator using most restrictive specification
# (This ensures consistent sample across all specifications)
# Filter baseline_controls to only include columns present in df
valid_baseline_controls = [col for col in baseline_controls if col in df.columns]
df_temp = df.dropna(subset=['intra_state', 'wheat_aid', 'instrument'] + valid_baseline_controls)

# Create the 'in_sample' Series
in_sample_series = df.index.isin(df_temp.index)

# Put together the new Series as a DataFrame to avoid fragmentation
df = pd.concat([df, pd.DataFrame(in_sample_series, index=df.index, columns=['in_sample'])], axis=1)

###############################
TABLE 1: SUMMARY STATISTICS
###############################

In [136]:
summary_vars = ['any_war', 'intra_state', 'inter_state', 'wheat_aid',
               'fadum_avg', 'instrument', 'recipient_cereals_prod', 'recipient_wheat_prod']

# Create a processed DataFrame with a guaranteed unique index for this cell's operations.
df_processed = df.copy()

# Robustly ensure the MultiIndex is unique by resetting, dropping duplicates, then re-setting index.
# This handles cases where .loc[~.index.duplicated()] might not fully prevent reindex issues.
df_processed = df_processed.reset_index().drop_duplicates(subset=['risocode', 'year'], keep='first').set_index(['risocode', 'year'])

# Filter the processed DataFrame based on the 'in_sample' column using .loc for robustness
df_filtered_in_sample = df_processed.loc[df_processed['in_sample']].copy()

# This check should be redundant if df_processed was truly unique and 'in_sample' is aligned,
# but we'll kept as a safeguard.
if df_filtered_in_sample.index.duplicated().any():
    df_filtered_in_sample = df_filtered_in_sample.loc[~df_filtered_in_sample.index.duplicated(keep='first')]

# Use this unique-indexed filtered DataFrame for summary statistics
table1 = df_filtered_in_sample[summary_vars].describe().T[['count', 'mean', 'std']]
print("\n=== TABLE 1: Summary Statistics ===")
print(table1)
table1.to_csv("Table1_summary.csv")


=== TABLE 1: Summary Statistics ===
                         count          mean           std
any_war                 3993.0      0.217881      0.412865
intra_state             3993.0      0.177060      0.381765
inter_state             3993.0      0.026546      0.160769
wheat_aid               3993.0     27.347148    116.527472
fadum_avg               3993.0      0.373410      0.312379
instrument              3993.0  22232.367515  19055.791389
recipient_cereals_prod  3553.0  10476.098822  42464.346912
recipient_wheat_prod    2284.0   4244.452310  14642.101871


###############################
TABLE 2: BASELINE OLS & IV
###############################

In [137]:
# Prepare fixed effects
fe_vars = list(country_dummies.columns) + [col for col in df.columns if col.startswith('region_') and 'year' in col]

In [138]:
### PANEL A: OLS ESTIMATES ###
print("\n" + "="*70)
print("TABLE 2 PANEL A: OLS Estimates")
print("="*70)

# Helper function to run OLS with clustered standard errors
def run_ols(df_subset, y_var, x_vars, cluster_var):
    """Run OLS with clustered standard errors"""
    X = sm.add_constant(df_subset[x_vars])
    y = df_subset[y_var]
    model = OLS(y, X).fit(cov_type='cluster',
                         cov_kwds={'groups': df_subset.index.get_level_values(cluster_var)})
    return model


TABLE 2 PANEL A: OLS Estimates


In [139]:
# Column 1: No controls except fixed effects
print("\nColumn 1: Country FE + Region×Year FE only")
df_sample = df[df['in_sample']].copy()
x_vars_1 = ['wheat_aid'] + fe_vars
# Explicitly convert feature columns to float to avoid object dtype issues with statsmodels
X1 = df_sample[x_vars_1].astype(float)
y1 = df_sample['any_war']

# Correct the clustering group from 'obs' to 'risocode'
model_a1 = OLS(y1, sm.add_constant(X1)).fit(cov_type='cluster', cov_kwds={'groups': df_sample.index.get_level_values('risocode')})
print(f"  Wheat aid coef: {model_a1.params['wheat_aid']:.6f}")
print(f"  Std error:      ({model_a1.bse['wheat_aid']:.6f})")
print(f"  N = {int(model_a1.nobs)}")


Column 1: Country FE + Region×Year FE only
  Wheat aid coef: -0.000073
  Std error:      (0.000182)
  N = 3993


In [140]:
# Column 2: Add US-level controls
print("\nColumn 2: + US controls (oil, income, president)")
x_vars_2 = ['wheat_aid'] + US_controls + fe_vars
df_sample = df[df['in_sample']].copy() # Ensure df_sample is defined within this context

# Explicitly convert feature columns to float and correct clustering
X2 = df_sample[x_vars_2].astype(float)
y2 = df_sample['any_war']

model_a2 = OLS(y2, sm.add_constant(X2)).fit(cov_type='cluster', cov_kwds={'groups': df_sample.index.get_level_values('risocode')})
print(f"  Wheat aid coef: {model_a2.params['wheat_aid']:.6f}")
print(f"  Std error:      ({model_a2.bse['wheat_aid']:.6f})")
print(f"  N = {int(model_a2.nobs)}")


Column 2: + US controls (oil, income, president)
  Wheat aid coef: -0.000087
  Std error:      (0.000182)
  N = 3993


In [141]:
# Column 3: Add weather controls
print("\nColumn 3: + Weather controls (precipitation & temperature)")
x_vars_3 = ['wheat_aid'] + US_controls + weather_controls + fe_vars
df_sample = df[df['in_sample']].copy() # Ensure df_sample is defined within this context

# Explicitly convert feature columns to float and correct clustering
X3 = df_sample[x_vars_3].astype(float)
y3 = df_sample['any_war']

model_a3 = OLS(y3, sm.add_constant(X3)).fit(cov_type='cluster', cov_kwds={'groups': df_sample.index.get_level_values('risocode')})
print(f"  Wheat aid coef: {model_a3.params['wheat_aid']:.6f}")
print(f"  Std error:      ({model_a3.bse['wheat_aid']:.6f})")
print(f"  N = {int(model_a3.nobs)}")


Column 3: + Weather controls (precipitation & temperature)
  Wheat aid coef: -0.000068
  Std error:      (0.000174)
  N = 3993


In [142]:
# Column 4: Add country characteristics × year FE
print("\nColumn 4: + Country characteristics (GDP, other aid) × Year FE")

# Filter country_chars_controls to only include columns present in df
valid_country_chars_controls = [col for col in country_chars_controls if col in df_sample.columns]
x_vars_4 = ['wheat_aid'] + US_controls + weather_controls + valid_country_chars_controls + fe_vars

# Explicitly convert feature columns to float and correct clustering
X4 = df_sample[x_vars_4].astype(float)
y4 = df_sample['any_war']

model_a4 = OLS(y4, sm.add_constant(X4)).fit(cov_type='cluster', cov_kwds={'groups': df_sample.index.get_level_values('risocode')})
print(f"  Wheat aid coef: {model_a4.params['wheat_aid']:.6f}")
print(f"  Std error:      ({model_a4.bse['wheat_aid']:.6f})")
print(f"  N = {int(model_a4.nobs)}")


Column 4: + Country characteristics (GDP, other aid) × Year FE
  Wheat aid coef: -0.000060
  Std error:      (0.000175)
  N = 3993


In [143]:
# Column 5: Add cereal controls (full baseline specification) - any war
print("\nColumn 5: + Cereal production & imports × Year FE (FULL CONTROLS) - Any War")

df_sample = df[df['in_sample']].copy()

# Filter baseline_controls to only include columns present in df_sample
valid_baseline_controls_for_col5 = [col for col in baseline_controls if col in df_sample.columns]
x_vars_5 = ['wheat_aid'] + valid_baseline_controls_for_col5 + fe_vars

# Explicitly convert feature columns to float and correct clustering
X4 = df_sample[x_vars_5].astype(float)
y4 = df_sample['any_war']

# Correct the cluster_var to 'risocode'
model_a5 = OLS(y4, sm.add_constant(X4)).fit(cov_type='cluster', cov_kwds={'groups': df_sample.index.get_level_values('risocode')})
print(f"  Wheat aid coef: {model_a5.params['wheat_aid']:.6f}")
print(f"  Std error:      ({model_a5.bse['wheat_aid']:.6f})")
print(f"  N = {int(model_a5.nobs)}")


Column 5: + Cereal production & imports × Year FE (FULL CONTROLS) - Any War
  Wheat aid coef: -0.000106
  Std error:      (0.000175)
  N = 3993


In [144]:
# Column 6: Full controls - intra-state conflict
print("\nColumn 6: Full controls - Intra-state conflict")
df_sample = df[df['in_sample']].copy()

# Filter baseline_controls to only include columns present in df_sample
valid_baseline_controls_for_col6 = [col for col in baseline_controls if col in df_sample.columns]
x_vars_6 = ['wheat_aid'] + valid_baseline_controls_for_col6 + fe_vars

# Explicitly convert feature columns to float and correct clustering
X6 = df_sample[x_vars_6].astype(float)
y6 = df_sample['intra_state']

# Correct the cluster_var to 'risocode'
model_a6 = OLS(y6, sm.add_constant(X6)).fit(cov_type='cluster', cov_kwds={'groups': df_sample.index.get_level_values('risocode')})
print(f"  Wheat aid coef: {model_a6.params['wheat_aid']:.6f}")
print(f"  Std error:      ({model_a6.bse['wheat_aid']:.6f})")
print(f"  N = {int(model_a6.nobs)}")


Column 6: Full controls - Intra-state conflict
  Wheat aid coef: -0.000049
  Std error:      (0.000178)
  N = 3993


In [145]:
# Column 7: Full controls - inter-state conflict
print("\nColumn 7: Full controls - Inter-state conflict")
df_sample = df[df['in_sample']].copy()

# Filter baseline_controls to only include columns present in df_sample
valid_baseline_controls_for_col7 = [col for col in baseline_controls if col in df_sample.columns]
x_vars_7 = ['wheat_aid'] + valid_baseline_controls_for_col7 + fe_vars

# Explicitly convert feature columns to float and correct clustering
X7 = df_sample[x_vars_7].astype(float)
y7 = df_sample['inter_state']

# Correct the cluster_var to 'risocode'
model_a7 = OLS(y7, sm.add_constant(X7)).fit(cov_type='cluster', cov_kwds={'groups': df_sample.index.get_level_values('risocode')})
print(f"  Wheat aid coef: {model_a7.params['wheat_aid']:.6f}")
print(f"  Std error:      ({model_a7.bse['wheat_aid']:.6f})")
print(f"  N = {int(model_a7.nobs)}")


Column 7: Full controls - Inter-state conflict
  Wheat aid coef: -0.000130
  Std error:      (0.000047)
  N = 3993


In [146]:
# Summary table for Panel A
print("\n" + "-"*70)
print("PANEL A SUMMARY:")
print("-"*70)
panel_a_results = pd.DataFrame({
    'Column': [1, 2, 3, 4, 5, 6, 7],
    'Dependent Var': ['Any War']*5 + ['Intra-state', 'Inter-state'],
    'Coefficient': [
        model_a1.params['wheat_aid'],
        model_a2.params['wheat_aid'],
        model_a3.params['wheat_aid'],
        model_a4.params['wheat_aid'],
        model_a5.params['wheat_aid'],
        model_a6.params['wheat_aid'],
        model_a7.params['wheat_aid']
    ],
    'Std Error': [
        model_a1.bse['wheat_aid'],
        model_a2.bse['wheat_aid'],
        model_a3.bse['wheat_aid'],
        model_a4.bse['wheat_aid'],
        model_a5.bse['wheat_aid'],
        model_a6.bse['wheat_aid'],
        model_a7.bse['wheat_aid']
    ],
    'N': [
        int(model_a1.nobs),
        int(model_a2.nobs),
        int(model_a3.nobs),
        int(model_a4.nobs),
        int(model_a5.nobs),
        int(model_a6.nobs),
        int(model_a7.nobs)
    ]
})
print(panel_a_results.to_string(index=False))
panel_a_results.to_csv("Table2_PanelA_results.csv", index=False)


----------------------------------------------------------------------
PANEL A SUMMARY:
----------------------------------------------------------------------
 Column Dependent Var  Coefficient  Std Error    N
      1       Any War    -0.000073   0.000182 3993
      2       Any War    -0.000087   0.000182 3993
      3       Any War    -0.000068   0.000174 3993
      4       Any War    -0.000060   0.000175 3993
      5       Any War    -0.000106   0.000175 3993
      6   Intra-state    -0.000049   0.000178 3993
      7   Inter-state    -0.000130   0.000047 3993


In [147]:
### PANEL B: REDUCED FORM ###
print("\n" + "="*70)
print("TABLE 2 PANEL B: Reduced Form")
print("="*70)

# Scale outcomes by 1000 to make coefficients easier to read
df_rf = df.copy()
for var in ['any_war', 'intra_state', 'inter_state']:
    df_rf[var] = df_rf[var] * 1000

df_rf_sample = df_rf[df_rf['in_sample']].copy()

# Column 1: No controls
print("\nColumn 1: Country FE + Region×Year FE only")
x_vars_rf1 = ['instrument'] + fe_vars

# Explicitly convert feature columns to float and correct clustering
X_rf1 = df_rf_sample[x_vars_rf1].astype(float)
y_rf1 = df_rf_sample['any_war']

# Correct cluster_var to 'risocode'
model_b1 = OLS(y_rf1, sm.add_constant(X_rf1)).fit(cov_type='cluster', cov_kwds={'groups': df_rf_sample.index.get_level_values('risocode')})
print(f"  Instrument coef: {model_b1.params['instrument']:.6f}")
print(f"  Std error:       ({model_b1.bse['instrument']:.6f})")
print(f"  N = {int(model_b1.nobs)}")


TABLE 2 PANEL B: Reduced Form

Column 1: Country FE + Region×Year FE only
  Instrument coef: 0.008064
  Std error:       (0.002357)
  N = 3993


In [148]:
# Column 2: Add US controls
print("\nColumn 2: + US controls")
x_vars_rf2 = ['instrument'] + US_controls + fe_vars
df_rf_sample = df_rf_sample.copy() # Ensure df_rf_sample is defined and fresh for this context

# Explicitly convert feature columns to float and correct clustering
X_rf2 = df_rf_sample[x_vars_rf2].astype(float)
y_rf2 = df_rf_sample['any_war']

# Correct cluster_var to 'risocode'
model_b2 = OLS(y_rf2, sm.add_constant(X_rf2)).fit(cov_type='cluster', cov_kwds={'groups': df_rf_sample.index.get_level_values('risocode')})
print(f"  Instrument coef: {model_b2.params['instrument']:.6f}")
print(f"  Std error:       ({model_b2.bse['instrument']:.6f})")
print(f"  N = {int(model_b2.nobs)}")


Column 2: + US controls
  Instrument coef: 0.009788
  Std error:       (0.002468)
  N = 3993


In [149]:
# Column 3: Add weather controls
print("\nColumn 3: + Weather controls")
x_vars_rf3 = ['instrument'] + US_controls + weather_controls + fe_vars

# Explicitly convert feature columns to float and correct clustering
X_rf3 = df_rf_sample[x_vars_rf3].astype(float)
y_rf3 = df_rf_sample['any_war']

# Correct cluster_var to 'risocode'
model_b3 = OLS(y_rf3, sm.add_constant(X_rf3)).fit(cov_type='cluster', cov_kwds={'groups': df_rf_sample.index.get_level_values('risocode')})
print(f"  Instrument coef: {model_b3.params['instrument']:.6f}")
print(f"  Std error:       ({model_b3.bse['instrument']:.6f})")
print(f"  N = {int(model_b3.nobs)}")


Column 3: + Weather controls
  Instrument coef: 0.010058
  Std error:       (0.002489)
  N = 3993


In [150]:
# Column 4: Add country characteristics
print("\nColumn 4: + Country characteristics × Year FE")

# Filter country_chars_controls to only include columns present in df_rf_sample
valid_country_chars_controls_rf4 = [col for col in country_chars_controls if col in df_rf_sample.columns]
x_vars_rf4 = ['instrument'] + US_controls + weather_controls + valid_country_chars_controls_rf4 + fe_vars

# Explicitly convert feature columns to float and correct clustering
X_rf4 = df_rf_sample[x_vars_rf4].astype(float)
y_rf4 = df_rf_sample['any_war']

# Correct the cluster_var to 'risocode'
model_b4 = OLS(y_rf4, sm.add_constant(X_rf4)).fit(cov_type='cluster', cov_kwds={'groups': df_rf_sample.index.get_level_values('risocode')})
print(f"  Instrument coef: {model_b4.params['instrument']:.6f}")
print(f"  Std error:       ({model_b4.bse['instrument']:.6f})")
print(f"  N = {int(model_b4.nobs)}")


Column 4: + Country characteristics × Year FE
  Instrument coef: 0.012316
  Std error:       (0.002654)
  N = 3993


In [151]:
# Column 5: + Cereal controls (FULL) - Any War
print("\nColumn 5: + Cereal controls (FULL) - Any War")

df_rf_sample = df_rf_sample.copy()

# Filter baseline_controls to only include columns present in df_rf_sample
valid_baseline_controls_for_col5 = [col for col in baseline_controls if col in df_rf_sample.columns]
x_vars_rf5 = ['instrument'] + valid_baseline_controls_for_col5 + fe_vars

# Explicitly convert feature columns to float and correct clustering
X_rf5 = df_rf_sample[x_vars_rf5].astype(float)
y_rf5 = df_rf_sample['any_war']

# Correct the cluster_var to 'risocode'
model_b5 = OLS(y_rf5, sm.add_constant(X_rf5)).fit(cov_type='cluster', cov_kwds={'groups': df_rf_sample.index.get_level_values('risocode')})
print(f"  Instrument coef: {model_b5.params['instrument']:.6f}")
print(f"  Std error:       ({model_b5.bse['instrument']:.6f})")
print(f"  N = {int(model_b5.nobs)}")


Column 5: + Cereal controls (FULL) - Any War
  Instrument coef: 0.011705
  Std error:       (0.002917)
  N = 3993


In [152]:
# Column 6: Full controls - intra-state
print("\nColumn 6: Full controls - Intra-state conflict")
df_rf_sample = df_rf[df_rf['in_sample']].copy()

# Filter baseline_controls to only include columns present in df_rf_sample
valid_baseline_controls_for_col6 = [col for col in baseline_controls if col in df_rf_sample.columns]
x_vars_rf6 = ['instrument'] + valid_baseline_controls_for_col6 + fe_vars

# Explicitly convert feature columns to float and correct clustering
X_rf6 = df_rf_sample[x_vars_rf6].astype(float)
y_rf6 = df_rf_sample['intra_state']

# Correct the cluster_var to 'risocode'
model_b6 = OLS(y_rf6, sm.add_constant(X_rf6)).fit(cov_type='cluster', cov_kwds={'groups': df_rf_sample.index.get_level_values('risocode')})
print(f"  Instrument coef: {model_b6.params['instrument']:.6f}")
print(f"  Std error:       ({model_b6.bse['instrument']:.6f})")
print(f"  N = {int(model_b6.nobs)}")


Column 6: Full controls - Intra-state conflict
  Instrument coef: 0.010347
  Std error:       (0.002850)
  N = 3993


In [153]:
# Column 7: Full controls - inter-state
print("\nColumn 7: Full controls - Inter-state conflict")
df_rf_sample = df_rf[df_rf['in_sample']].copy()

# Filter baseline_controls to only include columns present in df_rf_sample
valid_baseline_controls_for_col7 = [col for col in baseline_controls if col in df_rf_sample.columns]
x_vars_rf7 = ['instrument'] + valid_baseline_controls_for_col7 + fe_vars

# Explicitly convert feature columns to float and correct clustering
X_rf7 = df_rf_sample[x_vars_rf7].astype(float)
y_rf7 = df_rf_sample['inter_state']

# Correct the cluster_var to 'risocode'
model_b7 = OLS(y_rf7, sm.add_constant(X_rf7)).fit(cov_type='cluster', cov_kwds={'groups': df_rf_sample.index.get_level_values('risocode')})
print(f"  Instrument coef: {model_b7.params['instrument']:.6f}")
print(f"  Std error:       ({model_b7.bse['instrument']:.6f})")
print(f"  N = {int(model_b7.nobs)}")


Column 7: Full controls - Inter-state conflict
  Instrument coef: -0.001346
  Std error:       (0.001027)
  N = 3993


In [154]:
# Summary table for Panel B
print("\n" + "-"*70)
print("PANEL B SUMMARY:")
print("-"*70)
panel_b_results = pd.DataFrame({
    'Column': [1, 2, 3, 4, 5, 6, 7],
    'Dependent Var': ['Any War']*5 + ['Intra-state', 'Inter-state'],
    'Coefficient': [
        model_b1.params['instrument'],
        model_b2.params['instrument'],
        model_b3.params['instrument'],
        model_b4.params['instrument'],
        model_b5.params['instrument'],
        model_b6.params['instrument'],
        model_b7.params['instrument']
    ],
    'Std Error': [
        model_b1.bse['instrument'],
        model_b2.bse['instrument'],
        model_b3.bse['instrument'],
        model_b4.bse['instrument'],
        model_b5.bse['instrument'],
        model_b6.bse['instrument'],
        model_b7.bse['instrument']
    ],
    'N': [
        int(model_b1.nobs),
        int(model_b2.nobs),
        int(model_b3.nobs),
        int(model_b4.nobs),
        int(model_b5.nobs),
        int(model_b6.nobs),
        int(model_b7.nobs)
    ]
})
print(panel_b_results.to_string(index=False))
panel_b_results.to_csv("Table2_PanelB_results.csv", index=False)

print("\n" + "="*70)
print("NOTE: Outcomes in Panel B scaled by 1000 (conflict variables multiplied by 1000)")
print("="*70)


----------------------------------------------------------------------
PANEL B SUMMARY:
----------------------------------------------------------------------
 Column Dependent Var  Coefficient  Std Error    N
      1       Any War     0.008064   0.002357 3993
      2       Any War     0.009788   0.002468 3993
      3       Any War     0.010058   0.002489 3993
      4       Any War     0.012316   0.002654 3993
      5       Any War     0.011705   0.002917 3993
      6   Intra-state     0.010347   0.002850 3993
      7   Inter-state    -0.001346   0.001027 3993

NOTE: Outcomes in Panel B scaled by 1000 (conflict variables multiplied by 1000)


In [155]:
from linearmodels.iv import IV2SLS

### PANEL C: IV (2SLS) ###
print("\n" + "="*70)
print("TABLE 2 PANEL C: Second Stage IV Estimates")
print("="*70)

# Prepare data
df_iv = df[df['in_sample']].copy()

# Helper function for IV regression with absorbed fixed effects
def run_iv2sls_with_fe(df_subset, y_var, endog_var, instrument_var, exog_controls,
                       entity_effects=True, time_effects=False, other_effects=None):
    """
    Run IV2SLS with absorbed fixed effects

    Parameters:
    - entity_effects: absorb country fixed effects
    - time_effects: absorb year fixed effects
    - other_effects: additional categorical variable for FE (e.g., region-year interactions)
    """
    from linearmodels.iv import IV2SLS

    # Prepare variables
    dependent = df_subset[y_var]
    endog = df_subset[[endog_var]]
    instruments = df_subset[[instrument_var]]

    # Add constant and exogenous controls
    if len(exog_controls) > 0:
        exog = df_subset[exog_controls]
    else:
        exog = None

    # Run IV2SLS with entity effects
    # entity_effects=True absorbs country (obs) fixed effects
    # We'll handle region-year interactions separately
    model = IV2SLS(dependent=dependent,
                   exog=exog,
                   endog=endog,
                   instruments=instruments,
                   entity_effects=entity_effects,
                   time_effects=time_effects).fit(cov_type='clustered',
                                                   clusters=df_subset.index.get_level_values('obs'),
                                                   debiased=True)
    return model


TABLE 2 PANEL C: Second Stage IV Estimates


In [156]:
# Column 1: No controls except fixed effects
print("\nColumn 1: Country FE only (simplified)")

df_iv = df[df['in_sample']].copy()

dependent = df_iv['any_war'].astype(float) # ensure dependent is float

# For Column 1 (no controls except FE), we use country fixed effects as exog
# to avoid multicollinearity from overly complex fixed effects like region-year interactions.
exog_fe_raw = df_iv[list(country_dummies.columns)].astype(float)

# Identify and remove columns from exog_fe_raw that have zero variance (are constant)
constant_fe_cols = exog_fe_raw.columns[exog_fe_raw.std() == 0].tolist()
if constant_fe_cols:
    print(f"Dropping constant fixed effect columns from exog: {constant_fe_cols}")
    exog = exog_fe_raw.drop(columns=constant_fe_cols)
else:
    exog = exog_fe_raw

endog = df_iv[['wheat_aid']].astype(float)
instruments = df_iv[['instrument']].astype(float)

model_c1 = IV2SLS(dependent, exog, endog, instruments).fit(cov_type='clustered',
                                                             clusters=df_iv.index.get_level_values('risocode'))
print(f"  Wheat aid coef: {model_c1.params['wheat_aid']:.6f}")
print(f"  Std error:      ({model_c1.std_errors['wheat_aid']:.6f})")
# Access First-stage F-stat using the identified column name 'f.stat' indexed by 'wheat_aid'
print(f"  First-stage F-stat: {model_c1.first_stage.diagnostics.loc['wheat_aid', 'f.stat']:.2f}")
print(f"  N = {int(model_c1.nobs)}")


Column 1: Country FE only (simplified)
Dropping constant fixed effect columns from exog: ['country_GNQ', 'country_TWN']
  Wheat aid coef: 0.010233
  Std error:      (0.003765)
  First-stage F-stat: 20.95
  N = 3993


In [157]:
# Column 2: Add US controls
print("\nColumn 2: + US controls")
df_iv = df[df['in_sample']].copy()

dependent = df_iv['any_war'].astype(float)

# Exogenous variables: US_controls + cleaned country fixed effects
exog_fe_raw = df_iv[list(country_dummies.columns)].astype(float)
constant_fe_cols = exog_fe_raw.columns[exog_fe_raw.std() == 0].tolist()
if constant_fe_cols:
    print(f"Dropping constant fixed effect columns from exog: {constant_fe_cols}")
    exog_fe_cleaned = exog_fe_raw.drop(columns=constant_fe_cols)
else:
    exog_fe_cleaned = exog_fe_raw

# Combine US_controls with cleaned country fixed effects
exog = pd.concat([df_iv[US_controls].astype(float), exog_fe_cleaned], axis=1)

endog = df_iv[['wheat_aid']].astype(float)
instruments = df_iv[['instrument']].astype(float)

model_c2 = IV2SLS(dependent, exog, endog, instruments).fit(cov_type='clustered',
                                                             clusters=df_iv.index.get_level_values('risocode'))
print(f"  Wheat aid coef: {model_c2.params['wheat_aid']:.6f}")
print(f"  Std error:      ({model_c2.std_errors['wheat_aid']:.6f})")
print(f"  First-stage F-stat: {model_c2.first_stage.diagnostics.loc['wheat_aid', 'f.stat']:.2f}")
print(f"  N = {int(model_c2.nobs)}")


Column 2: + US controls
Dropping constant fixed effect columns from exog: ['country_GNQ', 'country_TWN']
  Wheat aid coef: 0.004055
  Std error:      (0.001377)


  std_errors = sqrt(diag(self.cov))


  First-stage F-stat: 10.69
  N = 3993


In [158]:
# Column 3: Add weather controls
print("\nColumn 3: + Weather controls")
df_iv = df[df['in_sample']].copy()

dependent = df_iv['any_war'].astype(float)

# Exogenous variables: US_controls + weather_controls + cleaned country fixed effects
exog_fe_raw = df_iv[list(country_dummies.columns)].astype(float)
constant_fe_cols = exog_fe_raw.columns[exog_fe_raw.std() == 0].tolist()
if constant_fe_cols:
    print(f"Dropping constant fixed effect columns from exog: {constant_fe_cols}")
    exog_fe_cleaned = exog_fe_raw.drop(columns=constant_fe_cols)
else:
    exog_fe_cleaned = exog_fe_raw

# Combine US_controls, weather_controls with cleaned country fixed effects
exog = pd.concat([df_iv[US_controls].astype(float),
                  df_iv[weather_controls].astype(float),
                  exog_fe_cleaned], axis=1)

endog = df_iv[['wheat_aid']].astype(float)
instruments = df_iv[['instrument']].astype(float)

model_c3 = IV2SLS(dependent, exog, endog, instruments).fit(cov_type='clustered',
                                                             clusters=df_iv.index.get_level_values('risocode'))
print(f"  Wheat aid coef: {model_c3.params['wheat_aid']:.6f}")
print(f"  Std error:      ({model_c3.std_errors['wheat_aid']:.6f})")
print(f"  First-stage F-stat: {model_c3.first_stage.diagnostics.loc['wheat_aid', 'f.stat']:.2f}")
print(f"  N = {int(model_c3.nobs)}")


Column 3: + Weather controls
Dropping constant fixed effect columns from exog: ['country_GNQ', 'country_TWN']
  Wheat aid coef: 0.004322
  Std error:      (0.001365)
  First-stage F-stat: 11.54
  N = 3993


In [159]:
# Column 4: Add country characteristics × Year FE
print("\nColumn 4: + Country characteristics (GDP, other aid) × Year FE")

df_iv = df[df['in_sample']].copy()

dependent = df_iv['any_war'].astype(float)

# Exogenous fixed effects: simplified to country fixed effects to avoid multicollinearity
exog_fe_raw = df_iv[list(country_dummies.columns)].astype(float)
constant_fe_cols = exog_fe_raw.columns[exog_fe_raw.std() == 0].tolist()
if constant_fe_cols:
    print(f"Dropping constant fixed effect columns from exog (country FE): {constant_fe_cols}")
    exog_fe_cleaned = exog_fe_raw.drop(columns=constant_fe_cols)
else:
    exog_fe_cleaned = exog_fe_raw

# Filter country_chars_controls to only include columns present in df_iv
valid_country_chars_controls = [col for col in country_chars_controls if col in df_iv.columns]

# Combine all exogenous variables
exog = pd.concat([
    df_iv[US_controls].astype(float),
    df_iv[weather_controls].astype(float),
    df_iv[valid_country_chars_controls].astype(float),
    exog_fe_cleaned
], axis=1)

endog = df_iv[['wheat_aid']].astype(float)
instruments = df_iv[['instrument']].astype(float)

model_c4 = IV2SLS(dependent, exog, endog, instruments).fit(cov_type='clustered',
                                                             clusters=df_iv.index.get_level_values('risocode'))
print(f"  Wheat aid coef: {model_c4.params['wheat_aid']:.6f}")
print(f"  Std error:      ({model_c4.std_errors['wheat_aid']:.6f})")
print(f"  First-stage F-stat: {model_c4.first_stage.diagnostics.loc['wheat_aid', 'f.stat']:.2f}")
print(f"  N = {int(model_c4.nobs)}")


Column 4: + Country characteristics (GDP, other aid) × Year FE
Dropping constant fixed effect columns from exog (country FE): ['country_GNQ', 'country_TWN']
  Wheat aid coef: 0.004809
  Std error:      (0.001447)
  First-stage F-stat: 11.52
  N = 3993


In [160]:
# Column 5: Full controls - any war
print("\nColumn 5: + Cereal controls (FULL) - Any War")

df_iv = df[df['in_sample']].copy()

dependent = df_iv['any_war'].astype(float)

# Exogenous fixed effects: simplified to country fixed effects to avoid multicollinearity
exog_fe_raw = df_iv[list(country_dummies.columns)].astype(float)
constant_fe_cols = exog_fe_raw.columns[exog_fe_raw.std() == 0].tolist()
if constant_fe_cols:
    print(f"Dropping constant fixed effect columns from exog (country FE): {constant_fe_cols}")
    exog_fe_cleaned = exog_fe_raw.drop(columns=constant_fe_cols)
else:
    exog_fe_cleaned = exog_fe_raw

# Filter baseline_controls to only include columns present in df_iv
valid_baseline_controls_for_col5 = [col for col in baseline_controls if col in df_iv.columns]

# Combine all exogenous variables
exog = pd.concat([
    df_iv[valid_baseline_controls_for_col5].astype(float),
    exog_fe_cleaned
], axis=1)

endog = df_iv[['wheat_aid']].astype(float)
instruments = df_iv[['instrument']].astype(float)

model_c5 = IV2SLS(dependent, exog, endog, instruments).fit(cov_type='clustered',
                                                             clusters=df_iv.index.get_level_values('risocode'))
print(f"  Wheat aid coef: {model_c5.params['wheat_aid']:.6f}")
print(f"  Std error:      ({model_c5.std_errors['wheat_aid']:.6f})")
print(f"  First-stage F-stat: {model_c5.first_stage.diagnostics.loc['wheat_aid', 'f.stat']:.2f}")
print(f"  N = {int(model_c5.nobs)}")


Column 5: + Cereal controls (FULL) - Any War
Dropping constant fixed effect columns from exog (country FE): ['country_GNQ', 'country_TWN']
  Wheat aid coef: 0.004279
  Std error:      (0.001341)
  First-stage F-stat: 11.54
  N = 3993


In [161]:
# Column 6: Full controls - intra-state
print("\nColumn 6: Full controls - Intra-state conflict")
df_iv = df[df['in_sample']].copy()

dependent = df_iv['intra_state'].astype(float)

# Exogenous fixed effects: simplified to country fixed effects to avoid multicollinearity
exog_fe_raw = df_iv[list(country_dummies.columns)].astype(float)
constant_fe_cols = exog_fe_raw.columns[exog_fe_raw.std() == 0].tolist()
if constant_fe_cols:
    print(f"Dropping constant fixed effect columns from exog (country FE): {constant_fe_cols}")
    exog_fe_cleaned = exog_fe_raw.drop(columns=constant_fe_cols)
else:
    exog_fe_cleaned = exog_fe_raw

# Filter baseline_controls to only include columns present in df_iv
valid_baseline_controls_for_col6 = [col for col in baseline_controls if col in df_iv.columns]

# Combine all exogenous variables
exog = pd.concat([
    df_iv[valid_baseline_controls_for_col6].astype(float),
    exog_fe_cleaned
], axis=1)

endog = df_iv[['wheat_aid']].astype(float)
instruments = df_iv[['instrument']].astype(float)

model_c6 = IV2SLS(dependent, exog, endog, instruments).fit(cov_type='clustered',
                                                             clusters=df_iv.index.get_level_values('risocode'))
print(f"  Wheat aid coef: {model_c6.params['wheat_aid']:.6f}")
print(f"  Std error:      ({model_c6.std_errors['wheat_aid']:.6f})")
print(f"  First-stage F-stat: {model_c6.first_stage.diagnostics.loc['wheat_aid', 'f.stat']:.2f}")
print(f"  N = {int(model_c6.nobs)}")


Column 6: Full controls - Intra-state conflict
Dropping constant fixed effect columns from exog (country FE): ['country_GNQ', 'country_TWN']
  Wheat aid coef: 0.003654
  Std error:      (0.001141)
  First-stage F-stat: 11.54
  N = 3993


In [162]:
# Column 7: Full controls - inter-state
print("\nColumn 7: Full controls - Inter-state conflict")
df_iv = df[df['in_sample']].copy()

dependent = df_iv['inter_state'].astype(float)

# Exogenous fixed effects: simplified to country fixed effects to avoid multicollinearity
exog_fe_raw = df_iv[list(country_dummies.columns)].astype(float)
constant_fe_cols = exog_fe_raw.columns[exog_fe_raw.std() == 0].tolist()
if constant_fe_cols:
    print(f"Dropping constant fixed effect columns from exog (country FE): {constant_fe_cols}")
    exog_fe_cleaned = exog_fe_raw.drop(columns=constant_fe_cols)
else:
    exog_fe_cleaned = exog_fe_raw

# Filter baseline_controls to only include columns present in df_iv
valid_baseline_controls_for_col7 = [col for col in baseline_controls if col in df_iv.columns]

# Combine all exogenous variables
exog = pd.concat([
    df_iv[valid_baseline_controls_for_col7].astype(float),
    exog_fe_cleaned
], axis=1)

endog = df_iv[['wheat_aid']].astype(float)
instruments = df_iv[['instrument']].astype(float)

model_c7 = IV2SLS(dependent, exog, endog, instruments).fit(cov_type='clustered',
                                                             clusters=df_iv.index.get_level_values('risocode'))
print(f"  Wheat aid coef: {model_c7.params['wheat_aid']:.6f}")
print(f"  Std error:      ({model_c7.std_errors['wheat_aid']:.6f})")
print(f"  First-stage F-stat: {model_c7.first_stage.diagnostics.loc['wheat_aid', 'f.stat']:.2f}")
print(f"  N = {int(model_c7.nobs)}")


Column 7: Full controls - Inter-state conflict
Dropping constant fixed effect columns from exog (country FE): ['country_GNQ', 'country_TWN']
  Wheat aid coef: -0.000603
  Std error:      (0.000402)
  First-stage F-stat: 11.54
  N = 3993


In [163]:
# Summary table for Panel C
print("\n" + "-"*70)
print("PANEL C SUMMARY:")
print("-"*70)
panel_c_results = pd.DataFrame({
    'Column': [1, 2, 3, 4, 5, 6, 7],
    'Dependent Var': ['Any War']*5 + ['Intra-state', 'Inter-state'],
    'Coefficient': [
        model_c1.params['wheat_aid'],
        model_c2.params['wheat_aid'],
        model_c3.params['wheat_aid'],
        model_c4.params['wheat_aid'],
        model_c5.params['wheat_aid'],
        model_c6.params['wheat_aid'],
        model_c7.params['wheat_aid']
    ],
    'Std Error': [
        model_c1.std_errors['wheat_aid'],
        model_c2.std_errors['wheat_aid'],
        model_c3.std_errors['wheat_aid'],
        model_c4.std_errors['wheat_aid'],
        model_c5.std_errors['wheat_aid'],
        model_c6.std_errors['wheat_aid'],
        model_c7.std_errors['wheat_aid']
    ],
    'First-stage F': [
        model_c1.first_stage.diagnostics['f.stat'].values[0],
        model_c2.first_stage.diagnostics['f.stat'].values[0],
        model_c3.first_stage.diagnostics['f.stat'].values[0],
        model_c4.first_stage.diagnostics['f.stat'].values[0],
        model_c5.first_stage.diagnostics['f.stat'].values[0],
        model_c6.first_stage.diagnostics['f.stat'].values[0],
        model_c7.first_stage.diagnostics['f.stat'].values[0]
    ],
    'N': [
        int(model_c1.nobs),
        int(model_c2.nobs),
        int(model_c3.nobs),
        int(model_c4.nobs),
        int(model_c5.nobs),
        int(model_c6.nobs),
        int(model_c7.nobs)
    ]
})
print(panel_c_results.to_string(index=False))
panel_c_results.to_csv("Table2_PanelC_results.csv", index=False)

print("\n" + "="*70)
print("NOTE: Python uses Country FE + Year FE instead of Country FE + Region×Year FE")
print("This is a limitation of linearmodels with high-dimensional interactions")
print("Results will be similar but not identical to Stata")
print("="*70)


----------------------------------------------------------------------
PANEL C SUMMARY:
----------------------------------------------------------------------
 Column Dependent Var  Coefficient  Std Error  First-stage F    N
      1       Any War     0.010233   0.003765      20.950196 3993
      2       Any War     0.004055   0.001377      10.690591 3993
      3       Any War     0.004322   0.001365      11.540682 3993
      4       Any War     0.004809   0.001447      11.518431 3993
      5       Any War     0.004279   0.001341      11.543277 3993
      6   Intra-state     0.003654   0.001141      11.543277 3993
      7   Inter-state    -0.000603   0.000402      11.543277 3993

NOTE: Python uses Country FE + Year FE instead of Country FE + Region×Year FE
This is a limitation of linearmodels with high-dimensional interactions
Results will be similar but not identical to Stata


###############################
TABLE 7: ONSET & DURATION
###############################

In [164]:
print("\n=== TABLE 7: Onset and Duration ===")

### COLUMN 1: Collier-Hoeffler ###
df_col1 = df[(df['in_sample']) & (df['l_intra_state'] != 1)].copy()
dependent_onset = df_col1['intra_state_onset'].astype(float)

# Filter baseline_controls to only include columns present in df_col1
valid_baseline_controls_for_col1 = [col for col in baseline_controls if col in df_col1.columns]

# Exogenous fixed effects: simplified to country fixed effects for IV models to avoid multicollinearity
exog_fe_raw = df_col1[list(country_dummies.columns)].astype(float)
constant_fe_cols = exog_fe_raw.columns[exog_fe_raw.std() == 0].tolist()
if constant_fe_cols:
    print(f"Dropping constant country fixed effect columns from exog: {constant_fe_cols}")
    exog_fe_cleaned = exog_fe_raw.drop(columns=constant_fe_cols)
else:
    exog_fe_cleaned = exog_fe_raw

# Combine baseline controls and cleaned country fixed effects
exog_onset_combined = pd.concat([df_col1[valid_baseline_controls_for_col1].astype(float), exog_fe_cleaned], axis=1)

# Add a constant term to the exogenous regressors
exog_onset = sm.add_constant(exog_onset_combined, prepend=False)

endog_onset = df_col1[['wheat_aid']].astype(float)
instruments_onset = df_col1[['instrument']].astype(float)

iv_onset1 = IV2SLS(dependent_onset, exog_onset, endog_onset, instruments_onset).fit(
    cov_type='clustered', clusters=df_col1.index.get_level_values('risocode')) # Correct clustering var

print(f"\nColumn 1 (Collier-Hoeffler): {iv_onset1.params['wheat_aid']:.5f} ({iv_onset1.std_errors['wheat_aid']:.5f})")
print(f"First-stage F-stat: {iv_onset1.first_stage.diagnostics.loc['wheat_aid', 'f.stat']:.2f}")


=== TABLE 7: Onset and Duration ===
Dropping constant country fixed effect columns from exog: ['country_COL', 'country_GNQ', 'country_PHL', 'country_TWN']

Column 1 (Collier-Hoeffler): 0.00171 (0.00094)
First-stage F-stat: 6.47


In [165]:
### COLUMN 2: Fearon-Laitin ###
df_col2 = df[df['in_sample']].copy()
dependent_onset2 = df_col2['intra_state_onset'].astype(float)

# Filter baseline_controls to only include columns present in df_col2
valid_baseline_controls_for_col2 = [col for col in baseline_controls if col in df_col2.columns]

# Exogenous fixed effects: simplified to country fixed effects for IV models to avoid multicollinearity
exog_fe_raw = df_col2[list(country_dummies.columns)].astype(float)
constant_fe_cols = exog_fe_raw.columns[exog_fe_raw.std() == 0].tolist()
if constant_fe_cols:
    print(f"Dropping constant country fixed effect columns from exog: {constant_fe_cols}")
    exog_fe_cleaned = exog_fe_raw.drop(columns=constant_fe_cols)
else:
    exog_fe_cleaned = exog_fe_raw

# Combine lagged intra_state, baseline controls, and cleaned country fixed effects
exog_onset2_combined = pd.concat([
    df_col2[['l_intra_state']].astype(float),
    df_col2[valid_baseline_controls_for_col2].astype(float),
    exog_fe_cleaned
], axis=1)

# Add a constant term to the exogenous regressors
exog_onset2 = sm.add_constant(exog_onset2_combined, prepend=False)

endog_onset2 = df_col2[['wheat_aid']].astype(float)
instruments_onset2 = df_col2[['instrument']].astype(float)

# Initialize the IV regression model without fitting to get the notnull mask (rows where regression data is complete)
model_instance = IV2SLS(dependent_onset2, exog_onset2, endog_onset2, instruments_onset2)

# Filter clusters using the notnull mask from the model instance
filtered_clusters = df_col2.index.get_level_values('risocode')[model_instance.notnull]

# Now fit the model with the filtered clusters
iv_onset2 = model_instance.fit(cov_type='clustered', clusters=filtered_clusters)

print(f"\nColumn 2 (Fearon-Laitin): {iv_onset2.params['wheat_aid']:.5f} ({iv_onset2.std_errors['wheat_aid']:.5f})")
print(f"First-stage F-stat: {iv_onset2.first_stage.diagnostics.loc['wheat_aid', 'f.stat']:.2f}")

Dropping constant country fixed effect columns from exog: ['country_GNQ', 'country_TWN']


Inputs contain missing values. Dropping rows with missing observations.
  super().__init__(



Column 2 (Fearon-Laitin): 0.00128 (0.00062)
First-stage F-stat: 10.30


In [166]:
### COLUMNS 3-5: Duration Models for Onset ###
# Control function approach for logit

dur_controls = ['real_usmilaid_avg', 'real_us_nonfoodaid_ecaid_avg',
               'recipient_pc_cereals_prod_avg', 'cereal_pc_import_quantity_avg', 'ln_rgdpch_avg']

In [167]:
### Column 3: Duration polynomial only ###
print("\nColumn 3: Duration polynomial only")

# Only keep countries at peace last year
df_onset3 = df[(df['in_sample']) &
                            (df['l_intra_state'] != 1)].copy()

# Drop missing onset observations for dependent variable in logit
df_onset3 = df_onset3[df_onset3['intra_state_onset'].notna()].copy()

# Create polynomial terms for peace duration
df_onset3['peace_dur2'] = df_onset3['peace_dur'] ** 2
df_onset3['peace_dur3'] = df_onset3['peace_dur'] ** 3

# Step 1: First stage regression (generate control function residual)
first_stage_vars3 = ['instrument', 'peace_dur', 'peace_dur2', 'peace_dur3']

# Drop NaNs (not a number/missing data) from all variables used in the first stage OLS
df_onset3_cleaned = df_onset3.dropna(subset=first_stage_vars3 + ['wheat_aid']).copy()

X_fs3 = sm.add_constant(df_onset3_cleaned[first_stage_vars3])
y_fs3 = df_onset3_cleaned['wheat_aid']

fs_model3 = OLS(y_fs3, X_fs3).fit(cov_type='cluster',
                                   cov_kwds={'groups': df_onset3_cleaned.index.get_level_values('risocode')})

# Test instrument strength
f_stat3 = fs_model3.f_test('instrument = 0').fvalue
print(f"  First-stage F-statistic: {f_stat3:.2f}")

# Generate residuals
df_onset3_cleaned['aid_resid'] = fs_model3.resid

# Step 2: Logit with control function residual
logit_vars3 = ['wheat_aid', 'aid_resid', 'peace_dur', 'peace_dur2', 'peace_dur3']
X_logit3 = sm.add_constant(df_onset3_cleaned[logit_vars3])
y_logit3 = df_onset3_cleaned['intra_state_onset']

# Use statsmodels.api.Logit instead of statsmodels.formula.api.logit
logit_model3 = sm.Logit(y_logit3, X_logit3).fit(cov_type='cluster',
                                              cov_kwds={'groups': df_onset3_cleaned.index.get_level_values('risocode')},
                                              disp=False)

# Step 3: Calculate marginal effects at means
margeff3 = logit_model3.get_margeff(at='mean')
print(f"  Wheat aid marginal effect: {margeff3.margeff[0]:.7f}")
print(f"  Std error:                 ({margeff3.margeff_se[0]:.7f})")
print(f"  Mean of dependent var:      {df_onset3_cleaned['intra_state_onset'].mean():.3f}")
print(f"  N = {len(df_onset3_cleaned)}")


Column 3: Duration polynomial only
  First-stage F-statistic: 26.46
  Wheat aid marginal effect: 0.0000404
  Std error:                 (0.0002562)
  Mean of dependent var:      0.064
  N = 1430


In [168]:
### Column 4: Add time-invariant country controls ###
print("\nColumn 4: + Time-invariant country controls")

df_onset4 = df[(df['in_sample']) &
                            (df['l_intra_state'] != 1)].copy()
df_onset4 = df_onset4[df_onset4['intra_state_onset'].notna()].copy()

# Create polynomial terms
df_onset4['peace_dur2'] = df_onset4['peace_dur'] ** 2
df_onset4['peace_dur3'] = df_onset4['peace_dur'] ** 3

# Step 1: First stage with additional controls
# Filter dur_controls to only include columns present in df_onset4
valid_dur_controls = [col for col in dur_controls if col in df_onset4.columns]
first_stage_vars4 = ['instrument', 'peace_dur', 'peace_dur2', 'peace_dur3'] + valid_dur_controls

# Drop NaNs from all variables used in the first stage OLS
df_onset4_cleaned = df_onset4.dropna(subset=first_stage_vars4 + ['wheat_aid']).copy()

X_fs4 = sm.add_constant(df_onset4_cleaned[first_stage_vars4])
y_fs4 = df_onset4_cleaned['wheat_aid']

fs_model4 = OLS(y_fs4, X_fs4).fit(cov_type='cluster',
                                   cov_kwds={'groups': df_onset4_cleaned.index.get_level_values('risocode')})

# Test instrument strength
f_stat4 = fs_model4.f_test('instrument = 0').fvalue
print(f"  First-stage F-statistic: {f_stat4:.2f}")

# Generate residuals
df_onset4_cleaned['aid_resid'] = fs_model4.resid

# Step 2: Logit with control function and additional controls
logit_vars4 = ['wheat_aid', 'aid_resid', 'peace_dur', 'peace_dur2', 'peace_dur3'] + valid_dur_controls
X_logit4 = sm.add_constant(df_onset4_cleaned[logit_vars4])
y_logit4 = df_onset4_cleaned['intra_state_onset']

# Use statsmodels.api.Logit instead of statsmodels.formula.api.logit
logit_model4 = sm.Logit(y_logit4, X_logit4).fit(cov_type='cluster',
                                              cov_kwds={'groups': df_onset4_cleaned.index.get_level_values('risocode')},
                                              disp=False)

# Step 3: Marginal effects
margeff4 = logit_model4.get_margeff(at='mean')
print(f"  Wheat aid marginal effect: {margeff4.margeff[0]:.7f}")
print(f"  Std error:                 ({margeff4.margeff_se[0]:.7f})")
print(f"  Mean of dependent var:      {df_onset4_cleaned['intra_state_onset'].mean():.3f}")
print(f"  N = {len(df_onset4_cleaned)}")


Column 4: + Time-invariant country controls
  First-stage F-statistic: 23.36
  Wheat aid marginal effect: -0.0000610
  Std error:                 (0.0002432)
  Mean of dependent var:      0.064
  N = 1430


In [169]:
### Column 5: Add region fixed effects ###
print("\nColumn 5: + Region fixed effects")

df_onset5 = df[(df['in_sample']) &
                            (df['l_intra_state'] != 1)].copy()
df_onset5 = df_onset5[df_onset5['intra_state_onset'].notna()].copy()

# Create polynomial terms
df_onset5['peace_dur2'] = df_onset5['peace_dur'] ** 2
df_onset5['peace_dur3'] = df_onset5['peace_dur'] ** 3

# Get region dummy columns - now explicitly just the base region dummies, not interactions
# We assume `region_dummies` from previous cell contains these base dummies
base_region_dummy_cols = [col for col in region_dummies.columns if col.startswith('region_') and '_year_' not in col]
region_cols_for_logit = [col for col in base_region_dummy_cols if col in df_onset5.columns]

# Step 1: First stage with controls and region FE
# Filter dur_controls to only include columns present in df_onset5
valid_dur_controls = [col for col in dur_controls if col in df_onset5.columns]
first_stage_vars5 = ['instrument', 'peace_dur', 'peace_dur2', 'peace_dur3'] + valid_dur_controls + region_cols_for_logit

# Drop NaNs from all variables used in the first stage OLS
df_onset5_cleaned = df_onset5.dropna(subset=first_stage_vars5 + ['wheat_aid']).copy()

# Explicitly convert to float before adding constant for statsmodels OLS
X_fs5 = sm.add_constant(df_onset5_cleaned[first_stage_vars5].astype(float))
y_fs5 = df_onset5_cleaned['wheat_aid']

fs_model5 = OLS(y_fs5, X_fs5).fit(cov_type='cluster',
                                   cov_kwds={'groups': df_onset5_cleaned.index.get_level_values('risocode')})

f_stat5 = fs_model5.f_test('instrument = 0').fvalue
print(f"  First-stage F-statistic: {f_stat5:.2f}")

df_onset5_cleaned['aid_resid'] = fs_model5.resid

# Step 2: Logit with control function, controls, and simplified region FE
logit_vars5 = ['wheat_aid', 'aid_resid', 'peace_dur', 'peace_dur2', 'peace_dur3'] + valid_dur_controls + region_cols_for_logit

# Filter out constant columns from the logit_vars5 subset before adding constant
X_logit5_raw = df_onset5_cleaned[logit_vars5].astype(float)
constant_logit_cols = X_logit5_raw.columns[X_logit5_raw.std() == 0].tolist()
if constant_logit_cols:
    print(f"Dropping constant logit regressor columns: {constant_logit_cols}")
    X_logit5_cleaned = X_logit5_raw.drop(columns=constant_logit_cols)
else:
    X_logit5_cleaned = X_logit5_raw

# Add a constant term here for Logit. statsmodels will handle collinearity by dropping one regressor if needed.
X_logit5 = sm.add_constant(X_logit5_cleaned, prepend=False)
y_logit5 = df_onset5_cleaned['intra_state_onset']

# Use statsmodels.api.Logit instead of statsmodels.formula.api.logit
logit_model5 = sm.Logit(y_logit5, X_logit5).fit(cov_type='cluster',
                                              cov_kwds={'groups': df_onset5_cleaned.index.get_level_values('risocode')},
                                              disp=False)

# Step 3: Marginal effects
margeff5 = logit_model5.get_margeff(at='mean')
print(f"  Wheat aid marginal effect: {margeff5.margeff[0]:.7f}")
print(f"  Std error:                 ({margeff5.margeff_se[0]:.7f})")
print(f"  Mean of dependent var:      {df_onset5_cleaned['intra_state_onset'].mean():.3f}")
print(f"  N = {len(df_onset5_cleaned)}")


Column 5: + Region fixed effects
  First-stage F-statistic: 20.38
  Wheat aid marginal effect: -0.0000371
  Std error:                 (0.0003077)
  Mean of dependent var:      0.064
  N = 1430


In [170]:
#######################################################################
### Columns 6-8: Duration Models for Offset (War → Peace)
#######################################################################
# Similar control function approach but for countries at war

print("\n" + "-"*70)
print("OFFSET MODELS (War → Peace)")
print("-"*70)


----------------------------------------------------------------------
OFFSET MODELS (War → Peace)
----------------------------------------------------------------------


In [171]:
### Column 6: Duration polynomial only ###
print("\nColumn 6: Duration polynomial only")

# Only keep countries at war last year
df_offset6 = df[(df['in_sample']) &
                             (df['l_intra_state'] != 0)].copy()
df_offset6 = df_offset6[df_offset6['intra_state_offset'].notna()].copy()

# Create polynomial terms for conflict duration
df_offset6['intra_state_dur2'] = df_offset6['intra_state_dur'] ** 2
df_offset6['intra_state_dur3'] = df_offset6['intra_state_dur'] ** 3

# Step 1: First stage regression
first_stage_vars6 = ['instrument', 'intra_state_dur', 'intra_state_dur2', 'intra_state_dur3']

# Drop NaNs from all variables used in the first stage OLS
df_offset6_cleaned = df_offset6.dropna(subset=first_stage_vars6 + ['wheat_aid']).copy()

X_fs6 = sm.add_constant(df_offset6_cleaned[first_stage_vars6])
y_fs6 = df_offset6_cleaned['wheat_aid']

fs_model6 = OLS(y_fs6, X_fs6).fit(cov_type='cluster',
                                   cov_kwds={'groups': df_offset6_cleaned.index.get_level_values('risocode')})

f_stat6 = fs_model6.f_test('instrument = 0').fvalue
print(f"  First-stage F-statistic: {f_stat6:.2f}")

df_offset6_cleaned['aid_resid'] = fs_model6.resid

# Step 2: Logit with control function
logit_vars6 = ['wheat_aid', 'aid_resid', 'intra_state_dur', 'intra_state_dur2', 'intra_state_dur3']
X_logit6 = sm.add_constant(df_offset6_cleaned[logit_vars6])
y_logit6 = df_offset6_cleaned['intra_state_offset']

logit_model6 = sm.Logit(y_logit6, X_logit6).fit(cov_type='cluster',
                                              cov_kwds={'groups': df_offset6_cleaned.index.get_level_values('risocode')},
                                              disp=False)

# Step 3: Marginal effects
margeff6 = logit_model6.get_margeff(at='mean')
print(f"  Wheat aid marginal effect: {margeff6.margeff[0]:.7f}")
print(f"  Std error:                 ({margeff6.margeff_se[0]:.7f})")
print(f"  Mean of dependent var:      {df_offset6_cleaned['intra_state_offset'].mean():.3f}")
print(f"  N = {len(df_offset6_cleaned)}")


Column 6: Duration polynomial only
  First-stage F-statistic: 17.77
  Wheat aid marginal effect: -0.0003775
  Std error:                 (0.0002418)
  Mean of dependent var:      0.184
  N = 700


In [172]:
### Column 7: Add time-invariant country controls ###
print("\nColumn 7: + Time-invariant country controls")

df_offset7 = df[(df['in_sample']) &
                             (df['l_intra_state'] != 0)].copy()
df_offset7 = df_offset7[df_offset7['intra_state_offset'].notna()].copy()

# Create polynomial terms
df_offset7['intra_state_dur2'] = df_offset7['intra_state_dur'] ** 2
df_offset7['intra_state_dur3'] = df_offset7['intra_state_dur'] ** 3

# Step 1: First stage with additional controls
# Filter dur_controls to only include columns present in df_offset7
valid_dur_controls = [col for col in dur_controls if col in df_offset7.columns]
first_stage_vars7 = ['instrument', 'intra_state_dur', 'intra_state_dur2', 'intra_state_dur3'] + valid_dur_controls

# Drop NaNs from all variables used in the first stage OLS
df_offset7_cleaned = df_offset7.dropna(subset=first_stage_vars7 + ['wheat_aid']).copy()

X_fs7 = sm.add_constant(df_offset7_cleaned[first_stage_vars7])
y_fs7 = df_offset7_cleaned['wheat_aid']

fs_model7 = OLS(y_fs7, X_fs7).fit(cov_type='cluster',
                                   cov_kwds={'groups': df_offset7_cleaned.index.get_level_values('risocode')})

f_stat7 = fs_model7.f_test('instrument = 0').fvalue
print(f"  First-stage F-statistic: {f_stat7:.2f}")

df_offset7_cleaned['aid_resid'] = fs_model7.resid

# Step 2: Logit with control function and additional controls
logit_vars7 = ['wheat_aid', 'aid_resid', 'intra_state_dur', 'intra_state_dur2', 'intra_state_dur3'] + valid_dur_controls
X_logit7 = sm.add_constant(df_offset7_cleaned[logit_vars7])
y_logit7 = df_offset7_cleaned['intra_state_offset']

logit_model7 = sm.Logit(y_logit7, X_logit7).fit(cov_type='cluster',
                                              cov_kwds={'groups': df_offset7_cleaned.index.get_level_values('risocode')},
                                              disp=False)

# Step 3: Marginal effects
margeff7 = logit_model7.get_margeff(at='mean')
print(f"  Wheat aid marginal effect: {margeff7.margeff[0]:.7f}")
print(f"  Std error:                 ({margeff7.margeff_se[0]:.7f})")
print(f"  Mean of dependent var:      {df_offset7_cleaned['intra_state_offset'].mean():.3f}")
print(f"  N = {len(df_offset7_cleaned)}")


Column 7: + Time-invariant country controls
  First-stage F-statistic: 17.85
  Wheat aid marginal effect: -0.0004441
  Std error:                 (0.0002194)
  Mean of dependent var:      0.184
  N = 700


In [173]:
### Column 8: Add region fixed effects ###
print("\nColumn 8: + Region fixed effects")

df_offset8 = df[(df['in_sample']) &
                             (df['l_intra_state'] != 0)].copy()
df_offset8 = df_offset8[df_offset8['intra_state_offset'].notna()].copy()

# Create polynomial terms
df_offset8['intra_state_dur2'] = df_offset8['intra_state_dur'] ** 2
df_offset8['intra_state_dur3'] = df_offset8['intra_state_dur'] ** 3

# Get region dummy columns - now explicitly just the base region dummies, not interactions
# We assume `region_dummies` from previous cell contains these base dummies
base_region_dummy_cols = [col for col in region_dummies.columns if col.startswith('region_') and '_year_' not in col]
region_cols_for_logit = [col for col in base_region_dummy_cols if col in df_offset8.columns]

# Step 1: First stage with controls and region FE
# Filter dur_controls to only include columns present in df_offset8
valid_dur_controls = [col for col in dur_controls if col in df_offset8.columns]
first_stage_vars8 = ['instrument', 'intra_state_dur', 'intra_state_dur2', 'intra_state_dur3'] + valid_dur_controls + region_cols_for_logit

# Drop NaNs from all variables used in the first stage OLS
df_offset8_cleaned = df_offset8.dropna(subset=first_stage_vars8 + ['wheat_aid']).copy()

# Explicitly convert to float before adding constant for statsmodels OLS
X_fs8 = sm.add_constant(df_offset8_cleaned[first_stage_vars8].astype(float))
y_fs8 = df_offset8_cleaned['wheat_aid']

fs_model8 = OLS(y_fs8, X_fs8).fit(cov_type='cluster',
                                   cov_kwds={'groups': df_offset8_cleaned.index.get_level_values('risocode')})

f_stat8 = fs_model8.f_test('instrument = 0').fvalue
print(f"  First-stage F-statistic: {f_stat8:.2f}")

df_offset8_cleaned['aid_resid'] = fs_model8.resid

# Step 2: Logit with control function, controls, and region FE
logit_vars8 = ['wheat_aid', 'aid_resid', 'intra_state_dur', 'intra_state_dur2', 'intra_state_dur3'] + valid_dur_controls + region_cols_for_logit

# Filter out constant columns from the logit_vars8 subset before adding constant
X_logit8_raw = df_offset8_cleaned[logit_vars8].astype(float)
constant_logit_cols = X_logit8_raw.columns[X_logit8_raw.std() == 0].tolist()
if constant_logit_cols:
    print(f"Dropping constant logit regressor columns: {constant_logit_cols}")
    X_logit8_cleaned = X_logit8_raw.drop(columns=constant_logit_cols)
else:
    X_logit8_cleaned = X_logit8_raw

# Add a constant term here for Logit. statsmodels will handle collinearity by dropping one regressor if needed.
X_logit8 = sm.add_constant(X_logit8_cleaned, prepend=False)
y_logit8 = df_offset8_cleaned['intra_state_offset']

# Use statsmodels.api.Logit instead of statsmodels.formula.api.logit
logit_model8 = sm.Logit(y_logit8, X_logit8).fit(cov_type='cluster',
                                              cov_kwds={'groups': df_offset8_cleaned.index.get_level_values('risocode')},
                                              disp=False)

# Step 3: Marginal effects
margeff8 = logit_model8.get_margeff(at='mean')
print(f"  Wheat aid marginal effect: {margeff8.margeff[0]:.7f}")
print(f"  Std error:                 ({margeff8.margeff_se[0]:.7f})")
print(f"  Mean of dependent var:      {df_offset8_cleaned['intra_state_offset'].mean():.3f}")
print(f"  N = {len(df_offset8_cleaned)}")


Column 8: + Region fixed effects
  First-stage F-statistic: 23.82
  Wheat aid marginal effect: -0.0005191
  Std error:                 (0.0002874)
  Mean of dependent var:      0.184
  N = 700


In [174]:
#######################################################################
### Summary Table for Columns 3-8
#######################################################################

print("\n" + "="*70)
print("TABLE 7 SUMMARY (Columns 3-8):")
print("="*70)

table7_duration_results = pd.DataFrame({
    'Column': [3, 4, 5, 6, 7, 8],
    'Model': ['Onset: Dur only', 'Onset: + Controls', 'Onset: + Region FE',
              'Offset: Dur only', 'Offset: + Controls', 'Offset: + Region FE'],
    'Marginal Effect': [
        margeff3.margeff[0],
        margeff4.margeff[0],
        margeff5.margeff[0],
        margeff6.margeff[0],
        margeff7.margeff[0],
        margeff8.margeff[0]
    ],
    'Std Error': [
        margeff3.margeff_se[0],
        margeff4.margeff_se[0],
        margeff5.margeff_se[0],
        margeff6.margeff_se[0],
        margeff7.margeff_se[0],
        margeff8.margeff_se[0]
    ],
    'First-stage F': [
        f_stat3,
        f_stat4,
        f_stat5,
        f_stat6,
        f_stat7,
        f_stat8
    ],
    'N': [
        len(df_onset3),
        len(df_onset4),
        len(df_onset5),
        len(df_offset6),
        len(df_offset7),
        len(df_offset8)
    ]
})

print(table7_duration_results.to_string(index=False))
table7_duration_results.to_csv("Table7_Duration_results.csv", index=False)

print("\n" + "="*70)
print("INTERPRETATION:")
print("="*70)
print("ONSET (Cols 3-5): Positive marginal effects mean aid INCREASES")
print("                  the probability of conflict starting")
print("")
print("OFFSET (Cols 6-8): NEGATIVE marginal effects mean aid DECREASES")
print("                   the probability of conflict ending")
print("                   → Aid PROLONGS ongoing conflicts")
print("="*70)
print("\n=== Replication Complete ===")


TABLE 7 SUMMARY (Columns 3-8):
 Column               Model  Marginal Effect  Std Error  First-stage F    N
      3     Onset: Dur only         0.000040   0.000256      26.456331 3290
      4   Onset: + Controls        -0.000061   0.000243      23.359480 3290
      5  Onset: + Region FE        -0.000037   0.000308      20.377398 3290
      6    Offset: Dur only        -0.000377   0.000242      17.768051  703
      7  Offset: + Controls        -0.000444   0.000219      17.848997  703
      8 Offset: + Region FE        -0.000519   0.000287      23.824702  703

INTERPRETATION:
ONSET (Cols 3-5): Positive marginal effects mean aid INCREASES
                  the probability of conflict starting

OFFSET (Cols 6-8): NEGATIVE marginal effects mean aid DECREASES
                   the probability of conflict ending
                   → Aid PROLONGS ongoing conflicts

=== Replication Complete ===
