In [698]:
# Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.formula.api import ols
import shapefile
import geopandas as geo
import warnings

# Customize
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
warnings.filterwarnings("ignore")

# Approach

Summary of approach:

1. Data processing:

   - Import data from AEA RCT Registry
   - Filter data for Kenya
   - Text analysis to extract county information
   - Manual entry for missing county information
   - Match generic regions to counties
   - Create project-level and county-level datasets
   - Create datasets for restricted time period
   - Define coverage variable
   - Find total projects per county
   - Import data on outcome variables and merge with trials data
   - Repeat analysis for provinces

   
2. Descriptive analysis:
   - Summary statistics
   

3. Regression analysis:

   - Differences-in-differences model:
       - Education

# Data Processing 

## Import Data

- Data from [AEA RCT Registry](https://www.socialscienceregistry.org/trials/search): downloaded all 8327 projects reigstered on the site

In [699]:
# Import data on all trials from AEA
all_trials = pd.read_csv('all_trials.csv')
all_trials.head(5)

Unnamed: 0,Title,Url,Last update date,Published at,First registered on,RCT_ID,DOI Number,Primary Investigator,Status,Start date,End date,Keywords,Country names,Other Primary Investigators,Jel code,Secondary IDs,Abstract,External Links,Sponsors,Partners,Intervention start date,Intervention end date,Intervention,Primary outcome end points,Primary outcome explanation,Secondary outcome end points,Secondary outcome explanation,Experimental design,Experimental design details,Randomization method,Randomization unit,Sample size number clusters,Sample size number observations,Sample size number arms,Minimum effect size,IRB,Analysis Plan Documents,Intervention completion date,Data collection completion,Data collection completion date,Number of clusters,Attrition correlated,Total number of observations,Treatment arms,Public data,Public data url,Program files,Program files url,Post trial documents csv,Relevant papers for csv
0,Voter Pessimism and Electoral Accountability: ...,http://www.socialscienceregistry.org/trials/5,"May 02, 2017",2017-05-02 16:17:17 -0400,2013-05-21,AEARCTR-0000005,10.1257/rct.5-8.0,Kelly Zhang kwzhang@mit.edu,completed,2013-01-26,2014-05-31,"[""electoral"", ""public goods"", ""corruption"", ""a...",Kenya (National),,,,"The chapter combines novel survey, audit, and ...",,,,2013-03-02,2013-03-07,The intervention was embedded within a series ...,"Information uptake, political support, beliefs...",See pre-analysis plan.,,,Treatment was randomly assigned at the individ...,,Randomization done using Stata.,Individual,,2500 individuals/1600 individuals,,"MDE=5% change in perceived leakage, sd=28.8667...",Name: Stanford IRB\nApproval_number: 26335\nAp...,"November 08, 2013; February 18, 2014",,,,,,,,,,,,,
1,Community Based Strategies to Reduce Maternal ...,http://www.socialscienceregistry.org/trials/6,"October 25, 2023",2023-10-25 15:31:05 -0400,2013-05-26,AEARCTR-0000006,10.1257/rct.6-3.0,Vandana Sharma vsharma@povertyactionlab.org,completed,2012-02-01,2016-10-31,"[""health"", ""pregnancy"", ""maternal mortality"", ...",Nigeria (Africa),Martina Nyqvist (Martina.Bjorkman.Nyqvist@hhs....,,ClinicalTrials.gov ID: NCT01487707,The objective of this cluster randomized contr...,,,,2013-06-01,2015-06-01,The study will be evaluating 3 primarily deman...,"Maternal Mortality Ratio (MMR), Neonatal Morta...",\r\n,,,The study is a clustered randomized controlled...,,Randomization was done in an office outside of...,clusters of villages with an average populatio...,96 clusters of villages,7200 women,24 clusters per study arm,,Name: Massachusetts Institute of Technology\nA...,Private,2015-12-31,True,2016-10-31,96 clusters,False,6494 women,24 clusters in each arm (3 treatment arms and ...,False,,False,,,Abstract: The slow pace of improvement in serv...
2,An Evaluation of Continuous Comprehensive Eval...,http://www.socialscienceregistry.org/trials/8,"June 15, 2013",2013-06-15 09:00:16 -0400,2013-06-15,AEARCTR-0000008,10.1257/rct.8-1.0,Esther Duflo eduflo@mit.edu,on_going,2011-05-19,2014-12-31,"[""education"", ""remedial education""]",Private,Shobhini Mukerji (shobhini.mukerji@ifmr.ac.in)...,,,Under the Right to Education Act passed in 200...,,,,2011-11-30,2013-03-30,There are two interventions:\r\n1) training te...,test scores (written and oral),,,,This is a three group design:\r\n-control (98 ...,,Randomization in office computer (Stata),School campus (which could include multiple sc...,500 primary and upper primary school campuses,22900,"98 school campuses control, 93 school campuses...",,Name: Massachusetts Institute of Technology\nA...,,,,,,,,,,,,,,
3,Enhancing Local Public Service Delivery: Exper...,http://www.socialscienceregistry.org/trials/9,"October 04, 2013",2013-10-04 16:31:22 -0400,2013-10-04,AEARCTR-0000009,10.1257/rct.9-1.0,Esther Duflo eduflo@mit.edu,on_going,2012-01-01,2014-06-01,"[""governance"", ""employment"", ""welfare""]",Private,"Abhijit Banerjee (banerjee@mit.edu) MIT, J-PAL...",,,This project evaluates an innovative policy in...,,,,2012-10-01,2013-04-01,"In the new fund flow, the state pool of MNREG...",expenditures on NREGA; household level employm...,,,,Panchayats in randomly selected blocks will ei...,,Randomization done in office by a computer,Blocks,12 districts,196 blocks,"69 treatment blocks,127 control blocks",,Name: Massachusetts Institute of Technology\nA...,,,,,,,,,,,,,,
4,Free DFS - Intervention to fight anemia and im...,http://www.socialscienceregistry.org/trials/10,"May 15, 2013",2013-05-15 10:14:16 -0400,2013-05-15,AEARCTR-0000010,10.1257/rct.10-1.0,Sharon Barnhardt sharonbarnhardt@gmail.com,on_going,2011-04-04,2014-06-01,"[""health"", ""Nutrition"", ""Anemia""]",Private,Abhijit V. Banerjee (banerjee@mit.edu) MIT; Es...,,,Iron deficiency anemia (IDA) has been linked t...,,,,2013-05-11,2014-06-01,We deliver free Double Fortified Salt manufact...,"Hemoglobin level, physical health, cognition (...","Hemoglobin level measured by hemocue, anemia, ...",,,We randomly selected 62 villages participating...,,Computer generated randomization done in offic...,Households,930 households,6045 individuals,"2821 individuals free DFS, 3224 individuals n...",,Name: IFMR (Chennai)\nApproval_number: IRB0000...,,,,,,,,,,,,,,


In [700]:
all_trials.shape

(8327, 50)

## Prepare Kenya Dataset

- Filter for trials specifically for Kenya
- Search for 'Kenya' in  `Country names`, `Abstract` or `Title`
    - Problem with searching in the abstract: lot of studies mention previous research done in Kenya, the study itself is not taking place in Kenya

In [701]:
kenya_trials = all_trials[all_trials['Country names'].str.contains('Kenya', na=False) | all_trials['Abstract'].str.contains('Kenya') | all_trials['Title'].str.contains('Kenya')]
kenya_trials.head()

Unnamed: 0,Title,Url,Last update date,Published at,First registered on,RCT_ID,DOI Number,Primary Investigator,Status,Start date,End date,Keywords,Country names,Other Primary Investigators,Jel code,Secondary IDs,Abstract,External Links,Sponsors,Partners,Intervention start date,Intervention end date,Intervention,Primary outcome end points,Primary outcome explanation,Secondary outcome end points,Secondary outcome explanation,Experimental design,Experimental design details,Randomization method,Randomization unit,Sample size number clusters,Sample size number observations,Sample size number arms,Minimum effect size,IRB,Analysis Plan Documents,Intervention completion date,Data collection completion,Data collection completion date,Number of clusters,Attrition correlated,Total number of observations,Treatment arms,Public data,Public data url,Program files,Program files url,Post trial documents csv,Relevant papers for csv
0,Voter Pessimism and Electoral Accountability: ...,http://www.socialscienceregistry.org/trials/5,"May 02, 2017",2017-05-02 16:17:17 -0400,2013-05-21,AEARCTR-0000005,10.1257/rct.5-8.0,Kelly Zhang kwzhang@mit.edu,completed,2013-01-26,2014-05-31,"[""electoral"", ""public goods"", ""corruption"", ""a...",Kenya (National),,,,"The chapter combines novel survey, audit, and ...",,,,2013-03-02,2013-03-07,The intervention was embedded within a series ...,"Information uptake, political support, beliefs...",See pre-analysis plan.,,,Treatment was randomly assigned at the individ...,,Randomization done using Stata.,Individual,,2500 individuals/1600 individuals,,"MDE=5% change in perceived leakage, sd=28.8667...",Name: Stanford IRB\nApproval_number: 26335\nAp...,"November 08, 2013; February 18, 2014",,,,,,,,,,,,,
8,Understanding Ethnic Cooperation: Evidence fro...,http://www.socialscienceregistry.org/trials/16,"May 28, 2013",2013-05-28 19:50:19 -0400,2013-05-28,AEARCTR-0000016,10.1257/rct.16-1.0,Edward Miguel emiguel@econ.berkeley.edu,completed,2012-07-16,2013-02-14,"[""electoral"", ""governance"", ""post-conflict"", ""...","Kenya (); Tanzania, United Republic of ()",Kelly Zhang () Stanford University; Bertil Tun...,,,Ethnic polarization is often linked to underde...,,,,2012-07-16,2013-02-14,Different subject priming treatments in the la...,Choices in multiple lab experimental games. Se...,See the attached pre-analysis plan for details.,,,See the attached pre-analysis plan for details.,See the attached pre-analysis plan for details.,Computer random number generator. See the atta...,Individual,2098 individuals (across Kenya and Tanzania),2098 individuals (across Kenya and Tanzania),See the attached pre-analysis plan for details.,,"Name: Innovations for Poverty Action, Kenya IR...",,2013-02-14,True,2013-02-14,2098 individuals (across Kenya and Tanzania),False,2098 individuals (across Kenya and Tanzania),See pre-analysis plan for details.,False,,,,,
9,Welfare Effects of Unconditional Cash Transfers,http://www.socialscienceregistry.org/trials/17,"December 08, 2020",2020-12-08 06:15:10 -0500,2014-12-04,AEARCTR-0000017,10.1257/rct.17-1.1,Johannes Haushofer haushofer@gmail.com,completed,2011-05-01,2013-02-28,"[""agriculture"", ""education"", ""finance"", ""healt...",Kenya (Rarieda),Jeremy Shapiro (jeremypshapiro@gmail.com) Prin...,"C93, D13, I15, I25, O12",,This randomized controlled trial (RCT) evaluat...,,,,2011-06-01,2013-01-31,The UCT Program implemented by GiveDirectly In...,"Assets, expenditure, psychological well-being ...",See pre-analysis plan,,,Evaluation questions \r\n\r\nOur main question...,,Computerized randomization in office.,household (treatment vs. spillover households)...,100 villags,1500 households,"500 households treatment, 500 households spill...","0.2 SD, 80%","Name: University of Zurich, Department of Econ...","June 28, 2013; December 04, 2014",,True,,,,,,,,,,,
10,Welfare Effects of Unconditional Cash Transfers,http://www.socialscienceregistry.org/trials/19,"July 06, 2016",2016-07-06 15:21:00 -0400,2013-06-28,AEARCTR-0000019,10.1257/rct.19-5.0,Johannes Haushofer haushofer@gmail.com,completed,2011-05-01,2013-02-28,"[""agriculture"", ""education"", ""finance"", ""healt...",Kenya (Rarieda),Jeremy Shapiro (jeremypshapiro@gmail.com) McKi...,"C93, D13, I15, I25, O12",,This randomized controlled trial (RCT) evaluat...,,,,2011-06-01,2013-01-31,The UCT Program implemented by GiveDirectly In...,"Assets, expenditure, psychological well-being ...",See pre-analysis plan,,,Evaluation questions \r\n\r\nOur main question...,,Computerized randomization in office.,household (treatment vs. spillover households)...,100 villages,1500 households,"500 households treatment, 500 households spill...","0.2 SD, 89%",Name: Innovations for Poverty Action Kenya (IP...,"June 28, 2013; November 21, 2015; March 15, 20...",,,,,,,,,,,,,
19,Electing to Vote: Strengthening the Credibilit...,http://www.socialscienceregistry.org/trials/30,"April 20, 2014",2014-04-20 20:45:05 -0400,2014-04-20,AEARCTR-0000030,10.1257/rct.30-1.0,Tavneet Suri tavneet@mit.edu,on_going,2013-02-19,2014-12-01,"[""electoral"", ""governance"", ""post-conflict"", ""...",Private,Vincent Pons (vpons@mit.edu) MIT; Benjamin Mar...,,,This study analyzes the impact of different se...,,,,2013-02-28,2013-03-04,This study analyzes the impact of different se...,"voter turnout, vote shares, political attitude...",The outcomes come from two sources. The admini...,,,"12,160 polling stations (out of 24,560 polling...",,Polling stations were stratified by county. Th...,Polling station level,"The experiment was conducted with 6,086 pollin...",Approximately 5 million people,A cluster was a polling station. The number of...,1.5 percentage points in voter turnout,"Name: Innovations for Poverty Action, USA (for...",,,,,,,,,,,,,,


In [702]:
kenya_trials.shape

# 250 trials related to Kenya

(250, 50)

- <font color='green'> AEA website shows that there are 279 trials related to Kenya - which trials are left out? How to find them? </font>

In [703]:
# Look at country names
kenya_trials['Country names']

0                                        Kenya (National)
8               Kenya (); Tanzania, United Republic of ()
9                                         Kenya (Rarieda)
10                                        Kenya (Rarieda)
19                                                Private
44                                                Private
67                  Kenya (Meru & Tharaka-Nithi Counties)
100                                               Private
125                                       Kenya (Nairobi)
157              Kenya (Kakamega, Kisii, Embu and Kitui )
160                 Tanzania, United Republic of (Africa)
175                                               Private
193                                  Kenya (Busia, Siaya)
237                                               Private
243                                               Private
255                                               Private
270                                       Kenya (Western)
283           

In [704]:
# Regular expressions to extract name of county + add to the dataframe
pat_1 = r'Kenya\s*\(([\w\s\-\&\,]*)\)'
counties = kenya_trials['Country names'].str.findall(pat_1).fillna('')
counties_col = []
for i in range(len(counties)):
    if len(counties.iloc[i]) > 0:
        counties_col.append(counties.iloc[i][0])
    else:
        counties_col.append('')
kenya_trials['County'] = counties_col

In [705]:
kenya_trials['County']

0                                                National
8                                                        
9                                                 Rarieda
10                                                Rarieda
19                                                       
44                                                       
67                          Meru & Tharaka-Nithi Counties
100                                                      
125                                               Nairobi
157                      Kakamega, Kisii, Embu and Kitui 
160                                                      
175                                                      
193                                          Busia, Siaya
237                                                      
243                                                      
255                                                      
270                                               Western
283           

In [706]:
# Export dataframe - fill in the missing counties manually
kenya_trials.to_csv('kenya_trials.csv')

- Need to compare existing spreadsheet and match to find new projects
- Sort both RCT_IDs and insert the new projects at points of mismatch

In [707]:
# Importing missing counties data with new manually entered information
missing_counties_data = pd.read_csv('missing_counties_data.csv')
missing_counties_data.shape

(241, 2)

In [708]:
# Merge Kenya trials data with new counties data
kenya_trials = kenya_trials.merge(missing_counties_data, on='RCT_ID', how='left')
kenya_trials

Unnamed: 0,Title,Url,Last update date,Published at,First registered on,RCT_ID,DOI Number,Primary Investigator,Status,Start date,End date,Keywords,Country names,Other Primary Investigators,Jel code,Secondary IDs,Abstract,External Links,Sponsors,Partners,Intervention start date,Intervention end date,Intervention,Primary outcome end points,Primary outcome explanation,Secondary outcome end points,Secondary outcome explanation,Experimental design,Experimental design details,Randomization method,Randomization unit,Sample size number clusters,Sample size number observations,Sample size number arms,Minimum effect size,IRB,Analysis Plan Documents,Intervention completion date,Data collection completion,Data collection completion date,Number of clusters,Attrition correlated,Total number of observations,Treatment arms,Public data,Public data url,Program files,Program files url,Post trial documents csv,Relevant papers for csv,County_x,County_y
0,Voter Pessimism and Electoral Accountability: ...,http://www.socialscienceregistry.org/trials/5,"May 02, 2017",2017-05-02 16:17:17 -0400,2013-05-21,AEARCTR-0000005,10.1257/rct.5-8.0,Kelly Zhang kwzhang@mit.edu,completed,2013-01-26,2014-05-31,"[""electoral"", ""public goods"", ""corruption"", ""a...",Kenya (National),,,,"The chapter combines novel survey, audit, and ...",,,,2013-03-02,2013-03-07,The intervention was embedded within a series ...,"Information uptake, political support, beliefs...",See pre-analysis plan.,,,Treatment was randomly assigned at the individ...,,Randomization done using Stata.,Individual,,2500 individuals/1600 individuals,,"MDE=5% change in perceived leakage, sd=28.8667...",Name: Stanford IRB\nApproval_number: 26335\nAp...,"November 08, 2013; February 18, 2014",,,,,,,,,,,,,,National,National
1,Understanding Ethnic Cooperation: Evidence fro...,http://www.socialscienceregistry.org/trials/16,"May 28, 2013",2013-05-28 19:50:19 -0400,2013-05-28,AEARCTR-0000016,10.1257/rct.16-1.0,Edward Miguel emiguel@econ.berkeley.edu,completed,2012-07-16,2013-02-14,"[""electoral"", ""governance"", ""post-conflict"", ""...","Kenya (); Tanzania, United Republic of ()",Kelly Zhang () Stanford University; Bertil Tun...,,,Ethnic polarization is often linked to underde...,,,,2012-07-16,2013-02-14,Different subject priming treatments in the la...,Choices in multiple lab experimental games. Se...,See the attached pre-analysis plan for details.,,,See the attached pre-analysis plan for details.,See the attached pre-analysis plan for details.,Computer random number generator. See the atta...,Individual,2098 individuals (across Kenya and Tanzania),2098 individuals (across Kenya and Tanzania),See the attached pre-analysis plan for details.,,"Name: Innovations for Poverty Action, Kenya IR...",,2013-02-14,True,2013-02-14,2098 individuals (across Kenya and Tanzania),False,2098 individuals (across Kenya and Tanzania),See pre-analysis plan for details.,False,,,,,,,Nairobi
2,Welfare Effects of Unconditional Cash Transfers,http://www.socialscienceregistry.org/trials/17,"December 08, 2020",2020-12-08 06:15:10 -0500,2014-12-04,AEARCTR-0000017,10.1257/rct.17-1.1,Johannes Haushofer haushofer@gmail.com,completed,2011-05-01,2013-02-28,"[""agriculture"", ""education"", ""finance"", ""healt...",Kenya (Rarieda),Jeremy Shapiro (jeremypshapiro@gmail.com) Prin...,"C93, D13, I15, I25, O12",,This randomized controlled trial (RCT) evaluat...,,,,2011-06-01,2013-01-31,The UCT Program implemented by GiveDirectly In...,"Assets, expenditure, psychological well-being ...",See pre-analysis plan,,,Evaluation questions \r\n\r\nOur main question...,,Computerized randomization in office.,household (treatment vs. spillover households)...,100 villags,1500 households,"500 households treatment, 500 households spill...","0.2 SD, 80%","Name: University of Zurich, Department of Econ...","June 28, 2013; December 04, 2014",,True,,,,,,,,,,,,Rarieda,Rarieda
3,Welfare Effects of Unconditional Cash Transfers,http://www.socialscienceregistry.org/trials/19,"July 06, 2016",2016-07-06 15:21:00 -0400,2013-06-28,AEARCTR-0000019,10.1257/rct.19-5.0,Johannes Haushofer haushofer@gmail.com,completed,2011-05-01,2013-02-28,"[""agriculture"", ""education"", ""finance"", ""healt...",Kenya (Rarieda),Jeremy Shapiro (jeremypshapiro@gmail.com) McKi...,"C93, D13, I15, I25, O12",,This randomized controlled trial (RCT) evaluat...,,,,2011-06-01,2013-01-31,The UCT Program implemented by GiveDirectly In...,"Assets, expenditure, psychological well-being ...",See pre-analysis plan,,,Evaluation questions \r\n\r\nOur main question...,,Computerized randomization in office.,household (treatment vs. spillover households)...,100 villages,1500 households,"500 households treatment, 500 households spill...","0.2 SD, 89%",Name: Innovations for Poverty Action Kenya (IP...,"June 28, 2013; November 21, 2015; March 15, 20...",,,,,,,,,,,,,,Rarieda,Rarieda
4,Electing to Vote: Strengthening the Credibilit...,http://www.socialscienceregistry.org/trials/30,"April 20, 2014",2014-04-20 20:45:05 -0400,2014-04-20,AEARCTR-0000030,10.1257/rct.30-1.0,Tavneet Suri tavneet@mit.edu,on_going,2013-02-19,2014-12-01,"[""electoral"", ""governance"", ""post-conflict"", ""...",Private,Vincent Pons (vpons@mit.edu) MIT; Benjamin Mar...,,,This study analyzes the impact of different se...,,,,2013-02-28,2013-03-04,This study analyzes the impact of different se...,"voter turnout, vote shares, political attitude...",The outcomes come from two sources. The admini...,,,"12,160 polling stations (out of 24,560 polling...",,Polling stations were stratified by county. Th...,Polling station level,"The experiment was conducted with 6,086 pollin...",Approximately 5 million people,A cluster was a polling station. The number of...,1.5 percentage points in voter turnout,"Name: Innovations for Poverty Action, USA (for...",,,,,,,,,,,,,,,,
5,Understanding smallholder arbitrage in Kenyan ...,http://www.socialscienceregistry.org/trials/67,"September 06, 2013",2013-09-06 19:20:48 -0400,2013-09-06,AEARCTR-0000067,10.1257/rct.67-1.0,Marshall Burke marshall.burke@berkeley.edu,on_going,2012-08-01,2014-08-15,"[""agriculture""]",Private,,,,Rural grain markets throughout much of the dev...,,,,2012-10-01,2013-08-15,,"maize inventories, maize prices paid and recei...",,,,Farmers were divided into two main treatment g...,See pre-analysis plan.,Randomization was done in office by a computer,farmer groups,"240 groups, of 6-8 farmers each",1589 total farmers,"80 groups in C, 77 in T1, 75 in T2",,Name: UC Berkeley Committee for Protection of ...,Private,,,,,,,,,,,,,,,Bungoma
6,Mitigating Aflatoxin Exposure to Improve Child...,http://www.socialscienceregistry.org/trials/105,"October 20, 2017",2017-10-20 10:05:36 -0400,2013-11-06,AEARCTR-0000105,10.1257/rct.105-4.0,Kelly Jones kmjones@gmail.com,completed,2013-02-01,2017-03-31,"[""agriculture"", ""health"", ""Aflatoxin"", ""maize""...",Kenya (Meru & Tharaka-Nithi Counties),Jef Leroy (j.leroy@cgiar.org) IFPRI; Vivian Ho...,,,Whether the observed association between aflat...,,,,2013-07-01,2016-10-31,See protocol attached,Incidence of household maize stores with aflat...,See protocol attached,,,We propose a three arm randomized control tria...,,Based on random numbers assigned by computer,Village,71 villages,1852 households,Swapping treatment: 28 villages\r\nPost-harves...,See protocol attached,Name: International Food Policy Research Insti...,"November 23, 2016",,,,,,,,,,,,,,Meru & Tharaka-Nithi Counties,"Meru, Tharaka-Nithi"
7,THE IMPACT OF VCT AND CONDOM DISTRIBUTION AS H...,http://www.socialscienceregistry.org/trials/170,"December 17, 2013",2013-12-17 22:42:20 -0500,2013-12-17,AEARCTR-0000170,10.1257/rct.170-1.0,Esther Duflo eduflo@mit.edu,on_going,2009-02-01,2014-07-01,"[""health"", ""HIV/AIDS"", ""HSV-2"", ""condom"", ""VCT""]",Private,Pascaline Dupas (pdupas@stanford.edu) Stanford...,,,The vast majority of new HIV infections occur ...,,,,2009-02-01,2010-07-31,The study will evaluate two HIV/AIDS preventio...,Herpes Simplex Virus Type 2 (HSV-2) prevalence...,,,,"The study is an unblinded, individually random...",,Randomization done in office by computer,individual,,"10,306 respondents","approximately 2,550 per study arm",,Name: UCLA\nApproval_number: G08-09-085-01\nAp...,,,,,,,,,,,,,,,,
8,"The effect of acute stress on productivity, ti...",http://www.socialscienceregistry.org/trials/218,"January 14, 2014",2014-01-14 23:50:26 -0500,2014-01-14,AEARCTR-0000218,10.1257/rct.218-1.0,Chaning Jang cajang@hawaii.edu,completed,2013-05-28,2013-08-07,"[""welfare"", ""Behavior"", ""Stress""]",Kenya (Nairobi),Linda Kleppin (lindakleppin@gmail.com) Univers...,"D90, D03, D01",,This document outlines the analysis plan for t...,,,,2013-05-28,2013-08-07,Please see the analysis plan.,"Time Preference, Risk Preference, Productivity...",Please see the analysis plan for full details.,,,We use the cold pressor task and randomly assi...,,Done in office via physical randomization devi...,Individual,669 individuals,669 individuals,Roughly half of the respondents were assigned ...,,Name: MIT Committee on the use of humans as ex...,Private,,,,,,,,,,,,,,Nairobi,Nairobi
9,Unpacking the determinants of entrepreneurship...,http://www.socialscienceregistry.org/trials/287,"October 11, 2023",2023-10-11 16:57:19 -0400,2014-02-21,AEARCTR-0000287,10.1257/rct.287-4.0,David McKenzie dmckenzie@worldbank.org,completed,2013-06-01,2018-06-01,"[""labor"", ""business training"", ""microenterpris...","Kenya (Kakamega, Kisii, Embu and Kitui )",,O12,,This impact evaluation aims to measure the cau...,,,,2013-06-25,2013-11-29,The training provided is the ILO’s Gender and ...,"profitability, sales, and business survivorship",Business is still operating at the time of the...,,,Individuals were assigned to treatment and con...,"First, Markets were assigned to treatment (hav...",Randomization done in office by a computer,"Random selection at the market level, and then...",157 markets,"3,538 individuals","Markets: 93 treated, 64 control\r\nIndividuals...",See pre-analysis plan,Name: Innovations for Poverty Action \nApprova...,Private,2013-11-30,True,2017-08-31,"157 Markets (clusters), 3537 Firms",False,3537 firms,"93 treated markets, with 2160 firms; 64 contro...",True,https://www.openicpsr.org/openicpsr/project/11...,True,https://www.openicpsr.org/openicpsr/project/11...,,Abstract: A common concern with efforts to dir...,"Kakamega, Kisii, Embu and Kitui","Kakamega, Kisii, Embu, Kitui"


In [709]:
# Replace NaN values in new counties column for text analysis
kenya_trials['County_y'] = kenya_trials['County_y'].fillna('')

In [710]:
# Create new counties column based on the existing 2
final_counties = []
for i in range(len(kenya_trials)):
    x = kenya_trials['County_x'].iloc[i]
    y = kenya_trials['County_y'].iloc[i]
    if x == '':
        final_counties.append(y)
    elif y == '':
        final_counties.append(x)
    else:
        final_counties.append(y)

# Create new column with final county values
kenya_trials['County'] = final_counties
kenya_trials.head()

Unnamed: 0,Title,Url,Last update date,Published at,First registered on,RCT_ID,DOI Number,Primary Investigator,Status,Start date,End date,Keywords,Country names,Other Primary Investigators,Jel code,Secondary IDs,Abstract,External Links,Sponsors,Partners,Intervention start date,Intervention end date,Intervention,Primary outcome end points,Primary outcome explanation,Secondary outcome end points,Secondary outcome explanation,Experimental design,Experimental design details,Randomization method,Randomization unit,Sample size number clusters,Sample size number observations,Sample size number arms,Minimum effect size,IRB,Analysis Plan Documents,Intervention completion date,Data collection completion,Data collection completion date,Number of clusters,Attrition correlated,Total number of observations,Treatment arms,Public data,Public data url,Program files,Program files url,Post trial documents csv,Relevant papers for csv,County_x,County_y,County
0,Voter Pessimism and Electoral Accountability: ...,http://www.socialscienceregistry.org/trials/5,"May 02, 2017",2017-05-02 16:17:17 -0400,2013-05-21,AEARCTR-0000005,10.1257/rct.5-8.0,Kelly Zhang kwzhang@mit.edu,completed,2013-01-26,2014-05-31,"[""electoral"", ""public goods"", ""corruption"", ""a...",Kenya (National),,,,"The chapter combines novel survey, audit, and ...",,,,2013-03-02,2013-03-07,The intervention was embedded within a series ...,"Information uptake, political support, beliefs...",See pre-analysis plan.,,,Treatment was randomly assigned at the individ...,,Randomization done using Stata.,Individual,,2500 individuals/1600 individuals,,"MDE=5% change in perceived leakage, sd=28.8667...",Name: Stanford IRB\nApproval_number: 26335\nAp...,"November 08, 2013; February 18, 2014",,,,,,,,,,,,,,National,National,National
1,Understanding Ethnic Cooperation: Evidence fro...,http://www.socialscienceregistry.org/trials/16,"May 28, 2013",2013-05-28 19:50:19 -0400,2013-05-28,AEARCTR-0000016,10.1257/rct.16-1.0,Edward Miguel emiguel@econ.berkeley.edu,completed,2012-07-16,2013-02-14,"[""electoral"", ""governance"", ""post-conflict"", ""...","Kenya (); Tanzania, United Republic of ()",Kelly Zhang () Stanford University; Bertil Tun...,,,Ethnic polarization is often linked to underde...,,,,2012-07-16,2013-02-14,Different subject priming treatments in the la...,Choices in multiple lab experimental games. Se...,See the attached pre-analysis plan for details.,,,See the attached pre-analysis plan for details.,See the attached pre-analysis plan for details.,Computer random number generator. See the atta...,Individual,2098 individuals (across Kenya and Tanzania),2098 individuals (across Kenya and Tanzania),See the attached pre-analysis plan for details.,,"Name: Innovations for Poverty Action, Kenya IR...",,2013-02-14,True,2013-02-14,2098 individuals (across Kenya and Tanzania),False,2098 individuals (across Kenya and Tanzania),See pre-analysis plan for details.,False,,,,,,,Nairobi,Nairobi
2,Welfare Effects of Unconditional Cash Transfers,http://www.socialscienceregistry.org/trials/17,"December 08, 2020",2020-12-08 06:15:10 -0500,2014-12-04,AEARCTR-0000017,10.1257/rct.17-1.1,Johannes Haushofer haushofer@gmail.com,completed,2011-05-01,2013-02-28,"[""agriculture"", ""education"", ""finance"", ""healt...",Kenya (Rarieda),Jeremy Shapiro (jeremypshapiro@gmail.com) Prin...,"C93, D13, I15, I25, O12",,This randomized controlled trial (RCT) evaluat...,,,,2011-06-01,2013-01-31,The UCT Program implemented by GiveDirectly In...,"Assets, expenditure, psychological well-being ...",See pre-analysis plan,,,Evaluation questions \r\n\r\nOur main question...,,Computerized randomization in office.,household (treatment vs. spillover households)...,100 villags,1500 households,"500 households treatment, 500 households spill...","0.2 SD, 80%","Name: University of Zurich, Department of Econ...","June 28, 2013; December 04, 2014",,True,,,,,,,,,,,,Rarieda,Rarieda,Rarieda
3,Welfare Effects of Unconditional Cash Transfers,http://www.socialscienceregistry.org/trials/19,"July 06, 2016",2016-07-06 15:21:00 -0400,2013-06-28,AEARCTR-0000019,10.1257/rct.19-5.0,Johannes Haushofer haushofer@gmail.com,completed,2011-05-01,2013-02-28,"[""agriculture"", ""education"", ""finance"", ""healt...",Kenya (Rarieda),Jeremy Shapiro (jeremypshapiro@gmail.com) McKi...,"C93, D13, I15, I25, O12",,This randomized controlled trial (RCT) evaluat...,,,,2011-06-01,2013-01-31,The UCT Program implemented by GiveDirectly In...,"Assets, expenditure, psychological well-being ...",See pre-analysis plan,,,Evaluation questions \r\n\r\nOur main question...,,Computerized randomization in office.,household (treatment vs. spillover households)...,100 villages,1500 households,"500 households treatment, 500 households spill...","0.2 SD, 89%",Name: Innovations for Poverty Action Kenya (IP...,"June 28, 2013; November 21, 2015; March 15, 20...",,,,,,,,,,,,,,Rarieda,Rarieda,Rarieda
4,Electing to Vote: Strengthening the Credibilit...,http://www.socialscienceregistry.org/trials/30,"April 20, 2014",2014-04-20 20:45:05 -0400,2014-04-20,AEARCTR-0000030,10.1257/rct.30-1.0,Tavneet Suri tavneet@mit.edu,on_going,2013-02-19,2014-12-01,"[""electoral"", ""governance"", ""post-conflict"", ""...",Private,Vincent Pons (vpons@mit.edu) MIT; Benjamin Mar...,,,This study analyzes the impact of different se...,,,,2013-02-28,2013-03-04,This study analyzes the impact of different se...,"voter turnout, vote shares, political attitude...",The outcomes come from two sources. The admini...,,,"12,160 polling stations (out of 24,560 polling...",,Polling stations were stratified by county. Th...,Polling station level,"The experiment was conducted with 6,086 pollin...",Approximately 5 million people,A cluster was a polling station. The number of...,1.5 percentage points in voter turnout,"Name: Innovations for Poverty Action, USA (for...",,,,,,,,,,,,,,,,,


In [711]:
# Number of trials with no county information
sum(kenya_trials['County'] == '')

85

In [712]:
kenya_trials.columns

Index(['Title', 'Url', 'Last update date', 'Published at',
       'First registered on', 'RCT_ID', 'DOI Number', 'Primary Investigator',
       'Status', 'Start date', 'End date', 'Keywords', 'Country names',
       'Other Primary Investigators', 'Jel code', 'Secondary IDs', 'Abstract',
       'External Links', 'Sponsors', 'Partners', 'Intervention start date',
       'Intervention end date', 'Intervention', 'Primary outcome end points',
       'Primary outcome explanation', 'Secondary outcome end points',
       'Secondary outcome explanation', 'Experimental design',
       'Experimental design details', 'Randomization method',
       'Randomization unit', 'Sample size number clusters',
       'Sample size number observations', 'Sample size number arms',
       'Minimum effect size', 'IRB', 'Analysis Plan Documents',
       'Intervention completion date', 'Data collection completion',
       'Data collection completion date', 'Number of clusters',
       'Attrition correlated', 'Total

In [713]:
# Remove entries with no county information and select relevant columns
kenya_trials_final = kenya_trials[kenya_trials['County'] != ''][['RCT_ID', 'Start date', 'End date', 'Keywords', 'County']]
kenya_trials_final.shape

(165, 5)

In [714]:
kenya_trials_final = kenya_trials_final[(kenya_trials_final['County'] != 'National') & (kenya_trials_final['County'] != 'Africa') & (kenya_trials_final['County'] != 'District of Columbia') & (kenya_trials_final['County'] != 'sub-Saharan Africa')]
kenya_trials_final

Unnamed: 0,RCT_ID,Start date,End date,Keywords,County
1,AEARCTR-0000016,2012-07-16,2013-02-14,"[""electoral"", ""governance"", ""post-conflict"", ""...",Nairobi
2,AEARCTR-0000017,2011-05-01,2013-02-28,"[""agriculture"", ""education"", ""finance"", ""healt...",Rarieda
3,AEARCTR-0000019,2011-05-01,2013-02-28,"[""agriculture"", ""education"", ""finance"", ""healt...",Rarieda
5,AEARCTR-0000067,2012-08-01,2014-08-15,"[""agriculture""]",Bungoma
6,AEARCTR-0000105,2013-02-01,2017-03-31,"[""agriculture"", ""health"", ""Aflatoxin"", ""maize""...","Meru, Tharaka-Nithi"
8,AEARCTR-0000218,2013-05-28,2013-08-07,"[""welfare"", ""Behavior"", ""Stress""]",Nairobi
9,AEARCTR-0000287,2013-06-01,2018-06-01,"[""labor"", ""business training"", ""microenterpris...","Kakamega, Kisii, Embu, Kitui"
11,AEARCTR-0000323,2014-01-27,2014-10-26,"[""finance"", ""welfare""]",Kisumu
12,AEARCTR-0000350,2014-02-07,2020-08-31,"[""environment_and_energy"", ""Electrification"", ...","Busia, Siaya"
14,AEARCTR-0000443,2013-12-10,2016-06-30,"[""agriculture"", ""Technology adoption""]","Western, Nyanza, Central"


- Some of the locations entered are cities, which must be replaced with the counties they are in as this is a county-level analysis

In [715]:
# Replace some words
kenya_trials_final['County'] = kenya_trials_final['County'].str.replace('Rarieda', 'Siaya')
kenya_trials_final['County'] = kenya_trials_final['County'].str.replace('Butere', 'Kakamega')
kenya_trials_final['County'] = kenya_trials_final['County'].str.replace('Mumias', 'Kakamega')
kenya_trials_final['County'] = kenya_trials_final['County'].str.replace('Samia', 'Busia')
kenya_trials_final['County'] = kenya_trials_final['County'].str.replace('Nambale', 'Busia')
kenya_trials_final['County'] = kenya_trials_final['County'].str.replace('Ndaragwa', 'Nyandarua')
kenya_trials_final['County'] = kenya_trials_final['County'].str.replace('Transnzoia', 'Trans Nzoia')
kenya_trials_final['County'] = kenya_trials_final['County'].str.replace('Dandora', 'Nairobi')
kenya_trials_final['County'] = kenya_trials_final['County'].str.replace('Makeuni', 'Makueni')
kenya_trials_final['County'] = kenya_trials_final['County'].str.replace('Tharaka Nithi', 'Tharaka-Nithi')
kenya_trials_final['County'] = kenya_trials_final['County'].str.replace('Kibera', 'Nairobi')
kenya_trials_final['County'] = kenya_trials_final['County'].str.replace('Mount Kenya Region', 'Eastern, Central')
kenya_trials_final['County'] = kenya_trials_final['County'].str.replace('Eastern and Western Kenya', 'Eastern, Western')

In [716]:
# Get list of county names and their provinces
counties = pd.read_csv('counties.csv')
counties

Unnamed: 0,County,Province
0,Mombasa,Coast
1,Kwale,Coast
2,Kilifi,Coast
3,Tana River,Coast
4,Lamu,Coast
5,Taita-Taveta,Coast
6,Garissa,Northern
7,Wajir,Northern
8,Mandera,Northern
9,Marsabit,Eastern


In [717]:
# Changing general region names to specific ones
regions = np.array(counties['Province'].value_counts().index)

for i in kenya_trials_final.index:
    temp_list = []
    county = kenya_trials_final['County'].loc[i].split(',')
    for j in county:
        if j.strip() in regions:
            temp_list.extend(list(counties[counties['Province'] == j.strip()]['County']))
        else:
            temp_list.append(j)
    kenya_trials_final.at[i, 'County'] = temp_list

In [718]:
kenya_trials_final

Unnamed: 0,RCT_ID,Start date,End date,Keywords,County
1,AEARCTR-0000016,2012-07-16,2013-02-14,"[""electoral"", ""governance"", ""post-conflict"", ""...",[Nairobi]
2,AEARCTR-0000017,2011-05-01,2013-02-28,"[""agriculture"", ""education"", ""finance"", ""healt...",[Siaya]
3,AEARCTR-0000019,2011-05-01,2013-02-28,"[""agriculture"", ""education"", ""finance"", ""healt...",[Siaya]
5,AEARCTR-0000067,2012-08-01,2014-08-15,"[""agriculture""]",[Bungoma]
6,AEARCTR-0000105,2013-02-01,2017-03-31,"[""agriculture"", ""health"", ""Aflatoxin"", ""maize""...","[Meru, Tharaka-Nithi]"
8,AEARCTR-0000218,2013-05-28,2013-08-07,"[""welfare"", ""Behavior"", ""Stress""]",[Nairobi]
9,AEARCTR-0000287,2013-06-01,2018-06-01,"[""labor"", ""business training"", ""microenterpris...","[Kakamega, Kisii, Embu, Kitui]"
11,AEARCTR-0000323,2014-01-27,2014-10-26,"[""finance"", ""welfare""]",[Kisumu]
12,AEARCTR-0000350,2014-02-07,2020-08-31,"[""environment_and_energy"", ""Electrification"", ...","[Busia, Siaya]"
14,AEARCTR-0000443,2013-12-10,2016-06-30,"[""agriculture"", ""Technology adoption""]","[Kakamega, Vihiga, Bungoma, Busia, Siaya, Kisu..."


In [719]:
kenya_trials_final.shape

(154, 5)

In [720]:
# Add province information
provinces = []
for i in kenya_trials_final.index:
    temp_list = []
    county = kenya_trials_final['County'].loc[i]
    for j in county:
        val = counties[counties['County'] == j.strip()]['Province'].iloc[0]
        if val not in temp_list:
            temp_list.append(val)
    provinces.append(temp_list)
provinces

[['Nairobi'],
 ['Nyanza'],
 ['Nyanza'],
 ['Western'],
 ['Eastern'],
 ['Nairobi'],
 ['Western', 'Nyanza', 'Eastern'],
 ['Nyanza'],
 ['Western', 'Nyanza'],
 ['Western', 'Nyanza', 'Central'],
 ['Nairobi'],
 ['Western'],
 ['Nyanza'],
 ['Nairobi'],
 ['Nairobi'],
 ['Nairobi'],
 ['Nyanza'],
 ['Nyanza'],
 ['Nairobi'],
 ['Central'],
 ['Western'],
 ['Nairobi'],
 ['Nairobi'],
 ['Nairobi'],
 ['Nairobi'],
 ['Nairobi'],
 ['Nairobi'],
 ['Nairobi'],
 ['Nairobi'],
 ['Western'],
 ['Nairobi'],
 ['Western'],
 ['Northern', 'Nairobi'],
 ['Western'],
 ['Nyanza'],
 ['Nairobi'],
 ['Nyanza'],
 ['Western'],
 ['Western'],
 ['Western'],
 ['Nairobi'],
 ['Western'],
 ['Central'],
 ['Western'],
 ['Western'],
 ['Eastern'],
 ['Western'],
 ['Nairobi'],
 ['Nairobi'],
 ['Western'],
 ['Nairobi'],
 ['Northern'],
 ['Western'],
 ['Western'],
 ['Western'],
 ['Western'],
 ['Western'],
 ['Western'],
 ['Central'],
 ['Western'],
 ['Rift Valley', 'Eastern'],
 ['Western', 'Nyanza'],
 ['Nairobi'],
 ['Nairobi'],
 ['Eastern'],
 ['Easte

In [721]:
kenya_trials_final['Province'] = provinces

In [722]:
kenya_trials_final

Unnamed: 0,RCT_ID,Start date,End date,Keywords,County,Province
1,AEARCTR-0000016,2012-07-16,2013-02-14,"[""electoral"", ""governance"", ""post-conflict"", ""...",[Nairobi],[Nairobi]
2,AEARCTR-0000017,2011-05-01,2013-02-28,"[""agriculture"", ""education"", ""finance"", ""healt...",[Siaya],[Nyanza]
3,AEARCTR-0000019,2011-05-01,2013-02-28,"[""agriculture"", ""education"", ""finance"", ""healt...",[Siaya],[Nyanza]
5,AEARCTR-0000067,2012-08-01,2014-08-15,"[""agriculture""]",[Bungoma],[Western]
6,AEARCTR-0000105,2013-02-01,2017-03-31,"[""agriculture"", ""health"", ""Aflatoxin"", ""maize""...","[Meru, Tharaka-Nithi]",[Eastern]
8,AEARCTR-0000218,2013-05-28,2013-08-07,"[""welfare"", ""Behavior"", ""Stress""]",[Nairobi],[Nairobi]
9,AEARCTR-0000287,2013-06-01,2018-06-01,"[""labor"", ""business training"", ""microenterpris...","[Kakamega, Kisii, Embu, Kitui]","[Western, Nyanza, Eastern]"
11,AEARCTR-0000323,2014-01-27,2014-10-26,"[""finance"", ""welfare""]",[Kisumu],[Nyanza]
12,AEARCTR-0000350,2014-02-07,2020-08-31,"[""environment_and_energy"", ""Electrification"", ...","[Busia, Siaya]","[Western, Nyanza]"
14,AEARCTR-0000443,2013-12-10,2016-06-30,"[""agriculture"", ""Technology adoption""]","[Kakamega, Vihiga, Bungoma, Busia, Siaya, Kisu...","[Western, Nyanza, Central]"


In [723]:
# Create dataframe of dummy variables per project
df_by_project = pd.DataFrame(columns=counties['County'])
for i in range(len(kenya_trials_final['County'])):
    temp_list = []
    for j in counties['County']:
        if (j in kenya_trials_final['County'].iloc[i]):
            temp_list.append(1)
        else:
            temp_list.append(0)
    df_by_project.loc[kenya_trials_final['RCT_ID'].iloc[i]] = temp_list

In [724]:
df_by_project = df_by_project.rename_axis(None, axis=1)
df_by_project.head()

Unnamed: 0,Mombasa,Kwale,Kilifi,Tana River,Lamu,Taita-Taveta,Garissa,Wajir,Mandera,Marsabit,Isiolo,Meru,Tharaka-Nithi,Embu,Kitui,Machakos,Makueni,Nyandarua,Nyeri,Kirinyaga,Murang'a,Kiambu,Turkana,West Pokot,Samburu,Trans Nzoia,Uasin Gishu,Elgeyo-Marakwet,Nandi,Baringo,Laikipia,Nakuru,Narok,Kajiado,Kericho,Bomet,Kakamega,Vihiga,Bungoma,Busia,Siaya,Kisumu,Homa Bay,Migori,Kisii,Nyamira,Nairobi
AEARCTR-0000016,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
AEARCTR-0000017,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
AEARCTR-0000019,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
AEARCTR-0000067,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
AEARCTR-0000105,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [725]:
# Transpose project-level dataframe to get county-level dataframe
df_by_county = df_by_project.T
df_by_county

Unnamed: 0,AEARCTR-0000016,AEARCTR-0000017,AEARCTR-0000019,AEARCTR-0000067,AEARCTR-0000105,AEARCTR-0000218,AEARCTR-0000287,AEARCTR-0000323,AEARCTR-0000350,AEARCTR-0000443,AEARCTR-0000459,AEARCTR-0000486,AEARCTR-0000541,AEARCTR-0000605,AEARCTR-0000627,AEARCTR-0000647,AEARCTR-0000662,AEARCTR-0000669,AEARCTR-0000676,AEARCTR-0000705,AEARCTR-0000740,AEARCTR-0000766,AEARCTR-0000788,AEARCTR-0000790,AEARCTR-0000791,AEARCTR-0000843,AEARCTR-0000844,AEARCTR-0000893,AEARCTR-0000907,AEARCTR-0000912,AEARCTR-0000934,AEARCTR-0000946,AEARCTR-0000962,AEARCTR-0000991,AEARCTR-0000996,AEARCTR-0001002,AEARCTR-0001008,AEARCTR-0001076,AEARCTR-0001167,AEARCTR-0001183,AEARCTR-0001187,AEARCTR-0001197,AEARCTR-0001293,AEARCTR-0001304,AEARCTR-0001358,AEARCTR-0001373,AEARCTR-0001432,AEARCTR-0001457,AEARCTR-0001482,AEARCTR-0001484,AEARCTR-0001500,AEARCTR-0001515,AEARCTR-0001574,AEARCTR-0001592,AEARCTR-0001643,AEARCTR-0001706,AEARCTR-0001748,AEARCTR-0001750,AEARCTR-0001812,AEARCTR-0001824,AEARCTR-0001848,AEARCTR-0001893,AEARCTR-0001902,AEARCTR-0001997,AEARCTR-0002015,AEARCTR-0002019,AEARCTR-0002026,AEARCTR-0002063,AEARCTR-0002071,AEARCTR-0002133,AEARCTR-0002188,AEARCTR-0002313,AEARCTR-0002370,AEARCTR-0002375,AEARCTR-0002401,AEARCTR-0002448,AEARCTR-0002484,AEARCTR-0002579,AEARCTR-0002692,AEARCTR-0002741,AEARCTR-0002850,AEARCTR-0002913,AEARCTR-0002923,AEARCTR-0002948,AEARCTR-0003101,AEARCTR-0003138,AEARCTR-0003177,AEARCTR-0003224,AEARCTR-0003310,AEARCTR-0003556,AEARCTR-0003679,AEARCTR-0003937,AEARCTR-0004110,AEARCTR-0004498,AEARCTR-0004649,AEARCTR-0004818,AEARCTR-0005113,AEARCTR-0005170,AEARCTR-0005510,AEARCTR-0005517,AEARCTR-0005564,AEARCTR-0005621,AEARCTR-0005704,AEARCTR-0005822,AEARCTR-0005845,AEARCTR-0005941,AEARCTR-0005969,AEARCTR-0005971,AEARCTR-0006001,AEARCTR-0006089,AEARCTR-0006126,AEARCTR-0006267,AEARCTR-0006413,AEARCTR-0006675,AEARCTR-0006717,AEARCTR-0007067,AEARCTR-0007168,AEARCTR-0007424,AEARCTR-0007435,AEARCTR-0007738,AEARCTR-0007744,AEARCTR-0008292,AEARCTR-0009075,AEARCTR-0009102,AEARCTR-0009163,AEARCTR-0009200,AEARCTR-0009315,AEARCTR-0009348,AEARCTR-0009634,AEARCTR-0009644,AEARCTR-0009676,AEARCTR-0009743,AEARCTR-0009911,AEARCTR-0009930,AEARCTR-0010051,AEARCTR-0010101,AEARCTR-0010113,AEARCTR-0010426,AEARCTR-0010651,AEARCTR-0011089,AEARCTR-0011126,AEARCTR-0011135,AEARCTR-0011184,AEARCTR-0011205,AEARCTR-0011253,AEARCTR-0011473,AEARCTR-0011741,AEARCTR-0011769,AEARCTR-0011880,AEARCTR-0011937,AEARCTR-0012044,AEARCTR-0012126,AEARCTR-0012636,AEARCTR-0012796
Mombasa,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
Kwale,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
Kilifi,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
Tana River,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
Lamu,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
Taita-Taveta,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
Garissa,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
Wajir,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
Mandera,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
Marsabit,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0


In [726]:
# Reset index and rename column for both dataframes
df_by_project = df_by_project.reset_index().rename({'index': 'RCT_ID'}, axis=1)
df_by_county = df_by_county.reset_index().rename({'index': 'County'}, axis=1)

In [727]:
# Convert dates to Pandas datetime format
kenya_trials_final[['Start date', 'End date']] = kenya_trials[['Start date', 'End date']].apply(pd.to_datetime)
df_by_project = df_by_project.merge(kenya_trials_final[['RCT_ID', 'Start date', 'End date']], how='left', on='RCT_ID')
df_by_project

Unnamed: 0,RCT_ID,Mombasa,Kwale,Kilifi,Tana River,Lamu,Taita-Taveta,Garissa,Wajir,Mandera,Marsabit,Isiolo,Meru,Tharaka-Nithi,Embu,Kitui,Machakos,Makueni,Nyandarua,Nyeri,Kirinyaga,Murang'a,Kiambu,Turkana,West Pokot,Samburu,Trans Nzoia,Uasin Gishu,Elgeyo-Marakwet,Nandi,Baringo,Laikipia,Nakuru,Narok,Kajiado,Kericho,Bomet,Kakamega,Vihiga,Bungoma,Busia,Siaya,Kisumu,Homa Bay,Migori,Kisii,Nyamira,Nairobi,Start date,End date
0,AEARCTR-0000016,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2012-07-16,2013-02-14
1,AEARCTR-0000017,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,2011-05-01,2013-02-28
2,AEARCTR-0000019,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,2011-05-01,2013-02-28
3,AEARCTR-0000067,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,2012-08-01,2014-08-15
4,AEARCTR-0000105,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2013-02-01,2017-03-31
5,AEARCTR-0000218,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2013-05-28,2013-08-07
6,AEARCTR-0000287,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,2013-06-01,2018-06-01
7,AEARCTR-0000323,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,2014-01-27,2014-10-26
8,AEARCTR-0000350,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,2014-02-07,2020-08-31
9,AEARCTR-0000443,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,2013-12-10,2016-06-30


In [728]:
df_by_project.shape

(154, 50)

In [729]:
min(df_by_project['Start date'])

Timestamp('2000-06-01 00:00:00')

In [730]:
max(df_by_project['End date'])

Timestamp('2027-12-31 00:00:00')

- Earliest start date is in 2000 and latest end date is in 2027
- Time period: 2014 to 2022 (because of restrictions on availability of data on development outcomes)
- Define coverage variable: for what portion of the 8-year period did the project run?

In [731]:
# Restrict projects dataset to 2014 to 2022 time period
restricted_project = df_by_project[(df_by_project['Start date'].dt.year >= 2014) & (df_by_project['Start date'].dt.year < 2023)]
restricted_project.head()

Unnamed: 0,RCT_ID,Mombasa,Kwale,Kilifi,Tana River,Lamu,Taita-Taveta,Garissa,Wajir,Mandera,Marsabit,Isiolo,Meru,Tharaka-Nithi,Embu,Kitui,Machakos,Makueni,Nyandarua,Nyeri,Kirinyaga,Murang'a,Kiambu,Turkana,West Pokot,Samburu,Trans Nzoia,Uasin Gishu,Elgeyo-Marakwet,Nandi,Baringo,Laikipia,Nakuru,Narok,Kajiado,Kericho,Bomet,Kakamega,Vihiga,Bungoma,Busia,Siaya,Kisumu,Homa Bay,Migori,Kisii,Nyamira,Nairobi,Start date,End date
7,AEARCTR-0000323,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,2014-01-27,2014-10-26
8,AEARCTR-0000350,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,2014-02-07,2020-08-31
11,AEARCTR-0000486,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,2014-08-25,2015-08-25
12,AEARCTR-0000541,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,2014-11-10,2016-04-30
13,AEARCTR-0000605,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2015-01-28,2015-08-15


- More analysis
- The control group should be counties that **never** received the treatment (as that would bias the estimates)
- The treatment group should be counties where research started after 2014

In [732]:
# Find number of projects
a = pd.melt(df_by_project, id_vars=['RCT_ID', 'Start date', 'End date'], var_name='County', value_name='Number Projects')
a_gr = a.groupby('County').agg({'Number Projects': np.sum})
a_gr

Unnamed: 0_level_0,Number Projects
County,Unnamed: 1_level_1
Baringo,1
Bomet,2
Bungoma,28
Busia,38
Elgeyo-Marakwet,1
Embu,2
Garissa,4
Homa Bay,6
Isiolo,3
Kajiado,1


In [733]:
sum(a_gr['Number Projects'])

308

In [734]:
# Find earliest start date and latest end date 
min_start = []
max_end = []
for county in a_gr.reset_index()['County']:
    subset = a[a['County'] == county]
    if sum(subset['Number Projects']) == 0:
        min_start.append(0)
        max_end.append(0)
    else:
        val_start = min(subset[subset['Number Projects'] > 0]['Start date'].dt.year)
        val_end = max(subset[subset['Number Projects'] > 0]['End date'].dt.year)
        min_start.append(val_start)
        max_end.append(val_end)
        
a_gr['min_start'] = min_start
a_gr['max_end'] = max_end

In [735]:
a_gr

Unnamed: 0_level_0,Number Projects,min_start,max_end
County,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Baringo,1,2009,2015
Bomet,2,2009,2023
Bungoma,28,2007,2025
Busia,38,2000,2025
Elgeyo-Marakwet,1,2009,2015
Embu,2,2023,2024
Garissa,4,2009,2022
Homa Bay,6,2013,2024
Isiolo,3,2023,2024
Kajiado,1,2009,2015


In [736]:
# Filter to restrict time period
a_gr_filtered = a_gr[(a_gr['min_start'] == 0) | (a_gr['min_start'] >= 2014)]
a_gr_filtered

Unnamed: 0_level_0,Number Projects,min_start,max_end
County,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Embu,2,2023,2024
Isiolo,3,2023,2024
Kilifi,0,0,0
Kitui,3,2016,2024
Kwale,1,2020,2020
Lamu,0,0,0
Machakos,4,2022,2024
Makueni,4,2016,2024
Marsabit,2,2023,2024
Mombasa,1,2021,2021


In [737]:
a_gr_filtered.shape

(13, 3)

- Only 13 counties remain: not that much better than province-level analysis

In [738]:
sum(a_gr_filtered['Number Projects'])

24

In [739]:
# Define coverage variable: for what portion of the 7 years did the study run?
restricted_project['Coverage'] = (restricted_project['End date'].dt.year - 2015)/7
restricted_project['Coverage']

7     -0.142857
8      0.714286
11     0.000000
12     0.142857
13     0.000000
14    -0.142857
16     0.142857
17     0.142857
18     0.142857
21     0.000000
22     0.142857
23     0.000000
24     0.000000
25     0.428571
26     0.285714
27    -0.142857
28     0.000000
29     0.142857
32     0.571429
33     0.142857
34     1.285714
36     0.142857
40     0.428571
42     0.428571
45     0.428571
47     0.000000
48     0.285714
49     0.714286
50     0.285714
51     0.142857
52     0.142857
54     0.285714
60     0.000000
62     0.285714
63     0.285714
64     0.428571
65     0.285714
66     0.000000
67     0.285714
68     0.285714
69     0.571429
70     0.142857
71     0.285714
72     0.285714
73     0.285714
74     0.285714
75     0.428571
76     1.142857
77     0.571429
78     0.714286
79     0.285714
80     0.857143
81     1.000000
82     0.428571
83     0.428571
84     0.571429
85     0.571429
87     0.571429
89     0.571429
90     0.428571
91     0.571429
92     1.285714
93     0

In [740]:
# Create dataset at the project-county level
df_county_project = pd.melt(restricted_project, id_vars=['RCT_ID', 'Start date', 'End date', 'Coverage'], var_name='County', value_name='Number Projects')
df_county_project

Unnamed: 0,RCT_ID,Start date,End date,Coverage,County,Number Projects
0,AEARCTR-0000323,2014-01-27,2014-10-26,-0.142857,Mombasa,0
1,AEARCTR-0000350,2014-02-07,2020-08-31,0.714286,Mombasa,0
2,AEARCTR-0000486,2014-08-25,2015-08-25,0.0,Mombasa,0
3,AEARCTR-0000541,2014-11-10,2016-04-30,0.142857,Mombasa,0
4,AEARCTR-0000605,2015-01-28,2015-08-15,0.0,Mombasa,0
5,AEARCTR-0000627,2014-11-01,2014-12-31,-0.142857,Mombasa,0
6,AEARCTR-0000662,2015-05-04,2016-09-15,0.142857,Mombasa,0
7,AEARCTR-0000669,2015-04-14,2016-09-15,0.142857,Mombasa,0
8,AEARCTR-0000676,2015-04-08,2016-12-31,0.142857,Mombasa,0
9,AEARCTR-0000766,2015-04-25,2015-06-15,0.0,Mombasa,0


In [741]:
df_county_project.shape

(5029, 6)

In [742]:
# Find total number of projects for each county
grouped_data = df_county_project.groupby('County').agg({'Number Projects': np.sum})
grouped_data

Unnamed: 0_level_0,Number Projects
County,Unnamed: 1_level_1
Baringo,0
Bomet,0
Bungoma,18
Busia,21
Elgeyo-Marakwet,0
Embu,0
Garissa,2
Homa Bay,5
Isiolo,0
Kajiado,0


In [743]:
# Compute coverage by county
county_cov = []
for county in grouped_data.reset_index()['County']:
    subset = df_county_project[df_county_project['County'] == county]
    if sum(subset['Number Projects']) == 0:
        county_cov.append(0)
    else:
        cov = np.mean(subset[subset['Number Projects'] > 0]['Coverage'])
        county_cov.append(cov)

In [744]:
# Transpose project-level dataframe to get county-level dataframe
restricted_county = restricted_project.iloc[:, :48].set_index('RCT_ID').T.reset_index().rename_axis(None, axis=1).rename({'index': 'County'}, axis=1)
restricted_county.sort_values(by='County', inplace=True)
restricted_county['Coverage'] = county_cov
restricted_county

Unnamed: 0,County,AEARCTR-0000323,AEARCTR-0000350,AEARCTR-0000486,AEARCTR-0000541,AEARCTR-0000605,AEARCTR-0000627,AEARCTR-0000662,AEARCTR-0000669,AEARCTR-0000676,AEARCTR-0000766,AEARCTR-0000788,AEARCTR-0000790,AEARCTR-0000791,AEARCTR-0000843,AEARCTR-0000844,AEARCTR-0000893,AEARCTR-0000907,AEARCTR-0000912,AEARCTR-0000962,AEARCTR-0000991,AEARCTR-0000996,AEARCTR-0001008,AEARCTR-0001187,AEARCTR-0001293,AEARCTR-0001373,AEARCTR-0001457,AEARCTR-0001482,AEARCTR-0001484,AEARCTR-0001500,AEARCTR-0001515,AEARCTR-0001574,AEARCTR-0001643,AEARCTR-0001848,AEARCTR-0001902,AEARCTR-0001997,AEARCTR-0002015,AEARCTR-0002019,AEARCTR-0002026,AEARCTR-0002063,AEARCTR-0002071,AEARCTR-0002133,AEARCTR-0002188,AEARCTR-0002313,AEARCTR-0002370,AEARCTR-0002375,AEARCTR-0002401,AEARCTR-0002448,AEARCTR-0002484,AEARCTR-0002579,AEARCTR-0002692,AEARCTR-0002741,AEARCTR-0002850,AEARCTR-0002913,AEARCTR-0002923,AEARCTR-0002948,AEARCTR-0003101,AEARCTR-0003138,AEARCTR-0003224,AEARCTR-0003556,AEARCTR-0003679,AEARCTR-0003937,AEARCTR-0004110,AEARCTR-0004498,AEARCTR-0004649,AEARCTR-0004818,AEARCTR-0005113,AEARCTR-0005170,AEARCTR-0005510,AEARCTR-0005517,AEARCTR-0005564,AEARCTR-0005621,AEARCTR-0005704,AEARCTR-0005822,AEARCTR-0005845,AEARCTR-0005941,AEARCTR-0005971,AEARCTR-0006001,AEARCTR-0006089,AEARCTR-0006126,AEARCTR-0006267,AEARCTR-0006413,AEARCTR-0006675,AEARCTR-0006717,AEARCTR-0007067,AEARCTR-0007168,AEARCTR-0007424,AEARCTR-0007435,AEARCTR-0007738,AEARCTR-0007744,AEARCTR-0008292,AEARCTR-0009075,AEARCTR-0009102,AEARCTR-0009163,AEARCTR-0009315,AEARCTR-0009348,AEARCTR-0009634,AEARCTR-0009644,AEARCTR-0009676,AEARCTR-0009743,AEARCTR-0009911,AEARCTR-0009930,AEARCTR-0010051,AEARCTR-0010101,AEARCTR-0010113,AEARCTR-0010426,AEARCTR-0010651,AEARCTR-0011880,Coverage
29,Baringo,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0
35,Bomet,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0
38,Bungoma,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.642857
39,Busia,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0.659864
27,Elgeyo-Marakwet,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0
13,Embu,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0
6,Garissa,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.571429
42,Homa Bay,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.571429
10,Isiolo,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0
33,Kajiado,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0


In [745]:
restricted_county.shape

(47, 109)

In [746]:
# Merge with county-level data
restricted_county = restricted_county.merge(grouped_data.reset_index(), on='County', how='left')
restricted_county

Unnamed: 0,County,AEARCTR-0000323,AEARCTR-0000350,AEARCTR-0000486,AEARCTR-0000541,AEARCTR-0000605,AEARCTR-0000627,AEARCTR-0000662,AEARCTR-0000669,AEARCTR-0000676,AEARCTR-0000766,AEARCTR-0000788,AEARCTR-0000790,AEARCTR-0000791,AEARCTR-0000843,AEARCTR-0000844,AEARCTR-0000893,AEARCTR-0000907,AEARCTR-0000912,AEARCTR-0000962,AEARCTR-0000991,AEARCTR-0000996,AEARCTR-0001008,AEARCTR-0001187,AEARCTR-0001293,AEARCTR-0001373,AEARCTR-0001457,AEARCTR-0001482,AEARCTR-0001484,AEARCTR-0001500,AEARCTR-0001515,AEARCTR-0001574,AEARCTR-0001643,AEARCTR-0001848,AEARCTR-0001902,AEARCTR-0001997,AEARCTR-0002015,AEARCTR-0002019,AEARCTR-0002026,AEARCTR-0002063,AEARCTR-0002071,AEARCTR-0002133,AEARCTR-0002188,AEARCTR-0002313,AEARCTR-0002370,AEARCTR-0002375,AEARCTR-0002401,AEARCTR-0002448,AEARCTR-0002484,AEARCTR-0002579,AEARCTR-0002692,AEARCTR-0002741,AEARCTR-0002850,AEARCTR-0002913,AEARCTR-0002923,AEARCTR-0002948,AEARCTR-0003101,AEARCTR-0003138,AEARCTR-0003224,AEARCTR-0003556,AEARCTR-0003679,AEARCTR-0003937,AEARCTR-0004110,AEARCTR-0004498,AEARCTR-0004649,AEARCTR-0004818,AEARCTR-0005113,AEARCTR-0005170,AEARCTR-0005510,AEARCTR-0005517,AEARCTR-0005564,AEARCTR-0005621,AEARCTR-0005704,AEARCTR-0005822,AEARCTR-0005845,AEARCTR-0005941,AEARCTR-0005971,AEARCTR-0006001,AEARCTR-0006089,AEARCTR-0006126,AEARCTR-0006267,AEARCTR-0006413,AEARCTR-0006675,AEARCTR-0006717,AEARCTR-0007067,AEARCTR-0007168,AEARCTR-0007424,AEARCTR-0007435,AEARCTR-0007738,AEARCTR-0007744,AEARCTR-0008292,AEARCTR-0009075,AEARCTR-0009102,AEARCTR-0009163,AEARCTR-0009315,AEARCTR-0009348,AEARCTR-0009634,AEARCTR-0009644,AEARCTR-0009676,AEARCTR-0009743,AEARCTR-0009911,AEARCTR-0009930,AEARCTR-0010051,AEARCTR-0010101,AEARCTR-0010113,AEARCTR-0010426,AEARCTR-0010651,AEARCTR-0011880,Coverage,Number Projects
0,Baringo,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0
1,Bomet,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0
2,Bungoma,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.642857,18
3,Busia,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0.659864,21
4,Elgeyo-Marakwet,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0
5,Embu,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0
6,Garissa,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.571429,2
7,Homa Bay,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.571429,5
8,Isiolo,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0
9,Kajiado,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0


## Data on Outcome Variables

- Spatial data from DHS in 2014 and 2022
- These are the only years for which I found find county-level data
- Kenyan counties were established in 2010 so cannot find county-level data before then

In [747]:
# Define function for reading Shapefiles
def read_shapefile(sf_shape):
    """
    Read a shapefile into a Pandas dataframe with a 'coords' 
    column holding the geometry information. This uses the pyshp
    package
    """

    fields = [x[0] for x in sf_shape.fields][1:]
    records = [y[:] for y in sf_shape.records()]
    #records = sf_shape.records()
    shps = [s.points for s in sf_shape.shapes()]
    df = pd.DataFrame(columns=fields, data=records)
    df = df.assign(coords=shps)
    return df

In [748]:
# Read 2014 data
sh_educ_2014 = shapefile.Reader('sdr_subnational_data_dhs_2014_lvl_2.shp')
df_educ_2014 = read_shapefile(sh_educ_2014).rename({'DHSREGEN': 'County'}, axis=1)
df_educ_2014.head()

Unnamed: 0,ISO,FIPS,DHSCC,SVYTYPE,SVYYEAR,CNTRYNAMEE,CNTRYNAMEF,CNTRYNAMES,County,DHSREGFR,DHSREGSP,SVYID,REG_ID,Svy_Map,MULTLEVEL,LEVELRNK,REGVAR,REGCODE,REGNAME,OTHREGVAR,OTHREGCO,OTHREGNA,LEVELCO,LEVELNA,REPALLIND,REGNOTES,SVYNOTES,EDGARPWFEM,EDGARPMMAL,EDGARPBBTH,EDGARPBGPI,EDGARSWFEM,EDGARSMMAL,EDGARSBBTH,EDGARSBGPI,EDEDUCWNED,EDEDUCWSPR,EDEDUCWCPR,EDEDUCWSSC,EDEDUCWCSC,EDEDUCWHGH,EDEDUCWDKM,EDEDUCWTOT,EDEDUCWPRI,EDEDUCWSEH,EDEDUCWMYR,EDEDUCWNUM,EDEDUCWUNW,EDEDUCMNED,EDEDUCMSPR,EDEDUCMCPR,EDEDUCMSSC,EDEDUCMCSC,EDEDUCMHGH,EDEDUCMDKM,EDEDUCMTOT,EDEDUCMPRI,coords
0,KE,KE,KE,DHS,2014.0,Kenya,,,Baringo,,,451.0,KEDHS2014416058,yes,yes,2.0,shcounty,58.0,baringo,hv024,5.0,rift valley,Admin1,Counties,no,,Survey is representative at two levels: 47 reg...,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9.3,30.0,24.5,15.0,12.1,9.0,9999.0,100.0,54.5,36.1,7.4,335.0,598.0,5.9,33.3,16.7,15.3,20.1,8.7,9999.0,100.0,50.0,"[(35.78413003125479, 1.6524838122778078), (35...."
1,KE,KE,KE,DHS,2014.0,Kenya,,,Bomet,,,451.0,KEDHS2014416064,yes,yes,2.0,shcounty,64.0,bomet,hv024,5.0,rift valley,Admin1,Counties,no,,Survey is representative at two levels: 47 reg...,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,0.4,39.8,25.7,14.3,11.5,8.2,9999.0,100.0,65.5,34.0,7.3,687.0,708.0,0.0,33.1,27.2,14.4,18.1,7.2,9999.0,100.0,60.3,"[(35.547965492284504, -0.55796921962326), (35...."
2,KE,KE,KE,DHS,2014.0,Kenya,,,Bungoma,,,451.0,KEDHS2014416073,yes,yes,2.0,shcounty,73.0,bungoma,hv024,7.0,western,Admin1,Counties,no,,Survey is representative at two levels: 47 reg...,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,0.9,41.2,19.8,20.7,10.3,7.2,9999.0,100.0,61.0,38.1,7.3,1203.0,805.0,0.4,45.2,16.9,17.9,11.5,8.1,9999.0,100.0,62.1,"[(34.556667480780504, 1.0988888396579455), (34..."
3,KE,KE,KE,DHS,2014.0,Kenya,,,Busia,,,451.0,KEDHS2014416074,yes,yes,2.0,shcounty,74.0,busia,hv024,7.0,western,Admin1,Counties,no,,Survey is representative at two levels: 47 reg...,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,6.6,51.6,16.1,14.3,6.0,5.2,9999.0,100.0,67.7,25.6,6.5,546.0,676.0,1.1,45.7,16.9,18.1,12.4,5.8,9999.0,100.0,62.6,"[(34.38465022643885, 0.32892968427290725), (34..."
4,KE,KE,KE,DHS,2014.0,Kenya,,,Elgeyo Marakwet,,,451.0,KEDHS2014416056,yes,yes,2.0,shcounty,56.0,elgeyo marakwet,hv024,5.0,rift valley,Admin1,Counties,no,,Survey is representative at two levels: 47 reg...,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,1.2,27.7,28.6,14.4,15.0,13.0,9999.0,100.0,56.3,42.5,7.7,250.0,630.0,0.2,24.4,26.6,13.3,21.4,14.1,9999.0,100.0,51.0,"[(35.69746802677497, 1.303448182993975), (35.6..."


In [749]:
# Read 2022 data
sh_educ_2022 = shapefile.Reader('sdr_subnational_data_dhs_2022.shp')
df_educ_2022 = read_shapefile(sh_educ_2022).rename({'DHSREGEN': 'County'}, axis=1)
df_educ_2022.head()

Unnamed: 0,ISO,FIPS,DHSCC,SVYTYPE,SVYYEAR,CNTRYNAMEE,CNTRYNAMEF,CNTRYNAMES,County,DHSREGFR,DHSREGSP,SVYID,REG_ID,Svy_Map,MULTLEVEL,LEVELRNK,REGVAR,REGCODE,REGNAME,OTHREGVAR,OTHREGCO,OTHREGNA,LEVELCO,LEVELNA,REPALLIND,REGNOTES,SVYNOTES,EDGARPWFEM,EDGARPMMAL,EDGARPBBTH,EDGARPBGPI,EDGARSWFEM,EDGARSMMAL,EDGARSBBTH,EDGARSBGPI,EDEDUCWNED,EDEDUCWSPR,EDEDUCWCPR,EDEDUCWSSC,EDEDUCWCSC,EDEDUCWHGH,EDEDUCWDKM,EDEDUCWTOT,EDEDUCWPRI,EDEDUCWSEH,EDEDUCWMYR,EDEDUCWNUM,EDEDUCWUNW,EDEDUCMNED,EDEDUCMSPR,EDEDUCMCPR,EDEDUCMSSC,EDEDUCMCSC,EDEDUCMHGH,EDEDUCMDKM,EDEDUCMTOT,EDEDUCMPRI,coords
0,KE,KE,KE,DHS,2022.0,Kenya,,,Baringo,,,566.0,KEDHS2022416058,yes,no,1.0,hv024,30.0,Baringo,,9999.0,,Admin1,Counties,no,,Survey is representative at one level: 47 regi...,113.3,100.4,106.2,1.13,66.6,84.5,75.8,0.79,5.9,19.7,20.3,17.1,21.2,15.8,9999.0,100.0,40.0,54.1,7.7,378.0,687.0,3.1,20.3,20.1,18.6,21.0,16.8,9999.0,100.0,40.4,"[(35.78413003125479, 1.6524838122778078), (35...."
1,KE,KE,KE,DHS,2022.0,Kenya,,,Bomet,,,566.0,KEDHS2022416064,yes,no,1.0,hv024,36.0,Bomet,,9999.0,,Admin1,Counties,no,,Survey is representative at one level: 47 regi...,116.8,125.3,121.2,0.93,87.7,90.7,89.2,0.97,0.3,25.4,23.0,24.0,15.5,11.8,9999.0,100.0,48.4,51.3,7.5,650.0,778.0,0.4,22.8,22.1,22.6,16.4,15.7,9999.0,100.0,44.9,"[(35.547965492284504, -0.55796921962326), (35...."
2,KE,KE,KE,DHS,2022.0,Kenya,,,Bungoma,,,566.0,KEDHS2022416073,yes,no,1.0,hv024,39.0,Bungoma,,9999.0,,Admin1,Counties,no,,Survey is representative at one level: 47 regi...,114.5,116.1,115.3,0.99,78.5,81.6,80.0,0.96,0.8,23.7,17.8,26.4,15.2,16.0,9999.0,100.0,41.5,57.6,7.6,1138.0,841.0,0.3,26.0,15.7,24.5,15.5,18.0,9999.0,100.0,41.7,"[(34.556667480780504, 1.0988888396579455), (34..."
3,KE,KE,KE,DHS,2022.0,Kenya,,,Busia,,,566.0,KEDHS2022416074,yes,no,1.0,hv024,40.0,Busia,,9999.0,,Admin1,Counties,no,,Survey is representative at one level: 47 regi...,111.5,124.0,117.9,0.9,89.3,74.9,81.1,1.19,3.3,32.6,18.5,24.7,12.4,8.5,9999.0,100.0,51.1,45.6,7.2,622.0,768.0,0.7,35.9,16.1,22.2,12.2,12.9,9999.0,100.0,52.0,"[(34.38465022643885, 0.32892968427290725), (34..."
4,KE,KE,KE,DHS,2022.0,Kenya,,,Elgeyo-Marakwet,,,566.0,KEDHS2022416056,yes,no,1.0,hv024,28.0,Elgeyo-Marakwet,,9999.0,,Admin1,Counties,no,,Survey is representative at one level: 47 regi...,108.8,112.4,110.7,0.97,98.5,90.8,93.8,1.08,0.0,16.2,21.8,17.6,25.0,19.4,9999.0,100.0,38.0,62.0,7.9,228.0,591.0,0.0,15.5,20.8,26.3,20.7,16.7,9999.0,100.0,36.3,"[(35.69746802677497, 1.303448182993975), (35.6..."


In [750]:
df_educ_2022 = df_educ_2022.merge(counties, on='County', how='left')
mmm = df_educ_2022.groupby('Province').first().reset_index()

In [751]:
mmm

Unnamed: 0,Province,ISO,FIPS,DHSCC,SVYTYPE,SVYYEAR,CNTRYNAMEE,CNTRYNAMEF,CNTRYNAMES,County,DHSREGFR,DHSREGSP,SVYID,REG_ID,Svy_Map,MULTLEVEL,LEVELRNK,REGVAR,REGCODE,REGNAME,OTHREGVAR,OTHREGCO,OTHREGNA,LEVELCO,LEVELNA,REPALLIND,REGNOTES,SVYNOTES,EDGARPWFEM,EDGARPMMAL,EDGARPBBTH,EDGARPBGPI,EDGARSWFEM,EDGARSMMAL,EDGARSBBTH,EDGARSBGPI,EDEDUCWNED,EDEDUCWSPR,EDEDUCWCPR,EDEDUCWSSC,EDEDUCWCSC,EDEDUCWHGH,EDEDUCWDKM,EDEDUCWTOT,EDEDUCWPRI,EDEDUCWSEH,EDEDUCWMYR,EDEDUCWNUM,EDEDUCWUNW,EDEDUCMNED,EDEDUCMSPR,EDEDUCMCPR,EDEDUCMSSC,EDEDUCMCSC,EDEDUCMHGH,EDEDUCMDKM,EDEDUCMTOT,EDEDUCMPRI,coords
0,Central,KE,KE,KE,DHS,2022.0,Kenya,,,Kiambu,,,566.0,KEDHS2022416045,yes,no,1.0,hv024,22.0,Kiambu,,9999.0,,Admin1,Counties,no,,Survey is representative at one level: 47 regi...,104.2,106.1,105.1,0.98,106.2,92.4,98.7,1.15,0.5,8.8,14.9,18.9,25.6,31.4,9999.0,100.0,23.7,75.8,9.3,2094.0,668.0,1.1,11.3,15.3,21.7,24.3,26.2,9999.0,100.0,26.6,"[(37.30601356167972, -1.0812960331804788), (37..."
1,Coast,KE,KE,KE,DHS,2022.0,Kenya,,,Kilifi,,,566.0,KEDHS2022416013,yes,no,1.0,hv024,3.0,Kilifi,,9999.0,,Admin1,Counties,no,,Survey is representative at one level: 47 regi...,121.5,122.0,121.8,1.0,46.6,69.0,58.3,0.68,13.3,35.5,20.2,12.3,11.0,7.6,9999.0,100.0,55.8,30.9,6.6,928.0,742.0,0.8,27.4,27.7,15.0,16.1,13.0,9999.0,100.0,55.1,"[(39.81000132006619, -3.632083380178983), (39...."
2,Eastern,KE,KE,KE,DHS,2022.0,Kenya,,,Embu,,,566.0,KEDHS2022416035,yes,no,1.0,hv024,14.0,Embu,,9999.0,,Admin1,Counties,no,,Survey is representative at one level: 47 regi...,105.1,112.5,108.8,0.93,127.3,85.8,101.7,1.48,1.2,13.2,23.8,21.8,21.9,18.1,9999.0,100.0,37.1,61.8,7.9,358.0,584.0,0.0,18.8,19.9,19.4,22.3,19.5,9999.0,100.0,38.8,"[(37.65222732422325, -0.8470060993128072), (37..."
3,Nairobi,KE,KE,KE,DHS,2022.0,Kenya,,,Nairobi,,,566.0,KEDHS2022416001,yes,no,1.0,hv024,47.0,Nairobi,,9999.0,,Admin1,Counties,no,,Survey is representative at one level: 47 regi...,102.7,100.1,101.5,1.03,112.3,98.7,105.5,1.14,1.3,6.6,14.7,13.8,28.6,35.0,9999.0,100.0,21.3,77.4,9.5,4235.0,944.0,1.1,4.5,11.0,12.0,30.9,40.4,9999.0,100.0,15.6,"[(36.666664178944814, -1.3154033319373184), (3..."
4,Northern,KE,KE,KE,DHS,2022.0,Kenya,,,Garissa,,,566.0,KEDHS2022416021,yes,no,1.0,hv024,7.0,Garissa,,9999.0,,Admin1,Counties,no,,Survey is representative at one level: 47 regi...,66.0,62.6,64.1,1.05,34.7,51.8,42.4,0.67,52.3,14.7,5.5,14.1,7.3,6.1,9999.0,100.0,20.1,27.6,9999.0,290.0,641.0,16.3,17.5,12.5,16.6,20.6,16.5,9999.0,100.0,30.0,"[(39.1714903612816, 0.7193667518077405), (39.1..."
5,Nyanza,KE,KE,KE,DHS,2022.0,Kenya,,,Homa Bay,,,566.0,KEDHS2022416083,yes,no,1.0,hv024,43.0,Homa Bay,,9999.0,,Admin1,Counties,no,,Survey is representative at one level: 47 regi...,100.7,111.5,106.0,0.9,94.1,76.9,84.7,1.22,0.8,23.5,25.0,26.5,13.0,11.2,9999.0,100.0,48.5,50.7,7.4,662.0,712.0,1.2,19.9,17.5,28.9,15.5,17.1,9999.0,100.0,37.4,"[(34.03615570271745, -0.7477368079109397), (34..."
6,Rift Valley,KE,KE,KE,DHS,2022.0,Kenya,,,Baringo,,,566.0,KEDHS2022416058,yes,no,1.0,hv024,30.0,Baringo,,9999.0,,Admin1,Counties,no,,Survey is representative at one level: 47 regi...,113.3,100.4,106.2,1.13,66.6,84.5,75.8,0.79,5.9,19.7,20.3,17.1,21.2,15.8,9999.0,100.0,40.0,54.1,7.7,378.0,687.0,3.1,20.3,20.1,18.6,21.0,16.8,9999.0,100.0,40.4,"[(35.78413003125479, 1.6524838122778078), (35...."
7,Western,KE,KE,KE,DHS,2022.0,Kenya,,,Bungoma,,,566.0,KEDHS2022416073,yes,no,1.0,hv024,39.0,Bungoma,,9999.0,,Admin1,Counties,no,,Survey is representative at one level: 47 regi...,114.5,116.1,115.3,0.99,78.5,81.6,80.0,0.96,0.8,23.7,17.8,26.4,15.2,16.0,9999.0,100.0,41.5,57.6,7.6,1138.0,841.0,0.3,26.0,15.7,24.5,15.5,18.0,9999.0,100.0,41.7,"[(34.556667480780504, 1.0988888396579455), (34..."


In [752]:
# Add column to indicate treatment status
restricted_county['treated_num'] = [0 if restricted_county['Number Projects'].iloc[i] == 0 else 1 for i in range(len(restricted_county))]
restricted_county['treated_cov'] = [0 if restricted_county['Coverage'].iloc[i] == 0 else 1 for i in range(len(restricted_county))]

In [753]:
# Discretize treatment variables

# Discretize number of projects into 3 categories: low, medium, high
num_low, num_med, num_high = [], [], []
for i in restricted_county['Number Projects']:
    if i == 0:
        num_low.append(1)
        num_med.append(0)
        num_high.append(0)
    elif (i > 0 and i < 20):
        num_low.append(0)
        num_med.append(1)
        num_high.append(0)
    elif (i >= 20):
        num_low.append(0)
        num_med.append(0)
        num_high.append(1)
        
restricted_county['num_low'] = num_low
restricted_county['num_med'] = num_med
restricted_county['num_high'] = num_high

In [754]:
# Discretize coverage into 3 categories: low, medium, high
cov_low, cov_med, cov_high = [], [], []
for i in restricted_county['Coverage']:
    if i == 0:
        cov_low.append(1)
        cov_med.append(0)
        cov_high.append(0)
    elif (i > 0 and i < .50):
        cov_low.append(0)
        cov_med.append(1)
        cov_high.append(0)
    elif (i >= .50):
        cov_low.append(0)
        cov_med.append(0)
        cov_high.append(1)
        
restricted_county['cov_low'] = cov_low
restricted_county['cov_med'] = cov_med
restricted_county['cov_high'] = cov_high

In [755]:
# Merge data on trials with data on outcome variables
restricted_county_outcome = restricted_county.merge(df_educ_2014, on='County', how='left').merge(mmm, on='County', how='left')
restricted_county_outcome

Unnamed: 0,County,AEARCTR-0000323,AEARCTR-0000350,AEARCTR-0000486,AEARCTR-0000541,AEARCTR-0000605,AEARCTR-0000627,AEARCTR-0000662,AEARCTR-0000669,AEARCTR-0000676,AEARCTR-0000766,AEARCTR-0000788,AEARCTR-0000790,AEARCTR-0000791,AEARCTR-0000843,AEARCTR-0000844,AEARCTR-0000893,AEARCTR-0000907,AEARCTR-0000912,AEARCTR-0000962,AEARCTR-0000991,AEARCTR-0000996,AEARCTR-0001008,AEARCTR-0001187,AEARCTR-0001293,AEARCTR-0001373,AEARCTR-0001457,AEARCTR-0001482,AEARCTR-0001484,AEARCTR-0001500,AEARCTR-0001515,AEARCTR-0001574,AEARCTR-0001643,AEARCTR-0001848,AEARCTR-0001902,AEARCTR-0001997,AEARCTR-0002015,AEARCTR-0002019,AEARCTR-0002026,AEARCTR-0002063,AEARCTR-0002071,AEARCTR-0002133,AEARCTR-0002188,AEARCTR-0002313,AEARCTR-0002370,AEARCTR-0002375,AEARCTR-0002401,AEARCTR-0002448,AEARCTR-0002484,AEARCTR-0002579,AEARCTR-0002692,AEARCTR-0002741,AEARCTR-0002850,AEARCTR-0002913,AEARCTR-0002923,AEARCTR-0002948,AEARCTR-0003101,AEARCTR-0003138,AEARCTR-0003224,AEARCTR-0003556,AEARCTR-0003679,AEARCTR-0003937,AEARCTR-0004110,AEARCTR-0004498,AEARCTR-0004649,AEARCTR-0004818,AEARCTR-0005113,AEARCTR-0005170,AEARCTR-0005510,AEARCTR-0005517,AEARCTR-0005564,AEARCTR-0005621,AEARCTR-0005704,AEARCTR-0005822,AEARCTR-0005845,AEARCTR-0005941,AEARCTR-0005971,AEARCTR-0006001,AEARCTR-0006089,AEARCTR-0006126,AEARCTR-0006267,AEARCTR-0006413,AEARCTR-0006675,AEARCTR-0006717,AEARCTR-0007067,AEARCTR-0007168,AEARCTR-0007424,AEARCTR-0007435,AEARCTR-0007738,AEARCTR-0007744,AEARCTR-0008292,AEARCTR-0009075,AEARCTR-0009102,AEARCTR-0009163,AEARCTR-0009315,AEARCTR-0009348,AEARCTR-0009634,AEARCTR-0009644,AEARCTR-0009676,AEARCTR-0009743,AEARCTR-0009911,AEARCTR-0009930,AEARCTR-0010051,AEARCTR-0010101,AEARCTR-0010113,AEARCTR-0010426,AEARCTR-0010651,AEARCTR-0011880,Coverage,Number Projects,treated_num,treated_cov,num_low,num_med,num_high,cov_low,cov_med,cov_high,ISO_x,FIPS_x,DHSCC_x,SVYTYPE_x,SVYYEAR_x,CNTRYNAMEE_x,CNTRYNAMEF_x,CNTRYNAMES_x,DHSREGFR_x,DHSREGSP_x,SVYID_x,REG_ID_x,Svy_Map_x,MULTLEVEL_x,LEVELRNK_x,REGVAR_x,REGCODE_x,REGNAME_x,OTHREGVAR_x,OTHREGCO_x,OTHREGNA_x,LEVELCO_x,LEVELNA_x,REPALLIND_x,REGNOTES_x,SVYNOTES_x,EDGARPWFEM_x,EDGARPMMAL_x,EDGARPBBTH_x,EDGARPBGPI_x,EDGARSWFEM_x,EDGARSMMAL_x,EDGARSBBTH_x,EDGARSBGPI_x,EDEDUCWNED_x,EDEDUCWSPR_x,EDEDUCWCPR_x,EDEDUCWSSC_x,EDEDUCWCSC_x,EDEDUCWHGH_x,EDEDUCWDKM_x,EDEDUCWTOT_x,EDEDUCWPRI_x,EDEDUCWSEH_x,EDEDUCWMYR_x,EDEDUCWNUM_x,EDEDUCWUNW_x,EDEDUCMNED_x,EDEDUCMSPR_x,EDEDUCMCPR_x,EDEDUCMSSC_x,EDEDUCMCSC_x,EDEDUCMHGH_x,EDEDUCMDKM_x,EDEDUCMTOT_x,EDEDUCMPRI_x,coords_x,Province,ISO_y,FIPS_y,DHSCC_y,SVYTYPE_y,SVYYEAR_y,CNTRYNAMEE_y,CNTRYNAMEF_y,CNTRYNAMES_y,DHSREGFR_y,DHSREGSP_y,SVYID_y,REG_ID_y,Svy_Map_y,MULTLEVEL_y,LEVELRNK_y,REGVAR_y,REGCODE_y,REGNAME_y,OTHREGVAR_y,OTHREGCO_y,OTHREGNA_y,LEVELCO_y,LEVELNA_y,REPALLIND_y,REGNOTES_y,SVYNOTES_y,EDGARPWFEM_y,EDGARPMMAL_y,EDGARPBBTH_y,EDGARPBGPI_y,EDGARSWFEM_y,EDGARSMMAL_y,EDGARSBBTH_y,EDGARSBGPI_y,EDEDUCWNED_y,EDEDUCWSPR_y,EDEDUCWCPR_y,EDEDUCWSSC_y,EDEDUCWCSC_y,EDEDUCWHGH_y,EDEDUCWDKM_y,EDEDUCWTOT_y,EDEDUCWPRI_y,EDEDUCWSEH_y,EDEDUCWMYR_y,EDEDUCWNUM_y,EDEDUCWUNW_y,EDEDUCMNED_y,EDEDUCMSPR_y,EDEDUCMCPR_y,EDEDUCMSSC_y,EDEDUCMCSC_y,EDEDUCMHGH_y,EDEDUCMDKM_y,EDEDUCMTOT_y,EDEDUCMPRI_y,coords_y
0,Baringo,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,0,1,0,0,1,0,0,KE,KE,KE,DHS,2014.0,Kenya,,,,,451.0,KEDHS2014416058,yes,yes,2.0,shcounty,58.0,baringo,hv024,5.0,rift valley,Admin1,Counties,no,,Survey is representative at two levels: 47 reg...,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9.3,30.0,24.5,15.0,12.1,9.0,9999.0,100.0,54.5,36.1,7.4,335.0,598.0,5.9,33.3,16.7,15.3,20.1,8.7,9999.0,100.0,50.0,"[(35.78413003125479, 1.6524838122778078), (35....",Rift Valley,KE,KE,KE,DHS,2022.0,Kenya,,,,,566.0,KEDHS2022416058,yes,no,1.0,hv024,30.0,Baringo,,9999.0,,Admin1,Counties,no,,Survey is representative at one level: 47 regi...,113.3,100.4,106.2,1.13,66.6,84.5,75.8,0.79,5.9,19.7,20.3,17.1,21.2,15.8,9999.0,100.0,40.0,54.1,7.7,378.0,687.0,3.1,20.3,20.1,18.6,21.0,16.8,9999.0,100.0,40.4,"[(35.78413003125479, 1.6524838122778078), (35...."
1,Bomet,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,0,1,0,0,1,0,0,KE,KE,KE,DHS,2014.0,Kenya,,,,,451.0,KEDHS2014416064,yes,yes,2.0,shcounty,64.0,bomet,hv024,5.0,rift valley,Admin1,Counties,no,,Survey is representative at two levels: 47 reg...,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,0.4,39.8,25.7,14.3,11.5,8.2,9999.0,100.0,65.5,34.0,7.3,687.0,708.0,0.0,33.1,27.2,14.4,18.1,7.2,9999.0,100.0,60.3,"[(35.547965492284504, -0.55796921962326), (35....",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,Bungoma,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.642857,18,1,1,0,1,0,0,0,1,KE,KE,KE,DHS,2014.0,Kenya,,,,,451.0,KEDHS2014416073,yes,yes,2.0,shcounty,73.0,bungoma,hv024,7.0,western,Admin1,Counties,no,,Survey is representative at two levels: 47 reg...,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,0.9,41.2,19.8,20.7,10.3,7.2,9999.0,100.0,61.0,38.1,7.3,1203.0,805.0,0.4,45.2,16.9,17.9,11.5,8.1,9999.0,100.0,62.1,"[(34.556667480780504, 1.0988888396579455), (34...",Western,KE,KE,KE,DHS,2022.0,Kenya,,,,,566.0,KEDHS2022416073,yes,no,1.0,hv024,39.0,Bungoma,,9999.0,,Admin1,Counties,no,,Survey is representative at one level: 47 regi...,114.5,116.1,115.3,0.99,78.5,81.6,80.0,0.96,0.8,23.7,17.8,26.4,15.2,16.0,9999.0,100.0,41.5,57.6,7.6,1138.0,841.0,0.3,26.0,15.7,24.5,15.5,18.0,9999.0,100.0,41.7,"[(34.556667480780504, 1.0988888396579455), (34..."
3,Busia,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0.659864,21,1,1,0,0,1,0,0,1,KE,KE,KE,DHS,2014.0,Kenya,,,,,451.0,KEDHS2014416074,yes,yes,2.0,shcounty,74.0,busia,hv024,7.0,western,Admin1,Counties,no,,Survey is representative at two levels: 47 reg...,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,6.6,51.6,16.1,14.3,6.0,5.2,9999.0,100.0,67.7,25.6,6.5,546.0,676.0,1.1,45.7,16.9,18.1,12.4,5.8,9999.0,100.0,62.6,"[(34.38465022643885, 0.32892968427290725), (34...",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,Elgeyo-Marakwet,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,0,1,0,0,1,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
5,Embu,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,0,1,0,0,1,0,0,KE,KE,KE,DHS,2014.0,Kenya,,,,,451.0,KEDHS2014416035,yes,yes,2.0,shcounty,35.0,embu,hv024,3.0,eastern,Admin1,Counties,no,,Survey is representative at two levels: 47 reg...,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,1.3,28.6,29.2,16.6,13.7,10.6,9999.0,100.0,57.8,40.9,7.6,459.0,645.0,1.0,33.6,27.4,16.6,12.2,9.3,9999.0,100.0,61.0,"[(37.65222732422325, -0.8470060993128072), (37...",Eastern,KE,KE,KE,DHS,2022.0,Kenya,,,,,566.0,KEDHS2022416035,yes,no,1.0,hv024,14.0,Embu,,9999.0,,Admin1,Counties,no,,Survey is representative at one level: 47 regi...,105.1,112.5,108.8,0.93,127.3,85.8,101.7,1.48,1.2,13.2,23.8,21.8,21.9,18.1,9999.0,100.0,37.1,61.8,7.9,358.0,584.0,0.0,18.8,19.9,19.4,22.3,19.5,9999.0,100.0,38.8,"[(37.65222732422325, -0.8470060993128072), (37..."
6,Garissa,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.571429,2,1,1,0,1,0,0,0,1,KE,KE,KE,DHS,2014.0,Kenya,,,,,451.0,KEDHS2014416021,yes,yes,2.0,shcounty,21.0,garissa,hv024,2.0,north eastern,Admin1,Counties,no,,Survey is representative at two levels: 47 reg...,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,72.7,9.6,5.7,4.4,3.6,4.0,9999.0,100.0,15.4,12.0,9999.0,261.0,609.0,33.2,16.2,17.7,12.6,12.1,8.2,9999.0,100.0,33.9,"[(39.23293899431013, 0.8335424833336447), (39....",Northern,KE,KE,KE,DHS,2022.0,Kenya,,,,,566.0,KEDHS2022416021,yes,no,1.0,hv024,7.0,Garissa,,9999.0,,Admin1,Counties,no,,Survey is representative at one level: 47 regi...,66.0,62.6,64.1,1.05,34.7,51.8,42.4,0.67,52.3,14.7,5.5,14.1,7.3,6.1,9999.0,100.0,20.1,27.6,9999.0,290.0,641.0,16.3,17.5,12.5,16.6,20.6,16.5,9999.0,100.0,30.0,"[(39.1714903612816, 0.7193667518077405), (39.1..."
7,Homa Bay,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.571429,5,1,1,0,1,0,0,0,1,KE,KE,KE,DHS,2014.0,Kenya,,,,,451.0,KEDHS2014416083,yes,yes,2.0,shcounty,83.0,homa bay,hv024,8.0,nyanza,Admin1,Counties,no,,Survey is representative at two levels: 47 reg...,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,1.1,39.7,28.1,18.2,8.9,4.0,9999.0,100.0,67.9,31.1,7.3,798.0,716.0,0.0,35.0,27.1,18.2,12.0,7.8,9999.0,100.0,62.1,"[(34.03615570271745, -0.7477368079109397), (34...",Nyanza,KE,KE,KE,DHS,2022.0,Kenya,,,,,566.0,KEDHS2022416083,yes,no,1.0,hv024,43.0,Homa Bay,,9999.0,,Admin1,Counties,no,,Survey is representative at one level: 47 regi...,100.7,111.5,106.0,0.9,94.1,76.9,84.7,1.22,0.8,23.5,25.0,26.5,13.0,11.2,9999.0,100.0,48.5,50.7,7.4,662.0,712.0,1.2,19.9,17.5,28.9,15.5,17.1,9999.0,100.0,37.4,"[(34.03615570271745, -0.7477368079109397), (34..."
8,Isiolo,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,0,1,0,0,1,0,0,KE,KE,KE,DHS,2014.0,Kenya,,,,,451.0,KEDHS2014416032,yes,yes,2.0,shcounty,32.0,isiolo,hv024,3.0,eastern,Admin1,Counties,no,,Survey is representative at two levels: 47 reg...,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,39.7,22.5,18.1,6.0,8.7,5.0,9999.0,100.0,40.6,19.7,5.0,104.0,606.0,11.3,29.7,21.1,12.5,21.9,3.5,9999.0,100.0,50.8,"[(36.927832509683924, 0.7265079527228977), (36...",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
9,Kajiado,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,0,1,0,0,1,0,0,KE,KE,KE,DHS,2014.0,Kenya,,,,,451.0,KEDHS2014416062,yes,yes,2.0,shcounty,62.0,kajiado,hv024,5.0,rift valley,Admin1,Counties,no,,Survey is representative at two levels: 47 reg...,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,18.0,12.8,17.9,14.1,17.0,20.2,9999.0,100.0,30.7,51.3,8.0,670.0,642.0,4.8,17.2,15.9,16.1,17.3,28.7,9999.0,100.0,33.1,"[(36.690441119912236, -2.500000020496202), (36...",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [756]:
a_gr_outcome = a_gr_filtered.merge(df_educ_2014, on='County', how='left').merge(df_educ_2022, on='County', how='left')
a_gr_outcome

Unnamed: 0,County,Number Projects,min_start,max_end,ISO_x,FIPS_x,DHSCC_x,SVYTYPE_x,SVYYEAR_x,CNTRYNAMEE_x,CNTRYNAMEF_x,CNTRYNAMES_x,DHSREGFR_x,DHSREGSP_x,SVYID_x,REG_ID_x,Svy_Map_x,MULTLEVEL_x,LEVELRNK_x,REGVAR_x,REGCODE_x,REGNAME_x,OTHREGVAR_x,OTHREGCO_x,OTHREGNA_x,LEVELCO_x,LEVELNA_x,REPALLIND_x,REGNOTES_x,SVYNOTES_x,EDGARPWFEM_x,EDGARPMMAL_x,EDGARPBBTH_x,EDGARPBGPI_x,EDGARSWFEM_x,EDGARSMMAL_x,EDGARSBBTH_x,EDGARSBGPI_x,EDEDUCWNED_x,EDEDUCWSPR_x,EDEDUCWCPR_x,EDEDUCWSSC_x,EDEDUCWCSC_x,EDEDUCWHGH_x,EDEDUCWDKM_x,EDEDUCWTOT_x,EDEDUCWPRI_x,EDEDUCWSEH_x,EDEDUCWMYR_x,EDEDUCWNUM_x,EDEDUCWUNW_x,EDEDUCMNED_x,EDEDUCMSPR_x,EDEDUCMCPR_x,EDEDUCMSSC_x,EDEDUCMCSC_x,EDEDUCMHGH_x,EDEDUCMDKM_x,EDEDUCMTOT_x,EDEDUCMPRI_x,coords_x,ISO_y,FIPS_y,DHSCC_y,SVYTYPE_y,SVYYEAR_y,CNTRYNAMEE_y,CNTRYNAMEF_y,CNTRYNAMES_y,DHSREGFR_y,DHSREGSP_y,SVYID_y,REG_ID_y,Svy_Map_y,MULTLEVEL_y,LEVELRNK_y,REGVAR_y,REGCODE_y,REGNAME_y,OTHREGVAR_y,OTHREGCO_y,OTHREGNA_y,LEVELCO_y,LEVELNA_y,REPALLIND_y,REGNOTES_y,SVYNOTES_y,EDGARPWFEM_y,EDGARPMMAL_y,EDGARPBBTH_y,EDGARPBGPI_y,EDGARSWFEM_y,EDGARSMMAL_y,EDGARSBBTH_y,EDGARSBGPI_y,EDEDUCWNED_y,EDEDUCWSPR_y,EDEDUCWCPR_y,EDEDUCWSSC_y,EDEDUCWCSC_y,EDEDUCWHGH_y,EDEDUCWDKM_y,EDEDUCWTOT_y,EDEDUCWPRI_y,EDEDUCWSEH_y,EDEDUCWMYR_y,EDEDUCWNUM_y,EDEDUCWUNW_y,EDEDUCMNED_y,EDEDUCMSPR_y,EDEDUCMCPR_y,EDEDUCMSSC_y,EDEDUCMCSC_y,EDEDUCMHGH_y,EDEDUCMDKM_y,EDEDUCMTOT_y,EDEDUCMPRI_y,coords_y,Province
0,Embu,2,2023,2024,KE,KE,KE,DHS,2014.0,Kenya,,,,,451.0,KEDHS2014416035,yes,yes,2.0,shcounty,35.0,embu,hv024,3.0,eastern,Admin1,Counties,no,,Survey is representative at two levels: 47 reg...,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,1.3,28.6,29.2,16.6,13.7,10.6,9999.0,100.0,57.8,40.9,7.6,459.0,645.0,1.0,33.6,27.4,16.6,12.2,9.3,9999.0,100.0,61.0,"[(37.65222732422325, -0.8470060993128072), (37...",KE,KE,KE,DHS,2022.0,Kenya,,,,,566.0,KEDHS2022416035,yes,no,1.0,hv024,14.0,Embu,,9999.0,,Admin1,Counties,no,,Survey is representative at one level: 47 regi...,105.1,112.5,108.8,0.93,127.3,85.8,101.7,1.48,1.2,13.2,23.8,21.8,21.9,18.1,9999.0,100.0,37.1,61.8,7.9,358.0,584.0,0.0,18.8,19.9,19.4,22.3,19.5,9999.0,100.0,38.8,"[(37.65222732422325, -0.8470060993128072), (37...",Eastern
1,Isiolo,3,2023,2024,KE,KE,KE,DHS,2014.0,Kenya,,,,,451.0,KEDHS2014416032,yes,yes,2.0,shcounty,32.0,isiolo,hv024,3.0,eastern,Admin1,Counties,no,,Survey is representative at two levels: 47 reg...,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,39.7,22.5,18.1,6.0,8.7,5.0,9999.0,100.0,40.6,19.7,5.0,104.0,606.0,11.3,29.7,21.1,12.5,21.9,3.5,9999.0,100.0,50.8,"[(36.927832509683924, 0.7265079527228977), (36...",KE,KE,KE,DHS,2022.0,Kenya,,,,,566.0,KEDHS2022416032,yes,no,1.0,hv024,11.0,Isiolo,,9999.0,,Admin1,Counties,no,,Survey is representative at one level: 47 regi...,93.6,92.1,92.9,1.02,81.6,70.5,75.9,1.16,25.3,14.3,18.5,12.2,15.5,14.3,9999.0,100.0,32.8,41.9,7.3,137.0,623.0,9.2,13.1,20.1,13.7,19.9,24.0,9999.0,100.0,33.2,"[(39.09465200983874, 1.8961535354557668), (39....",Eastern
2,Kilifi,0,0,0,KE,KE,KE,DHS,2014.0,Kenya,,,,,451.0,KEDHS2014416013,yes,yes,2.0,shcounty,13.0,kilifi,hv024,1.0,coast,Admin1,Counties,no,,Survey is representative at two levels: 47 reg...,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,20.4,34.1,19.3,10.5,11.4,4.4,9999.0,100.0,53.3,26.3,6.4,1043.0,824.0,2.6,34.4,24.9,15.9,13.5,8.7,9999.0,100.0,59.3,"[(39.81000132006619, -3.632083380178983), (39....",KE,KE,KE,DHS,2022.0,Kenya,,,,,566.0,KEDHS2022416013,yes,no,1.0,hv024,3.0,Kilifi,,9999.0,,Admin1,Counties,no,,Survey is representative at one level: 47 regi...,121.5,122.0,121.8,1.0,46.6,69.0,58.3,0.68,13.3,35.5,20.2,12.3,11.0,7.6,9999.0,100.0,55.8,30.9,6.6,928.0,742.0,0.8,27.4,27.7,15.0,16.1,13.0,9999.0,100.0,55.1,"[(39.81000132006619, -3.632083380178983), (39....",Coast
3,Kitui,3,2016,2024,KE,KE,KE,DHS,2014.0,Kenya,,,,,451.0,KEDHS2014416036,yes,yes,2.0,shcounty,36.0,kitui,hv024,3.0,eastern,Admin1,Counties,no,,Survey is representative at two levels: 47 reg...,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,3.9,35.4,34.4,12.9,8.7,4.6,9999.0,100.0,69.9,26.2,7.3,759.0,747.0,2.7,44.8,23.3,15.5,9.2,4.4,9999.0,100.0,68.2,"[(38.79114234915062, -2.3640895236817414), (38...",KE,KE,KE,DHS,2022.0,Kenya,,,,,566.0,KEDHS2022416036,yes,no,1.0,hv024,15.0,Kitui,,9999.0,,Admin1,Counties,no,,Survey is representative at one level: 47 regi...,117.3,118.3,117.8,0.99,93.7,72.8,82.4,1.29,1.5,16.8,29.4,22.7,14.2,15.4,9999.0,100.0,46.2,52.3,7.7,735.0,671.0,1.9,19.9,23.3,23.1,15.4,16.3,9999.0,100.0,43.2,"[(38.79114234915062, -2.3640895236817414), (38...",Eastern
4,Kwale,1,2020,2020,KE,KE,KE,DHS,2014.0,Kenya,,,,,451.0,KEDHS2014416012,yes,yes,2.0,shcounty,12.0,kwale,hv024,1.0,coast,Admin1,Counties,no,,Survey is representative at two levels: 47 reg...,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,21.7,35.0,23.5,6.9,8.5,4.4,9999.0,100.0,58.5,19.8,6.1,619.0,671.0,7.8,38.7,19.6,11.2,14.1,8.7,9999.0,100.0,58.2,"[(39.37152852042874, -4.657111200201257), (39....",KE,KE,KE,DHS,2022.0,Kenya,,,,,566.0,KEDHS2022416012,yes,no,1.0,hv024,2.0,Kwale,,9999.0,,Admin1,Counties,no,The border between Mombasa and Kwale slightly ...,Survey is representative at one level: 47 regi...,104.5,109.6,106.9,0.95,43.4,48.6,46.0,0.89,18.1,27.0,23.7,11.9,12.3,6.8,9999.0,100.0,50.8,31.1,7.0,498.0,711.0,4.4,33.9,22.4,14.2,18.4,6.6,9999.0,100.0,56.4,"[(39.37152852042874, -4.657111200201257), (39....",Coast
5,Lamu,0,0,0,KE,KE,KE,DHS,2014.0,Kenya,,,,,451.0,KEDHS2014416015,yes,yes,2.0,shcounty,15.0,lamu,hv024,1.0,coast,Admin1,Counties,no,,Survey is representative at two levels: 47 reg...,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,17.0,39.1,21.0,10.4,6.2,6.3,9999.0,100.0,60.1,22.9,6.4,89.0,600.0,5.8,41.2,18.4,11.2,9.6,13.7,9999.0,100.0,59.6,"[(40.84083179986709, -2.4303888900705033), (40...",KE,KE,KE,DHS,2022.0,Kenya,,,,,566.0,KEDHS2022416015,yes,no,1.0,hv024,5.0,Lamu,,9999.0,,Admin1,Counties,no,,Survey is representative at one level: 47 regi...,106.7,107.4,107.1,0.99,58.7,62.4,60.4,0.94,12.5,31.7,20.4,18.6,9.4,7.4,9999.0,100.0,52.1,35.4,6.8,101.0,675.0,5.7,28.0,21.9,17.9,14.0,12.4,9999.0,100.0,50.0,"[(40.84083179986709, -2.4303888900705033), (40...",Coast
6,Machakos,4,2022,2024,KE,KE,KE,DHS,2014.0,Kenya,,,,,451.0,KEDHS2014416037,yes,yes,2.0,shcounty,37.0,machakos,hv024,3.0,eastern,Admin1,Counties,no,,Survey is representative at two levels: 47 reg...,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,0.2,15.9,36.9,20.1,16.5,10.4,9999.0,100.0,52.8,47.0,7.8,873.0,718.0,0.3,21.4,31.6,12.5,24.3,10.0,9999.0,100.0,53.0,"[(37.84432601321015, -0.8136776624792788), (37...",KE,KE,KE,DHS,2022.0,Kenya,,,,,566.0,KEDHS2022416037,yes,no,1.0,hv024,16.0,Machakos,,9999.0,,Admin1,Counties,no,,Survey is representative at one level: 47 regi...,103.9,115.4,109.7,0.9,115.9,101.8,107.4,1.14,0.4,9.2,18.0,27.5,20.8,24.1,9999.0,100.0,27.2,72.4,8.0,992.0,699.0,0.3,13.0,21.0,24.4,20.4,20.9,9999.0,100.0,34.1,"[(37.30863783309604, -1.7062805202227196), (37...",Eastern
7,Makueni,4,2016,2024,KE,KE,KE,DHS,2014.0,Kenya,,,,,451.0,KEDHS2014416038,yes,yes,2.0,shcounty,38.0,makueni,hv024,3.0,eastern,Admin1,Counties,no,,Survey is representative at two levels: 47 reg...,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,0.9,21.6,34.6,24.0,10.4,8.5,9999.0,100.0,56.2,42.9,7.7,680.0,746.0,0.3,24.4,28.5,20.0,17.1,9.7,9999.0,100.0,52.9,"[(38.10272293004476, -2.283506456842474), (38....",KE,KE,KE,DHS,2022.0,Kenya,,,,,566.0,KEDHS2022416038,yes,no,1.0,hv024,17.0,Makueni,,9999.0,,Admin1,Counties,no,,Survey is representative at one level: 47 regi...,116.1,114.4,115.2,1.01,90.1,97.2,93.5,0.93,0.0,13.9,27.9,23.0,19.7,15.5,9999.0,100.0,41.8,58.2,7.7,683.0,720.0,0.4,25.2,18.7,26.5,17.1,12.0,9999.0,100.0,43.9,"[(37.4905662165782, -1.5200041825581252), (37....",Eastern
8,Marsabit,2,2023,2024,KE,KE,KE,DHS,2014.0,Kenya,,,,,451.0,KEDHS2014416031,yes,yes,2.0,shcounty,31.0,marsabit,hv024,3.0,eastern,Admin1,Counties,no,,Survey is representative at two levels: 47 reg...,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,61.9,16.0,10.3,5.0,4.3,2.4,9999.0,100.0,26.4,11.8,9999.0,115.0,575.0,35.7,18.6,10.0,7.7,15.4,12.6,9999.0,100.0,28.6,"[(36.502491833804925, 2.9184625932504673), (36...",KE,KE,KE,DHS,2022.0,Kenya,,,,,566.0,KEDHS2022416031,yes,no,1.0,hv024,10.0,Marsabit,,9999.0,,Admin1,Counties,no,,Survey is representative at one level: 47 regi...,78.6,88.7,83.4,0.89,71.2,62.9,66.5,1.13,61.9,11.6,11.0,5.9,6.8,2.9,9999.0,100.0,22.5,15.6,9999.0,129.0,535.0,27.9,12.1,17.3,17.2,18.3,7.1,9999.0,100.0,29.5,"[(36.502491833804925, 2.9184625932504673), (36...",Eastern
9,Mombasa,1,2021,2021,KE,KE,KE,DHS,2014.0,Kenya,,,,,451.0,KEDHS2014416011,yes,yes,2.0,shcounty,11.0,mombasa,hv024,1.0,coast,Admin1,Counties,no,,Survey is representative at two levels: 47 reg...,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,5.8,18.8,26.9,15.0,23.6,9.9,9999.0,100.0,45.8,48.5,7.9,912.0,598.0,2.5,10.0,31.3,16.8,25.9,13.4,9999.0,100.0,41.4,"[(39.74962407842554, -3.9590054913246604), (39...",KE,KE,KE,DHS,2022.0,Kenya,,,,,566.0,KEDHS2022416011,yes,no,1.0,hv024,1.0,Mombasa,,9999.0,,Admin1,Counties,no,The border between Mombasa and Kwale slightly ...,Survey is representative at one level: 47 regi...,107.1,115.2,111.0,0.93,75.8,82.7,79.2,0.92,6.8,14.7,22.3,13.5,22.0,20.7,9999.0,100.0,37.0,56.2,7.9,947.0,749.0,3.1,11.1,23.7,12.6,27.9,21.6,9999.0,100.0,34.8,"[(39.64102919982059, -4.063305419958169), (39....",Coast


In [757]:
a_gr_outcome['treated'] = [0 if a_gr_filtered['Number Projects'].iloc[i] == 0 else 1 for i in range(len(a_gr_filtered))]
a_gr_outcome

Unnamed: 0,County,Number Projects,min_start,max_end,ISO_x,FIPS_x,DHSCC_x,SVYTYPE_x,SVYYEAR_x,CNTRYNAMEE_x,CNTRYNAMEF_x,CNTRYNAMES_x,DHSREGFR_x,DHSREGSP_x,SVYID_x,REG_ID_x,Svy_Map_x,MULTLEVEL_x,LEVELRNK_x,REGVAR_x,REGCODE_x,REGNAME_x,OTHREGVAR_x,OTHREGCO_x,OTHREGNA_x,LEVELCO_x,LEVELNA_x,REPALLIND_x,REGNOTES_x,SVYNOTES_x,EDGARPWFEM_x,EDGARPMMAL_x,EDGARPBBTH_x,EDGARPBGPI_x,EDGARSWFEM_x,EDGARSMMAL_x,EDGARSBBTH_x,EDGARSBGPI_x,EDEDUCWNED_x,EDEDUCWSPR_x,EDEDUCWCPR_x,EDEDUCWSSC_x,EDEDUCWCSC_x,EDEDUCWHGH_x,EDEDUCWDKM_x,EDEDUCWTOT_x,EDEDUCWPRI_x,EDEDUCWSEH_x,EDEDUCWMYR_x,EDEDUCWNUM_x,EDEDUCWUNW_x,EDEDUCMNED_x,EDEDUCMSPR_x,EDEDUCMCPR_x,EDEDUCMSSC_x,EDEDUCMCSC_x,EDEDUCMHGH_x,EDEDUCMDKM_x,EDEDUCMTOT_x,EDEDUCMPRI_x,coords_x,ISO_y,FIPS_y,DHSCC_y,SVYTYPE_y,SVYYEAR_y,CNTRYNAMEE_y,CNTRYNAMEF_y,CNTRYNAMES_y,DHSREGFR_y,DHSREGSP_y,SVYID_y,REG_ID_y,Svy_Map_y,MULTLEVEL_y,LEVELRNK_y,REGVAR_y,REGCODE_y,REGNAME_y,OTHREGVAR_y,OTHREGCO_y,OTHREGNA_y,LEVELCO_y,LEVELNA_y,REPALLIND_y,REGNOTES_y,SVYNOTES_y,EDGARPWFEM_y,EDGARPMMAL_y,EDGARPBBTH_y,EDGARPBGPI_y,EDGARSWFEM_y,EDGARSMMAL_y,EDGARSBBTH_y,EDGARSBGPI_y,EDEDUCWNED_y,EDEDUCWSPR_y,EDEDUCWCPR_y,EDEDUCWSSC_y,EDEDUCWCSC_y,EDEDUCWHGH_y,EDEDUCWDKM_y,EDEDUCWTOT_y,EDEDUCWPRI_y,EDEDUCWSEH_y,EDEDUCWMYR_y,EDEDUCWNUM_y,EDEDUCWUNW_y,EDEDUCMNED_y,EDEDUCMSPR_y,EDEDUCMCPR_y,EDEDUCMSSC_y,EDEDUCMCSC_y,EDEDUCMHGH_y,EDEDUCMDKM_y,EDEDUCMTOT_y,EDEDUCMPRI_y,coords_y,Province,treated
0,Embu,2,2023,2024,KE,KE,KE,DHS,2014.0,Kenya,,,,,451.0,KEDHS2014416035,yes,yes,2.0,shcounty,35.0,embu,hv024,3.0,eastern,Admin1,Counties,no,,Survey is representative at two levels: 47 reg...,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,1.3,28.6,29.2,16.6,13.7,10.6,9999.0,100.0,57.8,40.9,7.6,459.0,645.0,1.0,33.6,27.4,16.6,12.2,9.3,9999.0,100.0,61.0,"[(37.65222732422325, -0.8470060993128072), (37...",KE,KE,KE,DHS,2022.0,Kenya,,,,,566.0,KEDHS2022416035,yes,no,1.0,hv024,14.0,Embu,,9999.0,,Admin1,Counties,no,,Survey is representative at one level: 47 regi...,105.1,112.5,108.8,0.93,127.3,85.8,101.7,1.48,1.2,13.2,23.8,21.8,21.9,18.1,9999.0,100.0,37.1,61.8,7.9,358.0,584.0,0.0,18.8,19.9,19.4,22.3,19.5,9999.0,100.0,38.8,"[(37.65222732422325, -0.8470060993128072), (37...",Eastern,1
1,Isiolo,3,2023,2024,KE,KE,KE,DHS,2014.0,Kenya,,,,,451.0,KEDHS2014416032,yes,yes,2.0,shcounty,32.0,isiolo,hv024,3.0,eastern,Admin1,Counties,no,,Survey is representative at two levels: 47 reg...,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,39.7,22.5,18.1,6.0,8.7,5.0,9999.0,100.0,40.6,19.7,5.0,104.0,606.0,11.3,29.7,21.1,12.5,21.9,3.5,9999.0,100.0,50.8,"[(36.927832509683924, 0.7265079527228977), (36...",KE,KE,KE,DHS,2022.0,Kenya,,,,,566.0,KEDHS2022416032,yes,no,1.0,hv024,11.0,Isiolo,,9999.0,,Admin1,Counties,no,,Survey is representative at one level: 47 regi...,93.6,92.1,92.9,1.02,81.6,70.5,75.9,1.16,25.3,14.3,18.5,12.2,15.5,14.3,9999.0,100.0,32.8,41.9,7.3,137.0,623.0,9.2,13.1,20.1,13.7,19.9,24.0,9999.0,100.0,33.2,"[(39.09465200983874, 1.8961535354557668), (39....",Eastern,1
2,Kilifi,0,0,0,KE,KE,KE,DHS,2014.0,Kenya,,,,,451.0,KEDHS2014416013,yes,yes,2.0,shcounty,13.0,kilifi,hv024,1.0,coast,Admin1,Counties,no,,Survey is representative at two levels: 47 reg...,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,20.4,34.1,19.3,10.5,11.4,4.4,9999.0,100.0,53.3,26.3,6.4,1043.0,824.0,2.6,34.4,24.9,15.9,13.5,8.7,9999.0,100.0,59.3,"[(39.81000132006619, -3.632083380178983), (39....",KE,KE,KE,DHS,2022.0,Kenya,,,,,566.0,KEDHS2022416013,yes,no,1.0,hv024,3.0,Kilifi,,9999.0,,Admin1,Counties,no,,Survey is representative at one level: 47 regi...,121.5,122.0,121.8,1.0,46.6,69.0,58.3,0.68,13.3,35.5,20.2,12.3,11.0,7.6,9999.0,100.0,55.8,30.9,6.6,928.0,742.0,0.8,27.4,27.7,15.0,16.1,13.0,9999.0,100.0,55.1,"[(39.81000132006619, -3.632083380178983), (39....",Coast,0
3,Kitui,3,2016,2024,KE,KE,KE,DHS,2014.0,Kenya,,,,,451.0,KEDHS2014416036,yes,yes,2.0,shcounty,36.0,kitui,hv024,3.0,eastern,Admin1,Counties,no,,Survey is representative at two levels: 47 reg...,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,3.9,35.4,34.4,12.9,8.7,4.6,9999.0,100.0,69.9,26.2,7.3,759.0,747.0,2.7,44.8,23.3,15.5,9.2,4.4,9999.0,100.0,68.2,"[(38.79114234915062, -2.3640895236817414), (38...",KE,KE,KE,DHS,2022.0,Kenya,,,,,566.0,KEDHS2022416036,yes,no,1.0,hv024,15.0,Kitui,,9999.0,,Admin1,Counties,no,,Survey is representative at one level: 47 regi...,117.3,118.3,117.8,0.99,93.7,72.8,82.4,1.29,1.5,16.8,29.4,22.7,14.2,15.4,9999.0,100.0,46.2,52.3,7.7,735.0,671.0,1.9,19.9,23.3,23.1,15.4,16.3,9999.0,100.0,43.2,"[(38.79114234915062, -2.3640895236817414), (38...",Eastern,1
4,Kwale,1,2020,2020,KE,KE,KE,DHS,2014.0,Kenya,,,,,451.0,KEDHS2014416012,yes,yes,2.0,shcounty,12.0,kwale,hv024,1.0,coast,Admin1,Counties,no,,Survey is representative at two levels: 47 reg...,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,21.7,35.0,23.5,6.9,8.5,4.4,9999.0,100.0,58.5,19.8,6.1,619.0,671.0,7.8,38.7,19.6,11.2,14.1,8.7,9999.0,100.0,58.2,"[(39.37152852042874, -4.657111200201257), (39....",KE,KE,KE,DHS,2022.0,Kenya,,,,,566.0,KEDHS2022416012,yes,no,1.0,hv024,2.0,Kwale,,9999.0,,Admin1,Counties,no,The border between Mombasa and Kwale slightly ...,Survey is representative at one level: 47 regi...,104.5,109.6,106.9,0.95,43.4,48.6,46.0,0.89,18.1,27.0,23.7,11.9,12.3,6.8,9999.0,100.0,50.8,31.1,7.0,498.0,711.0,4.4,33.9,22.4,14.2,18.4,6.6,9999.0,100.0,56.4,"[(39.37152852042874, -4.657111200201257), (39....",Coast,1
5,Lamu,0,0,0,KE,KE,KE,DHS,2014.0,Kenya,,,,,451.0,KEDHS2014416015,yes,yes,2.0,shcounty,15.0,lamu,hv024,1.0,coast,Admin1,Counties,no,,Survey is representative at two levels: 47 reg...,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,17.0,39.1,21.0,10.4,6.2,6.3,9999.0,100.0,60.1,22.9,6.4,89.0,600.0,5.8,41.2,18.4,11.2,9.6,13.7,9999.0,100.0,59.6,"[(40.84083179986709, -2.4303888900705033), (40...",KE,KE,KE,DHS,2022.0,Kenya,,,,,566.0,KEDHS2022416015,yes,no,1.0,hv024,5.0,Lamu,,9999.0,,Admin1,Counties,no,,Survey is representative at one level: 47 regi...,106.7,107.4,107.1,0.99,58.7,62.4,60.4,0.94,12.5,31.7,20.4,18.6,9.4,7.4,9999.0,100.0,52.1,35.4,6.8,101.0,675.0,5.7,28.0,21.9,17.9,14.0,12.4,9999.0,100.0,50.0,"[(40.84083179986709, -2.4303888900705033), (40...",Coast,0
6,Machakos,4,2022,2024,KE,KE,KE,DHS,2014.0,Kenya,,,,,451.0,KEDHS2014416037,yes,yes,2.0,shcounty,37.0,machakos,hv024,3.0,eastern,Admin1,Counties,no,,Survey is representative at two levels: 47 reg...,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,0.2,15.9,36.9,20.1,16.5,10.4,9999.0,100.0,52.8,47.0,7.8,873.0,718.0,0.3,21.4,31.6,12.5,24.3,10.0,9999.0,100.0,53.0,"[(37.84432601321015, -0.8136776624792788), (37...",KE,KE,KE,DHS,2022.0,Kenya,,,,,566.0,KEDHS2022416037,yes,no,1.0,hv024,16.0,Machakos,,9999.0,,Admin1,Counties,no,,Survey is representative at one level: 47 regi...,103.9,115.4,109.7,0.9,115.9,101.8,107.4,1.14,0.4,9.2,18.0,27.5,20.8,24.1,9999.0,100.0,27.2,72.4,8.0,992.0,699.0,0.3,13.0,21.0,24.4,20.4,20.9,9999.0,100.0,34.1,"[(37.30863783309604, -1.7062805202227196), (37...",Eastern,1
7,Makueni,4,2016,2024,KE,KE,KE,DHS,2014.0,Kenya,,,,,451.0,KEDHS2014416038,yes,yes,2.0,shcounty,38.0,makueni,hv024,3.0,eastern,Admin1,Counties,no,,Survey is representative at two levels: 47 reg...,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,0.9,21.6,34.6,24.0,10.4,8.5,9999.0,100.0,56.2,42.9,7.7,680.0,746.0,0.3,24.4,28.5,20.0,17.1,9.7,9999.0,100.0,52.9,"[(38.10272293004476, -2.283506456842474), (38....",KE,KE,KE,DHS,2022.0,Kenya,,,,,566.0,KEDHS2022416038,yes,no,1.0,hv024,17.0,Makueni,,9999.0,,Admin1,Counties,no,,Survey is representative at one level: 47 regi...,116.1,114.4,115.2,1.01,90.1,97.2,93.5,0.93,0.0,13.9,27.9,23.0,19.7,15.5,9999.0,100.0,41.8,58.2,7.7,683.0,720.0,0.4,25.2,18.7,26.5,17.1,12.0,9999.0,100.0,43.9,"[(37.4905662165782, -1.5200041825581252), (37....",Eastern,1
8,Marsabit,2,2023,2024,KE,KE,KE,DHS,2014.0,Kenya,,,,,451.0,KEDHS2014416031,yes,yes,2.0,shcounty,31.0,marsabit,hv024,3.0,eastern,Admin1,Counties,no,,Survey is representative at two levels: 47 reg...,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,61.9,16.0,10.3,5.0,4.3,2.4,9999.0,100.0,26.4,11.8,9999.0,115.0,575.0,35.7,18.6,10.0,7.7,15.4,12.6,9999.0,100.0,28.6,"[(36.502491833804925, 2.9184625932504673), (36...",KE,KE,KE,DHS,2022.0,Kenya,,,,,566.0,KEDHS2022416031,yes,no,1.0,hv024,10.0,Marsabit,,9999.0,,Admin1,Counties,no,,Survey is representative at one level: 47 regi...,78.6,88.7,83.4,0.89,71.2,62.9,66.5,1.13,61.9,11.6,11.0,5.9,6.8,2.9,9999.0,100.0,22.5,15.6,9999.0,129.0,535.0,27.9,12.1,17.3,17.2,18.3,7.1,9999.0,100.0,29.5,"[(36.502491833804925, 2.9184625932504673), (36...",Eastern,1
9,Mombasa,1,2021,2021,KE,KE,KE,DHS,2014.0,Kenya,,,,,451.0,KEDHS2014416011,yes,yes,2.0,shcounty,11.0,mombasa,hv024,1.0,coast,Admin1,Counties,no,,Survey is representative at two levels: 47 reg...,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,5.8,18.8,26.9,15.0,23.6,9.9,9999.0,100.0,45.8,48.5,7.9,912.0,598.0,2.5,10.0,31.3,16.8,25.9,13.4,9999.0,100.0,41.4,"[(39.74962407842554, -3.9590054913246604), (39...",KE,KE,KE,DHS,2022.0,Kenya,,,,,566.0,KEDHS2022416011,yes,no,1.0,hv024,1.0,Mombasa,,9999.0,,Admin1,Counties,no,The border between Mombasa and Kwale slightly ...,Survey is representative at one level: 47 regi...,107.1,115.2,111.0,0.93,75.8,82.7,79.2,0.92,6.8,14.7,22.3,13.5,22.0,20.7,9999.0,100.0,37.0,56.2,7.9,947.0,749.0,3.1,11.1,23.7,12.6,27.9,21.6,9999.0,100.0,34.8,"[(39.64102919982059, -4.063305419958169), (39....",Coast,1


# Descriptive Analysis

## Summary Statistics

## Data Visualization

# Econometric Analysis

## Differences-in-Differences Regression

In [758]:
# Define function for creating the regression table
def create_reg_table(var, new_var, x1, x2=None):
    # Pre-treatment data
    if (x2 != None):
        cols = [var + '_x', x1, x2]
    else:
        cols = [var + '_x', x1]
    df_before = restricted_county_outcome[cols]
    df_before['t'] = 0
    df_before.rename({var + '_x': new_var}, axis=1, inplace=True)

    # Post-treatment data
    if (x2 != None):
        cols = [var + '_y', x1, x2]
    else:
        cols = [var + '_y', x1]
    df_after = restricted_county_outcome[cols]
    df_after['t'] = 1
    df_after.rename({var + '_y': new_var}, axis=1, inplace=True)

    # Merged data
    df_reg = pd.concat([df_before, df_after])
    df_reg.dropna(inplace=True)
    
    return df_reg

### 1. Education

#### 1.1. Percentage of men with some primary education

In [759]:
# Create regression tables for each of the 2 independent variables
var1 = 'EDEDUCWSPR'
df_reg_educ_num_1 = create_reg_table(var1, 'educ', 'treated_num')
df_reg_educ_cov_1 = create_reg_table(var1, 'educ', 'treated_cov')

# Run regression
mod_educ_num_1 = ols('educ ~ treated_num + t + treated_num*t', data=df_reg_educ_num_1).fit(cov_type='HC1')
mod_educ_cov_1 = ols('educ ~ treated_cov + t + treated_cov*t', data=df_reg_educ_cov_1).fit(cov_type='HC1')

In [760]:
print(mod_educ_num_1.summary())

                            OLS Regression Results                            
Dep. Variable:                   educ   R-squared:                       0.104
Model:                            OLS   Adj. R-squared:                  0.048
Method:                 Least Squares   F-statistic:                     3.751
Date:                Thu, 25 Apr 2024   Prob (F-statistic):             0.0168
Time:                        22:10:15   Log-Likelihood:                -197.88
No. Observations:                  52   AIC:                             403.8
Df Residuals:                      48   BIC:                             411.6
Df Model:                           3                                         
Covariance Type:                  HC1                                         
                    coef    std err          z      P>|z|      [0.025      0.975]
---------------------------------------------------------------------------------
Intercept        28.7588      2.275     12.642

In [761]:
m1 = ols('educ ~ num_med + num_high + t + num_med*t + num_high*t', data=create_reg_table(var1, 'educ', 'num_med', 'num_high')).fit(cov_type='HC1')
print(m1.summary())

                            OLS Regression Results                            
Dep. Variable:                   educ   R-squared:                       0.124
Model:                            OLS   Adj. R-squared:                  0.028
Method:                 Least Squares   F-statistic:                     34.62
Date:                Thu, 25 Apr 2024   Prob (F-statistic):           1.64e-14
Time:                        22:10:15   Log-Likelihood:                -197.29
No. Observations:                  52   AIC:                             406.6
Df Residuals:                      46   BIC:                             418.3
Df Model:                           5                                         
Covariance Type:                  HC1                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     28.7588      2.324     12.376      0.0

In [762]:
m2 = ols('educ ~ cov_med + cov_high + t + cov_med*t + cov_high*t', data=create_reg_table(var1, 'educ', 'cov_med', 'cov_high')).fit(cov_type='HC1')
print(m2.summary())

                            OLS Regression Results                            
Dep. Variable:                   educ   R-squared:                       0.166
Model:                            OLS   Adj. R-squared:                  0.095
Method:                 Least Squares   F-statistic:                     3.801
Date:                Thu, 25 Apr 2024   Prob (F-statistic):            0.00930
Time:                        22:10:15   Log-Likelihood:                -195.99
No. Observations:                  52   AIC:                             402.0
Df Residuals:                      47   BIC:                             411.7
Df Model:                           4                                         
Covariance Type:                  HC1                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     28.3333      2.214     12.796      0.0

In [763]:
# Define function for creating the regression table
def create_reg_table(var, new_var, x1, x2=None):
    # Pre-treatment data
    if (x2 != None):
        cols = [var + '_x', x1, x2]
    else:
        cols = [var + '_x', x1]
    df_before = a_gr_outcome[cols]
    df_before['t'] = 0
    df_before.rename({var + '_x': new_var}, axis=1, inplace=True)

    # Post-treatment data
    if (x2 != None):
        cols = [var + '_y', x1, x2]
    else:
        cols = [var + '_y', x1]
    df_after = a_gr_outcome[cols]
    df_after['t'] = 1
    df_after.rename({var + '_y': new_var}, axis=1, inplace=True)

    # Merged data
    df_reg = pd.concat([df_before, df_after])
    df_reg.dropna(inplace=True)
    
    return df_reg

In [764]:
var1 = 'EDEDUCMNED'
a = create_reg_table(var1, 'educ', 'treated')
print(ols('educ ~ treated + t + treated*t', data=a).fit(cov_type='HC1').summary())

                            OLS Regression Results                            
Dep. Variable:                   educ   R-squared:                       0.047
Model:                            OLS   Adj. R-squared:                 -0.096
Method:                 Least Squares   F-statistic:                    0.3038
Date:                Thu, 25 Apr 2024   Prob (F-statistic):              0.822
Time:                        22:10:15   Log-Likelihood:                -88.742
No. Observations:                  24   AIC:                             185.5
Df Residuals:                      20   BIC:                             190.2
Df Model:                           3                                         
Covariance Type:                  HC1                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      8.8667      4.255      2.084      0.0

---

# Extra: Province-Level Analysis

In [765]:
# Create dataframe of dummy variables per project
prov = counties['Province'].value_counts().index
df_by_project = pd.DataFrame(columns=prov)
for i in range(len(kenya_trials_final['Province'])):
    temp_list = []
    for j in prov:
        if (j in kenya_trials_final['Province'].iloc[i]):
            temp_list.append(1)
        else:
            temp_list.append(0)
    df_by_project.loc[kenya_trials_final['RCT_ID'].iloc[i]] = temp_list

In [766]:
df_by_project = df_by_project.rename_axis(None, axis=1)
df_by_project.head()

Unnamed: 0,Rift Valley,Eastern,Coast,Nyanza,Central,Western,Northern,Nairobi
AEARCTR-0000016,0,0,0,0,0,0,0,1
AEARCTR-0000017,0,0,0,1,0,0,0,0
AEARCTR-0000019,0,0,0,1,0,0,0,0
AEARCTR-0000067,0,0,0,0,0,1,0,0
AEARCTR-0000105,0,1,0,0,0,0,0,0


In [767]:
# Transpose project-level dataframe to get county-level dataframe
df_by_prov = df_by_project.T
df_by_prov.head()

Unnamed: 0,AEARCTR-0000016,AEARCTR-0000017,AEARCTR-0000019,AEARCTR-0000067,AEARCTR-0000105,AEARCTR-0000218,AEARCTR-0000287,AEARCTR-0000323,AEARCTR-0000350,AEARCTR-0000443,AEARCTR-0000459,AEARCTR-0000486,AEARCTR-0000541,AEARCTR-0000605,AEARCTR-0000627,AEARCTR-0000647,AEARCTR-0000662,AEARCTR-0000669,AEARCTR-0000676,AEARCTR-0000705,AEARCTR-0000740,AEARCTR-0000766,AEARCTR-0000788,AEARCTR-0000790,AEARCTR-0000791,AEARCTR-0000843,AEARCTR-0000844,AEARCTR-0000893,AEARCTR-0000907,AEARCTR-0000912,AEARCTR-0000934,AEARCTR-0000946,AEARCTR-0000962,AEARCTR-0000991,AEARCTR-0000996,AEARCTR-0001002,AEARCTR-0001008,AEARCTR-0001076,AEARCTR-0001167,AEARCTR-0001183,AEARCTR-0001187,AEARCTR-0001197,AEARCTR-0001293,AEARCTR-0001304,AEARCTR-0001358,AEARCTR-0001373,AEARCTR-0001432,AEARCTR-0001457,AEARCTR-0001482,AEARCTR-0001484,AEARCTR-0001500,AEARCTR-0001515,AEARCTR-0001574,AEARCTR-0001592,AEARCTR-0001643,AEARCTR-0001706,AEARCTR-0001748,AEARCTR-0001750,AEARCTR-0001812,AEARCTR-0001824,AEARCTR-0001848,AEARCTR-0001893,AEARCTR-0001902,AEARCTR-0001997,AEARCTR-0002015,AEARCTR-0002019,AEARCTR-0002026,AEARCTR-0002063,AEARCTR-0002071,AEARCTR-0002133,AEARCTR-0002188,AEARCTR-0002313,AEARCTR-0002370,AEARCTR-0002375,AEARCTR-0002401,AEARCTR-0002448,AEARCTR-0002484,AEARCTR-0002579,AEARCTR-0002692,AEARCTR-0002741,AEARCTR-0002850,AEARCTR-0002913,AEARCTR-0002923,AEARCTR-0002948,AEARCTR-0003101,AEARCTR-0003138,AEARCTR-0003177,AEARCTR-0003224,AEARCTR-0003310,AEARCTR-0003556,AEARCTR-0003679,AEARCTR-0003937,AEARCTR-0004110,AEARCTR-0004498,AEARCTR-0004649,AEARCTR-0004818,AEARCTR-0005113,AEARCTR-0005170,AEARCTR-0005510,AEARCTR-0005517,AEARCTR-0005564,AEARCTR-0005621,AEARCTR-0005704,AEARCTR-0005822,AEARCTR-0005845,AEARCTR-0005941,AEARCTR-0005969,AEARCTR-0005971,AEARCTR-0006001,AEARCTR-0006089,AEARCTR-0006126,AEARCTR-0006267,AEARCTR-0006413,AEARCTR-0006675,AEARCTR-0006717,AEARCTR-0007067,AEARCTR-0007168,AEARCTR-0007424,AEARCTR-0007435,AEARCTR-0007738,AEARCTR-0007744,AEARCTR-0008292,AEARCTR-0009075,AEARCTR-0009102,AEARCTR-0009163,AEARCTR-0009200,AEARCTR-0009315,AEARCTR-0009348,AEARCTR-0009634,AEARCTR-0009644,AEARCTR-0009676,AEARCTR-0009743,AEARCTR-0009911,AEARCTR-0009930,AEARCTR-0010051,AEARCTR-0010101,AEARCTR-0010113,AEARCTR-0010426,AEARCTR-0010651,AEARCTR-0011089,AEARCTR-0011126,AEARCTR-0011135,AEARCTR-0011184,AEARCTR-0011205,AEARCTR-0011253,AEARCTR-0011473,AEARCTR-0011741,AEARCTR-0011769,AEARCTR-0011880,AEARCTR-0011937,AEARCTR-0012044,AEARCTR-0012126,AEARCTR-0012636,AEARCTR-0012796
Rift Valley,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0
Eastern,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,1,0,0,1,0
Coast,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
Nyanza,0,1,1,0,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
Central,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0


In [768]:
# Reset index and rename column for both dataframes
df_by_project = df_by_project.reset_index().rename({'index': 'RCT_ID'}, axis=1)
df_by_prov = df_by_prov.reset_index().rename({'index': 'Province'}, axis=1)

In [769]:
# Convert dates to Pandas datetime format
kenya_trials_final[['Start date', 'End date']] = kenya_trials[['Start date', 'End date']].apply(pd.to_datetime)
df_by_project = df_by_project.merge(kenya_trials_final[['RCT_ID', 'Start date', 'End date']], how='left', on='RCT_ID')
df_by_project.head()

Unnamed: 0,RCT_ID,Rift Valley,Eastern,Coast,Nyanza,Central,Western,Northern,Nairobi,Start date,End date
0,AEARCTR-0000016,0,0,0,0,0,0,0,1,2012-07-16,2013-02-14
1,AEARCTR-0000017,0,0,0,1,0,0,0,0,2011-05-01,2013-02-28
2,AEARCTR-0000019,0,0,0,1,0,0,0,0,2011-05-01,2013-02-28
3,AEARCTR-0000067,0,0,0,0,0,1,0,0,2012-08-01,2014-08-15
4,AEARCTR-0000105,0,1,0,0,0,0,0,0,2013-02-01,2017-03-31


In [770]:
df_by_project.shape

# 154 total projects after all the data cleaning (projects removed mainly because of lack of location information)

(154, 11)

In [771]:
min(df_by_project['Start date'])

Timestamp('2000-06-01 00:00:00')

In [772]:
max(df_by_project['End date'])

Timestamp('2027-12-31 00:00:00')

- Earliest start date is in 2000 and latest end date is in 2027
- Don't have to restrict time period as we have all the required data at the province-level
- The treatment group should consist of provinces that only started after 2000 (i.e. all projects) and the control group should consist of provinces where no projects were implemented
- Latest end date should be 2022 to measure treatment effect
- Define coverage variable: for what portion of the 8-year period did the project run?
- <font color='green'> Would it make more sense to restrict to projects that ended by 2022? Completion of projects affects treatment effect? (reasonable) </font>

In [773]:
# Restrict start date to before 2023
restricted_project = df_by_project[(df_by_project['Start date'].dt.year > 2003) & (df_by_project['Start date'].dt.year < 2023)]
restricted_project.head()

Unnamed: 0,RCT_ID,Rift Valley,Eastern,Coast,Nyanza,Central,Western,Northern,Nairobi,Start date,End date
0,AEARCTR-0000016,0,0,0,0,0,0,0,1,2012-07-16,2013-02-14
1,AEARCTR-0000017,0,0,0,1,0,0,0,0,2011-05-01,2013-02-28
2,AEARCTR-0000019,0,0,0,1,0,0,0,0,2011-05-01,2013-02-28
3,AEARCTR-0000067,0,0,0,0,0,1,0,0,2012-08-01,2014-08-15
4,AEARCTR-0000105,0,1,0,0,0,0,0,0,2013-02-01,2017-03-31


In [774]:
restricted_project.shape

# Removed 14 projects

(136, 11)

In [775]:
# Define coverage variable: for what portion of the years did the study run?
cov = []

for i in range(len(restricted_project)):
    row = restricted_project.iloc[i]
    if row['End date'].year < 2023:
        val = (row['End date'].year - row['Start date'].year)/20
    else:
        val = (2023 - row['Start date'].year)/23 # as we are only considering coverage during our time period of interest: 2000 to 2022
    cov.append(val)

restricted_project['Coverage'] = cov
min(restricted_project['Coverage']), max(restricted_project['Coverage'])

(0.0, 0.65)

In [836]:
restricted_project[restricted_project['Coverage'] == 0.65]

Unnamed: 0,RCT_ID,Rift Valley,Eastern,Coast,Nyanza,Central,Western,Northern,Nairobi,Start date,End date,Coverage
142,AEARCTR-0011184,0,0,0,0,0,0,1,0,2009-10-01,2022-02-28,0.65


- Longest project: https://www.socialscienceregistry.org/trials/11184

In [777]:
# Create dataset at the project-county level
df_prov_project = pd.melt(restricted_project, id_vars=['RCT_ID', 'Start date', 'End date', 'Coverage'], var_name='Province', value_name='Number Projects')
df_prov_project

Unnamed: 0,RCT_ID,Start date,End date,Coverage,Province,Number Projects
0,AEARCTR-0000016,2012-07-16,2013-02-14,0.05,Rift Valley,0
1,AEARCTR-0000017,2011-05-01,2013-02-28,0.1,Rift Valley,0
2,AEARCTR-0000019,2011-05-01,2013-02-28,0.1,Rift Valley,0
3,AEARCTR-0000067,2012-08-01,2014-08-15,0.1,Rift Valley,0
4,AEARCTR-0000105,2013-02-01,2017-03-31,0.2,Rift Valley,0
5,AEARCTR-0000218,2013-05-28,2013-08-07,0.0,Rift Valley,0
6,AEARCTR-0000287,2013-06-01,2018-06-01,0.25,Rift Valley,0
7,AEARCTR-0000323,2014-01-27,2014-10-26,0.0,Rift Valley,0
8,AEARCTR-0000350,2014-02-07,2020-08-31,0.3,Rift Valley,0
9,AEARCTR-0000443,2013-12-10,2016-06-30,0.15,Rift Valley,0


In [778]:
df_prov_project.shape #140 * 8 --> makes sense

(1088, 6)

In [779]:
# Find total number of projects for each county
grouped_data = df_prov_project.groupby('Province').agg({'Number Projects': sum})
grouped_data

Unnamed: 0_level_0,Number Projects
Province,Unnamed: 1_level_1
Central,10
Coast,2
Eastern,14
Nairobi,52
Northern,5
Nyanza,21
Rift Valley,4
Western,44


In [780]:
# Compute coverage by province
province_cov = []
for prov in grouped_data.reset_index()['Province']:
    subset = df_prov_project[df_prov_project['Province'] == prov]
    if sum(subset['Number Projects']) == 0:
        province_cov.append(0)
    else:
        cov = np.mean(subset[subset['Number Projects'] > 0]['Coverage'])
        province_cov.append(cov)

In [781]:
# Transpose project-level dataframe to get county-level dataframe
restricted_prov = restricted_project.iloc[:, :9].set_index('RCT_ID').T.reset_index().rename_axis(None, axis=1).rename({'index': 'Province'}, axis=1)
restricted_prov.sort_values(by='Province', inplace=True)
restricted_prov['Coverage'] = province_cov
restricted_prov

Unnamed: 0,Province,AEARCTR-0000016,AEARCTR-0000017,AEARCTR-0000019,AEARCTR-0000067,AEARCTR-0000105,AEARCTR-0000218,AEARCTR-0000287,AEARCTR-0000323,AEARCTR-0000350,AEARCTR-0000443,AEARCTR-0000459,AEARCTR-0000486,AEARCTR-0000541,AEARCTR-0000605,AEARCTR-0000627,AEARCTR-0000647,AEARCTR-0000662,AEARCTR-0000669,AEARCTR-0000676,AEARCTR-0000705,AEARCTR-0000740,AEARCTR-0000766,AEARCTR-0000788,AEARCTR-0000790,AEARCTR-0000791,AEARCTR-0000843,AEARCTR-0000844,AEARCTR-0000893,AEARCTR-0000907,AEARCTR-0000912,AEARCTR-0000934,AEARCTR-0000946,AEARCTR-0000962,AEARCTR-0000991,AEARCTR-0000996,AEARCTR-0001002,AEARCTR-0001008,AEARCTR-0001076,AEARCTR-0001167,AEARCTR-0001183,AEARCTR-0001187,AEARCTR-0001197,AEARCTR-0001293,AEARCTR-0001304,AEARCTR-0001358,AEARCTR-0001373,AEARCTR-0001457,AEARCTR-0001482,AEARCTR-0001484,AEARCTR-0001500,AEARCTR-0001515,AEARCTR-0001574,AEARCTR-0001592,AEARCTR-0001643,AEARCTR-0001706,AEARCTR-0001824,AEARCTR-0001848,AEARCTR-0001893,AEARCTR-0001902,AEARCTR-0001997,AEARCTR-0002015,AEARCTR-0002019,AEARCTR-0002026,AEARCTR-0002063,AEARCTR-0002071,AEARCTR-0002133,AEARCTR-0002188,AEARCTR-0002313,AEARCTR-0002370,AEARCTR-0002375,AEARCTR-0002401,AEARCTR-0002448,AEARCTR-0002484,AEARCTR-0002579,AEARCTR-0002692,AEARCTR-0002741,AEARCTR-0002850,AEARCTR-0002913,AEARCTR-0002923,AEARCTR-0002948,AEARCTR-0003101,AEARCTR-0003138,AEARCTR-0003177,AEARCTR-0003224,AEARCTR-0003310,AEARCTR-0003556,AEARCTR-0003679,AEARCTR-0003937,AEARCTR-0004110,AEARCTR-0004498,AEARCTR-0004649,AEARCTR-0004818,AEARCTR-0005113,AEARCTR-0005170,AEARCTR-0005510,AEARCTR-0005517,AEARCTR-0005564,AEARCTR-0005621,AEARCTR-0005704,AEARCTR-0005822,AEARCTR-0005845,AEARCTR-0005941,AEARCTR-0005969,AEARCTR-0005971,AEARCTR-0006001,AEARCTR-0006089,AEARCTR-0006126,AEARCTR-0006267,AEARCTR-0006413,AEARCTR-0006675,AEARCTR-0006717,AEARCTR-0007067,AEARCTR-0007168,AEARCTR-0007424,AEARCTR-0007435,AEARCTR-0007738,AEARCTR-0007744,AEARCTR-0008292,AEARCTR-0009075,AEARCTR-0009102,AEARCTR-0009163,AEARCTR-0009315,AEARCTR-0009348,AEARCTR-0009634,AEARCTR-0009644,AEARCTR-0009676,AEARCTR-0009743,AEARCTR-0009911,AEARCTR-0009930,AEARCTR-0010051,AEARCTR-0010101,AEARCTR-0010113,AEARCTR-0010426,AEARCTR-0010651,AEARCTR-0011184,AEARCTR-0011880,Coverage
4,Central,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0.144348
2,Coast,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0
1,Eastern,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0.076242
7,Nairobi,1,0,0,0,0,1,0,0,0,0,1,0,0,1,1,1,0,0,1,0,0,1,1,1,1,1,1,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,1,0,0,0,1,0,1,0,1,1,1,0,0,0,1,0.058236
6,Northern,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0.25
3,Nyanza,0,1,1,0,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0.097516
0,Rift Valley,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0875
5,Western,0,0,0,1,0,0,1,0,1,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,1,1,1,1,1,0,1,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,0,1,1,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0.121937


In [782]:
restricted_prov.shape

(8, 138)

In [783]:
# Merge with county-level data
restricted_prov = restricted_prov.merge(grouped_data.reset_index(), on='Province', how='left')
restricted_prov

Unnamed: 0,Province,AEARCTR-0000016,AEARCTR-0000017,AEARCTR-0000019,AEARCTR-0000067,AEARCTR-0000105,AEARCTR-0000218,AEARCTR-0000287,AEARCTR-0000323,AEARCTR-0000350,AEARCTR-0000443,AEARCTR-0000459,AEARCTR-0000486,AEARCTR-0000541,AEARCTR-0000605,AEARCTR-0000627,AEARCTR-0000647,AEARCTR-0000662,AEARCTR-0000669,AEARCTR-0000676,AEARCTR-0000705,AEARCTR-0000740,AEARCTR-0000766,AEARCTR-0000788,AEARCTR-0000790,AEARCTR-0000791,AEARCTR-0000843,AEARCTR-0000844,AEARCTR-0000893,AEARCTR-0000907,AEARCTR-0000912,AEARCTR-0000934,AEARCTR-0000946,AEARCTR-0000962,AEARCTR-0000991,AEARCTR-0000996,AEARCTR-0001002,AEARCTR-0001008,AEARCTR-0001076,AEARCTR-0001167,AEARCTR-0001183,AEARCTR-0001187,AEARCTR-0001197,AEARCTR-0001293,AEARCTR-0001304,AEARCTR-0001358,AEARCTR-0001373,AEARCTR-0001457,AEARCTR-0001482,AEARCTR-0001484,AEARCTR-0001500,AEARCTR-0001515,AEARCTR-0001574,AEARCTR-0001592,AEARCTR-0001643,AEARCTR-0001706,AEARCTR-0001824,AEARCTR-0001848,AEARCTR-0001893,AEARCTR-0001902,AEARCTR-0001997,AEARCTR-0002015,AEARCTR-0002019,AEARCTR-0002026,AEARCTR-0002063,AEARCTR-0002071,AEARCTR-0002133,AEARCTR-0002188,AEARCTR-0002313,AEARCTR-0002370,AEARCTR-0002375,AEARCTR-0002401,AEARCTR-0002448,AEARCTR-0002484,AEARCTR-0002579,AEARCTR-0002692,AEARCTR-0002741,AEARCTR-0002850,AEARCTR-0002913,AEARCTR-0002923,AEARCTR-0002948,AEARCTR-0003101,AEARCTR-0003138,AEARCTR-0003177,AEARCTR-0003224,AEARCTR-0003310,AEARCTR-0003556,AEARCTR-0003679,AEARCTR-0003937,AEARCTR-0004110,AEARCTR-0004498,AEARCTR-0004649,AEARCTR-0004818,AEARCTR-0005113,AEARCTR-0005170,AEARCTR-0005510,AEARCTR-0005517,AEARCTR-0005564,AEARCTR-0005621,AEARCTR-0005704,AEARCTR-0005822,AEARCTR-0005845,AEARCTR-0005941,AEARCTR-0005969,AEARCTR-0005971,AEARCTR-0006001,AEARCTR-0006089,AEARCTR-0006126,AEARCTR-0006267,AEARCTR-0006413,AEARCTR-0006675,AEARCTR-0006717,AEARCTR-0007067,AEARCTR-0007168,AEARCTR-0007424,AEARCTR-0007435,AEARCTR-0007738,AEARCTR-0007744,AEARCTR-0008292,AEARCTR-0009075,AEARCTR-0009102,AEARCTR-0009163,AEARCTR-0009315,AEARCTR-0009348,AEARCTR-0009634,AEARCTR-0009644,AEARCTR-0009676,AEARCTR-0009743,AEARCTR-0009911,AEARCTR-0009930,AEARCTR-0010051,AEARCTR-0010101,AEARCTR-0010113,AEARCTR-0010426,AEARCTR-0010651,AEARCTR-0011184,AEARCTR-0011880,Coverage,Number Projects
0,Central,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0.144348,10
1,Coast,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,2
2,Eastern,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0.076242,14
3,Nairobi,1,0,0,0,0,1,0,0,0,0,1,0,0,1,1,1,0,0,1,0,0,1,1,1,1,1,1,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,1,0,0,0,1,0,1,0,1,1,1,0,0,0,1,0.058236,52
4,Northern,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0.25,5
5,Nyanza,0,1,1,0,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0.097516,21
6,Rift Valley,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0875,4
7,Western,0,0,0,1,0,0,1,0,1,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,1,1,1,1,1,0,1,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,0,1,1,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0.121937,44


## Data on Outcome Variables

- Spatial data from DHS in 1998 and 2022

In [784]:
# Variables for regression

# Education
var1e = 'EDEDUCMCPR' # percentage of men with completed primary education *
var2e = 'EDEDUCMCSC' # percentage of men with completed secondary education *
var3e = 'EDEDUCMSPR' # percentage of men with some primary education
var4e = 'EDEDUCMSSC' # percentage of men with some secondary education
var5e = 'EDEDUCWCPR' # percentage of women with completed primary education *
var6e = 'EDEDUCWCSC' # percentage of women with completed secondary education *
var7e = 'EDEDUCWSPR' # percentage of women with some primary education
var8e = 'EDEDUCWSSC' # percentage of women with some secondary education

#Health
var1h = 'CHVACCCBAS' # percentage of children (12-23 months) who received all 8 basic vaccinations *
var2h = 'CMECMRCCMR' # child mortality rate *
var3h = 'CMECMRCIMR' # infant mortality rate
var4h = 'CMECMRCNNR' # neonatal mortality rate
var5h = 'CMECMRCPNR' # postneonatal mortality rate
var6h = 'CMECMRCU5M' # under-5 mortality rate
var7h = 'CNNUTSCHA2' # children stunted *
var8h = 'CNNUTSCWA2' # children underweight *
var9h = 'CNNUTSCWH2' # children wasted

In [785]:
# Define function for reading Shapefiles
def read_shapefile(sf_shape):
    """
    Read a shapefile into a Pandas dataframe with a 'coords' 
    column holding the geometry information. This uses the pyshp
    package
    """

    fields = [x[0] for x in sf_shape.fields][1:]
    records = [y[:] for y in sf_shape.records()]
    #records = sf_shape.records()
    shps = [s.points for s in sf_shape.shapes()]
    df = pd.DataFrame(columns=fields, data=records)
    df = df.assign(coords=shps)
    return df

In [786]:
# Read 1998 educ data
sh_educ_1998 = shapefile.Reader('sdr_subnational_data_dhs_1998.shp')
df_educ_1998 = read_shapefile(sh_educ_1998).rename({'DHSREGEN': 'Province', 
                                         var1e: var1e+'_1998',
                                         var2e: var2e+'_1998',
                                         var3e: var3e+'_1998',
                                         var4e: var4e+'_1998',
                                         var5e: var5e+'_1998',
                                         var6e: var6e+'_1998',
                                         var7e: var7e+'_1998',
                                         var8e: var8e+'_1998'}, axis=1)
df_educ_1998

Unnamed: 0,ISO,FIPS,DHSCC,SVYTYPE,SVYYEAR,CNTRYNAMEE,CNTRYNAMEF,CNTRYNAMES,Province,DHSREGFR,DHSREGSP,SVYID,REG_ID,Svy_Map,MULTLEVEL,LEVELRNK,REGVAR,REGCODE,REGNAME,OTHREGVAR,OTHREGCO,OTHREGNA,LEVELCO,LEVELNA,REPALLIND,REGNOTES,SVYNOTES,EDGARPWFEM,EDGARPMMAL,EDGARPBBTH,EDGARSWFEM,EDGARSMMAL,EDGARSBBTH,EDGARSBGPI,EDEDUCWNED,EDEDUCWSPR_1998,EDEDUCWCPR_1998,EDEDUCWSSC_1998,EDEDUCWCSC_1998,EDEDUCWHGH,EDEDUCWDKM,EDEDUCWTOT,EDEDUCWPRI,EDEDUCWSEH,EDEDUCWMYR,EDEDUCWNUM,EDEDUCWUNW,EDEDUCMNED,EDEDUCMSPR_1998,EDEDUCMCPR_1998,EDEDUCMSSC_1998,EDEDUCMCSC_1998,EDEDUCMHGH,EDEDUCMDKM,EDEDUCMTOT,EDEDUCMPRI,EDEDUCMSEH,coords
0,KE,KE,KE,DHS,1998.0,Kenya,,,Nairobi Area,,,115.0,KEDHS1998416001,yes,no,1.0,hv024,1.0,nairobi,,9999.0,,Admin1,Provinces,no,,"7 regions as 8 admin1 areas, minus the North E...",9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,1.2,21.0,23.4,10.7,34.1,9.5,9999.0,100.0,44.4,54.4,9.1,770.0,419.0,1.2,13.0,19.3,13.0,42.9,10.6,9999.0,100.0,32.3,66.5,"[(36.924489975361666, -1.1947898874492466), (3..."
1,KE,KE,KE,DHS,1998.0,Kenya,,,Central,,,115.0,KEDHS1998416002,yes,no,1.0,hv024,2.0,central,,9999.0,,Admin1,Provinces,no,,"7 regions as 8 admin1 areas, minus the North E...",9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,4.7,29.1,32.0,12.6,20.5,1.1,9999.0,100.0,61.1,34.2,7.1,834.0,787.0,1.1,22.3,34.5,15.3,26.1,0.7,9999.0,100.0,56.8,42.2,"[(36.49808883575042, 0.12191009497144023), (36..."
2,KE,KE,KE,DHS,1998.0,Kenya,,,Coast,,,115.0,KEDHS1998416003,yes,no,1.0,hv024,3.0,coast,,9999.0,,Admin1,Provinces,no,,"7 regions as 8 admin1 areas, minus the North E...",9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,30.6,26.5,21.8,7.8,12.2,1.1,9999.0,100.0,48.3,21.1,5.8,605.0,1226.0,6.7,27.2,27.7,9.9,25.3,3.2,9999.0,100.0,54.9,38.4,"[(39.37152862564932, -4.657112122005799), (39...."
3,KE,KE,KE,DHS,1998.0,Kenya,,,Eastern,,,115.0,KEDHS1998416004,yes,no,1.0,hv024,4.0,eastern,,9999.0,,Admin1,Provinces,no,,"7 regions as 8 admin1 areas, minus the North E...",9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,10.0,39.3,25.7,9.9,13.8,1.3,9999.0,100.0,64.9,25.1,6.5,1386.0,1186.0,2.4,40.8,26.1,9.9,19.4,1.5,9999.0,100.0,66.8,30.8,"[(36.24938774059518, 4.455442427656521), (36.2..."
4,KE,KE,KE,DHS,1998.0,Kenya,,,Nyanza,,,115.0,KEDHS1998416005,yes,no,1.0,hv024,5.0,nyanza,,9999.0,,Admin1,Provinces,no,,"7 regions as 8 admin1 areas, minus the North E...",9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,10.7,47.2,16.6,13.2,11.4,0.9,9999.0,100.0,63.8,25.4,6.2,1690.0,1390.0,2.2,37.3,21.1,16.3,19.2,3.8,9999.0,100.0,58.5,39.3,"[(34.294889449630375, 0.31460952802649444), (3..."
5,KE,KE,KE,DHS,1998.0,Kenya,,,Rift Valley,,,115.0,KEDHS1998416006,yes,no,1.0,hv024,6.0,rift valley,,9999.0,,Admin1,Provinces,no,,"7 regions as 8 admin1 areas, minus the North E...",9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,14.5,39.2,21.7,9.3,13.4,1.9,9999.0,100.0,60.9,24.6,6.4,1696.0,1977.0,5.8,30.6,25.7,11.8,20.9,5.3,9999.0,100.0,56.3,37.9,"[(35.421245574729994, 4.988193511865063), (35...."
6,KE,KE,KE,DHS,1998.0,Kenya,,,Western,,,115.0,KEDHS1998416007,yes,no,1.0,hv024,7.0,western,,9999.0,,Admin1,Provinces,no,,"7 regions as 8 admin1 areas, minus the North E...",9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,12.2,35.7,21.6,15.3,12.2,3.1,9999.0,100.0,57.2,30.5,6.7,899.0,896.0,5.3,33.1,18.7,15.8,22.5,4.5,9999.0,100.0,51.8,42.8,"[(34.62709236137175, 1.1038837441481633), (34...."


In [787]:
# Read 1998 health data
sh_health_1998 = shapefile.Reader('sdr_subnational_data_dhs_1998 1.shp')
df_health_1998 = read_shapefile(sh_health_1998).rename({'DHSREGEN': 'Province',
                                                             var1h: var1h+'_1998',
                                                             var2h: var2h+'_1998',
                                                             var3h: var3h+'_1998',
                                                             var4h: var4h+'_1998',
                                                             var5h: var5h+'_1998',
                                                             var6h: var6h+'_1998',
                                                             var7h: var7h+'_1998',
                                                             var8h: var8h+'_1998',
                                                             var9h: var9h+'_1998'}, axis=1)
df_health_1998

Unnamed: 0,ISO,FIPS,DHSCC,SVYTYPE,SVYYEAR,CNTRYNAMEE,CNTRYNAMEF,CNTRYNAMES,Province,DHSREGFR,DHSREGSP,SVYID,REG_ID,Svy_Map,MULTLEVEL,LEVELRNK,REGVAR,REGCODE,REGNAME,OTHREGVAR,OTHREGCO,OTHREGNA,LEVELCO,LEVELNA,REPALLIND,REGNOTES,SVYNOTES,CMECMRCNNR_1998,CMECMRCPNR_1998,CMECMRCIMR_1998,CMECMRCCMR_1998,CMECMRCU5M_1998,CHVACCCBAS_1998,CNNUTSCHA2_1998,CNNUTSCWH2_1998,CNNUTSCWA2_1998,MLNETCCITN,HACPHTWT1R,HACPHTMT1R,HAHIVPWHIV,HAHIVPMHIV,coords
0,KE,KE,KE,DHS,1998.0,Kenya,,,Nairobi Area,,,115.0,KEDHS1998416001,yes,no,1.0,hv024,1.0,nairobi,,9999.0,,Admin1,Provinces,no,,"7 regions as 8 admin1 areas, minus the North E...",19.0,22.0,41.0,26.0,66.0,50.0,28.7,8.4,7.7,9999.0,9999.0,9999.0,9999.0,9999.0,"[(36.924489975361666, -1.1947898874492466), (3..."
1,KE,KE,KE,DHS,1998.0,Kenya,,,Central,,,115.0,KEDHS1998416002,yes,no,1.0,hv024,2.0,central,,9999.0,,Admin1,Provinces,no,,"7 regions as 8 admin1 areas, minus the North E...",18.0,10.0,27.0,6.0,33.0,70.8,33.4,6.1,11.6,9999.0,9999.0,9999.0,9999.0,9999.0,"[(36.49808883575042, 0.12191009497144023), (36..."
2,KE,KE,KE,DHS,1998.0,Kenya,,,Coast,,,115.0,KEDHS1998416003,yes,no,1.0,hv024,3.0,coast,,9999.0,,Admin1,Provinces,no,,"7 regions as 8 admin1 areas, minus the North E...",28.0,42.0,70.0,28.0,96.0,67.7,42.9,5.5,21.9,9999.0,9999.0,9999.0,9999.0,9999.0,"[(39.37152862564932, -4.657112122005799), (39...."
3,KE,KE,KE,DHS,1998.0,Kenya,,,Eastern,,,115.0,KEDHS1998416004,yes,no,1.0,hv024,4.0,eastern,,9999.0,,Admin1,Provinces,no,,"7 regions as 8 admin1 areas, minus the North E...",23.0,31.0,53.0,26.0,78.0,68.6,43.3,5.3,22.7,9999.0,9999.0,9999.0,9999.0,9999.0,"[(36.24938774059518, 4.455442427656521), (36.2..."
4,KE,KE,KE,DHS,1998.0,Kenya,,,Nyanza,,,115.0,KEDHS1998416005,yes,no,1.0,hv024,5.0,nyanza,,9999.0,,Admin1,Provinces,no,,"7 regions as 8 admin1 areas, minus the North E...",38.0,97.0,135.0,73.0,199.0,44.4,35.8,8.4,18.7,9999.0,9999.0,9999.0,9999.0,9999.0,"[(34.294889449630375, 0.31460952802649444), (3..."
5,KE,KE,KE,DHS,1998.0,Kenya,,,Rift Valley,,,115.0,KEDHS1998416006,yes,no,1.0,hv024,6.0,rift valley,,9999.0,,Admin1,Provinces,no,,"7 regions as 8 admin1 areas, minus the North E...",28.0,22.0,50.0,18.0,68.0,66.9,36.6,8.1,19.3,9999.0,9999.0,9999.0,9999.0,9999.0,"[(35.421245574729994, 4.988193511865063), (35...."
6,KE,KE,KE,DHS,1998.0,Kenya,,,Western,,,115.0,KEDHS1998416007,yes,no,1.0,hv024,7.0,western,,9999.0,,Admin1,Provinces,no,,"7 regions as 8 admin1 areas, minus the North E...",20.0,44.0,64.0,63.0,122.0,51.2,40.0,4.8,15.5,9999.0,9999.0,9999.0,9999.0,9999.0,"[(34.62709236137175, 1.1038837441481633), (34...."


In [788]:
# Read 2003 data
sh_2003 = shapefile.Reader('sdr_subnational_data_dhs_2003.shp')
df_2003 = read_shapefile(sh_2003).rename({'DHSREGEN': 'Province', 
                                             var1e: var1e+'_2003',
                                             var2e: var2e+'_2003',
                                             var3e: var3e+'_2003',
                                             var4e: var4e+'_2003',
                                             var5e: var5e+'_2003',
                                             var6e: var6e+'_2003',
                                             var7e: var7e+'_2003',
                                             var8e: var8e+'_2003',
                                             var1h: var1h+'_2003',
                                             var2h: var2h+'_2003',
                                             var3h: var3h+'_2003',
                                             var4h: var4h+'_2003',
                                             var5h: var5h+'_2003',
                                             var6h: var6h+'_2003',
                                             var7h: var7h+'_2003',
                                             var8h: var8h+'_2003',
                                             var9h: var9h+'_2003'}, axis=1)
df_2003

Unnamed: 0,ISO,FIPS,DHSCC,SVYTYPE,SVYYEAR,CNTRYNAMEE,CNTRYNAMEF,CNTRYNAMES,Province,DHSREGFR,DHSREGSP,SVYID,REG_ID,Svy_Map,MULTLEVEL,LEVELRNK,REGVAR,REGCODE,REGNAME,OTHREGVAR,OTHREGCO,OTHREGNA,LEVELCO,LEVELNA,REPALLIND,REGNOTES,SVYNOTES,CMECMRCNNR_2003,CMECMRCPNR_2003,CMECMRCIMR_2003,CMECMRCCMR_2003,CMECMRCU5M_2003,CHVACCCBAS_2003,CNNUTSCHA2_2003,CNNUTSCWH2_2003,CNNUTSCWA2_2003,EDEDUCWSPR_2003,EDEDUCWCPR_2003,EDEDUCWSSC_2003,EDEDUCWCSC_2003,EDEDUCMSPR_2003,EDEDUCMCPR_2003,EDEDUCMSSC_2003,EDEDUCMCSC_2003,coords
0,KE,KE,KE,DHS,2003.0,Kenya,,,Nairobi Area,,,216.0,KEDHS2003416001,yes,no,1.0,hv024,1.0,nairobi,,9999.0,,Admin1,Provinces,no,,8 regions as the 8 admin1 areas,32.0,35.0,67.0,30.0,95.0,54.9,23.5,4.4,5.4,12.5,25.6,11.7,26.1,10.9,16.8,13.4,26.3,"[(36.924489975361666, -1.1947898874492466), (3..."
1,KE,KE,KE,DHS,2003.0,Kenya,,,Central,,,216.0,KEDHS2003416002,yes,no,1.0,hv024,2.0,central,,9999.0,,Admin1,Provinces,no,,8 regions as the 8 admin1 areas,27.0,17.0,44.0,10.0,54.0,70.8,31.1,4.5,11.3,22.3,33.6,15.2,18.3,24.6,32.9,10.5,19.0,"[(36.49808883575042, 0.12191009497144023), (36..."
2,KE,KE,KE,DHS,2003.0,Kenya,,,Coast,,,216.0,KEDHS2003416003,yes,no,1.0,hv024,3.0,coast,,9999.0,,Admin1,Provinces,no,,8 regions as the 8 admin1 areas,45.0,33.0,78.0,41.0,116.0,62.1,41.2,5.7,19.0,28.6,21.9,5.8,9.9,29.9,30.5,9.4,17.6,"[(39.37152862564932, -4.657112122005799), (39...."
3,KE,KE,KE,DHS,2003.0,Kenya,,,Eastern,,,216.0,KEDHS2003416004,yes,no,1.0,hv024,4.0,eastern,,9999.0,,Admin1,Provinces,no,,8 regions as the 8 admin1 areas,32.0,24.0,56.0,29.0,84.0,59.3,39.2,4.7,17.3,37.8,31.6,7.8,10.4,47.4,19.4,10.7,12.9,"[(36.24938774059518, 4.455442427656521), (36.2..."
4,KE,KE,KE,DHS,2003.0,Kenya,,,Nyanza,,,216.0,KEDHS2003416005,yes,no,1.0,hv024,5.0,nyanza,,9999.0,,Admin1,Provinces,no,,8 regions as the 8 admin1 areas,27.0,106.0,133.0,84.0,206.0,32.7,36.0,3.0,11.8,44.9,21.7,15.6,7.2,43.6,20.7,15.2,13.3,"[(34.294889449630375, 0.31460952802649444), (3..."
5,KE,KE,KE,DHS,2003.0,Kenya,,,Rift Valley,,,216.0,KEDHS2003416006,yes,no,1.0,hv024,6.0,rift valley,,9999.0,,Admin1,Provinces,no,,8 regions as the 8 admin1 areas,37.0,25.0,61.0,17.0,77.0,51.5,37.0,7.8,18.8,33.6,23.9,9.4,11.3,35.1,22.7,7.9,15.7,"[(35.421245574729994, 4.988193511865063), (35...."
6,KE,KE,KE,DHS,2003.0,Kenya,,,Western,,,216.0,KEDHS2003416007,yes,no,1.0,hv024,7.0,western,,9999.0,,Admin1,Provinces,no,,8 regions as the 8 admin1 areas,25.0,54.0,80.0,70.0,144.0,45.4,35.4,5.8,15.9,47.6,19.3,13.8,7.7,44.2,20.5,15.3,11.3,"[(34.62709236137175, 1.1038837441481633), (34...."
7,KE,KE,KE,DHS,2003.0,Kenya,,,North-Eastern,,,216.0,KEDHS2003416008,yes,no,1.0,hv024,8.0,north eastern,,9999.0,,Admin1,Provinces,no,,8 regions as the 8 admin1 areas,50.0,41.0,91.0,79.0,163.0,7.5,27.9,25.8,29.6,4.0,1.8,0.1,0.3,11.9,8.8,3.1,7.2,"[(40.815277099931336, 4.251943587622918), (40...."


In [789]:
# Read 2022 educ data
sh_educ_2022 = shapefile.Reader('sdr_subnational_data_dhs_2022.shp')
df_educ_2022 = read_shapefile(sh_educ_2022).rename({'DHSREGEN': 'County'}, axis=1)

# Take average across counties to find province-level data for 2022
df_educ_2022 = df_educ_2022.merge(counties, on='County', how='left')
df_educ_2022_grp = df_educ_2022.groupby('Province')[[var1e, var2e, var3e, var4e, var5e, var6e, var7e, var8e]].agg(np.mean).reset_index().rename({'DHSREGEN': 'Province', 
                                                                                                                                                     var1e: var1e+'_2022',
                                                                                                                                                     var2e: var2e+'_2022',
                                                                                                                                                     var3e: var3e+'_2022',
                                                                                                                                                     var4e: var4e+'_2022',
                                                                                                                                                     var5e: var5e+'_2022',
                                                                                                                                                     var6e: var6e+'_2022',
                                                                                                                                                     var7e: var7e+'_2022',
                                                                                                                                                     var8e: var8e+'_2022'}, axis=1)
df_educ_2022_grp

Unnamed: 0,Province,EDEDUCMCPR_2022,EDEDUCMCSC_2022,EDEDUCMSPR_2022,EDEDUCMSSC_2022,EDEDUCWCPR_2022,EDEDUCWCSC_2022,EDEDUCWSPR_2022,EDEDUCWSSC_2022
0,Central,20.54,25.14,14.08,20.62,23.28,24.94,10.1,20.72
1,Coast,22.56,17.92,25.06,14.1,20.4,12.9,26.72,12.26
2,Eastern,19.1625,17.675,19.95,21.8375,21.3125,16.175,16.5,19.15
3,Nairobi,11.0,30.9,4.5,12.0,14.7,28.6,6.6,13.8
4,Northern,9.7,16.4,19.3,17.7,5.3,7.566667,12.266667,10.333333
5,Nyanza,20.066667,18.166667,22.3,22.816667,23.333333,16.95,22.3,25.566667
6,Rift Valley,15.171429,19.535714,19.421429,19.971429,16.757143,17.514286,18.607143,17.378571
7,Western,18.225,14.85,28.525,24.225,19.95,15.575,25.35,25.35


In [790]:
# Read 2022 health data
sh_health_2022 = shapefile.Reader('sdr_subnational_data_dhs_2022 1.shp')
df_health_2022 = read_shapefile(sh_health_2022).rename({'DHSREGEN': 'County'}, axis=1)

# Take average across counties to find province-level data for 2022
df_health_2022 = df_health_2022.merge(counties, on='County', how='left')
df_health_2022_grp = df_health_2022.groupby('Province')[[var1h, var2h, var3h, var4h, var5h, var6h, var7h, var8h, var9h]].agg(np.mean).reset_index().rename({'DHSREGEN': 'Province',
                                                                                                                                                                 var1h: var1h+'_2022',
                                                                                                                                                                 var2h: var2h+'_2022',
                                                                                                                                                                 var3h: var3h+'_2022',
                                                                                                                                                                 var4h: var4h+'_2022',
                                                                                                                                                                 var5h: var5h+'_2022',
                                                                                                                                                                 var6h: var6h+'_2022',
                                                                                                                                                                 var7h: var7h+'_2022',
                                                                                                                                                                 var8h: var8h+'_2022',
                                                                                                                                                                 var9h: var9h+'_2022'}, axis=1)
df_health_2022_grp

Unnamed: 0,Province,CHVACCCBAS_2022,CMECMRCCMR_2022,CMECMRCIMR_2022,CMECMRCNNR_2022,CMECMRCPNR_2022,CMECMRCU5M_2022,CNNUTSCHA2_2022,CNNUTSCWA2_2022,CNNUTSCWH2_2022
0,Central,88.7,5.4,43.4,29.6,13.6,48.0,13.38,5.12,2.34
1,Coast,79.92,7.6,35.8,24.8,10.6,43.0,22.08,14.46,6.56
2,Eastern,81.0375,6.125,26.125,18.375,7.5,32.25,19.9625,12.2375,6.8
3,Nairobi,77.2,4.0,40.0,20.0,20.0,44.0,11.1,5.3,2.5
4,Northern,33.533333,6.333333,34.666667,27.0,7.666667,40.666667,13.966667,17.833333,18.466667
5,Nyanza,83.466667,15.5,39.166667,27.0,12.5,54.0,14.233333,5.466667,2.166667
6,Rift Valley,76.142857,7.571429,33.071429,21.428571,11.642857,40.214286,20.678571,14.6,7.121429
7,Western,89.175,17.75,33.75,19.75,14.0,51.0,15.525,7.875,2.25


In [791]:
# Read 2022 health data
sh_1993 = shapefile.Reader('sdr_subnational_data_dhs_1993.shp')
df_1993 = read_shapefile(sh_1993).rename({'DHSREGEN': 'Province'}, axis=1)
df_1993

Unnamed: 0,ISO,FIPS,DHSCC,SVYTYPE,SVYYEAR,CNTRYNAMEE,CNTRYNAMEF,CNTRYNAMES,Province,DHSREGFR,DHSREGSP,SVYID,REG_ID,Svy_Map,MULTLEVEL,LEVELRNK,REGVAR,REGCODE,REGNAME,OTHREGVAR,OTHREGCO,OTHREGNA,LEVELCO,LEVELNA,REPALLIND,REGNOTES,SVYNOTES,CMECMTCNNR,CMECMTCPNR,CMECMTCIMR,CMECMTCCMR,CMECMTCU5M,CHVACCCBAS,CNNUTSCHA2,CNNUTSCWH2,CNNUTSCWA2,EDEDATWSPR,EDEDATWCPR,EDEDATWSSC,EDEDATWCSC,EDEDATMSPR,EDEDATMCPR,EDEDATMSSC,EDEDATMCSC,coords
0,KE,KE,KE,DHS,1993.0,Kenya,,,Nairobi Area,,,56.0,KEDHS1993416001,yes,no,1.0,hv024,1.0,nairobi,,9999.0,,Admin1,Provinces,yes,,"7 regions as 8 admin1 areas, minus the North E...",9999.0,9999.0,9999.0,9999.0,9999.0,86.7,32.2,1.7,8.5,22.1,24.1,35.0,2.2,23.4,21.3,41.3,3.3,"[(36.924489975361666, -1.1947898874492466), (3..."
1,KE,KE,KE,DHS,1993.0,Kenya,,,Central,,,56.0,KEDHS1993416002,yes,no,1.0,hv024,2.0,central,,9999.0,,Admin1,Provinces,yes,,"7 regions as 8 admin1 areas, minus the North E...",9999.0,9999.0,9999.0,9999.0,9999.0,91.2,37.5,4.9,15.0,45.1,19.7,13.3,0.5,47.0,23.3,16.2,0.7,"[(36.49808883575042, 0.12191009497144023), (36..."
2,KE,KE,KE,DHS,1993.0,Kenya,,,Coast,,,56.0,KEDHS1993416003,yes,no,1.0,hv024,3.0,coast,,9999.0,,Admin1,Provinces,yes,,"7 regions as 8 admin1 areas, minus the North E...",9999.0,9999.0,9999.0,9999.0,9999.0,80.9,48.1,11.9,29.7,34.1,13.6,8.6,0.2,40.6,19.4,15.1,0.7,"[(39.37152862564932, -4.657112122005799), (39...."
3,KE,KE,KE,DHS,1993.0,Kenya,,,Eastern,,,56.0,KEDHS1993416004,yes,no,1.0,hv024,4.0,eastern,,9999.0,,Admin1,Provinces,yes,,"7 regions as 8 admin1 areas, minus the North E...",9999.0,9999.0,9999.0,9999.0,9999.0,84.5,47.6,7.8,23.6,47.0,15.3,9.7,0.1,52.4,17.1,11.4,0.6,"[(36.24938774059518, 4.455442427656521), (36.2..."
4,KE,KE,KE,DHS,1993.0,Kenya,,,Nyanza,,,56.0,KEDHS1993416005,yes,no,1.0,hv024,5.0,nyanza,,9999.0,,Admin1,Provinces,yes,,"7 regions as 8 admin1 areas, minus the North E...",9999.0,9999.0,9999.0,9999.0,9999.0,69.7,39.4,5.9,17.3,49.2,13.0,7.9,0.4,52.2,17.8,11.7,1.1,"[(34.294889449630375, 0.31460952802649444), (3..."
5,KE,KE,KE,DHS,1993.0,Kenya,,,Rift Valley,,,56.0,KEDHS1993416006,yes,no,1.0,hv024,6.0,rift valley,,9999.0,,Admin1,Provinces,yes,,"7 regions as 8 admin1 areas, minus the North E...",9999.0,9999.0,9999.0,9999.0,9999.0,75.7,34.9,8.4,19.8,47.0,13.3,9.3,0.4,46.5,16.1,13.7,1.4,"[(35.421245574729994, 4.988193511865063), (35...."
6,KE,KE,KE,DHS,1993.0,Kenya,,,Western,,,56.0,KEDHS1993416007,yes,no,1.0,hv024,7.0,western,,9999.0,,Admin1,Provinces,yes,,"7 regions as 8 admin1 areas, minus the North E...",9999.0,9999.0,9999.0,9999.0,9999.0,68.0,36.7,3.9,12.6,47.7,14.5,11.9,0.4,48.5,15.6,17.3,1.4,"[(34.62709236137175, 1.1038837441481633), (34...."


In [792]:
# Add column to indicate treatment status
restricted_prov['treated_num'] = [0 if restricted_prov['Number Projects'].iloc[i] == 0 else 1 for i in range(len(restricted_prov))]
restricted_prov['treated_cov'] = [0 if restricted_prov['Coverage'].iloc[i] == 0 else 1 for i in range(len(restricted_prov))]

In [793]:
# Discretize treatment variables

# Discretize number of projects into 3 categories: low, medium, high
num_low, num_med, num_high = [], [], []
for i in restricted_prov['Number Projects']:
    if i <= 5:
        num_low.append(1)
        num_med.append(0)
        num_high.append(0)
    elif (i > 5 and i < 20):
        num_low.append(0)
        num_med.append(1)
        num_high.append(0)
    elif (i >= 20):
        num_low.append(0)
        num_med.append(0)
        num_high.append(1)
        
restricted_prov['num_low'] = num_low
restricted_prov['num_med'] = num_med
restricted_prov['num_high'] = num_high

In [796]:
# Discretize treatment variables

# Discretize number of projects into 3 categories: low, medium, high
control, treat = [], []
for i in restricted_prov['Number Projects']:
    if i <= 5:
        control.append(1)
        treat.append(0)
    elif i > 5:
        control.append(0)
        treat.append(1)
        
restricted_prov['control'] = control
restricted_prov['treat'] = treat

In [797]:
# Discretize coverage into 3 categories: low, medium, high
cov_low, cov_med, cov_high = [], [], []
for i in restricted_prov['Coverage']:
    if i <= 0.05:
        cov_low.append(1)
        cov_med.append(0)
        cov_high.append(0)
    elif (i > 0.05 and i < 0.1):
        cov_low.append(0)
        cov_med.append(1)
        cov_high.append(0)
    else:
        cov_low.append(0)
        cov_med.append(0)
        cov_high.append(1)
        
restricted_prov['cov_low'] = cov_low
restricted_prov['cov_med'] = cov_med
restricted_prov['cov_high'] = cov_high

In [857]:
df_2003.sort_values(by='Province',inplace=True)
df_2003['Province'] = counties['Province'].value_counts().index.sort_values()

In [858]:
# Merge data on trials with data on outcome variables
restricted_prov_outcome = restricted_prov.merge(df_educ_1998, on='Province', how='left').merge(df_educ_2022_grp, on='Province', how='left').merge(df_health_1998, on='Province', how='left').merge(df_health_2022_grp, on='Province', how='left').merge(df_2003, on='Province', how='left')
restricted_prov_outcome

Unnamed: 0,Province,AEARCTR-0000016,AEARCTR-0000017,AEARCTR-0000019,AEARCTR-0000067,AEARCTR-0000105,AEARCTR-0000218,AEARCTR-0000287,AEARCTR-0000323,AEARCTR-0000350,AEARCTR-0000443,AEARCTR-0000459,AEARCTR-0000486,AEARCTR-0000541,AEARCTR-0000605,AEARCTR-0000627,AEARCTR-0000647,AEARCTR-0000662,AEARCTR-0000669,AEARCTR-0000676,AEARCTR-0000705,AEARCTR-0000740,AEARCTR-0000766,AEARCTR-0000788,AEARCTR-0000790,AEARCTR-0000791,AEARCTR-0000843,AEARCTR-0000844,AEARCTR-0000893,AEARCTR-0000907,AEARCTR-0000912,AEARCTR-0000934,AEARCTR-0000946,AEARCTR-0000962,AEARCTR-0000991,AEARCTR-0000996,AEARCTR-0001002,AEARCTR-0001008,AEARCTR-0001076,AEARCTR-0001167,AEARCTR-0001183,AEARCTR-0001187,AEARCTR-0001197,AEARCTR-0001293,AEARCTR-0001304,AEARCTR-0001358,AEARCTR-0001373,AEARCTR-0001457,AEARCTR-0001482,AEARCTR-0001484,AEARCTR-0001500,AEARCTR-0001515,AEARCTR-0001574,AEARCTR-0001592,AEARCTR-0001643,AEARCTR-0001706,AEARCTR-0001824,AEARCTR-0001848,AEARCTR-0001893,AEARCTR-0001902,AEARCTR-0001997,AEARCTR-0002015,AEARCTR-0002019,AEARCTR-0002026,AEARCTR-0002063,AEARCTR-0002071,AEARCTR-0002133,AEARCTR-0002188,AEARCTR-0002313,AEARCTR-0002370,AEARCTR-0002375,AEARCTR-0002401,AEARCTR-0002448,AEARCTR-0002484,AEARCTR-0002579,AEARCTR-0002692,AEARCTR-0002741,AEARCTR-0002850,AEARCTR-0002913,AEARCTR-0002923,AEARCTR-0002948,AEARCTR-0003101,AEARCTR-0003138,AEARCTR-0003177,AEARCTR-0003224,AEARCTR-0003310,AEARCTR-0003556,AEARCTR-0003679,AEARCTR-0003937,AEARCTR-0004110,AEARCTR-0004498,AEARCTR-0004649,AEARCTR-0004818,AEARCTR-0005113,AEARCTR-0005170,AEARCTR-0005510,AEARCTR-0005517,AEARCTR-0005564,AEARCTR-0005621,AEARCTR-0005704,AEARCTR-0005822,AEARCTR-0005845,AEARCTR-0005941,AEARCTR-0005969,AEARCTR-0005971,AEARCTR-0006001,AEARCTR-0006089,AEARCTR-0006126,AEARCTR-0006267,AEARCTR-0006413,AEARCTR-0006675,AEARCTR-0006717,AEARCTR-0007067,AEARCTR-0007168,AEARCTR-0007424,AEARCTR-0007435,AEARCTR-0007738,AEARCTR-0007744,AEARCTR-0008292,AEARCTR-0009075,AEARCTR-0009102,AEARCTR-0009163,AEARCTR-0009315,AEARCTR-0009348,AEARCTR-0009634,AEARCTR-0009644,AEARCTR-0009676,AEARCTR-0009743,AEARCTR-0009911,AEARCTR-0009930,AEARCTR-0010051,AEARCTR-0010101,AEARCTR-0010113,AEARCTR-0010426,AEARCTR-0010651,AEARCTR-0011184,AEARCTR-0011880,Coverage,Number Projects,treated_num,treated_cov,num_low,num_med,num_high,control,treat,cov_low,cov_med,cov_high,ISO_x,FIPS_x,DHSCC_x,SVYTYPE_x,SVYYEAR_x,CNTRYNAMEE_x,CNTRYNAMEF_x,CNTRYNAMES_x,DHSREGFR_x,DHSREGSP_x,SVYID_x,REG_ID_x,Svy_Map_x,MULTLEVEL_x,LEVELRNK_x,REGVAR_x,REGCODE_x,REGNAME_x,OTHREGVAR_x,OTHREGCO_x,OTHREGNA_x,LEVELCO_x,LEVELNA_x,REPALLIND_x,REGNOTES_x,SVYNOTES_x,EDGARPWFEM,EDGARPMMAL,EDGARPBBTH,EDGARSWFEM,EDGARSMMAL,EDGARSBBTH,EDGARSBGPI,EDEDUCWNED,EDEDUCWSPR_1998,EDEDUCWCPR_1998,EDEDUCWSSC_1998,EDEDUCWCSC_1998,EDEDUCWHGH,EDEDUCWDKM,EDEDUCWTOT,EDEDUCWPRI,EDEDUCWSEH,EDEDUCWMYR,EDEDUCWNUM,EDEDUCWUNW,EDEDUCMNED,EDEDUCMSPR_1998,EDEDUCMCPR_1998,EDEDUCMSSC_1998,EDEDUCMCSC_1998,EDEDUCMHGH,EDEDUCMDKM,EDEDUCMTOT,EDEDUCMPRI,EDEDUCMSEH,coords_x,EDEDUCMCPR_2022,EDEDUCMCSC_2022,EDEDUCMSPR_2022,EDEDUCMSSC_2022,EDEDUCWCPR_2022,EDEDUCWCSC_2022,EDEDUCWSPR_2022,EDEDUCWSSC_2022,ISO_y,FIPS_y,DHSCC_y,SVYTYPE_y,SVYYEAR_y,CNTRYNAMEE_y,CNTRYNAMEF_y,CNTRYNAMES_y,DHSREGFR_y,DHSREGSP_y,SVYID_y,REG_ID_y,Svy_Map_y,MULTLEVEL_y,LEVELRNK_y,REGVAR_y,REGCODE_y,REGNAME_y,OTHREGVAR_y,OTHREGCO_y,OTHREGNA_y,LEVELCO_y,LEVELNA_y,REPALLIND_y,REGNOTES_y,SVYNOTES_y,CMECMRCNNR_1998,CMECMRCPNR_1998,CMECMRCIMR_1998,CMECMRCCMR_1998,CMECMRCU5M_1998,CHVACCCBAS_1998,CNNUTSCHA2_1998,CNNUTSCWH2_1998,CNNUTSCWA2_1998,MLNETCCITN,HACPHTWT1R,HACPHTMT1R,HAHIVPWHIV,HAHIVPMHIV,coords_y,CHVACCCBAS_2022,CMECMRCCMR_2022,CMECMRCIMR_2022,CMECMRCNNR_2022,CMECMRCPNR_2022,CMECMRCU5M_2022,CNNUTSCHA2_2022,CNNUTSCWA2_2022,CNNUTSCWH2_2022,ISO,FIPS,DHSCC,SVYTYPE,SVYYEAR,CNTRYNAMEE,CNTRYNAMEF,CNTRYNAMES,DHSREGFR,DHSREGSP,SVYID,REG_ID,Svy_Map,MULTLEVEL,LEVELRNK,REGVAR,REGCODE,REGNAME,OTHREGVAR,OTHREGCO,OTHREGNA,LEVELCO,LEVELNA,REPALLIND,REGNOTES,SVYNOTES,CMECMRCNNR_2003,CMECMRCPNR_2003,CMECMRCIMR_2003,CMECMRCCMR_2003,CMECMRCU5M_2003,CHVACCCBAS_2003,CNNUTSCHA2_2003,CNNUTSCWH2_2003,CNNUTSCWA2_2003,EDEDUCWSPR_2003,EDEDUCWCPR_2003,EDEDUCWSSC_2003,EDEDUCWCSC_2003,EDEDUCMSPR_2003,EDEDUCMCPR_2003,EDEDUCMSSC_2003,EDEDUCMCSC_2003,coords
0,Central,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0.144348,10,1,1,0,1,0,0,1,0,0,1,KE,KE,KE,DHS,1998.0,Kenya,,,,,115.0,KEDHS1998416002,yes,no,1.0,hv024,2.0,central,,9999.0,,Admin1,Provinces,no,,"7 regions as 8 admin1 areas, minus the North E...",9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,4.7,29.1,32.0,12.6,20.5,1.1,9999.0,100.0,61.1,34.2,7.1,834.0,787.0,1.1,22.3,34.5,15.3,26.1,0.7,9999.0,100.0,56.8,42.2,"[(36.49808883575042, 0.12191009497144023), (36...",20.54,25.14,14.08,20.62,23.28,24.94,10.1,20.72,KE,KE,KE,DHS,1998.0,Kenya,,,,,115.0,KEDHS1998416002,yes,no,1.0,hv024,2.0,central,,9999.0,,Admin1,Provinces,no,,"7 regions as 8 admin1 areas, minus the North E...",18.0,10.0,27.0,6.0,33.0,70.8,33.4,6.1,11.6,9999.0,9999.0,9999.0,9999.0,9999.0,"[(36.49808883575042, 0.12191009497144023), (36...",88.7,5.4,43.4,29.6,13.6,48.0,13.38,5.12,2.34,KE,KE,KE,DHS,2003.0,Kenya,,,,,216.0,KEDHS2003416002,yes,no,1.0,hv024,2.0,central,,9999.0,,Admin1,Provinces,no,,8 regions as the 8 admin1 areas,27.0,17.0,44.0,10.0,54.0,70.8,31.1,4.5,11.3,22.3,33.6,15.2,18.3,24.6,32.9,10.5,19.0,"[(36.49808883575042, 0.12191009497144023), (36..."
1,Coast,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,2,1,0,1,0,0,1,0,1,0,0,KE,KE,KE,DHS,1998.0,Kenya,,,,,115.0,KEDHS1998416003,yes,no,1.0,hv024,3.0,coast,,9999.0,,Admin1,Provinces,no,,"7 regions as 8 admin1 areas, minus the North E...",9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,30.6,26.5,21.8,7.8,12.2,1.1,9999.0,100.0,48.3,21.1,5.8,605.0,1226.0,6.7,27.2,27.7,9.9,25.3,3.2,9999.0,100.0,54.9,38.4,"[(39.37152862564932, -4.657112122005799), (39....",22.56,17.92,25.06,14.1,20.4,12.9,26.72,12.26,KE,KE,KE,DHS,1998.0,Kenya,,,,,115.0,KEDHS1998416003,yes,no,1.0,hv024,3.0,coast,,9999.0,,Admin1,Provinces,no,,"7 regions as 8 admin1 areas, minus the North E...",28.0,42.0,70.0,28.0,96.0,67.7,42.9,5.5,21.9,9999.0,9999.0,9999.0,9999.0,9999.0,"[(39.37152862564932, -4.657112122005799), (39....",79.92,7.6,35.8,24.8,10.6,43.0,22.08,14.46,6.56,KE,KE,KE,DHS,2003.0,Kenya,,,,,216.0,KEDHS2003416003,yes,no,1.0,hv024,3.0,coast,,9999.0,,Admin1,Provinces,no,,8 regions as the 8 admin1 areas,45.0,33.0,78.0,41.0,116.0,62.1,41.2,5.7,19.0,28.6,21.9,5.8,9.9,29.9,30.5,9.4,17.6,"[(39.37152862564932, -4.657112122005799), (39...."
2,Eastern,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0.076242,14,1,1,0,1,0,0,1,0,1,0,KE,KE,KE,DHS,1998.0,Kenya,,,,,115.0,KEDHS1998416004,yes,no,1.0,hv024,4.0,eastern,,9999.0,,Admin1,Provinces,no,,"7 regions as 8 admin1 areas, minus the North E...",9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,10.0,39.3,25.7,9.9,13.8,1.3,9999.0,100.0,64.9,25.1,6.5,1386.0,1186.0,2.4,40.8,26.1,9.9,19.4,1.5,9999.0,100.0,66.8,30.8,"[(36.24938774059518, 4.455442427656521), (36.2...",19.1625,17.675,19.95,21.8375,21.3125,16.175,16.5,19.15,KE,KE,KE,DHS,1998.0,Kenya,,,,,115.0,KEDHS1998416004,yes,no,1.0,hv024,4.0,eastern,,9999.0,,Admin1,Provinces,no,,"7 regions as 8 admin1 areas, minus the North E...",23.0,31.0,53.0,26.0,78.0,68.6,43.3,5.3,22.7,9999.0,9999.0,9999.0,9999.0,9999.0,"[(36.24938774059518, 4.455442427656521), (36.2...",81.0375,6.125,26.125,18.375,7.5,32.25,19.9625,12.2375,6.8,KE,KE,KE,DHS,2003.0,Kenya,,,,,216.0,KEDHS2003416004,yes,no,1.0,hv024,4.0,eastern,,9999.0,,Admin1,Provinces,no,,8 regions as the 8 admin1 areas,32.0,24.0,56.0,29.0,84.0,59.3,39.2,4.7,17.3,37.8,31.6,7.8,10.4,47.4,19.4,10.7,12.9,"[(36.24938774059518, 4.455442427656521), (36.2..."
3,Nairobi,1,0,0,0,0,1,0,0,0,0,1,0,0,1,1,1,0,0,1,0,0,1,1,1,1,1,1,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,1,0,0,1,1,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,1,0,0,0,1,0,1,0,1,1,1,0,0,0,1,0.058236,52,1,1,0,0,1,0,1,0,1,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,11.0,30.9,4.5,12.0,14.7,28.6,6.6,13.8,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,77.2,4.0,40.0,20.0,20.0,44.0,11.1,5.3,2.5,KE,KE,KE,DHS,2003.0,Kenya,,,,,216.0,KEDHS2003416001,yes,no,1.0,hv024,1.0,nairobi,,9999.0,,Admin1,Provinces,no,,8 regions as the 8 admin1 areas,32.0,35.0,67.0,30.0,95.0,54.9,23.5,4.4,5.4,12.5,25.6,11.7,26.1,10.9,16.8,13.4,26.3,"[(36.924489975361666, -1.1947898874492466), (3..."
4,Northern,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0.25,5,1,1,1,0,0,1,0,0,0,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,9.7,16.4,19.3,17.7,5.3,7.566667,12.266667,10.333333,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,33.533333,6.333333,34.666667,27.0,7.666667,40.666667,13.966667,17.833333,18.466667,KE,KE,KE,DHS,2003.0,Kenya,,,,,216.0,KEDHS2003416008,yes,no,1.0,hv024,8.0,north eastern,,9999.0,,Admin1,Provinces,no,,8 regions as the 8 admin1 areas,50.0,41.0,91.0,79.0,163.0,7.5,27.9,25.8,29.6,4.0,1.8,0.1,0.3,11.9,8.8,3.1,7.2,"[(40.815277099931336, 4.251943587622918), (40...."
5,Nyanza,0,1,1,0,0,0,1,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0.097516,21,1,1,0,0,1,0,1,0,1,0,KE,KE,KE,DHS,1998.0,Kenya,,,,,115.0,KEDHS1998416005,yes,no,1.0,hv024,5.0,nyanza,,9999.0,,Admin1,Provinces,no,,"7 regions as 8 admin1 areas, minus the North E...",9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,10.7,47.2,16.6,13.2,11.4,0.9,9999.0,100.0,63.8,25.4,6.2,1690.0,1390.0,2.2,37.3,21.1,16.3,19.2,3.8,9999.0,100.0,58.5,39.3,"[(34.294889449630375, 0.31460952802649444), (3...",20.066667,18.166667,22.3,22.816667,23.333333,16.95,22.3,25.566667,KE,KE,KE,DHS,1998.0,Kenya,,,,,115.0,KEDHS1998416005,yes,no,1.0,hv024,5.0,nyanza,,9999.0,,Admin1,Provinces,no,,"7 regions as 8 admin1 areas, minus the North E...",38.0,97.0,135.0,73.0,199.0,44.4,35.8,8.4,18.7,9999.0,9999.0,9999.0,9999.0,9999.0,"[(34.294889449630375, 0.31460952802649444), (3...",83.466667,15.5,39.166667,27.0,12.5,54.0,14.233333,5.466667,2.166667,KE,KE,KE,DHS,2003.0,Kenya,,,,,216.0,KEDHS2003416005,yes,no,1.0,hv024,5.0,nyanza,,9999.0,,Admin1,Provinces,no,,8 regions as the 8 admin1 areas,27.0,106.0,133.0,84.0,206.0,32.7,36.0,3.0,11.8,44.9,21.7,15.6,7.2,43.6,20.7,15.2,13.3,"[(34.294889449630375, 0.31460952802649444), (3..."
6,Rift Valley,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0875,4,1,1,1,0,0,1,0,0,1,0,KE,KE,KE,DHS,1998.0,Kenya,,,,,115.0,KEDHS1998416006,yes,no,1.0,hv024,6.0,rift valley,,9999.0,,Admin1,Provinces,no,,"7 regions as 8 admin1 areas, minus the North E...",9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,14.5,39.2,21.7,9.3,13.4,1.9,9999.0,100.0,60.9,24.6,6.4,1696.0,1977.0,5.8,30.6,25.7,11.8,20.9,5.3,9999.0,100.0,56.3,37.9,"[(35.421245574729994, 4.988193511865063), (35....",15.171429,19.535714,19.421429,19.971429,16.757143,17.514286,18.607143,17.378571,KE,KE,KE,DHS,1998.0,Kenya,,,,,115.0,KEDHS1998416006,yes,no,1.0,hv024,6.0,rift valley,,9999.0,,Admin1,Provinces,no,,"7 regions as 8 admin1 areas, minus the North E...",28.0,22.0,50.0,18.0,68.0,66.9,36.6,8.1,19.3,9999.0,9999.0,9999.0,9999.0,9999.0,"[(35.421245574729994, 4.988193511865063), (35....",76.142857,7.571429,33.071429,21.428571,11.642857,40.214286,20.678571,14.6,7.121429,KE,KE,KE,DHS,2003.0,Kenya,,,,,216.0,KEDHS2003416006,yes,no,1.0,hv024,6.0,rift valley,,9999.0,,Admin1,Provinces,no,,8 regions as the 8 admin1 areas,37.0,25.0,61.0,17.0,77.0,51.5,37.0,7.8,18.8,33.6,23.9,9.4,11.3,35.1,22.7,7.9,15.7,"[(35.421245574729994, 4.988193511865063), (35...."
7,Western,0,0,0,1,0,0,1,0,1,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,1,1,1,0,1,0,1,1,0,0,0,1,0,0,1,1,1,1,1,0,1,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,0,1,1,0,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0.121937,44,1,1,0,0,1,0,1,0,0,1,KE,KE,KE,DHS,1998.0,Kenya,,,,,115.0,KEDHS1998416007,yes,no,1.0,hv024,7.0,western,,9999.0,,Admin1,Provinces,no,,"7 regions as 8 admin1 areas, minus the North E...",9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0,12.2,35.7,21.6,15.3,12.2,3.1,9999.0,100.0,57.2,30.5,6.7,899.0,896.0,5.3,33.1,18.7,15.8,22.5,4.5,9999.0,100.0,51.8,42.8,"[(34.62709236137175, 1.1038837441481633), (34....",18.225,14.85,28.525,24.225,19.95,15.575,25.35,25.35,KE,KE,KE,DHS,1998.0,Kenya,,,,,115.0,KEDHS1998416007,yes,no,1.0,hv024,7.0,western,,9999.0,,Admin1,Provinces,no,,"7 regions as 8 admin1 areas, minus the North E...",20.0,44.0,64.0,63.0,122.0,51.2,40.0,4.8,15.5,9999.0,9999.0,9999.0,9999.0,9999.0,"[(34.62709236137175, 1.1038837441481633), (34....",89.175,17.75,33.75,19.75,14.0,51.0,15.525,7.875,2.25,KE,KE,KE,DHS,2003.0,Kenya,,,,,216.0,KEDHS2003416007,yes,no,1.0,hv024,7.0,western,,9999.0,,Admin1,Provinces,no,,8 regions as the 8 admin1 areas,25.0,54.0,80.0,70.0,144.0,45.4,35.4,5.8,15.9,47.6,19.3,13.8,7.7,44.2,20.5,15.3,11.3,"[(34.62709236137175, 1.1038837441481633), (34...."


# Descriptive Analysis

## Summary Statistics

### Treatment Variables

In [822]:
restricted_prov[['Province', 'Number Projects', 'Coverage']]

Unnamed: 0,Province,Number Projects,Coverage
0,Central,10,0.144348
1,Coast,2,0.0
2,Eastern,14,0.076242
3,Nairobi,52,0.058236
4,Northern,5,0.25
5,Nyanza,21,0.097516
6,Rift Valley,4,0.0875
7,Western,44,0.121937


### Outcome Variables

#### 1. Education

In [865]:
# 1998 data
df_educ_1998[[var1e+'_1998', var2e+'_1998', var3e+'_1998', var4e+'_1998', var5e+'_1998', var6e+'_1998', var7e+'_1998', var8e+'_1998']].describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
EDEDUCMCPR_1998,7.0,24.728571,5.569475,18.7,20.2,25.7,26.9,34.5
EDEDUCMCSC_1998,7.0,25.185714,8.261644,19.2,20.15,22.5,25.7,42.9
EDEDUCMSPR_1998,7.0,29.185714,9.410885,13.0,24.75,30.6,35.2,40.8
EDEDUCMSSC_1998,7.0,13.142857,2.724492,9.9,10.85,13.0,15.55,16.3
EDEDUCWCPR_1998,7.0,23.257143,4.726471,16.6,21.65,21.8,24.55,32.0
EDEDUCWCSC_1998,7.0,16.8,8.216041,11.4,12.2,13.4,17.15,34.1
EDEDUCWSPR_1998,7.0,34.0,8.958422,21.0,27.8,35.7,39.25,47.2
EDEDUCWSSC_1998,7.0,11.257143,2.577374,7.8,9.6,10.7,12.9,15.3


In [866]:
# 2003 data
df_2003[[var1e+'_2003', var2e+'_2003', var5e+'_2003', var6e+'_2003']].describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
EDEDUCMCPR_2003,8.0,21.5375,7.576456,8.8,18.75,20.6,24.65,32.9
EDEDUCMCSC_2003,8.0,15.4125,5.749643,7.2,12.5,14.5,17.95,26.3
EDEDUCWCPR_2003,8.0,22.425,9.690017,1.8,21.1,22.9,27.1,33.6
EDEDUCWCSC_2003,8.0,11.4,7.759786,0.3,7.575,10.15,13.05,26.1


In [867]:
#2022 data
df_educ_2022_grp[[var1e+'_2022', var2e+'_2022', var5e+'_2022', var6e+'_2022']].describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
EDEDUCMCPR_2022,8.0,17.053199,4.654302,9.7,14.128571,18.69375,20.185,22.56
EDEDUCMCSC_2022,8.0,20.073423,5.318132,14.85,17.35625,18.043333,20.936786,30.9
EDEDUCWCPR_2022,8.0,18.129122,5.981511,5.3,16.242857,20.175,21.804375,23.333333
EDEDUCWCSC_2022,8.0,17.527619,6.591689,7.566667,14.90625,16.5625,19.370714,28.6


#### 2. Health

In [868]:
# 1998 data
df_health_1998[[var1h+'_1998', var2h+'_1998', var3h+'_1998', var4h+'_1998', var5h+'_1998', var6h+'_1998', var7h+'_1998', var8h+'_1998', var9h+'_1998']].describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
CHVACCCBAS_1998,7.0,59.942857,10.941337,44.4,50.6,66.9,68.15,70.8
CMECMRCCMR_1998,7.0,34.285714,24.376999,6.0,22.0,26.0,45.5,73.0
CMECMRCIMR_1998,7.0,62.857143,34.839769,27.0,45.5,53.0,67.0,135.0
CMECMRCNNR_1998,7.0,24.857143,7.081162,18.0,19.5,23.0,28.0,38.0
CMECMRCPNR_1998,7.0,38.285714,28.511485,10.0,22.0,31.0,43.0,97.0
CMECMRCU5M_1998,7.0,94.571429,53.615474,33.0,67.0,78.0,109.0,199.0
CNNUTSCHA2_1998,7.0,37.242857,5.268414,28.7,34.6,36.6,41.45,43.3
CNNUTSCWA2_1998,7.0,16.771429,5.510812,7.7,13.55,18.7,20.6,22.7
CNNUTSCWH2_1998,7.0,6.657143,1.58625,4.8,5.4,6.1,8.25,8.4


In [869]:
# 2003 data
df_2003[[var1h+'_2003', var2h+'_2003', var7h+'_2003', var8h+'_2003']].describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
CHVACCCBAS_2003,8.0,48.025,19.954573,7.5,42.225,53.2,60.0,70.8
CMECMRCCMR_2003,8.0,45.0,28.804762,10.0,26.0,35.5,72.25,84.0
CNNUTSCHA2_2003,8.0,33.9125,5.971943,23.5,30.3,35.7,37.55,41.2
CNNUTSCWA2_2003,8.0,16.1375,7.125796,5.4,11.675,16.6,18.85,29.6


In [870]:
# 2022 data
df_health_2022_grp[[var1h+'_2022', var2h+'_2022', var7h+'_2022', var8h+'_2022']].describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
CHVACCCBAS_2022,8.0,76.14692,17.872845,33.533333,76.935714,80.47875,84.775,89.175
CMECMRCCMR_2022,8.0,8.78497,5.011133,4.0,5.94375,6.952381,9.575,17.75
CNNUTSCHA2_2022,8.0,16.365759,3.997181,11.1,13.82,14.879167,20.141518,22.08
CNNUTSCWA2_2022,8.0,10.361562,5.03338,5.12,5.425,10.05625,14.495,17.833333


## Data Visualization

# Econometric Analysis

## Differences-in-Differences Regression

In [820]:
# educ: 1,2,5,6
# health: 1,2,7,8

In [941]:
# Define function for creabting the regression table
def create_reg_table1(var, new_var, x1, x2=None):
    restricted_prov_outcome.dropna(inplace=True)
    # Pre-treatment data
    if (x2 != None):
        cols = [var + '_2003', x1, x2]
    else:
        cols = [var + '_2003', x1]
    df_before = restricted_prov_outcome[cols]
    df_before['t'] = 0
    df_before.rename({var + '_2003': new_var}, axis=1, inplace=True)

    # Post-treatment data
    if (x2 != None):
        cols = [var + '_2022', x1, x2]
    else:
        cols = [var + '_2022', x1]
    df_after = restricted_prov_outcome[cols]
    df_after['t'] = 1
    df_after.rename({var + '_2022': new_var}, axis=1, inplace=True)

    # Merged data
    df_reg = pd.concat([df_before, df_after])
    
    return df_reg

### 1. Education

In [944]:
# Run regressions

# var1
m11 = ols('educ ~ num_med + num_high + t + num_med*t + num_high*t', data=create_reg_table1(var1e, 'educ', 'num_med', 'num_high')).fit(cov_type='HC1')
m12 = ols('educ ~ treat + t + treat*t', data=create_reg_table1(var1e, 'educ', 'treat')).fit(cov_type='HC1')
m13 = ols('educ ~ cov_med + cov_high + t + cov_med*t + cov_high*t', data=create_reg_table1(var1e, 'educ', 'cov_med', 'cov_high')).fit(cov_type='HC1')

# var2
m21 = ols('educ ~ num_med + num_high + t + num_med*t + num_high*t', data=create_reg_table1(var2e, 'educ', 'num_med', 'num_high')).fit(cov_type='HC1')
m22 = ols('educ ~ treat + t + treat*t', data=create_reg_table1(var2e, 'educ', 'treat')).fit(cov_type='HC1')
m23 = ols('educ ~ cov_med + cov_high + t + cov_med*t + cov_high*t', data=create_reg_table1(var2e, 'educ', 'cov_med', 'cov_high')).fit(cov_type='HC1')

#var3
m31 = ols('educ ~ num_med + num_high + t + num_med*t + num_high*t', data=create_reg_table1(var3e, 'educ', 'num_med', 'num_high')).fit(cov_type='HC1')
m32 = ols('educ ~ treat + t + treat*t', data=create_reg_table1(var3e, 'educ', 'treat')).fit(cov_type='HC1')
m33 = ols('educ ~ cov_med + cov_high + t + cov_med*t + cov_high*t', data=create_reg_table1(var3e, 'educ', 'cov_med', 'cov_high')).fit(cov_type='HC1')

#var4
m41 = ols('educ ~ num_med + num_high + t + num_med*t + num_high*t', data=create_reg_table1(var4e, 'educ', 'num_med', 'num_high')).fit(cov_type='HC1')
m42 = ols('educ ~ treat + t + treat*t', data=create_reg_table1(var4e, 'educ', 'treat')).fit(cov_type='HC1')
m43 = ols('educ ~ cov_med + cov_high + t + cov_med*t + cov_high*t', data=create_reg_table1(var4e, 'educ', 'cov_med', 'cov_high')).fit(cov_type='HC1')

#var5
m51 = ols('educ ~ num_med + num_high + t + num_med*t + num_high*t', data=create_reg_table1(var5e, 'educ', 'num_med', 'num_high')).fit(cov_type='HC1')
m52 = ols('educ ~ treat + t + treat*t', data=create_reg_table1(var5e, 'educ', 'treat')).fit(cov_type='HC1')
m53 = ols('educ ~ cov_med + cov_high + t + cov_med*t + cov_high*t', data=create_reg_table1(var5e, 'educ', 'cov_med', 'cov_high')).fit(cov_type='HC1')

#var6
m61 = ols('educ ~ num_med + num_high + t + num_med*t + num_high*t', data=create_reg_table1(var6e, 'educ', 'num_med', 'num_high')).fit(cov_type='HC1')
m62 = ols('educ ~ treat + t + treat*t', data=create_reg_table1(var6e, 'educ', 'treat')).fit(cov_type='HC1')
m63 = ols('educ ~ cov_med + cov_high + t + cov_med*t + cov_high*t', data=create_reg_table1(var6e, 'educ', 'cov_med', 'cov_high')).fit(cov_type='HC1')

#var7
m71 = ols('educ ~ num_med + num_high + t + num_med*t + num_high*t', data=create_reg_table1(var7e, 'educ', 'num_med', 'num_high')).fit(cov_type='HC1')
m72 = ols('educ ~ treat + t + treat*t', data=create_reg_table1(var7e, 'educ', 'treat')).fit(cov_type='HC1')
m73 = ols('educ ~ cov_med + cov_high + t + cov_med*t + cov_high*t', data=create_reg_table1(var7e, 'educ', 'cov_med', 'cov_high')).fit(cov_type='HC1')

#var8
m81 = ols('educ ~ num_med + num_high + t + num_med*t + num_high*t', data=create_reg_table1(var8e, 'educ', 'num_med', 'num_high')).fit(cov_type='HC1')
m82 = ols('educ ~ treat + t + treat*t', data=create_reg_table1(var8e, 'educ', 'treat')).fit(cov_type='HC1')
m83 = ols('educ ~ cov_med + cov_high + t + cov_med*t + cov_high*t', data=create_reg_table1(var8e, 'educ', 'cov_med', 'cov_high')).fit(cov_type='HC1')

In [956]:
m11.mse_resid

42.62214865480917

In [957]:
m13.mse_resid

120.74527793524815

In [946]:
# Variable 1 results
print(m11.summary())
print(m13.summary())

                            OLS Regression Results                            
Dep. Variable:                   educ   R-squared:                       0.453
Model:                            OLS   Adj. R-squared:                 -0.002
Method:                 Least Squares   F-statistic:                     1.371
Date:                Fri, 26 Apr 2024   Prob (F-statistic):              0.352
Time:                        14:03:46   Log-Likelihood:                -32.242
No. Observations:                  12   AIC:                             76.48
Df Residuals:                       6   BIC:                             79.39
Df Model:                           5                                         
Covariance Type:                  HC1                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     26.6000      3.900      6.821      0.0

In [958]:
m21.mse_resid

94.88241177721089

In [959]:
m23.mse_resid

746.6460430839002

In [948]:
# Variable 1 results
print(m21.summary())
print(m23.summary())

                            OLS Regression Results                            
Dep. Variable:                   educ   R-squared:                       0.617
Model:                            OLS   Adj. R-squared:                  0.297
Method:                 Least Squares   F-statistic:                     5.401
Date:                Fri, 26 Apr 2024   Prob (F-statistic):             0.0317
Time:                        14:03:52   Log-Likelihood:                -26.384
No. Observations:                  12   AIC:                             64.77
Df Residuals:                       6   BIC:                             67.68
Df Model:                           5                                         
Covariance Type:                  HC1                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     16.6500      0.950     17.526      0.0

In [960]:
m51.mse_resid

238.12896258503403

In [961]:
m53.mse_resid

854.5909977324262

In [949]:
# Variable 1 results
print(m51.summary())
print(m53.summary())

                            OLS Regression Results                            
Dep. Variable:                   educ   R-squared:                       0.919
Model:                            OLS   Adj. R-squared:                  0.852
Method:                 Least Squares   F-statistic:                     19.46
Date:                Fri, 26 Apr 2024   Prob (F-statistic):            0.00120
Time:                        14:04:11   Log-Likelihood:                -20.435
No. Observations:                  12   AIC:                             52.87
Df Residuals:                       6   BIC:                             55.78
Df Model:                           5                                         
Covariance Type:                  HC1                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     22.9000      1.000     22.900      0.0

In [962]:
m61.mse_resid

544.1518920068028

In [963]:
m63.mse_resid

2470.224348072562

In [950]:
# Variable 1 results
print(m61.summary())
print(m63.summary())

                            OLS Regression Results                            
Dep. Variable:                   educ   R-squared:                       0.717
Model:                            OLS   Adj. R-squared:                  0.481
Method:                 Least Squares   F-statistic:                     34.10
Date:                Fri, 26 Apr 2024   Prob (F-statistic):           0.000248
Time:                        14:04:13   Log-Likelihood:                -28.581
No. Observations:                  12   AIC:                             69.16
Df Residuals:                       6   BIC:                             72.07
Df Model:                           5                                         
Covariance Type:                  HC1                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     10.6000      0.700     15.143      0.0

### 2. Health

In [973]:
# Run regressions

# var1
m11 = ols('health ~ num_med + num_high + t + num_med*t + num_high*t', data=create_reg_table1(var1h, 'health', 'num_med', 'num_high')).fit(cov_type='HC1')
m12 = ols('health ~ treat + t + treat*t', data=create_reg_table1(var1h, 'health', 'treat')).fit(cov_type='HC1')
m13 = ols('health ~ cov_med + cov_high + t + cov_med*t + cov_high*t', data=create_reg_table1(var1h, 'health', 'cov_med', 'cov_high')).fit(cov_type='HC1')

# var2
m21 = ols('health ~ num_med + num_high + t + num_med*t + num_high*t', data=create_reg_table1(var2h, 'health', 'num_med', 'num_high')).fit(cov_type='HC1')
m22 = ols('health ~ treat + t + treat*t', data=create_reg_table1(var2h, 'health', 'treat')).fit(cov_type='HC1')
m23 = ols('health ~ cov_med + cov_high + t + cov_med*t + cov_high*t', data=create_reg_table1(var2h, 'health', 'cov_med', 'cov_high')).fit(cov_type='HC1')

# var3
m31 = ols('health ~ num_med + num_high + t + num_med*t + num_high*t', data=create_reg_table1(var3h, 'health', 'num_med', 'num_high')).fit(cov_type='HC1')
m32 = ols('health ~ treat + t + treat*t', data=create_reg_table1(var3h, 'health', 'treat')).fit(cov_type='HC1')
m33 = ols('health ~ cov_med + cov_high + t + cov_med*t + cov_high*t', data=create_reg_table1(var3h, 'health', 'cov_med', 'cov_high')).fit(cov_type='HC1')

# var4
m41 = ols('health ~ num_med + num_high + t + num_med*t + num_high*t', data=create_reg_table1(var4h, 'health', 'num_med', 'num_high')).fit(cov_type='HC1')
m42 = ols('health ~ treat + t + treat*t', data=create_reg_table1(var4h, 'health', 'treat')).fit(cov_type='HC1')
m43 = ols('health ~ cov_med + cov_high + t + cov_med*t + cov_high*t', data=create_reg_table1(var4h, 'health', 'cov_med', 'cov_high')).fit(cov_type='HC1')

# var5
m51 = ols('health ~ num_med + num_high + t + num_med*t + num_high*t', data=create_reg_table1(var5h, 'health', 'num_med', 'num_high')).fit(cov_type='HC1')
m52 = ols('health ~ treat + t + treat*t', data=create_reg_table1(var5h, 'health', 'treat')).fit(cov_type='HC1')
m53 = ols('health ~ cov_med + cov_high + t + cov_med*t + cov_high*t', data=create_reg_table1(var5h, 'health', 'cov_med', 'cov_high')).fit(cov_type='HC1')

# var6
m61 = ols('health ~ num_med + num_high + t + num_med*t + num_high*t', data=create_reg_table1(var6h, 'health', 'num_med', 'num_high')).fit(cov_type='HC1')
m62 = ols('health ~ treat + t + treat*t', data=create_reg_table1(var6h, 'health', 'treat')).fit(cov_type='HC1')
m63 = ols('health ~ cov_med + cov_high + t + cov_med*t + cov_high*t', data=create_reg_table1(var6h, 'health', 'cov_med', 'cov_high')).fit(cov_type='HC1')

# var7
m71 = ols('health ~ num_med + num_high + t + num_med*t + num_high*t', data=create_reg_table1(var7h, 'health', 'num_med', 'num_high')).fit(cov_type='HC1')
m72 = ols('health ~ treat + t + treat*t', data=create_reg_table1(var7h, 'health', 'treat')).fit(cov_type='HC1')
m73 = ols('health ~ cov_med + cov_high + t + cov_med*t + cov_high*t', data=create_reg_table1(var7h, 'health', 'cov_med', 'cov_high')).fit(cov_type='HC1')

# var8
m81 = ols('health ~ num_med + num_high + t + num_med*t + num_high*t', data=create_reg_table1(var8h, 'health', 'num_med', 'num_high')).fit(cov_type='HC1')
m82 = ols('health ~ treat + t + treat*t', data=create_reg_table1(var8h, 'health', 'treat')).fit(cov_type='HC1')
m83 = ols('health ~ cov_med + cov_high + t + cov_med*t + cov_high*t', data=create_reg_table1(var8h, 'health', 'cov_med', 'cov_high')).fit(cov_type='HC1')

# var9
m91 = ols('health ~ num_med + num_high + t + num_med*t + num_high*t', data=create_reg_table1(var9h, 'health', 'num_med', 'num_high')).fit(cov_type='HC1')
m92 = ols('health ~ treat + t + treat*t', data=create_reg_table1(var9h, 'health', 'treat')).fit(cov_type='HC1')
m93 = ols('health ~ cov_med + cov_high + t + cov_med*t + cov_high*t', data=create_reg_table1(var9h, 'health', 'cov_med', 'cov_high')).fit(cov_type='HC1')

In [980]:
var8h

'CNNUTSCWA2'

In [974]:
m11.mse_resid

42.62214865480917

In [975]:
m13.mse_resid

120.74527793524815

In [976]:
# Variable 1 results
print(m11.summary())
print(m13.summary())

                            OLS Regression Results                            
Dep. Variable:                 health   R-squared:                       0.930
Model:                            OLS   Adj. R-squared:                  0.871
Method:                 Least Squares   F-statistic:                     13.95
Date:                Fri, 26 Apr 2024   Prob (F-statistic):            0.00298
Time:                        14:33:07   Log-Likelihood:                -35.383
No. Observations:                  12   AIC:                             82.77
Df Residuals:                       6   BIC:                             85.67
Df Model:                           5                                         
Covariance Type:                  HC1                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     56.8000      5.300     10.717      0.0

In [966]:
m21.mse_resid

94.88241177721089

In [967]:
m23.mse_resid

746.6460430839002

In [953]:
# Variable 1 results
print(m21.summary())
print(m23.summary())

                            OLS Regression Results                            
Dep. Variable:                 health   R-squared:                       0.925
Model:                            OLS   Adj. R-squared:                  0.862
Method:                 Least Squares   F-statistic:                     38.59
Date:                Fri, 26 Apr 2024   Prob (F-statistic):           0.000174
Time:                        14:04:17   Log-Likelihood:                -40.184
No. Observations:                  12   AIC:                             92.37
Df Residuals:                       6   BIC:                             95.28
Df Model:                           5                                         
Covariance Type:                  HC1                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     29.0000     12.000      2.417      0.0

In [968]:
m71.mse_resid

10.880975922382847

In [969]:
m73.mse_resid

6.977430032753838

In [954]:
# Variable 1 results
print(m71.summary())
print(m73.summary())

                            OLS Regression Results                            
Dep. Variable:                 health   R-squared:                       0.946
Model:                            OLS   Adj. R-squared:                  0.901
Method:                 Least Squares   F-statistic:                     218.0
Date:                Fri, 26 Apr 2024   Prob (F-statistic):           1.07e-06
Time:                        14:04:18   Log-Likelihood:                -27.190
No. Observations:                  12   AIC:                             66.38
Df Residuals:                       6   BIC:                             69.29
Df Model:                           5                                         
Covariance Type:                  HC1                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     39.1000      2.100     18.619      0.0

In [970]:
m81.mse_resid

9.110706307870371

In [971]:
m83.mse_resid

14.414911419753084

In [955]:
# Variable 1 results
print(m81.summary())
print(m83.summary())

                            OLS Regression Results                            
Dep. Variable:                 health   R-squared:                       0.782
Model:                            OLS   Adj. R-squared:                  0.601
Method:                 Least Squares   F-statistic:                     269.3
Date:                Fri, 26 Apr 2024   Prob (F-statistic):           5.70e-07
Time:                        14:04:19   Log-Likelihood:                -26.125
No. Observations:                  12   AIC:                             64.25
Df Residuals:                       6   BIC:                             67.16
Df Model:                           5                                         
Covariance Type:                  HC1                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     18.9000      0.100    189.000      0.0

---

In [983]:
var1h, var2h, var7h, var8h

('CHVACCCBAS', 'CMECMRCCMR', 'CNNUTSCHA2', 'CNNUTSCWA2')

## Parallel Trends

In [996]:
# Define function for creabting the regression table
def create_reg_table(var, new_var, x1, x2=None):
    restricted_prov_outcome.dropna(inplace=True)
    # Pre-treatment data
    if (x2 != None):
        cols = [var + '_1998', x1, x2]
    else:
        cols = [var + '_1998', x1]
    df_before = restricted_prov_outcome[cols]
    df_before['t'] = 0
    df_before.rename({var + '_1998': new_var}, axis=1, inplace=True)

    # Post-treatment data
    if (x2 != None):
        cols = [var + '_2003', x1, x2]
    else:
        cols = [var + '_2003', x1]
    df_after = restricted_prov_outcome[cols]
    df_after['t'] = 1
    df_after.rename({var + '_2003': new_var}, axis=1, inplace=True)

    # Merged data
    df_reg = pd.concat([df_before, df_after])
    
    return df_reg

In [1003]:
# Run regressions

# var1
m11 = ols('educ ~ num_med + num_high + t + num_med*t + num_high*t', data=create_reg_table(var1e, 'educ', 'num_med', 'num_high')).fit(cov_type='HC1')
m12 = ols('educ ~ treat + t + treat*t', data=create_reg_table(var1e, 'educ', 'treat')).fit(cov_type='HC1')
m13 = ols('educ ~ cov_med + cov_high + t + cov_med*t + cov_high*t', data=create_reg_table(var1e, 'educ', 'cov_med', 'cov_high')).fit(cov_type='HC1')

# var2
m21 = ols('educ ~ num_med + num_high + t + num_med*t + num_high*t', data=create_reg_table(var2e, 'educ', 'num_med', 'num_high')).fit(cov_type='HC1')
m22 = ols('educ ~ treat + t + treat*t', data=create_reg_table(var2e, 'educ', 'treat')).fit(cov_type='HC1')
m23 = ols('educ ~ cov_med + cov_high + t + cov_med*t + cov_high*t', data=create_reg_table(var2e, 'educ', 'cov_med', 'cov_high')).fit(cov_type='HC1')

#var3
m31 = ols('educ ~ num_med + num_high + t + num_med*t + num_high*t', data=create_reg_table(var3e, 'educ', 'num_med', 'num_high')).fit(cov_type='HC1')
m32 = ols('educ ~ treat + t + treat*t', data=create_reg_table(var3e, 'educ', 'treat')).fit(cov_type='HC1')
m33 = ols('educ ~ cov_med + cov_high + t + cov_med*t + cov_high*t', data=create_reg_table(var3e, 'educ', 'cov_med', 'cov_high')).fit(cov_type='HC1')

#var4
m41 = ols('educ ~ num_med + num_high + t + num_med*t + num_high*t', data=create_reg_table(var4e, 'educ', 'num_med', 'num_high')).fit(cov_type='HC1')
m42 = ols('educ ~ treat + t + treat*t', data=create_reg_table(var4e, 'educ', 'treat')).fit(cov_type='HC1')
m43 = ols('educ ~ cov_med + cov_high + t + cov_med*t + cov_high*t', data=create_reg_table(var4e, 'educ', 'cov_med', 'cov_high')).fit(cov_type='HC1')

#var5
m51 = ols('educ ~ num_med + num_high + t + num_med*t + num_high*t', data=create_reg_table(var5e, 'educ', 'num_med', 'num_high')).fit(cov_type='HC1')
m52 = ols('educ ~ treat + t + treat*t', data=create_reg_table(var5e, 'educ', 'treat')).fit(cov_type='HC1')
m53 = ols('educ ~ cov_med + cov_high + t + cov_med*t + cov_high*t', data=create_reg_table(var5e, 'educ', 'cov_med', 'cov_high')).fit(cov_type='HC1')

#var6
m61 = ols('educ ~ num_med + num_high + t + num_med*t + num_high*t', data=create_reg_table(var6e, 'educ', 'num_med', 'num_high')).fit(cov_type='HC1')
m62 = ols('educ ~ treat + t + treat*t', data=create_reg_table(var6e, 'educ', 'treat')).fit(cov_type='HC1')
m63 = ols('educ ~ cov_med + cov_high + t + cov_med*t + cov_high*t', data=create_reg_table(var6e, 'educ', 'cov_med', 'cov_high')).fit(cov_type='HC1')

#var7
m71 = ols('educ ~ num_med + num_high + t + num_med*t + num_high*t', data=create_reg_table(var7e, 'educ', 'num_med', 'num_high')).fit(cov_type='HC1')
m72 = ols('educ ~ treat + t + treat*t', data=create_reg_table(var7e, 'educ', 'treat')).fit(cov_type='HC1')
m73 = ols('educ ~ cov_med + cov_high + t + cov_med*t + cov_high*t', data=create_reg_table(var7e, 'educ', 'cov_med', 'cov_high')).fit(cov_type='HC1')

#var8
m81 = ols('educ ~ num_med + num_high + t + num_med*t + num_high*t', data=create_reg_table(var8e, 'educ', 'num_med', 'num_high')).fit(cov_type='HC1')
m82 = ols('educ ~ treat + t + treat*t', data=create_reg_table(var8e, 'educ', 'treat')).fit(cov_type='HC1')
m83 = ols('educ ~ cov_med + cov_high + t + cov_med*t + cov_high*t', data=create_reg_table(var8e, 'educ', 'cov_med', 'cov_high')).fit(cov_type='HC1')

In [1007]:
m11.mse_resid, m13.mse_resid

(26.954166666666662, 37.11111111111111)

In [1004]:
print(m11.summary())
print(m13.summary())

                            OLS Regression Results                            
Dep. Variable:                   educ   R-squared:                       0.498
Model:                            OLS   Adj. R-squared:                  0.080
Method:                 Least Squares   F-statistic:                     9.098
Date:                Fri, 26 Apr 2024   Prob (F-statistic):            0.00906
Time:                        17:01:20   Log-Likelihood:                -32.633
No. Observations:                  12   AIC:                             77.27
Df Residuals:                       6   BIC:                             80.18
Df Model:                           5                                         
Covariance Type:                  HC1                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     26.7000      1.000     26.700      0.0

In [1008]:
m21.mse_resid, m23.mse_resid

(9.996666666666671, 7.073055555555555)

In [1005]:
print(m21.summary())
print(m23.summary())

                            OLS Regression Results                            
Dep. Variable:                   educ   R-squared:                       0.756
Model:                            OLS   Adj. R-squared:                  0.553
Method:                 Least Squares   F-statistic:                     7.403
Date:                Fri, 26 Apr 2024   Prob (F-statistic):             0.0151
Time:                        17:01:22   Log-Likelihood:                -26.682
No. Observations:                  12   AIC:                             65.36
Df Residuals:                       6   BIC:                             68.27
Df Model:                           5                                         
Covariance Type:                  HC1                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     23.1000      2.200     10.500      0.0

In [1010]:
m51.mse_resid, m53.mse_resid

(6.538333333333333, 41.996388888888895)

In [1001]:
print(m51.summary())
print(m53.summary())

                            OLS Regression Results                            
Dep. Variable:                   educ   R-squared:                       0.877
Model:                            OLS   Adj. R-squared:                  0.774
Method:                 Least Squares   F-statistic:                     25.20
Date:                Fri, 26 Apr 2024   Prob (F-statistic):           0.000585
Time:                        17:01:09   Log-Likelihood:                -24.134
No. Observations:                  12   AIC:                             60.27
Df Residuals:                       6   BIC:                             63.18
Df Model:                           5                                         
Covariance Type:                  HC1                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     21.7500      0.050    435.000      0.0

In [1012]:
m61.mse_resid, m63.mse_resid

(9.299166666666666, 17.20305555555556)

In [1002]:
print(m61.summary())
print(m63.summary())

                            OLS Regression Results                            
Dep. Variable:                   educ   R-squared:                       0.662
Model:                            OLS   Adj. R-squared:                  0.380
Method:                 Least Squares   F-statistic:                     27.68
Date:                Fri, 26 Apr 2024   Prob (F-statistic):           0.000449
Time:                        17:01:13   Log-Likelihood:                -26.248
No. Observations:                  12   AIC:                             64.50
Df Residuals:                       6   BIC:                             67.41
Df Model:                           5                                         
Covariance Type:                  HC1                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     12.8000      0.600     21.333      0.0

In [986]:
# Run regressions

# var1
m11 = ols('health ~ num_med + num_high + t + num_med*t + num_high*t', data=create_reg_table(var1h, 'health', 'num_med', 'num_high')).fit(cov_type='HC1')
m12 = ols('health ~ treat + t + treat*t', data=create_reg_table(var1h, 'health', 'treat')).fit(cov_type='HC1')
m13 = ols('health ~ cov_med + cov_high + t + cov_med*t + cov_high*t', data=create_reg_table(var1h, 'health', 'cov_med', 'cov_high')).fit(cov_type='HC1')

# var2
m21 = ols('health ~ num_med + num_high + t + num_med*t + num_high*t', data=create_reg_table(var2h, 'health', 'num_med', 'num_high')).fit(cov_type='HC1')
m22 = ols('health ~ treat + t + treat*t', data=create_reg_table(var2h, 'health', 'treat')).fit(cov_type='HC1')
m23 = ols('health ~ cov_med + cov_high + t + cov_med*t + cov_high*t', data=create_reg_table(var2h, 'health', 'cov_med', 'cov_high')).fit(cov_type='HC1')

# var3
m31 = ols('health ~ num_med + num_high + t + num_med*t + num_high*t', data=create_reg_table(var3h, 'health', 'num_med', 'num_high')).fit(cov_type='HC1')
m32 = ols('health ~ treat + t + treat*t', data=create_reg_table(var3h, 'health', 'treat')).fit(cov_type='HC1')
m33 = ols('health ~ cov_med + cov_high + t + cov_med*t + cov_high*t', data=create_reg_table(var3h, 'health', 'cov_med', 'cov_high')).fit(cov_type='HC1')

# var4
m41 = ols('health ~ num_med + num_high + t + num_med*t + num_high*t', data=create_reg_table(var4h, 'health', 'num_med', 'num_high')).fit(cov_type='HC1')
m42 = ols('health ~ treat + t + treat*t', data=create_reg_table(var4h, 'health', 'treat')).fit(cov_type='HC1')
m43 = ols('health ~ cov_med + cov_high + t + cov_med*t + cov_high*t', data=create_reg_table(var4h, 'health', 'cov_med', 'cov_high')).fit(cov_type='HC1')

# var5
m51 = ols('health ~ num_med + num_high + t + num_med*t + num_high*t', data=create_reg_table(var5h, 'health', 'num_med', 'num_high')).fit(cov_type='HC1')
m52 = ols('health ~ treat + t + treat*t', data=create_reg_table(var5h, 'health', 'treat')).fit(cov_type='HC1')
m53 = ols('health ~ cov_med + cov_high + t + cov_med*t + cov_high*t', data=create_reg_table(var5h, 'health', 'cov_med', 'cov_high')).fit(cov_type='HC1')

# var6
m61 = ols('health ~ num_med + num_high + t + num_med*t + num_high*t', data=create_reg_table(var6h, 'health', 'num_med', 'num_high')).fit(cov_type='HC1')
m62 = ols('health ~ treat + t + treat*t', data=create_reg_table(var6h, 'health', 'treat')).fit(cov_type='HC1')
m63 = ols('health ~ cov_med + cov_high + t + cov_med*t + cov_high*t', data=create_reg_table(var6h, 'health', 'cov_med', 'cov_high')).fit(cov_type='HC1')

# var7
m71 = ols('health ~ num_med + num_high + t + num_med*t + num_high*t', data=create_reg_table(var7h, 'health', 'num_med', 'num_high')).fit(cov_type='HC1')
m72 = ols('health ~ treat + t + treat*t', data=create_reg_table(var7h, 'health', 'treat')).fit(cov_type='HC1')
m73 = ols('health ~ cov_med + cov_high + t + cov_med*t + cov_high*t', data=create_reg_table(var7h, 'health', 'cov_med', 'cov_high')).fit(cov_type='HC1')

# var8
m81 = ols('health ~ num_med + num_high + t + num_med*t + num_high*t', data=create_reg_table(var8h, 'health', 'num_med', 'num_high')).fit(cov_type='HC1')
m82 = ols('health ~ treat + t + treat*t', data=create_reg_table(var8h, 'health', 'treat')).fit(cov_type='HC1')
m83 = ols('health ~ cov_med + cov_high + t + cov_med*t + cov_high*t', data=create_reg_table(var8h, 'health', 'cov_med', 'cov_high')).fit(cov_type='HC1')

# var9
m91 = ols('health ~ num_med + num_high + t + num_med*t + num_high*t', data=create_reg_table(var9h, 'health', 'num_med', 'num_high')).fit(cov_type='HC1')
m92 = ols('health ~ treat + t + treat*t', data=create_reg_table(var9h, 'health', 'treat')).fit(cov_type='HC1')
m93 = ols('health ~ cov_med + cov_high + t + cov_med*t + cov_high*t', data=create_reg_table(var9h, 'health', 'cov_med', 'cov_high')).fit(cov_type='HC1')

In [922]:
m11.mse_resid

38.135

In [923]:
m13.mse_resid

208.92222222222222

In [988]:
# Variable 1 results
print(m11.summary())
print(m13.summary())

                            OLS Regression Results                            
Dep. Variable:                 health   R-squared:                       0.866
Model:                            OLS   Adj. R-squared:                  0.754
Method:                 Least Squares   F-statistic:                     12.29
Date:                Fri, 26 Apr 2024   Prob (F-statistic):            0.00417
Time:                        16:56:19   Log-Likelihood:                -34.715
No. Observations:                  12   AIC:                             81.43
Df Residuals:                       6   BIC:                             84.34
Df Model:                           5                                         
Covariance Type:                  HC1                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     67.3000      0.400    168.250      0.0

In [891]:
# Variable 1 results
print(m21.summary())
print(m23.summary())

                            OLS Regression Results                            
Dep. Variable:                 health   R-squared:                       0.891
Model:                            OLS   Adj. R-squared:                  0.801
Method:                 Least Squares   F-statistic:                     15.76
Date:                Fri, 26 Apr 2024   Prob (F-statistic):            0.00214
Time:                        12:32:24   Log-Likelihood:                -42.705
No. Observations:                  12   AIC:                             97.41
Df Residuals:                       6   BIC:                             100.3
Df Model:                           5                                         
Covariance Type:                  HC1                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     23.0000      5.000      4.600      0.0

In [938]:
m53.mse_resid

1507.5277777777776

In [892]:
# Variable 1 results
print(m51.summary())
print(m53.summary())

                            OLS Regression Results                            
Dep. Variable:                 health   R-squared:                       0.681
Model:                            OLS   Adj. R-squared:                  0.415
Method:                 Least Squares   F-statistic:                     2.146
Date:                Fri, 26 Apr 2024   Prob (F-statistic):              0.190
Time:                        12:32:24   Log-Likelihood:                -50.606
No. Observations:                  12   AIC:                             113.2
Df Residuals:                       6   BIC:                             116.1
Df Model:                           5                                         
Covariance Type:                  HC1                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     32.0000     10.000      3.200      0.0

In [939]:
m61.mse_resid

1250.25

In [940]:
m63.mse_resid

4861.5277777777765

In [893]:
# Variable 1 results
print(m61.summary())
print(m63.summary())

                            OLS Regression Results                            
Dep. Variable:                 health   R-squared:                       0.767
Model:                            OLS   Adj. R-squared:                  0.572
Method:                 Least Squares   F-statistic:                     3.094
Date:                Fri, 26 Apr 2024   Prob (F-statistic):              0.101
Time:                        12:32:24   Log-Likelihood:                -55.655
No. Observations:                  12   AIC:                             123.3
Df Residuals:                       6   BIC:                             126.2
Df Model:                           5                                         
Covariance Type:                  HC1                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     82.0000     14.000      5.857      0.0

In [894]:
# Variable 1 results
print(m11.summary())
print(m13.summary())

                            OLS Regression Results                            
Dep. Variable:                 health   R-squared:                       0.866
Model:                            OLS   Adj. R-squared:                  0.754
Method:                 Least Squares   F-statistic:                     12.29
Date:                Fri, 26 Apr 2024   Prob (F-statistic):            0.00417
Time:                        12:32:24   Log-Likelihood:                -34.715
No. Observations:                  12   AIC:                             81.43
Df Residuals:                       6   BIC:                             84.34
Df Model:                           5                                         
Covariance Type:                  HC1                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     67.3000      0.400    168.250      0.0

In [895]:
# Variable 1 results
print(m21.summary())
print(m23.summary())

                            OLS Regression Results                            
Dep. Variable:                 health   R-squared:                       0.891
Model:                            OLS   Adj. R-squared:                  0.801
Method:                 Least Squares   F-statistic:                     15.76
Date:                Fri, 26 Apr 2024   Prob (F-statistic):            0.00214
Time:                        12:32:24   Log-Likelihood:                -42.705
No. Observations:                  12   AIC:                             97.41
Df Residuals:                       6   BIC:                             100.3
Df Model:                           5                                         
Covariance Type:                  HC1                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     23.0000      5.000      4.600      0.0

In [896]:
# Variable 1 results
print(m71.summary())
print(m73.summary())

                            OLS Regression Results                            
Dep. Variable:                 health   R-squared:                       0.223
Model:                            OLS   Adj. R-squared:                 -0.425
Method:                 Least Squares   F-statistic:                     1.083
Date:                Fri, 26 Apr 2024   Prob (F-statistic):              0.454
Time:                        12:32:24   Log-Likelihood:                -30.816
No. Observations:                  12   AIC:                             73.63
Df Residuals:                       6   BIC:                             76.54
Df Model:                           5                                         
Covariance Type:                  HC1                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     39.7500      3.150     12.619      0.0

In [987]:
# Variable 1 results
print(m81.summary())
print(m83.summary())

                            OLS Regression Results                            
Dep. Variable:                 health   R-squared:                       0.412
Model:                            OLS   Adj. R-squared:                 -0.078
Method:                 Least Squares   F-statistic:                     2.295
Date:                Fri, 26 Apr 2024   Prob (F-statistic):              0.170
Time:                        16:55:50   Log-Likelihood:                -29.537
No. Observations:                  12   AIC:                             71.07
Df Residuals:                       6   BIC:                             73.98
Df Model:                           5                                         
Covariance Type:                  HC1                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     20.6000      1.300     15.846      0.0

----