In [1]:
import pandas as pd
import numpy as np

### About the Dataset
This dataset provides health insurance coverage data for each state and the nation as a whole, including variables such as the uninsured rates before and after the Affordable Care Act, estimates of individuals covered by employer and marketplace healthcare plans, and enrollment in Medicare and Medicaid programs.

In [2]:
insure_data_file = pd.read_csv("Data/health_insurance_coverage.csv")
insure_data_file.head(10)

Unnamed: 0,State,Uninsured Rate (2010),Uninsured Rate (2015),Uninsured Rate Change (2010-2015),Health Insurance Coverage Change (2010-2015),Employer Health Insurance Coverage (2015),Marketplace Health Insurance Coverage (2016),Marketplace Tax Credits (2016),Average Monthly Tax Credit (2016),State Medicaid Expansion (2016),Medicaid Enrollment (2013),Medicaid Enrollment (2016),Medicaid Enrollment Change (2013-2016),Medicare Enrollment (2016)
0,Alabama,14.6%,10.1%,-4.5%,215000,2545000,165534,152206,$310,False,799176.0,910775,111599.0,989855
1,Alaska,19.9%,14.9%,-5%,36000,390000,17995,16205,$750,True,122334.0,166625,44291.0,88966
2,Arizona,16.9%,10.8%,-6.1%,410000,3288000,179445,124346,$230,True,1201770.0,1716198,514428.0,1175624
3,Arkansas,17.5%,9.5%,-8%,234000,1365000,63357,56843,$306,True,556851.0,920194,363343.0,606146
4,California,18.5%,8.6%,-9.9%,3826000,19552000,1415428,1239893,$309,True,7755381.0,11843081,4087700.0,5829777
5,Colorado,15.9%,8.1%,-7.8%,419000,2949000,108311,67062,$318,True,783420.0,1375264,591844.0,820234
6,Connecticut,9.1%,6%,-3.1%,110000,2148000,102917,80759,$357,True,,761137,,644136
7,Delaware,9.7%,5.9%,-3.8%,35000,564000,25379,21467,$330,True,223324.0,236702,13378.0,186835
8,District of Columbia,7.6%,3.8%,-3.8%,25000,388000,17666,1224,$183,True,235786.0,255491,19705.0,90492
9,Florida,21.3%,13.3%,-8%,1597000,8847000,1531714,1428712,$305,False,3104996.0,3644673,539677.0,4149593


In [3]:
#make copy of original file before cleaning

insure_data = insure_data_file.copy()

In [4]:
# check size of data

insure_data.shape

(52, 14)

In [5]:
insure_data.columns

Index(['State', 'Uninsured Rate (2010)', 'Uninsured Rate (2015)',
       'Uninsured Rate Change (2010-2015)',
       'Health Insurance Coverage Change (2010-2015)',
       'Employer Health Insurance Coverage (2015)',
       'Marketplace Health Insurance Coverage (2016)',
       'Marketplace Tax Credits (2016)', 'Average Monthly Tax Credit (2016)',
       'State Medicaid Expansion (2016)', 'Medicaid Enrollment (2013)',
       'Medicaid Enrollment (2016)', 'Medicaid Enrollment Change (2013-2016)',
       'Medicare Enrollment (2016)'],
      dtype='object')

In [6]:
#check for nulls
insure_data.isnull().sum()


State                                           0
Uninsured Rate (2010)                           0
Uninsured Rate (2015)                           0
Uninsured Rate Change (2010-2015)               0
Health Insurance Coverage Change (2010-2015)    0
Employer Health Insurance Coverage (2015)       0
Marketplace Health Insurance Coverage (2016)    0
Marketplace Tax Credits (2016)                  0
Average Monthly Tax Credit (2016)               0
State Medicaid Expansion (2016)                 1
Medicaid Enrollment (2013)                      2
Medicaid Enrollment (2016)                      0
Medicaid Enrollment Change (2013-2016)          2
Medicare Enrollment (2016)                      0
dtype: int64

In [7]:
#display rows with null values

insure_data[pd.isnull(insure_data['Medicaid Enrollment (2013)'])]

Unnamed: 0,State,Uninsured Rate (2010),Uninsured Rate (2015),Uninsured Rate Change (2010-2015),Health Insurance Coverage Change (2010-2015),Employer Health Insurance Coverage (2015),Marketplace Health Insurance Coverage (2016),Marketplace Tax Credits (2016),Average Monthly Tax Credit (2016),State Medicaid Expansion (2016),Medicaid Enrollment (2013),Medicaid Enrollment (2016),Medicaid Enrollment Change (2013-2016),Medicare Enrollment (2016)
6,Connecticut,9.1%,6%,-3.1%,110000,2148000,102917,80759,$357,True,,761137,,644136
19,Maine,10.1%,8.4%,-1.7%,22000,702000,75240,63896,$342,False,,273160,,315160


In [8]:
#update Nan columns for Maine and Connecticut using data found on the state websites 
# and various healthcare sites. See explanation and citation in Readme file.

#update Maine
insure_data.at[19, 'Medicaid Enrollment (2013)'] = 286803

insure_data.at[19, 'Medicaid Enrollment Change (2013-2016)' ] = 13643

In [9]:
#update Connecticut

insure_data.at[6, 'Medicaid Enrollment (2013)'] = 634518
insure_data.at[6, 'Medicaid Enrollment Change (2013-2016)' ] = 126619


In [10]:
insure_data[pd.isnull(insure_data['Medicaid Enrollment (2013)'])]

Unnamed: 0,State,Uninsured Rate (2010),Uninsured Rate (2015),Uninsured Rate Change (2010-2015),Health Insurance Coverage Change (2010-2015),Employer Health Insurance Coverage (2015),Marketplace Health Insurance Coverage (2016),Marketplace Tax Credits (2016),Average Monthly Tax Credit (2016),State Medicaid Expansion (2016),Medicaid Enrollment (2013),Medicaid Enrollment (2016),Medicaid Enrollment Change (2013-2016),Medicare Enrollment (2016)


In [11]:
insure_data.head()

Unnamed: 0,State,Uninsured Rate (2010),Uninsured Rate (2015),Uninsured Rate Change (2010-2015),Health Insurance Coverage Change (2010-2015),Employer Health Insurance Coverage (2015),Marketplace Health Insurance Coverage (2016),Marketplace Tax Credits (2016),Average Monthly Tax Credit (2016),State Medicaid Expansion (2016),Medicaid Enrollment (2013),Medicaid Enrollment (2016),Medicaid Enrollment Change (2013-2016),Medicare Enrollment (2016)
0,Alabama,14.6%,10.1%,-4.5%,215000,2545000,165534,152206,$310,False,799176.0,910775,111599.0,989855
1,Alaska,19.9%,14.9%,-5%,36000,390000,17995,16205,$750,True,122334.0,166625,44291.0,88966
2,Arizona,16.9%,10.8%,-6.1%,410000,3288000,179445,124346,$230,True,1201770.0,1716198,514428.0,1175624
3,Arkansas,17.5%,9.5%,-8%,234000,1365000,63357,56843,$306,True,556851.0,920194,363343.0,606146
4,California,18.5%,8.6%,-9.9%,3826000,19552000,1415428,1239893,$309,True,7755381.0,11843081,4087700.0,5829777


In [12]:
insure_data.iloc[19]

State                                            Maine 
Uninsured Rate (2010)                             10.1%
Uninsured Rate (2015)                              8.4%
Uninsured Rate Change (2010-2015)                -1.7% 
Health Insurance Coverage Change (2010-2015)      22000
Employer Health Insurance Coverage (2015)        702000
Marketplace Health Insurance Coverage (2016)      75240
Marketplace Tax Credits (2016)                    63896
Average Monthly Tax Credit (2016)                 $342 
State Medicaid Expansion (2016)                   False
Medicaid Enrollment (2013)                       286803
Medicaid Enrollment (2016)                       273160
Medicaid Enrollment Change (2013-2016)            13643
Medicare Enrollment (2016)                       315160
Name: 19, dtype: object

In [13]:
# extract only needed columns

insure_data = insure_data[['State', 'Uninsured Rate (2010)', 'Uninsured Rate (2015)',
       'Uninsured Rate Change (2010-2015)',
       'Health Insurance Coverage Change (2010-2015)',
       'Employer Health Insurance Coverage (2015)',
       'Marketplace Health Insurance Coverage (2016)',
       'State Medicaid Expansion (2016)', 'Medicaid Enrollment (2013)',
       'Medicaid Enrollment (2016)', 'Medicaid Enrollment Change (2013-2016)']]

In [14]:
insure_data.head()

Unnamed: 0,State,Uninsured Rate (2010),Uninsured Rate (2015),Uninsured Rate Change (2010-2015),Health Insurance Coverage Change (2010-2015),Employer Health Insurance Coverage (2015),Marketplace Health Insurance Coverage (2016),State Medicaid Expansion (2016),Medicaid Enrollment (2013),Medicaid Enrollment (2016),Medicaid Enrollment Change (2013-2016)
0,Alabama,14.6%,10.1%,-4.5%,215000,2545000,165534,False,799176.0,910775,111599.0
1,Alaska,19.9%,14.9%,-5%,36000,390000,17995,True,122334.0,166625,44291.0
2,Arizona,16.9%,10.8%,-6.1%,410000,3288000,179445,True,1201770.0,1716198,514428.0
3,Arkansas,17.5%,9.5%,-8%,234000,1365000,63357,True,556851.0,920194,363343.0
4,California,18.5%,8.6%,-9.9%,3826000,19552000,1415428,True,7755381.0,11843081,4087700.0


In [15]:
insure_data.to_csv("Data/health_cov_clean.csv", index = False)