In [1]:
import pandas as pd

In [2]:
DATA_PATH = "../../data/output/ma_enrollment_service_area_2018.csv"

df = pd.read_csv(DATA_PATH)

In [3]:
df.shape
df.head()

Unnamed: 0,CONTRACT_ID,PLAN_ID,COUNTY_CODE,ENROLLMENT,PLAN_TYPE,SNP_PLAN,EGHP_FLAG
0,H0022,1,39023.0,598605591636638589595587622584578558,Medicare-Medicaid Plan HMO/HMOPOS,No,No
1,H0022,1,39035.0,358635493610382838293593358636283657370136733596,Medicare-Medicaid Plan HMO/HMOPOS,No,No
2,H0022,1,39051.0,117119113126125112112115126107111107,Medicare-Medicaid Plan HMO/HMOPOS,No,No
3,H0022,1,39055.0,777572848468777580807680,Medicare-Medicaid Plan HMO/HMOPOS,No,No
4,H0022,1,39057.0,573569553605618563567560601548557539,Medicare-Medicaid Plan HMO/HMOPOS,No,No


In [28]:
df.columns = (
    df.columns
    .str.strip()
    .str.upper()
    .str.replace(" ", "_")
)

In [29]:
df["ENROLLMENT"] = (
    df["ENROLLMENT"]
    .astype(str)
    .str.replace(",", "", regex=False)
    .astype(float)
)

ValueError: could not convert string to float: '<NA>'

In [11]:
# Question 1: Provide a table of the count of plans under each plan type.
table1 = (
    df
    .drop_duplicates(subset=["CONTRACT_ID", "PLAN_ID"])
    .groupby("PLAN_TYPE")
    .size()
    .reset_index(name="Plan Count")
)

table1

Unnamed: 0,PLAN_TYPE,Plan Count
0,1876 Cost,101
1,HMO/HMOPOS,2678
2,Local PPO,966
3,MSA,5
4,Medicare-Medicaid Plan HMO/HMOPOS,54
5,National PACE,254
6,PFFS,50
7,Regional PPO,109


In [24]:
# Question 2: Remove SNPs, EGHPs, and 800-series plans
df_filtered = df[
    (df["SNP_PLAN"] != "Yes") &
    (df["EGHP_FLAG"] != "Yes") &
    (df["PLAN_ID"] < 800)
]

# Updated version of Table 1
table2 = (
    df_filtered
    .drop_duplicates(subset=["CONTRACT_ID", "PLAN_ID"])
    .groupby("PLAN_TYPE")
    .size()
    .reset_index(name="Plan Count")
)

table2

Unnamed: 0,PLAN_TYPE,Plan Count
0,1876 Cost,89
1,HMO/HMOPOS,1569
2,Local PPO,569
3,MSA,3
4,Medicare-Medicaid Plan HMO/HMOPOS,54
5,National PACE,254
6,PFFS,46
7,Regional PPO,49


In [30]:
# Question 3: Provide average enrollment for each plan type
plan_county = (
    df_filtered
    .groupby(
        ["CONTRACT_ID", "PLAN_ID", "PLAN_TYPE", "COUNTY_CODE"],
        as_index=False
    )
    .agg({"ENROLLMENT": "sum"})
)
df_filtered["ENROLLMENT"] = (
    df_filtered["ENROLLMENT"]
    .astype(str)
    .str.replace(",", "", regex=False)
    .astype(float)
)

avg_enrollment = (
    df_filtered
    .groupby("PLAN_TYPE", as_index=False)["ENROLLMENT"]
    .mean()
)

avg_enrollment

ValueError: could not convert string to float: '<NA>'

In [14]:
# Final Check
df["ENROLLMENT"].dtype
df_filtered["ENROLLMENT"].dtype
avg_enrollment

Unnamed: 0,PLAN_TYPE,Average Enrollment
0,1876 Cost,4881866850218260830857112250567811742920705543...
1,HMO/HMOPOS,5410265372356737737831556094702555799127102862...
2,Local PPO,1926264977466579615624313130903439792247356287...
3,MSA,15288615830459968608580826539491328.0
4,Medicare-Medicaid Plan HMO/HMOPOS,1535081741250190291128494197910295017916914109...
5,National PACE,1953150537412878915253817304159695682158359571...
6,PFFS,230110930396173841734708618772943219000868864.0
7,Regional PPO,5822421556980382226054213322312811456956799405...


In [16]:
table1

Unnamed: 0,PLAN_TYPE,Plan Count
0,1876 Cost,101
1,HMO/HMOPOS,2678
2,Local PPO,966
3,MSA,5
4,Medicare-Medicaid Plan HMO/HMOPOS,54
5,National PACE,254
6,PFFS,50
7,Regional PPO,109


In [17]:
table1.to_markdown(index=False)


'| PLAN_TYPE                         |   Plan Count |\n|:----------------------------------|-------------:|\n| 1876 Cost                         |          101 |\n| HMO/HMOPOS                        |         2678 |\n| Local PPO                         |          966 |\n| MSA                               |            5 |\n| Medicare-Medicaid Plan HMO/HMOPOS |           54 |\n| National PACE                     |          254 |\n| PFFS                              |           50 |\n| Regional PPO                      |          109 |'

In [18]:
table2

Unnamed: 0,PLAN_TYPE,Plan Count
0,1876 Cost,89
1,HMO/HMOPOS,1569
2,Local PPO,569
3,MSA,3
4,Medicare-Medicaid Plan HMO/HMOPOS,54
5,National PACE,254
6,PFFS,46
7,Regional PPO,49


In [19]:
table2.to_markdown(index=False)

'| PLAN_TYPE                         |   Plan Count |\n|:----------------------------------|-------------:|\n| 1876 Cost                         |           89 |\n| HMO/HMOPOS                        |         1569 |\n| Local PPO                         |          569 |\n| MSA                               |            3 |\n| Medicare-Medicaid Plan HMO/HMOPOS |           54 |\n| National PACE                     |          254 |\n| PFFS                              |           46 |\n| Regional PPO                      |           49 |'

In [21]:
avg_enrollment

Unnamed: 0,PLAN_TYPE,Average Enrollment
0,1876 Cost,4881866850218260830857112250567811742920705543...
1,HMO/HMOPOS,5410265372356737737831556094702555799127102862...
2,Local PPO,1926264977466579615624313130903439792247356287...
3,MSA,15288615830459968608580826539491328.0
4,Medicare-Medicaid Plan HMO/HMOPOS,1535081741250190291128494197910295017916914109...
5,National PACE,1953150537412878915253817304159695682158359571...
6,PFFS,230110930396173841734708618772943219000868864.0
7,Regional PPO,5822421556980382226054213322312811456956799405...


In [22]:
avg_enrollment.to_markdown(index=False)

'| PLAN_TYPE                         |   Average Enrollment |\n|:----------------------------------|---------------------:|\n| 1876 Cost                         |         4.88187e+55  |\n| HMO/HMOPOS                        |         5.41027e+114 |\n| Local PPO                         |         1.92626e+55  |\n| MSA                               |         1.52886e+34  |\n| Medicare-Medicaid Plan HMO/HMOPOS |         1.53508e+93  |\n| National PACE                     |         1.95315e+68  |\n| PFFS                              |         2.30111e+44  |\n| Regional PPO                      |         5.82242e+55  |'