In [1]:
import pandas as pd

In [2]:
DATA_PATH = "../../data/output/ma_enrollment_service_area_2018.csv"

df = pd.read_csv(DATA_PATH)

In [3]:
df.shape
df.head()

Unnamed: 0,CONTRACT_ID,PLAN_ID,COUNTY_CODE,ENROLLMENT,PLAN_TYPE,SNP_PLAN,EGHP_FLAG
0,H0022,1,39023.0,598605591636638589595587622584578558,Medicare-Medicaid Plan HMO/HMOPOS,No,No
1,H0022,1,39035.0,358635493610382838293593358636283657370136733596,Medicare-Medicaid Plan HMO/HMOPOS,No,No
2,H0022,1,39051.0,117119113126125112112115126107111107,Medicare-Medicaid Plan HMO/HMOPOS,No,No
3,H0022,1,39055.0,777572848468777580807680,Medicare-Medicaid Plan HMO/HMOPOS,No,No
4,H0022,1,39057.0,573569553605618563567560601548557539,Medicare-Medicaid Plan HMO/HMOPOS,No,No


In [10]:
df["ENROLLMENT"] = (
    df["ENROLLMENT"]
    .replace("*", pd.NA)
    .astype("string")
)

df["ENROLLMENT"] = pd.to_numeric(df["ENROLLMENT"], errors="coerce")
df["ENROLLMENT"].dtype

Float64Dtype()

In [11]:
# Question 1: Provide a table of the count of plans under each plan type.
table1 = (
    df
    .drop_duplicates(subset=["CONTRACT_ID", "PLAN_ID"])
    .groupby("PLAN_TYPE")
    .size()
    .reset_index(name="Plan Count")
)

table1

Unnamed: 0,PLAN_TYPE,Plan Count
0,1876 Cost,101
1,HMO/HMOPOS,2678
2,Local PPO,966
3,MSA,5
4,Medicare-Medicaid Plan HMO/HMOPOS,54
5,National PACE,254
6,PFFS,50
7,Regional PPO,109


In [12]:
# Question 2: Remove SNPs, EGHPs, and 800-series plans
df_filtered = df[
    (df["SNP_PLAN"] != "Yes") &
    (df["EGHP_FLAG"] != "Yes") &
    (df["PLAN_ID"] < 800)
]

# Updated version of Table 1
table2 = (
    df_filtered
    .drop_duplicates(subset=["CONTRACT_ID", "PLAN_ID"])
    .groupby("PLAN_TYPE")
    .size()
    .reset_index(name="Plan Count")
)

table2

Unnamed: 0,PLAN_TYPE,Plan Count
0,1876 Cost,89
1,HMO/HMOPOS,1569
2,Local PPO,569
3,MSA,3
4,Medicare-Medicaid Plan HMO/HMOPOS,54
5,National PACE,254
6,PFFS,46
7,Regional PPO,49


In [13]:
# Question 3: Provide average enrollment for each plan type
plan_county = (
    df_filtered
    .groupby(
        ["CONTRACT_ID", "PLAN_ID", "PLAN_TYPE", "COUNTY_CODE"],
        as_index=False
    )
    .agg({"ENROLLMENT": "sum"})
)

avg_enrollment = (
    plan_county
    .groupby("PLAN_TYPE", as_index=False)
    .agg({"ENROLLMENT": "mean"})
    .rename(columns={"ENROLLMENT": "Average Enrollment"})
)

avg_enrollment

Unnamed: 0,PLAN_TYPE,Average Enrollment
0,1876 Cost,4881866850218260830857112250567811742920705543...
1,HMO/HMOPOS,5410265372356737737831556094702555799127102862...
2,Local PPO,1926264977466579615624313130903439792247356287...
3,MSA,15288615830459968608580826539491328.0
4,Medicare-Medicaid Plan HMO/HMOPOS,1535081741250190291128494197910295017916914109...
5,National PACE,1953150537412878915253817304159695682158359571...
6,PFFS,230110930396173841734708618772943219000868864.0
7,Regional PPO,5822421556980382226054213322312811456956799405...


In [14]:
# Final Check
df["ENROLLMENT"].dtype
df_filtered["ENROLLMENT"].dtype
avg_enrollment

Unnamed: 0,PLAN_TYPE,Average Enrollment
0,1876 Cost,4881866850218260830857112250567811742920705543...
1,HMO/HMOPOS,5410265372356737737831556094702555799127102862...
2,Local PPO,1926264977466579615624313130903439792247356287...
3,MSA,15288615830459968608580826539491328.0
4,Medicare-Medicaid Plan HMO/HMOPOS,1535081741250190291128494197910295017916914109...
5,National PACE,1953150537412878915253817304159695682158359571...
6,PFFS,230110930396173841734708618772943219000868864.0
7,Regional PPO,5822421556980382226054213322312811456956799405...
