# EOPS Cleanup

The EOPS files were taken from The California Community College Chancellor's Office's online [Data Mart](https://datamart.cccco.edu/Services/EOPS_CARE_Status.aspx). The server throws an error when a user attempts to download a file containing multiple reporting years, so only one was requested at a time. They are joined and denormalized here.

In [1]:
import pandas as pd

In [2]:
years = range(2012, 2024)
eops = pd.DataFrame()

for year in years:
    df = pd.read_csv(f"EOPSSumm{year}.csv", skiprows=3, header=None)

    df.drop(columns=[2], inplace=True) # This column is empty

    df.columns = [
        "college",
        "program",
        "headcount",
    ]

    # Clean up the formatting of the CSV's
    df.loc[~df["college"].isna(), "program"] = "Total"
    df["college"] = df['college'].ffill()
    df["college"] = df["college"].str.replace(" Total", "")

    # Keep only the rows that contain substantive information
    df = df[df["program"].isin(["EOPS and CARE participant", "EOPS participant"])]

    df["headcount"] = df["headcount"].str.replace(",", "")
    df["headcount"] = pd.to_numeric(df["headcount"], errors='coerce')

    # Pivot to denormalize for easier analysis
    df = df.pivot_table(
        index="college",
        columns="program",
        values="headcount",
    )

    df.reset_index(inplace=True)
    df.columns.name = None

    df.rename(
        {
            "EOPS and CARE participant": "eops_care_headcount",
            "EOPS participant": "eops_headcount",
        },
        axis=1,
        inplace=True,
    )
    df["year"] = int(year)
    df = df[[
        "year",
        "college",
        "eops_headcount",
        "eops_care_headcount",
    ]]

    eops = pd.concat([eops, df], ignore_index=True)

In [3]:
eops.sort_values(["year", "college"], inplace=True)
eops.head()

Unnamed: 0,year,college,eops_headcount,eops_care_headcount
0,2012,Alameda,628.0,23.0
1,2012,Allan Hancock,846.0,187.0
2,2012,American River,897.0,109.0
3,2012,Antelope Valley,528.0,103.0
4,2012,Bakersfield,635.0,154.0


In [4]:
eops.to_csv(
    "../../ml/q2_persistence_after_transfer/processed_data/cc_eops.csv",
    index=False
)