In [8]:
import pandas as pd
import numpy as np
import us

In [9]:
def state_to_fips(state_name_or_abbreviation):
    """
    Converts a US state name or abbreviation to its FIPS code.

    Args:
        state_name_or_abbreviation: The name or abbreviation of the state.

    Returns:
        The FIPS code of the state as a string, or None if not found.
    """
    state = us.states.lookup(state_name_or_abbreviation)
    if state:
        return state.fips
    else:
        return None

### Enrollment Data Cleaning

In [10]:
def get_enrollments_for_year_xlsx(year: int):
    """
    Creates a dataframe containing the total number of individuals enrolled in SNAP
    statewide for the given year.  
    Args:
        year (int): an int from 20-24
    """
    file_path_xlsx = f"../data/state_snap_enrollments/original_data/FY{year}.xlsx"

    # Load the Excel file
    xls_xlsx = pd.ExcelFile(file_path_xlsx)

    total_state_enrollments = pd.DataFrame()

    for sheet in xls_xlsx.sheet_names:
        if sheet != "US Summary":
            df = pd.read_excel(file_path_xlsx, sheet_name=sheet)
            
            totals_indices = df[df.columns[0]] == "Total"
            totals = df[totals_indices][df.columns[2]].values

            state_indices = np.roll(totals_indices, -13)
            states = df[state_indices][df.columns[0]].values
            
            sheet_df = pd.DataFrame()
            sheet_df["State"] = states
            sheet_df["Enrolled Individuals"] = totals
            
            total_state_enrollments = pd.concat([total_state_enrollments, sheet_df])

    total_state_enrollments["Year"] = 2000 + year

    total_state_enrollments["State"] = total_state_enrollments["State"].apply(state_to_fips)
    total_state_enrollments.dropna(inplace = True)

    return total_state_enrollments


def get_enrollments_for_year_xls(year: int):
    """
    Creates a dataframe containing the total number of individuals enrolled in SNAP
    statewide for the given year.  
    Args:
        year (int): an int from 20-24
    """
    if year < 10:
        year_str = '0' + str(year)
    else:
        year_str = str(year)
    file_path_xls = f"../data/state_snap_enrollments/original_data/FY{year_str}.xls"

    # Load the Excel file
    xls_xls = pd.ExcelFile(file_path_xls)

    total_state_enrollments = pd.DataFrame()

    for sheet in xls_xls.sheet_names:
        if sheet != "US Summary":
            df = pd.read_excel(file_path_xls, sheet_name=sheet)
            #print(df)
            
            totals_indices = df[df.columns[0]] == "Total"
            totals = df[totals_indices][df.columns[2]].values

            state_indices = np.roll(totals_indices, -13)
            states = df[state_indices][df.columns[0]].values
            
            sheet_df = pd.DataFrame()
            sheet_df["State"] = states
            sheet_df["Enrolled Individuals"] = totals
            
            total_state_enrollments = pd.concat([total_state_enrollments, sheet_df])

    total_state_enrollments["Year"] = 2000 + year

    total_state_enrollments["State"] = total_state_enrollments["State"].apply(state_to_fips)
    total_state_enrollments.dropna(inplace = True)

    return total_state_enrollments

In [11]:
total_enrollments = pd.DataFrame()

for year in [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]:
    year_enrollments = get_enrollments_for_year_xls(year)
    total_enrollments = pd.concat([total_enrollments, year_enrollments])
for year in [20, 21, 22, 23, 24]:
    year_enrollments = get_enrollments_for_year_xlsx(year)
    total_enrollments = pd.concat([total_enrollments, year_enrollments])

In [23]:
total_enrollments = total_enrollments[~(total_enrollments['Enrolled Individuals'] == '--')]

In [24]:
total_enrollments['Enrolled Individuals'] = total_enrollments['Enrolled Individuals'].astype(float)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  total_enrollments['Enrolled Individuals'] = total_enrollments['Enrolled Individuals'].astype(float)


In [25]:
total_enrollments.to_csv("../data/state_snap_enrollments/cleaned_enrollments/2005-2024.csv")