# Active Members Extra Data

In [None]:
# Import the shared config
import config
import pandas as pd  # type: ignore

# Read raw data
active_members_extra_raw_df = pd.read_csv(config.ACTIVE_MEMBERS_EXTRA_RAW)
# active_members_extra_df.head()

In [None]:
def clean_data(df):
    # Don't work on the original dataframe
    df_copy = df.copy()

    # Uppercase column names
    df_copy.columns = df_copy.columns.str.upper()

    # Drop columns with no values
    df_copy = df_copy.dropna(axis=1, how="all")

    # Convert to datetime columns that only contain a date
    date_columns = [
        "DATE_JOINED",
        "DATE_EXPIRED",
        "LAST_RENEWAL_DATE",
        "DATE_OF_BIRTH",
        "DATE_MODIFIED",
    ]
    date_format = "%Y-%m-%d"  # 2024-12-31
    date_time_format = "%Y-%m-%d %I:%M:%S %p"  # 2024-12-31 11:59:59 PM
    for col in date_columns:
        df_copy.loc[:, col] = pd.to_datetime(df_copy[col], format=date_format, errors="coerce")
    df_copy.loc[:, "LAST_LOGIN_DATETIME"] = pd.to_datetime(
        df_copy["LAST_LOGIN_DATETIME"], format=date_time_format, errors="coerce"
    )

    return df_copy


active_members_extra_clean_df = clean_data(active_members_extra_raw_df.copy())
active_members_extra_clean_df.head()

In [None]:
# Persist the interim data
active_members_extra_clean_df.to_csv(config.ACTIVE_MEMBERS_EXTRA_INTERIM, index=False)