# Import Required Libraries

In [None]:
from IPython.display import Markdown, display
import matplotlib.pyplot as plt  # type: ignore
import pandas as pd  # type: ignore
import config

# Load the active members data

In [None]:
active_members_extra_interim_df = pd.read_csv(config.ACTIVE_MEMBERS_EXTRA_INTERIM)
active_members_extra_interim_df.head()

# Gender Demographic Table

In [None]:
def totalMemberCount(df):
    return df["MEMBER_COUNT"].sum()


def createGenderData(df):
    # Work on a copy of the dataframe
    df_copy = df.copy()
    # Group by GENDER and count AME_PRIMARY_KEY
    gender_data_df = df_copy.groupby("GENDER")["AME_PRIMARY_KEY"].count().reset_index()
    gender_data_df.columns = ["GENDER", "MEMBER_COUNT"]
    # Calculate percentage
    total_count = totalMemberCount(gender_data_df)
    gender_data_df["PERCENTAGE"] = (gender_data_df["MEMBER_COUNT"] / total_count) * 100
    return gender_data_df


def renderGenderData(df):
    # Render a markdown table with columns Gender, Member Count, and Percentage
    table_md = "| Gender | Member Count | Percentage |\n|--------|--------------|------------|\n"
    for index, row in df.iterrows():
        table_md += f"| {row['GENDER']} | {row['MEMBER_COUNT']} | {row['PERCENTAGE']:.1f}% |\n"
    table_md += f"| **Total** | **{totalMemberCount(df)}** | **100.0%** |"
    display(Markdown(table_md))


gender_data_df = createGenderData(active_members_extra_interim_df)
renderGenderData(gender_data_df)

# For TextWrangler
gender_data_df.head()

# Gender Demographic Chart

In [None]:
# Assign colors to genders
colors = {"Male": config.GENDER_BLUE, "Female": config.GENDER_PINK}

# Use gender_data_df to get the gender distribution
gender_demographic_df = gender_data_df.set_index("GENDER")["MEMBER_COUNT"]

# Plot the distribution of members
gender_demographic_df.plot(
    kind="pie",
    labels=[""] * len(gender_data_df),
    colors=[colors[key] for key in gender_demographic_df.index],
    autopct="%1.1f%%",
)

# Add a title
plt.title("Club Member Gender Distribution", pad=0)

# Remove the axis
plt.axis("off")

# Save and show the plot
plt.savefig(config.CLUB_MEMBER_GENDER_DISTRIBUTION)
plt.show()

gender_demographic_df.head()

# Age & Gender Demographic Table

In [None]:
def createAgeAndGenderData(df):
    # Create a new dataframe from active_members_extra_interim_df
    age_gender_data_df = df.copy()
    age_gender_data_df = age_gender_data_df[["AME_PRIMARY_KEY", "DATE_OF_BIRTH", "GENDER"]]

    # Create the AGE column next to DATE_OF_BIRTH with int type and fill with 0
    age_gender_data_df.insert(age_gender_data_df.columns.get_loc("DATE_OF_BIRTH") + 1, "AGE", 0)

    # Calculate age and update the AGE column
    age_gender_data_df["AGE"] = (
        (pd.Timestamp("now").year - pd.to_datetime(age_gender_data_df["DATE_OF_BIRTH"]).dt.year)
        .fillna(0)
        .astype(int)
    )
    return age_gender_data_df


def addAgeBinsToData(df):
    # Define age buckets
    bins = [18, 30, 40, 50, 60, 70, 80, 90, 100]
    labels = ["18-30", "31-40", "41-50", "51-60", "61-70", "71-80", "81-90", "91-100"]
    df.insert(
        df.columns.get_loc("AGE") + 1,
        "AGE_BUCKET",
        pd.cut(df["AGE"], bins=bins, labels=labels, right=False),
    )
    # Include 'Unknown' in age buckets
    df["AGE_BUCKET"] = df["AGE_BUCKET"].cat.add_categories("Unknown").fillna("Unknown")
    return df


def groupByAgeBinsAndGender(df):
    # Group by age bucket and gender
    df = df.groupby(["AGE_BUCKET", "GENDER"], observed=False).size().unstack().fillna(0)
    return df


def addTotalColumn(df):
    df["Total"] = df.sum(axis=1)
    return df


def getAgeAndGenderData(df):
    df = createAgeAndGenderData(df)
    df = addAgeBinsToData(df)
    df = groupByAgeBinsAndGender(df)
    df = addTotalColumn(df)
    return df


def renderAgeAndGenderData(df):
    # Render a markdown table with columns Age Bucket, Female, Male, Total.
    # The last row should show the total for each column.
    table_md = "| Age Bucket | Female | Male | Total |\n|------------|--------|------|-------|\n"
    for index, row in df.iterrows():
        table_md += f"| {index} | {row['Female']} | {row['Male']} | {row['Total']} |\n"
    table_md += f"| **Total** | **{df['Female'].sum()}** | **{df['Male'].sum()}** | **{df['Total'].sum()}** |"
    display(Markdown(table_md))


age_gender_data_df = getAgeAndGenderData(active_members_extra_interim_df)
renderAgeAndGenderData(age_gender_data_df)

age_gender_data_df.head()

# Age & Gender Demographic Chart

In [None]:
# Create a series with Age Bucket as index, Female, Male columns
# from age_gender_data_df
age_gender_series_df = age_gender_data_df[["Male", "Female"]]

# Plot the age and gender distribution
age_gender_series_df.plot(
    kind="bar", stacked=True, color=[config.GENDER_BLUE, config.GENDER_PINK], legend=True
)
plt.title("Club Member Demographics")
plt.xlabel("Age Bucket")
plt.ylabel("Number of Members")
plt.legend().set_visible(False)

# Save and show the plot
plt.savefig(config.CLUB_MEMBER_AGE_GENDER_DISTRIBUTION)
plt.show()