In [23]:
import pandas as pd
import altair as alt

alt.renderers.enable('default')

df = pd.read_csv("adult_clean.csv")
df['income_group'] = df['income'].apply(lambda x: 'Low Income' if '<=50K' in x else 'High Income')

grouped = df.groupby(['gender', 'income_group', 'race']).size().reset_index(name='count')
total_counts = grouped.groupby(['gender', 'income_group'])['count'].sum().reset_index(name='total')
grouped = pd.merge(grouped, total_counts, on=['gender', 'income_group'])

grouped['percentage'] = grouped['count'] / grouped['total']
grouped['group'] = grouped['gender'] + " - " + grouped['income_group']
sort_order = ['Female - Low Income', 'Male - Low Income', 'Female - High Income', 'Male - High Income']

race_options = list(grouped['race'].unique())
race_options.insert(0, "All")

race_param = alt.param(
    name="RaceSelection",                     
    value="All",                             
    bind=alt.binding_select(options=race_options, name='Race: ')
)

chart = alt.Chart(grouped).mark_bar().encode(
    y=alt.Y('group:N', title='Gender and Income', sort=sort_order),
    x=alt.X('sum(count):Q', title='Count'),
    color=alt.Color('race:N', title='Race', scale=alt.Scale(scheme='category10')),
    tooltip=[
        alt.Tooltip('group:N', title='Group'),
        alt.Tooltip('sum(count):Q', title='Count'),
        alt.Tooltip('max(total):Q', title='Total Count'),
        alt.Tooltip('sum(percentage):Q', title='Percentage', format='.2%')
    ]
).add_params(
    race_param
).transform_filter(
    "datum.race == RaceSelection || RaceSelection == 'All'"
).properties(
    title={
        "text": "Income by Gender and Race (Low vs. High Income Groups)",
        "subtitle": "Low Income - <= $50,000 | High Income - > $50,000"
    },
    width=1100,
    height=600
)

chart.save('visualization-2-updated.html')
