In [9]:
import altair as alt
import pandas as pd

df = pd.read_csv("adult_clean.csv")

df['age'] = pd.to_numeric(df['age'], errors='coerce')
df['capital-gain'] = pd.to_numeric(df['capital-gain'], errors='coerce')

edu_options = ['All'] + sorted(df['education'].dropna().unique().tolist())
income_options = ['All', '<=50K', '>50K']

education_param = alt.param(
    name='education_param',
    value='All',
    bind=alt.binding_select(options=edu_options, name='Education')
)

income_param = alt.param(
    name='income_param',
    value='All',
    bind=alt.binding_select(options=income_options, name='Income')
)


scatter = alt.Chart(df).mark_circle(size=60).encode(
    x=alt.X('age:Q', title='Age'),
    y=alt.Y('capital-gain:Q', title='Capital Gain'),
    tooltip=['age', 'capital-gain', 'education', 'income']
).add_params(
    education_param,
    income_param
).transform_filter(
    (alt.datum.education == education_param) | (education_param == 'All')
).transform_filter(
    (alt.datum.income == income_param) | (income_param == 'All')
).properties(
    title='Interactive Scatter Plot: Age vs. Capital Gain',
    width=1000,
    height=600
)

scatter.save('visualization-4.html')
