In [18]:
import altair as alt
import pandas as pd

In [19]:
# load housing data
housing = pd.read_csv("housing-subset.csv")
# subset 3k entries, since using all 10k entries will result in a MaxRowsError
# Altair has a default maximum number of 5000 rows
housing = housing.sample(n=3000, random_state=42)

In [20]:
## SCATTERPLOT WITH RADIO BUTTON
## allows users to select a category (highlights selected category, opacity of other points will be lowered)

region_selection = alt.selection_point(
    fields=["ocean_proximity"], 
    bind=alt.binding_radio(options=housing["ocean_proximity"].unique().tolist(), name="Select Region"), 
    name="highlight_region"
)

scatter = alt.Chart(housing).mark_circle(size=50).encode(
    x=alt.X("median_income", title="Median Income"),
    y=alt.Y("median_house_value", title="Median House Value"), 
    color=alt.Color("ocean_proximity", legend=alt.Legend(title="Ocean Proximity")),
    tooltip=["ocean_proximity", "median_house_value"],
    opacity=alt.condition(
        region_selection, 
        alt.value(1), 
        alt.value(0.2)
    )
).add_params(
    region_selection
).properties(
    title="Scatterplot of Median Income vs. Median House Value by Ocean Proximity"
)

scatter