In [1]:
import pandas as pd
import altair as alt

# load the data
df = pd.read_csv("cleaned_rental_data.csv")

# rename columns for clarity
df = df.rename(columns={
    "Total Rent Avg": "Rent",
    "Utility Cost Avg": "Utilities"
})

# melt to long format
melted_df = df.melt(
    id_vars=["Neighborhood"],
    value_vars=["Rent", "Utilities"],
    var_name="Cost Type",
    value_name="Cost"
)

# group by Neighborhood and Cost Type and calculate averages
avg_costs = melted_df.groupby(["Neighborhood", "Cost Type"], as_index=False)["Cost"].mean()

# sort neighborhoods by total cost
sorted_neighborhoods = (
    avg_costs.pivot(index="Neighborhood", columns="Cost Type", values="Cost")
    .fillna(0)
    .assign(Total=lambda x: x["Rent"] + x["Utilities"])
    .sort_values("Total")
    .index.tolist()
)

# define green color scale
color_scale = alt.Scale(domain=["Rent", "Utilities"], range=["#5a93f2", "#0a357f"])

# create the stacked bar chart
chart = alt.Chart(avg_costs).mark_bar().encode(
    x=alt.X("Neighborhood:N", sort=sorted_neighborhoods, title="Neighborhood"),
    y=alt.Y("Cost:Q", stack="zero", title="Average Monthly Housing Cost"),
    color=alt.Color("Cost Type:N", title="Cost Type", scale=color_scale),
    tooltip=["Neighborhood", "Cost Type", "Cost"]
).properties(
    title="Average Rent and Utility Costs by Neighborhood",
    width=750,
    background="#FFFFFF"  
).configure_axisX(
    labelAngle=-40
).configure_title(
    fontSize=18,
    anchor='start',
    color='black'  
).configure_view(
    stroke=None
)

chart


In [2]:
# HTML
chart.save("rent_utilities_by_neighborhood.html")
