In [1]:
import altair as alt

def custom_altair_theme():
    return {
        "config": {
            "view": {
                "stroke": "transparent"
            },
            "background": "#f9f9f9",  
            "title": {
                "fontSize": 20,
                "anchor": "start",
                "color": "#104e8b"  
            },
            "axis": {
                "labelFontSize": 12,
                "titleFontSize": 14,
                "titleColor": "#104e8b",
                "labelColor": "#555",
                "gridColor": "#ddd",
                "gridDash": [2, 2],  
                "tickColor": "#bbb"
            },
            "legend": {
                "labelFontSize": 12,
                "titleFontSize": 14,
                "labelColor": "#104e8b",
                "titleColor": "#444"
            },
            "mark": {
                "color": "#104e8b",  
                "tooltip": True
            },
            "bar": {
                "fill": "#376b9e",  
                "stroke": "#104e8b",
                "strokeWidth": 0.5
            },
            "line": {
                "stroke": "#b22222",  
                "strokeWidth": 2
            },
            "point": {
                "filled": True,
                "fill": "#b22222",
                "stroke": "#d09090",
                "size": 50
            },
            "area": {
                "fill": "#e5b5b5",  
                "opacity": 0.5
            }
        }
    }

alt.themes.register("custom_theme", custom_altair_theme)
alt.themes.enable("custom_theme")


ThemeRegistry.enable('custom_theme')

# Interactive Design 1

In [2]:
import pandas as pd 
nces_public_data = pd.read_csv('data/NCES_public_clean.csv')
nces_private_data = pd.read_csv('data/NCES_private_clean.csv')
nces_public_data.head()

Unnamed: 0.1,Unnamed: 0,School Name,State Name [Public School] Latest available year,State Name [Public School] 2015-16,State Abbr [Public School] Latest available year,School Name [Public School] 2015-16,School ID - NCES Assigned [Public School] Latest available year,Agency Name [Public School] 2015-16,Agency ID - NCES Assigned [Public School] Latest available year,County Name [Public School] 2015-16,...,American Indian/Alaska Native Students [Public School] 2015-16,Asian or Asian/Pacific Islander Students [Public School] 2015-16,Hispanic Students [Public School] 2015-16,Black or African American Students [Public School] 2015-16,White Students [Public School] 2015-16,Nat. Hawaiian or Other Pacific Isl. Students [Public School] 2015-16,Two or More Races Students [Public School] 2015-16,Total Race/Ethnicity [Public School] 2015-16,Full-Time Equivalent (FTE) Teachers [Public School] 2015-16,Pupil/Teacher Ratio [Public School] 2015-16
0,0,1 LT CHARLES W. WHITCOMB SCHOOL,Massachusetts,MASSACHUSETTS,MA,1 LT Charles W. Whitcomb School,250732000000.0,Marlborough,2507320.0,Middlesex County,...,1,34,540,38,648,0,37,1298.0,112.82,11.51
1,1,10TH STREET SCHOOL,Washington,WASHINGTON,WA,10th Street School,530486000000.0,Marysville School District,5304860.0,Snohomish County,...,8,5,20,1,122,0,15,171.0,7.30,23.42
2,2,112 ALC INDEPENDENT STUDY,Minnesota,MINNESOTA,MN,112 ALC AFTER SCHOOL & SUMMER SCH,270819000000.0,EASTERN CARVER COUNTY PUBLIC SCHOOL,2708190.0,Carver County,...,0,1,10,0,22,0,1,34.0,–,–
3,3,112 ALC MIDDLE SCHOOL,Minnesota,MINNESOTA,MN,112 ALC MIDDLE SCHOOL,270819000000.0,EASTERN CARVER COUNTY PUBLIC SCHOOL,2708190.0,Carver County,...,0,5,32,8,82,0,3,130.0,2.70,48.15
4,4,12TH STREET ELEMENTARY,Michigan,MICHIGAN,MI,12th Street Elementary,262895000000.0,Portage Public Schools,2628950.0,Kalamazoo County,...,0,68,27,48,406,0,45,594.0,31.50,18.86


In [3]:
nces_private_data.head()

Unnamed: 0.1,Unnamed: 0,Private School Name,State Name [Private School] Latest available year,State Name [Private School] 2015-16,ANSI/FIPS State Code [Private School] Latest available year,Private School Name [Private School] 2015-16,School ID - NCES Assigned [Private School] Latest available year,County Name [Private School] 2015-16,Days per School Year [Private School] 2015-16,Length of School Day in Total Hours (Including reported minutes) [Private School] 2015-16,...,Black or African American Students [Private School] 2015-16,Percentage of Black Students [Private School] 2015-16,White Students [Private School] 2015-16,Percentage of White Students [Private School] 2015-16,Nat. Hawaiian or Other Pacific Isl. Students [Private School] 2015-16,Percentage of Nat. Hawaiian or Other Pacific Isl. Students [Private School] 2015-16,Two or More Races Students [Private School] 2015-16,Percentage of Two or More Races Students [Private School] 2015-16,Pupil/Teacher Ratio [Private School] 2015-16,Full-Time Equivalent (FTE) Teachers [Private School] 2015-16
0,0,'HMAN'SHAWA DAY SCHOOL,ARIZONA,ARIZONA,4.0,'HMAN'SHAWA DAY SCHOOL,A1500070,MARICOPA,180,7.5,...,–,–,–,–,–,–,–,–,23.0,1.0
1,1,123 YOU N ME PRESCHOOL,ILLINOIS,ILLINOIS,17.0,123 YOU N ME PRESCHOOL,A0103186,PEORIA,180,6.5,...,2,20.00,2,20.00,0,0.00,2,20.00,10.0,1.0
2,2,1408 S. GOLIAD ST.,TEXAS,TEXAS,48.0,HERITAGE CHRISTIAN ACADEMY,A9703331,ROCKWALL,175,8.17,...,14,4.03,300,86.46,0,0.00,4,1.15,9.61,36.1
3,3,1ST CEREBRAL PALSY OF NJ,NEW JERSEY,NEW JERSEY,34.0,1ST CEREBRAL PALSY OF NJ,02043767,ESSEX,180,6.0,...,21,36.84,4,7.02,0,0.00,0,0.00,4.75,12.0
4,4,1ST CLASS MONTESSORI,TENNESSEE,TENNESSEE,47.0,1ST CLASS MONTESSORI,A1102054,SHELBY,180,8.0,...,13,86.67,0,0.00,0,0.00,0,0.00,3.0,5.0


In [4]:
nyc_public_schools = nces_public_data[
    nces_public_data["County Name [Public School] 2015-16"].str.contains("New York|Kings|Queens|Bronx|Richmond", na=False, case=False)
].copy()
public_columns_to_convert = [
    "Total Race/Ethnicity [Public School] 2015-16",
    "Black or African American Students [Public School] 2015-16",
    "Hispanic Students [Public School] 2015-16",
    "White Students [Public School] 2015-16",
    "American Indian/Alaska Native Students [Public School] 2015-16",
    "Nat. Hawaiian or Other Pacific Isl. Students [Public School] 2015-16",
    "Asian or Asian/Pacific Islander Students [Public School] 2015-16",
    "Two or More Races Students [Public School] 2015-16",
]
nyc_public_schools[public_columns_to_convert] = nyc_public_schools[public_columns_to_convert].apply(pd.to_numeric, errors="coerce")
total_black_public = nyc_public_schools["Black or African American Students [Public School] 2015-16"].sum()
total_hispanic_public = nyc_public_schools["Hispanic Students [Public School] 2015-16"].sum()
total_white_public = nyc_public_schools["White Students [Public School] 2015-16"].sum()
total_asian_public = nyc_public_schools["Asian or Asian/Pacific Islander Students [Public School] 2015-16"].sum()
total_multiracial_public = nyc_public_schools["Two or More Races Students [Public School] 2015-16"].sum()
total_american_indian_public = nyc_public_schools["American Indian/Alaska Native Students [Public School] 2015-16"].sum()
total_hawaiian_public = nyc_public_schools["Nat. Hawaiian or Other Pacific Isl. Students [Public School] 2015-16"].sum()
total_students_public = nyc_public_schools["Total Race/Ethnicity [Public School] 2015-16"].sum()


nyc_private_schools = nces_private_data[
    nces_private_data["County Name [Private School] 2015-16"].str.contains("New York|Kings|Queens|Bronx|Richmond", na=False, case=False)
].copy()
private_columns_to_convert = [
    "Black or African American Students [Private School] 2015-16",
    "Hispanic Students [Private School] 2015-16",
    "White Students [Private School] 2015-16",
    "Asian or Asian/Pacific Islander Students [Private School] 2015-16",
    "American Indian/Alaska Native Students [Private School] 2015-16",
    "Nat. Hawaiian or Other Pacific Isl. Students [Private School] 2015-16",
    "Two or More Races Students [Private School] 2015-16",
]
nyc_private_schools[private_columns_to_convert] = nyc_private_schools[private_columns_to_convert].apply(pd.to_numeric, errors="coerce")
total_black_private = nyc_private_schools["Black or African American Students [Private School] 2015-16"].sum()
total_hispanic_private = nyc_private_schools["Hispanic Students [Private School] 2015-16"].sum()
total_white_private = nyc_private_schools["White Students [Private School] 2015-16"].sum()
total_asian_private = nyc_private_schools["Asian or Asian/Pacific Islander Students [Private School] 2015-16"].sum()
total_multiracial_private = nyc_private_schools["Two or More Races Students [Private School] 2015-16"].sum()
total_american_indian_private = nyc_private_schools["American Indian/Alaska Native Students [Private School] 2015-16"].sum()
total_hawaiian_private = nyc_private_schools["Nat. Hawaiian or Other Pacific Isl. Students [Private School] 2015-16"].sum()
total_students_private = nyc_private_schools[private_columns_to_convert].sum().sum()


black_public = (total_black_public / total_students_public) * 100
hispanic_public = (total_hispanic_public / total_students_public) * 100
white_public = (total_white_public / total_students_public) * 100
asian_public = (total_asian_public / total_students_public) * 100
multiracial_public = (total_multiracial_public / total_students_public) * 100
american_indian_public = (total_american_indian_public / total_students_public) * 100
hawaiian_public = (total_hawaiian_public / total_students_public) * 100

black_private = (total_black_private / total_students_private) * 100
hispanic_private = (total_hispanic_private / total_students_private) * 100
white_private = (total_white_private / total_students_private) * 100
asian_private = (total_asian_private / total_students_private) * 100
multiracial_private = (total_multiracial_private / total_students_private) * 100
american_indian_private = (total_american_indian_private / total_students_private) * 100
hawaiian_private = (total_hawaiian_private / total_students_private) * 100

race_distribution = pd.DataFrame({
    "School Type": ["Public", "Private"],
    
    # Percentage columns
    "Black (%)": [black_public, black_private],
    "Hispanic (%)": [hispanic_public, hispanic_private],
    "White (%)": [white_public, white_private],
    "Asian (%)": [asian_public, asian_private],
    "Multiracial (%)": [multiracial_public, multiracial_private],
    "Other (%)": [american_indian_public + hawaiian_public, american_indian_private + hawaiian_private],
    
    # Count columns
    "Black (Count)": [total_black_public, total_black_private],
    "Hispanic (Count)": [total_hispanic_public, total_hispanic_private],
    "White (Count)": [total_white_public, total_white_private],
    "Asian (Count)": [total_asian_public, total_asian_private],
    "Multiracial (Count)": [total_multiracial_public, total_multiracial_private],
    "Other (Count)": [total_american_indian_public + total_hawaiian_public,
                      total_american_indian_private + total_hawaiian_private],
})

race_distribution

Unnamed: 0,School Type,Black (%),Hispanic (%),White (%),Asian (%),Multiracial (%),Other (%),Black (Count),Hispanic (Count),White (Count),Asian (Count),Multiracial (Count),Other (Count)
0,Public,28.747224,39.311397,15.358263,14.376298,1.257006,0.949812,337075.0,460945.0,180083.0,168569.0,14739.0,11137.0
1,Private,12.533154,13.657719,64.484936,5.679912,3.383803,0.260476,21075.0,22966.0,108434.0,9551.0,5690.0,438.0


In [None]:
race_distribution2 = pd.DataFrame({
    "School Type": ["Public", "Private"],
    "Black (%)": [black_public, black_private],
    "Hispanic (%)": [hispanic_public, hispanic_private],
    "White (%)": [white_public, white_private],
    "Asian (%)": [asian_public, asian_private],
    "Multiracial (%)": [multiracial_public, multiracial_private],
    "Other (%)": [american_indian_public + hawaiian_public, american_indian_private + hawaiian_private],
    
})

race_distribution_melted = race_distribution2.melt(id_vars=["School Type"], var_name="Race", value_name="Percentage")
race_distribution_melted

Unnamed: 0,School Type,Race,Percentage
0,Public,Black (%),28.747224
1,Private,Black (%),12.533154
2,Public,Hispanic (%),39.311397
3,Private,Hispanic (%),13.657719
4,Public,White (%),15.358263
5,Private,White (%),64.484936
6,Public,Asian (%),14.376298
7,Private,Asian (%),5.679912
8,Public,Multiracial (%),1.257006
9,Private,Multiracial (%),3.383803


In [6]:
melted_percentage = race_distribution.melt(id_vars=["School Type"], 
                                           value_vars=["Black (%)", "Hispanic (%)", "White (%)", "Asian (%)", "Multiracial (%)", "Other (%)"],
                                           var_name="Race", value_name="Value")
melted_percentage["Measure"] = "Percentage"
melted_percentage["Race"] = melted_percentage["Race"].str.replace(" \\(\\%\\)", "", regex=True)

melted_count = race_distribution.melt(id_vars=["School Type"], 
                                      value_vars=["Black (Count)", "Hispanic (Count)", "White (Count)", "Asian (Count)", "Multiracial (Count)", "Other (Count)"],
                                      var_name="Race", value_name="Value")
melted_count["Measure"] = "Count"
melted_count["Race"] = melted_count["Race"].str.replace(" \\(Count\\)", "", regex=True)
race_distribution_long = pd.concat([melted_percentage, melted_count], ignore_index=True)
race_distribution_long["Year"] = "2015-16"

race_distribution_long

Unnamed: 0,School Type,Race,Value,Measure,Year
0,Public,Black,28.747224,Percentage,2015-16
1,Private,Black,12.533154,Percentage,2015-16
2,Public,Hispanic,39.311397,Percentage,2015-16
3,Private,Hispanic,13.657719,Percentage,2015-16
4,Public,White,15.358263,Percentage,2015-16
5,Private,White,64.484936,Percentage,2015-16
6,Public,Asian,14.376298,Percentage,2015-16
7,Private,Asian,5.679912,Percentage,2015-16
8,Public,Multiracial,1.257006,Percentage,2015-16
9,Private,Multiracial,3.383803,Percentage,2015-16


In [7]:
import altair as alt

color_scale = alt.Scale(
    domain=["Public","Private"],
    range=["#104E8B","#d89090"]
)

alt.Chart(race_distribution_melted).mark_bar().encode(
    x=alt.X("Race:N", title="Race", sort=["White", "Black", "Hispanic", "Asian", "Multiracial", "Other"],axis=alt.Axis(labelAngle=0)),
    xOffset="School Type:N", 
    y=alt.Y("Percentage:Q", title="Percentage of Students"),
    color=alt.Color("School Type:N", scale=color_scale, legend=alt.Legend(title="School Type")),
    tooltip=["School Type", "Race", alt.Tooltip("Percentage:Q", format=".1f")]
).properties(
    title="Racial Distribution in NYC Public vs Private Schools (2015-16)",
    width=500,
    height=400
).configure_axis(
    labelFontSize=12,
    titleFontSize=14
).configure_title(
    fontSize=16,
    anchor="start"
)

In [8]:
toggle = alt.binding_radio(options=["Percentage", "Count"], name="Show: ")
toggle_selection = alt.param(name="MeasureSelector", bind=toggle, value="Percentage")

highlight = alt.selection_point(fields=["School Type"])

color_scale = alt.Scale(domain=["Public", "Private"], range=["#104E8B", "#d89090"])

chart = alt.Chart(race_distribution_long).mark_bar().encode(
    x=alt.X("Race:N", title="Race",
            sort=["White", "Black", "Hispanic", "Asian", "Multiracial", "Other"], axis=alt.Axis(labelAngle=0)),
    xOffset="School Type:N",
    y=alt.Y("Value:Q", title=None),
    color=alt.Color("School Type:N", scale=color_scale, legend=alt.Legend(title="School Type")),
    opacity=alt.condition(highlight, alt.value(1), alt.value(0.25)),
    tooltip=["Race", "School Type", "Measure", alt.Tooltip("Value:Q", format=",.0f")]
).add_params(
    toggle_selection,
    highlight,
).transform_filter(
    "datum.Measure === MeasureSelector"
).properties(
    title={
        "text": "Racial Distribution in NYC Public vs Private Schools (2015-16)",
        "subtitle": "Click to highlight | Toggle to switch Count vs Percentage",
        "subtitleFontSize": 12
    },
    width=650,
    height=400,
    padding={"left": 80, "right": 20, "top": 20, "bottom": 20}
).configure_axis(
    labelFontSize=12,
    titleFontSize=14
).configure_title(
    fontSize=16,
)

chart

## Final Interactive Design 

In [9]:
toggle = alt.binding_radio(options=["Percentage", "Count"], name="Show: ")
toggle_selection = alt.param(name="MeasureSelector", bind=toggle, value="Percentage")

highlight = alt.selection_point(fields=["School Type"])

black_param = alt.param(name="BlackSelect", bind=alt.binding_checkbox(name="Black"), value=False)
hispanic_param = alt.param(name="HispanicSelect", bind=alt.binding_checkbox(name="Hispanic"), value=False)
asian_param = alt.param(name="AsianSelect", bind=alt.binding_checkbox(name="Asian"), value=False)
multi_param = alt.param(name="MultiSelect", bind=alt.binding_checkbox(name="Multiracial"), value=False)
other_param = alt.param(name="OtherSelect", bind=alt.binding_checkbox(name="Other"), value=False)


color_scale = alt.Scale(domain=["Public", "Private"], range=["#104E8B", "#d89090"])

base = alt.Chart(race_distribution_long).add_params(
    toggle_selection
).transform_filter("datum.Measure === MeasureSelector").transform_filter(
    "(datum.Race === 'White') || " +
    "(!BlackSelect && datum.Race === 'Black') || " +
    "(!HispanicSelect && datum.Race === 'Hispanic') || " +
    "(!AsianSelect && datum.Race === 'Asian') || " +
    "(!MultiSelect && datum.Race === 'Multiracial') || " +
    "(!OtherSelect && datum.Race === 'Other')"
).mark_bar().encode(
    x=alt.X("Race:N", title="Race", sort=["White", "Black", "Hispanic", "Asian", "Multiracial", "Other"], axis=alt.Axis(labelAngle=0)),
    xOffset="School Type:N",
    y=alt.Y("Value:Q", title=None),
    color=alt.Color("School Type:N", scale=color_scale, legend=alt.Legend(title="School Type")),
    opacity=alt.condition(highlight, alt.value(1), alt.value(0.25)),
    tooltip=["Race", "School Type", "Measure", alt.Tooltip("Value:Q", format=",.0f")]
)

combined = alt.Chart(race_distribution_long).add_params(toggle_selection).transform_filter(
    "datum.Measure === MeasureSelector").transform_filter(
    "(datum.Race !== 'White') && " +
    "( (BlackSelect && datum.Race === 'Black') || " +
    "(HispanicSelect && datum.Race === 'Hispanic') || " +
    "(AsianSelect && datum.Race === 'Asian') || " +
    "(MultiSelect && datum.Race === 'Multiracial') || " +
    "(OtherSelect && datum.Race === 'Other') )"
).transform_aggregate(
    aggregate=[{"op": "sum", "field": "Value", "as": "Value"}],
    groupby=["School Type", "Measure"]
).transform_calculate(
    Race='"Combined"'
).mark_bar().encode(
    x=alt.X("Race:N", title="Race", sort=["White", "Combined"], axis=alt.Axis(labelAngle=0)),
    xOffset="School Type:N",
    y=alt.Y("Value:Q"),
    color=alt.Color("School Type:N", scale=color_scale),
    opacity=alt.condition(highlight, alt.value(1), alt.value(0.25)),
    tooltip=["Race", "School Type", "Measure", alt.Tooltip("Value:Q", format=",.0f")]
)

final_chart = (base + combined).add_params(
    toggle_selection,
    highlight,
    black_param,
    hispanic_param,
    asian_param,
    multi_param,
    other_param
).resolve_scale(
    y='shared'
).properties(
    title={
        "text": "Racial Distribution in NYC Public vs Private Schools (2015-16)",
        "subtitle": "Click to highlight | Toggle to switch Count vs Percentage | Use checkboxes to customize groupings" ,
        "subtitleFontSize": 16
    },
    width=720,
    height=400,
    padding={"left": 80, "right": 20, "top": 20, "bottom": 20}
)

final_chart

# Interactive Design 2

In [10]:

nces_public_df = pd.read_csv("data/NCES_public_clean.csv")

segregation_columns = [
    "School Name", 
    "Black or African American Students [Public School] 2015-16", 
    "Hispanic Students [Public School] 2015-16", 
    "White Students [Public School] 2015-16", 
    "Total Students All Grades (Excludes AE) [Public School] 2015-16"
]
nces_public_df = nces_public_df.rename(columns={
    "Pupil/Teacher Ratio [Public School] 2015-16": "Pupil-Teacher Ratio",
    "Free and Reduced Lunch Students [Public School] 2015-16": "Low-Income Students",
    "Full-Time Equivalent (FTE) Teachers [Public School] 2015-16": "Full-Time Teachers"
})
fairness_columns = [
    "Pupil-Teacher Ratio",
    "Low-Income Students",
    "Full-Time Teachers"
]

for col in segregation_columns[1:] + fairness_columns:
    nces_public_df[col] = pd.to_numeric(nces_public_df[col], errors="coerce")


nces_public_df["Segregation Index"] = (
    nces_public_df["Black or African American Students [Public School] 2015-16"] + 
    nces_public_df["Hispanic Students [Public School] 2015-16"]
) / nces_public_df["Total Students All Grades (Excludes AE) [Public School] 2015-16"]


correlation_df = nces_public_df[["Segregation Index"] + fairness_columns].dropna()

correlation_matrix = correlation_df.corr()
correlation_matrix = correlation_matrix.reset_index().melt(id_vars="index")
correlation_matrix.columns = ["Variable 1", "Variable 2", "Correlation"]

correlation_matrix

Unnamed: 0,Variable 1,Variable 2,Correlation
0,Segregation Index,Segregation Index,1.0
1,Pupil-Teacher Ratio,Segregation Index,0.037286
2,Low-Income Students,Segregation Index,0.468758
3,Full-Time Teachers,Segregation Index,0.11518
4,Segregation Index,Pupil-Teacher Ratio,0.037286
5,Pupil-Teacher Ratio,Pupil-Teacher Ratio,1.0
6,Low-Income Students,Pupil-Teacher Ratio,0.043123
7,Full-Time Teachers,Pupil-Teacher Ratio,-0.027839
8,Segregation Index,Low-Income Students,0.468758
9,Pupil-Teacher Ratio,Low-Income Students,0.043123


In [11]:
heatmap = alt.Chart(correlation_matrix).mark_rect().encode(
    x=alt.X("Variable 1:N", title=None, sort=None, axis=alt.Axis(labelAngle=0)),
    y=alt.Y("Variable 2:N", title=None, sort=None),
    color=alt.Color("Correlation:Q", scale=alt.Scale(range=['#d7e1eb','#104e8b']), title="Correlation"),
    tooltip=["Variable 1", "Variable 2", "Correlation"]
).properties(
    title="Correlation of Segregation Index and Educational Fairness Indicators",
    width=600,
    height=400
)

text_labels = alt.Chart(correlation_matrix).mark_text(baseline='middle').encode(
    x=alt.X("Variable 1:N", sort=None),
    y=alt.Y("Variable 2:N", sort=None),
    text=alt.Text("Correlation:Q", format=".2f"),
    color=alt.condition(
        "datum.Correlation > 0.5", alt.value("white"), alt.value("black")
    )
)

heatmap + text_labels

In [None]:
var_dropdown = alt.binding_select(
    options=correlation_matrix["Variable 1"].unique().tolist(),
    name="Select variable: "
)
var_param = alt.param(bind=var_dropdown, name="SelectedVar", value="Segregation Index")
bar_chart = alt.Chart(correlation_matrix).transform_filter(
    "datum['Variable 1'] === SelectedVar"
).mark_bar().encode(
    y=alt.Y("Variable 2:N", sort="-x", title="Compared Variable"),
    x=alt.X("Correlation:Q", title="Correlation with Selected Variable"),
    color=alt.condition("datum.Correlation > 0", alt.value("#104E8B"), alt.value("#d89090")),
    tooltip=["Variable 1", "Variable 2", alt.Tooltip("Correlation:Q", format=".3f")]
).add_params(
    var_param
).properties(
    width=500,
    height=250
)

bar_chart

## Final interactive design

In [13]:
dropdown = alt.binding_select(
    options=sorted(correlation_matrix["Variable 1"].unique()),
    name="Select Variable: "
)
focus = alt.param(name="focus", bind=dropdown, value="Segregation Index")
bar_chart = alt.Chart(correlation_matrix).transform_filter(
    "datum['Variable 1'] === focus && datum['Variable 1'] !== datum['Variable 2']"
).mark_bar().encode(
    x=alt.X("Correlation:Q", scale=alt.Scale(domain=[-1, 1])),
    y=alt.Y("Variable 2:N", sort="-x", title=""),
    color=alt.condition("datum.Correlation > 0", alt.value("#104e8b"), alt.value("#d89090")),
    tooltip=["Variable 1", "Variable 2", alt.Tooltip("Correlation:Q", format=".2f")]
).add_params(
    focus
).properties(
    width=400,
    height=300
)

threshold = alt.param(
    bind=alt.binding_range(min=0, max=1, step=0.05, name="Min |Correlation|: "),
    value=0
)

filtered_heatmap = alt.Chart(correlation_matrix).transform_filter(
    abs(alt.datum.Correlation) >= threshold
).mark_rect().encode(
   x=alt.X("Variable 1:N", title=None, sort=None, axis=alt.Axis(labelAngle=30)),
    y=alt.Y("Variable 2:N", title=None, sort=None),
    color=alt.Color("Correlation:Q", scale=alt.Scale(domain=[-1,1],range=['#d89090','#e5b5b5','#ffffff','#afc3d8','#104e8b']), title="Correlation"),
    tooltip=["Variable 1", "Variable 2", alt.Tooltip("Correlation:Q", format=".2f")]
).add_params(
    threshold
).properties(
    width=300,
    height=300
)
text = alt.Chart(correlation_matrix).mark_text(baseline='middle').encode(
    x="Variable 1:N",
    y="Variable 2:N",
    text=alt.Text("Correlation:Q", format=".2f"),
    color=alt.condition(abs(alt.datum.Correlation) < threshold, alt.value("white"), alt.value("black"))
)

heatmap_chart = (filtered_heatmap + text)
final_chart2 = alt.hconcat(
    heatmap_chart,
    bar_chart
).resolve_scale(color='independent')

caption_text = pd.DataFrame({
    "text": [
        "Explanation: This chart uses the Pearson correlation coefficient to show how strongly two variables are related. The value ranges from -1 to +1:",
        "Posiive values indicate a positive linear relationship(both increase together),while negative values indicate a negative linear relationship(one increases while the other decreases).",
        "0 means no linear relationship. The closer the value is to ±1, the stronger the relationship.",
        "For example, here the correlation cofficient between Segregation Index and Low-Income Students is 0.47, indicating a moderate positive relationship."
    ],
    "line": [1, 2, 3, 4, ]
})

caption = alt.Chart(caption_text).mark_text(
    align='left',
    fontSize=12,
    dx=5
).encode(
    x=alt.value(0),
    y=alt.Y("line:O", axis=None),
    text="text:N"
).properties(
    width=700,
    height=80
)
final_chart_with_caption = alt.vconcat(
    final_chart2,
    caption
).configure_title(
    fontSize=20,
    subtitleFontSize=16
).configure_axis(labelFontSize=12).properties(
    title={
        "text": "Correlation of Segregation and Educational Equity Indicators (2015-16)",
        "subtitle": "Use slider to filter weak correlations | Select a variable to see how it correlates with others",
    },
    padding={"top": 20, "bottom": 10, "left": 20, "right": 20}
)

final_chart_with_caption

In [14]:
final_chart.save("final_chart1.html")
final_chart_with_caption.save("final_chart2.html")