# Combine Data by Schools

In [2]:
%run notebooks/Setup.ipynb

import polars as polars

## Augment Composite Scores with Demographics

In [3]:
geo = polars.read_csv(workspace_path.joinpath('data/processed/composite_scores_raw.csv')).select(['school_name', 'latitude', 'longitude'])
component_scores = polars.read_csv(workspace_path.joinpath('data/processed/component_scores.csv'))
supervisors = polars.read_csv(workspace_path.joinpath('data/raw/supervisors.csv'))
demographics = polars.read_csv(workspace_path.joinpath('data/processed/race_ethnicity_demographics.csv'))
facility = polars.read_csv(workspace_path.joinpath('data/processed/facilities.csv'))
capacity = polars.read_csv(workspace_path.joinpath('data/processed/capacity.csv'))
caaspp_ela = polars.read_csv(workspace_path.joinpath('data/processed/caaspp_ela.csv'))
caaspp_math = polars.read_csv(workspace_path.joinpath('data/processed/caaspp_math.csv'))
cast = polars.read_csv(workspace_path.joinpath('data/processed/cast.csv'))

In [4]:
# join all dataframes by school_name
combined_data = (component_scores
    .join(supervisors, on="school_name", how="left")
    .join(demographics, on="school_name", how="left")
    .join(facility, on="school_name", how="left")
    .join(capacity, on="school_name", how="left")
    .join(geo, on="school_name", how="left")
    .join(caaspp_ela, on="school_name", how="left")
    .join(caaspp_math, on="school_name", how="left")
    .join(cast, on="school_name", how="left")
)

# normalize column names to lowercase and replace spaces with underscores
combined_data = combined_data.select([
    polars.col(col).alias(col.lower().replace(" ", "_")) for col in combined_data.columns
])

combined_data

school_name,composite_score,equity_rank,excellence_rank,efficiency_rank,enrollment,equity_score_p,excellence_score_p,efficiency_score_p,composite_score_p,district,supervisor,asian_students,african_american_students,not_reported_students,filipino_students,hispanic_or_latino_students,two_or_more_races_students,white_students,pacific_islander_students,american_indian_or_alaska_native_students,asian_percent,african_american_percent,not_reported_percent,filipino_percent,hispanic_or_latino_percent,two_or_more_races_percent,white_percent,pacific_islander_percent,american_indian_or_alaska_native_percent,replacement_costs_in_years_1—5,overall_campus_fci,building_fci,exterior_site_fci,electrical_system,equipment,exterior_enclosure,fire_protection,furnishings,hvac_system,student_enrollment,enrollment_capacity,bond_investments_since_2003,classrooms,lot_sq_ft,year_built,building_sq_ft,primary_program,latitude,longitude,ela_total_students_tested,ela_percentage_standard_exceeded,ela_percentage_standard_met,ela_percentage_standard_met_and_above,ela_percentage_standard_nearly_met,ela_percentage_standard_not_met,math_total_students_tested,math_percentage_standard_exceeded,math_percentage_standard_met,math_percentage_standard_met_and_above,math_percentage_standard_nearly_met,math_percentage_standard_not_met,science_total_students_tested,science_percentage_standard_exceeded,science_percentage_standard_met,science_percentage_standard_met_and_above,science_percentage_standard_nearly_met,science_percentage_standard_not_met
str,f64,i64,i64,i64,i64,f64,f64,f64,f64,i64,str,i64,i64,i64,i64,i64,i64,i64,i64,i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,i64,i64,i64,i64,i64,i64,i64,str,f64,f64,i64,f64,f64,f64,f64,f64,i64,f64,f64,f64,f64,f64,i64,f64,f64,f64,f64,f64
"""Washington (George) High""",72.91,81,52,98,2091,63.294783,37.851138,93.061238,64.375485,1,"""Connie Chan""",1018,87,69,62,370,126,262,11,3,50.697211,4.332669,3.436255,3.087649,18.426295,6.2749,13.047809,0.547809,0.149402,18427650,0.14,0.12,0.27,0.12,0.0,0.0,0.41,0.41,,2068,2101,97404162,100,679002,1936,247800,"""HS""",37.777765,-122.492016,421,43.61,35.66,79.28,13.49,7.23,407,29.6,28.61,58.21,23.13,18.66,366,9.92,34.99,44.9,46.56,8.54
"""Presidio Middle""",51.16,52,44,94,996,39.098491,31.936443,80.958686,47.773028,1,"""Connie Chan""",343,28,65,29,151,135,226,1,2,35.0,2.857143,6.632653,2.959184,15.408163,13.77551,23.061224,0.102041,0.204082,25167306,0.25,0.24,0.35,0.1,0.39,0.39,,,0.38,987,1015,19362502,47,144000,1930,140000,"""MS""",37.780842,-122.490093,938,31.73,35.9,67.63,18.16,14.21,932,27.1,24.3,51.4,25.05,23.55,309,17.8,27.83,45.63,45.63,8.74
"""Lafayette Elementary""",23.61,7,96,48,468,0.0,86.009098,34.900858,30.227489,1,"""Connie Chan""",173,7,28,12,62,72,118,2,,36.49789,1.476793,5.907173,2.531646,13.080169,15.189873,24.894515,0.421941,,7482095,0.19,0.18,0.29,0.1,,0.03,,,,494,531,26530616,26,74400,1927,54283,"""ES""",37.777548,-122.496864,233,51.93,26.61,78.54,14.59,6.87,237,51.05,29.54,80.59,11.81,7.59,83,26.51,42.17,68.67,27.71,3.61
"""Alamo Elementary""",14.13,6,74,35,390,0.0,55.243183,25.054181,20.074341,1,"""Connie Chan""",130,5,27,10,59,48,114,,,33.07888,1.272265,6.870229,2.544529,15.012723,12.21374,29.007634,,,14767840,0.34,0.39,0.06,0.81,,0.13,0.0,0.0,1.05,435,413,2862675,27,60000,1926,50420,"""ES""",37.783034,-122.482727,194,43.81,24.23,68.04,16.49,15.46,194,43.3,26.8,70.1,18.56,11.34,67,22.39,34.33,56.72,34.33,8.96
"""Argonne Elementary""",11.46,4,75,40,389,0.0,56.150348,28.927687,21.269509,1,"""Connie Chan""",118,12,33,13,43,39,108,2,,32.065217,3.26087,8.967391,3.532609,11.684783,10.597826,29.347826,0.543478,,9395256,0.325,0.315,0.3,0.69,0.2,0.175,0.0,0.27,0.0,389,398,300000,20,68520,1997,52000,"""ES""",37.775159,-122.476784,170,39.05,21.89,60.95,21.89,17.16,169,31.95,31.36,63.31,23.67,13.02,60,26.67,30.0,56.67,38.33,5.0
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""Milk (Harvey) Civil Rights Ele…",21.5,9,73,71,133,0.0,54.352953,52.617792,26.742686,8,"""Rafael Mandelman""",1,22,7,,32,20,70,2,,0.649351,14.285714,4.545455,,20.779221,12.987013,45.454545,1.298701,,489965,0.02,0.0,0.42,0.0,,0.42,0.0,0.0,,187,198,20716327,11,31405,1956,30560,"""ES""",37.759142,-122.436348,89,42.7,28.09,70.79,8.99,20.22,89,30.34,17.98,48.31,20.22,31.46,30,26.67,46.67,73.33,16.67,10.0
"""Everett Middle""",15.2,79,1,4,404,61.228763,0.0,0.0,30.614381,8,"""Rafael Mandelman""",7,26,16,5,358,18,35,,2,1.498929,5.567452,3.426124,1.070664,76.659529,3.85439,7.494647,,0.428266,50951547,0.53,0.54,0.23,0.74,0.93,0.36,0.0,1.23,1.04,598,637,12809961,38,178298,1928,155370,"""MS""",37.764027,-122.428692,409,3.93,11.79,15.72,17.69,66.58,481,3.33,6.04,9.38,15.0,75.63,182,1.65,4.4,6.04,53.85,40.11
"""Lilienthal (Claire) Elementary""",40.88,31,87,61,674,23.063878,69.158131,44.586996,39.968221,2,"""Catherine Stefani""",194,47,50,13,78,114,174,,,28.955224,7.014925,7.462687,1.940299,11.641791,17.014925,25.970149,,,7443225,0.21,0.205,0.325,0.44,1.25,0.14,0.0,0.625,0.0,645,657,27650231,37,119230,1920,24919,"""ES""",37.803091,-122.443403,412,57.04,26.7,83.74,10.44,5.83,412,60.44,19.9,80.34,10.92,8.74,129,29.46,33.33,62.79,30.23,6.98
"""Marina Middle""",35.98,65,17,49,666,48.945168,8.509072,35.638621,35.509507,2,"""Catherine Stefani""",383,37,30,18,113,25,48,,,58.562691,5.657492,4.587156,2.752294,17.278287,3.82263,7.33945,,,32251032,0.3,0.29,0.89,0.21,0.06,0.19,0.6,0.73,0.73,684,739,44021773,47,281180,1930,152900,"""MS""",37.801764,-122.436362,627,14.51,34.77,49.28,24.72,26.0,629,23.89,17.52,41.4,24.52,34.08,209,12.92,31.58,44.5,45.45,10.05


## Modeled Data
Process the modeled data a bit.

In [5]:
modeled_data = combined_data.sort("composite_score")\
    .with_columns(polars.arange(0, combined_data.height, eager=True).alias("composite_rank"))

# calc equal_weighted_composite_score by averaging the equity, excellence, and access scores
modeled_data = modeled_data\
    .with_columns(
        ((polars.col("equity_score_p") + polars.col("excellence_score_p") + polars.col("efficiency_score_p")) / 3.0)
            .alias("equal_weighted_composite_score")
    )\
    .with_columns(
        (polars.col("composite_score") - polars.col("equal_weighted_composite_score")).alias("composite_score_change")
    )

# sort by equity_rank asc
modeled_data = modeled_data.sort("equal_weighted_composite_score")\
    .with_columns(polars.arange(0, modeled_data.height, eager=True).alias("equal_weighted_composite_rank"))\
    .sort("equal_weighted_composite_rank", descending=True)

# calculate the composite_rank_change by taking 101 - row_number() - composite_rank
modeled_data = modeled_data.with_columns(
    (polars.col("equal_weighted_composite_rank") - polars.col("composite_rank")).alias("composite_rank_change")
)

modeled_data

school_name,composite_score,equity_rank,excellence_rank,efficiency_rank,enrollment,equity_score_p,excellence_score_p,efficiency_score_p,composite_score_p,district,supervisor,asian_students,african_american_students,not_reported_students,filipino_students,hispanic_or_latino_students,two_or_more_races_students,white_students,pacific_islander_students,american_indian_or_alaska_native_students,asian_percent,african_american_percent,not_reported_percent,filipino_percent,hispanic_or_latino_percent,two_or_more_races_percent,white_percent,pacific_islander_percent,american_indian_or_alaska_native_percent,replacement_costs_in_years_1—5,overall_campus_fci,building_fci,exterior_site_fci,electrical_system,equipment,exterior_enclosure,fire_protection,furnishings,hvac_system,student_enrollment,enrollment_capacity,bond_investments_since_2003,classrooms,lot_sq_ft,year_built,building_sq_ft,primary_program,latitude,longitude,ela_total_students_tested,ela_percentage_standard_exceeded,ela_percentage_standard_met,ela_percentage_standard_met_and_above,ela_percentage_standard_nearly_met,ela_percentage_standard_not_met,math_total_students_tested,math_percentage_standard_exceeded,math_percentage_standard_met,math_percentage_standard_met_and_above,math_percentage_standard_nearly_met,math_percentage_standard_not_met,science_total_students_tested,science_percentage_standard_exceeded,science_percentage_standard_met,science_percentage_standard_met_and_above,science_percentage_standard_nearly_met,science_percentage_standard_not_met,composite_rank,equal_weighted_composite_score,composite_score_change,equal_weighted_composite_rank,composite_rank_change
str,f64,i64,i64,i64,i64,f64,f64,f64,f64,i64,str,i64,i64,i64,i64,i64,i64,i64,i64,i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,i64,i64,i64,i64,i64,i64,i64,str,f64,f64,i64,f64,f64,f64,f64,f64,i64,f64,f64,f64,f64,f64,i64,f64,f64,f64,f64,f64,i64,f64,f64,i64,i64
"""Lowell High""",90.97,60,88,100,2593,45.071663,70.531761,100.0,65.168772,7,"""Myrna Melgar""",1185,66,68,145,450,210,409,4,3,46.653543,2.598425,2.677165,5.708661,17.716535,8.267717,16.102362,0.15748,0.11811,32726196,0.2,0.2,0.18,0.55,,,0.0,0.08,,2652,2661,22578221,99,1189188,1970,245000,"""HS""",37.731019,-122.483607,531,57.09,27.41,84.5,10.4,5.1,525,40.38,25.9,66.29,19.24,14.48,532,23.5,39.1,62.59,34.96,2.44,99,71.867808,19.102192,100,1
"""Roosevelt Middle""",74.97,78,78,96,659,60.236857,58.989504,86.008981,66.36805,2,"""Catherine Stefani""",318,26,33,11,70,67,129,2,1,48.401826,3.957382,5.022831,1.674277,10.65449,10.197869,19.634703,0.304414,0.152207,14388157,0.2,0.2,0.19,0.18,0.08,,,,,670,682,11051694,32,94469,1928,121000,"""MS""",37.782287,-122.458868,625,36.48,37.92,74.4,15.84,9.76,621,38.23,23.39,61.61,21.61,16.77,217,21.66,32.72,54.38,41.01,4.61,98,68.411781,6.558219,99,1
"""Yu (Alice Fong) Elementary""",66.85,69,98,78,595,52.182942,93.061355,58.989386,64.104156,4,"""Joel Engardio""",352,33,42,18,29,83,29,,3,59.762309,5.602716,7.13073,3.056027,4.923599,14.091681,4.923599,,0.509338,6768445,0.21,0.23,0.02,0.41,0.81,0.02,0.0,0.67,,586,594,4378810,24,78007,1995,42978,"""ES""",37.759396,-122.469281,390,53.85,28.21,82.05,13.33,4.62,389,54.76,22.11,76.86,15.42,7.71,128,35.94,28.91,64.84,35.16,0.0,94,68.077895,-1.227895,98,4
"""Burton (Phillip and Sala) Acad…",69.96,97,60,85,1053,90.419241,43.824309,66.599007,72.81545,10,"""Shamann Walton""",321,92,17,105,414,41,18,20,3,31.134821,8.923375,1.648885,10.184287,40.155189,3.976722,1.745878,1.939864,0.29098,30758948,0.19,0.18,0.3,0.35,0.08,0.08,0.8,0.11,,1144,1228,16676468,70,774000,1961,230000,"""HS""",37.720642,-122.40515,226,30.8,32.59,63.39,19.64,16.96,219,12.84,17.43,30.28,19.72,50.0,225,13.78,30.22,44.0,44.89,11.11,96,66.947519,3.012481,97,1
"""Wallenberg (Raoul) Traditional…",68.34,75,80,92,528,57.397701,60.999819,76.927007,63.180557,5,"""Dean Preston""",203,52,13,19,128,43,89,3,1,36.842105,9.437387,2.359347,3.448276,23.23049,7.803993,16.15245,0.544465,0.181488,9577470,0.22,0.24,0.01,0.31,0.24,0.24,0.17,0.17,,636,686,15139396,26,144184,1951,62840,"""HS""",37.780136,-122.445816,110,28.44,21.1,49.54,20.18,30.28,107,22.43,19.63,42.06,14.02,43.93,117,11.11,25.64,36.75,41.03,22.22,95,65.108176,3.231824,96,1
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""Sherman Elementary""",13.85,15,64,14,293,7.400342,46.909982,4.901997,16.653166,2,"""Catherine Stefani""",39,26,27,9,53,25,100,2,1,13.829787,9.219858,9.574468,3.191489,18.794326,8.865248,35.460993,0.70922,0.35461,11291026,0.3,0.35,0.03,0.74,,0.06,,,0.0,285,322,1171140,26,75625,1928,56200,"""ES""",37.798182,-122.426452,102,31.37,19.61,50.98,21.57,27.45,108,25.0,22.22,47.22,30.56,22.22,38,18.42,23.68,42.11,50.0,7.89,3,19.73744,-5.88744,4,1
"""Parker (Jean) Elementary""",13.88,24,56,3,123,16.9235,40.81567,0.0,18.665668,3,"""Aaron Peskin""",80,12,4,1,14,7,6,,1,64.0,9.6,3.2,0.8,11.2,5.6,4.8,,0.8,5957958,0.26,0.27,,0.72,0.42,0.42,0.01,0.0,0.0,168,231,300000,16,26888,1996,46000,"""ES""",37.797388,-122.411294,53,28.3,30.19,58.49,15.09,26.42,54,29.63,27.78,57.41,20.37,22.22,17,5.88,41.18,47.06,29.41,23.53,4,19.24639,-5.36639,3,-1
"""Parks (Rosa) Elementary""",19.84,68,13,2,328,51.358968,3.593982,0.0,26.577979,5,"""Dean Preston""",51,67,20,11,90,68,33,4,,14.825581,19.476744,5.813953,3.197674,26.162791,19.767442,9.593023,1.162791,,20315195,0.49,0.5,0.23,0.77,0.16,0.44,,1.25,1.0,380,460,4158677,30,133884,1924,66900,"""ES""",37.783532,-122.430297,162,14.81,16.67,31.48,21.6,46.91,165,8.48,17.58,26.06,18.79,55.15,55,7.27,14.55,21.82,43.64,34.55,15,18.31765,1.52235,2,-13
"""Visitacion Valley Elementary""",23.23,58,16,13,237,43.559875,7.35341,3.593864,24.516756,10,"""Shamann Walton""",112,23,6,10,73,14,6,19,,42.585551,8.745247,2.281369,3.802281,27.756654,5.323194,2.281369,7.224335,,3311584,0.08,0.11,0.0,0.0,,0.01,0.0,0.0,,297,343,11880310,22,84552,1937,51400,"""ES""",37.712688,-122.41035,118,17.95,19.66,37.61,19.66,42.74,117,11.11,18.8,29.91,24.79,45.3,46,8.7,17.39,26.09,43.48,30.43,19,18.16905,5.06095,1,-18


In [6]:
modeled_data.write_csv(workspace_path.joinpath('data/processed/component_scores_augmented.csv'))