# Combine Data by Schools

In [2]:
%run notebooks/Setup.ipynb

import polars as polars

## Augment Composite Scores with Demographics

In [6]:
geo = polars.read_csv(workspace_path.joinpath('data/processed/composite_scores_raw.csv')).select(['school_name', 'latitude', 'longitude'])
component_scores = polars.read_csv(workspace_path.joinpath('data/processed/component_scores.csv'))
supervisors = polars.read_csv(workspace_path.joinpath('data/raw/supervisors.csv'))
demographics = polars.read_csv(workspace_path.joinpath('data/processed/race_ethnicity_demographics.csv'))
facility = polars.read_csv(workspace_path.joinpath('data/processed/facilities.csv'))
capacity = polars.read_csv(workspace_path.joinpath('data/processed/capacity.csv'))
caaspp_ela = polars.read_csv(workspace_path.joinpath('data/processed/caaspp_ela.csv'))
caaspp_math = polars.read_csv(workspace_path.joinpath('data/processed/caaspp_math.csv'))
cast = polars.read_csv(workspace_path.joinpath('data/processed/cast.csv'))

In [19]:
# join all dataframes by school_name
combined_data = (component_scores
    .join(supervisors, on="school_name", how="left")
    .join(demographics, on="school_name", how="left")
    .join(facility, on="school_name", how="left")
    .join(capacity, on="school_name", how="left")
    .join(geo, on="school_name", how="left")
    .join(caaspp_ela, on="school_name", how="left")
    .join(caaspp_math, on="school_name", how="left")
    .join(cast, on="school_name", how="left")
)

# normalize column names to lowercase and replace spaces with underscores
combined_data = combined_data.select([
    polars.col(col).alias(col.lower().replace(" ", "_")) for col in combined_data.columns
])

combined_data.write_csv(workspace_path.joinpath('data/processed/component_scores_augmented.csv'))
combined_data

school_name,composite_score,equity_rank,excellence_rank,efficiency_rank,enrollment,equity_score_p,excellence_score_p,efficiency_score_p,composite_score_p,district,supervisor,asian_students,african_american_students,not_reported_students,filipino_students,hispanic_or_latino_students,two_or_more_races_students,white_students,pacific_islander_students,american_indian_or_alaska_native_students,asian_percent,african_american_percent,not_reported_percent,filipino_percent,hispanic_or_latino_percent,two_or_more_races_percent,white_percent,pacific_islander_percent,american_indian_or_alaska_native_percent,replacement_costs_in_years_1—5,overall_campus_fci,building_fci,exterior_site_fci,electrical_system,equipment,exterior_enclosure,fire_protection,furnishings,hvac_system,student_enrollment,enrollment_capacity,bond_investments_since_2003,classrooms,lot_sq_ft,year_built,building_sq_ft,primary_program,latitude,longitude,ela_total_students_tested,ela_percentage_standard_exceeded,ela_percentage_standard_met,ela_percentage_standard_met_and_above,ela_percentage_standard_nearly_met,ela_percentage_standard_not_met,math_total_students_tested,math_percentage_standard_exceeded,math_percentage_standard_met,math_percentage_standard_met_and_above,math_percentage_standard_nearly_met,math_percentage_standard_not_met,science_total_students_tested,science_percentage_standard_exceeded,science_percentage_standard_met,science_percentage_standard_met_and_above,science_percentage_standard_nearly_met,science_percentage_standard_not_met
str,f64,i64,i64,i64,i64,f64,f64,f64,f64,i64,str,i64,i64,i64,i64,i64,i64,i64,i64,i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,i64,i64,i64,i64,i64,i64,i64,str,f64,f64,i64,f64,f64,f64,f64,f64,i64,f64,f64,f64,f64,f64,i64,f64,f64,f64,f64,f64
"""Key (Francis Scott) Elementary""",12.29,0,84,68,552,0.0,65.398704,50.111497,28.87755,4,"""Joel Engardio""",238,2,21,23,56,60,162,,,42.348754,0.355872,3.736655,4.092527,9.964413,10.676157,28.825623,,,9592457,0.24,0.24,0.27,0.71,,0.04,,,,543,582,11051316,29,90000,1935,54321,"""ES""",37.758111,-122.502468,269,45.72,26.77,72.49,14.87,12.64,271,39.11,32.1,71.22,18.82,9.96,94,22.34,34.04,56.38,38.3,5.32
"""McCoppin (Frank) Elementary""",19.41,1,86,90,260,0.0,67.849999,73.518349,35.342087,1,"""Connie Chan""",71,6,15,8,46,36,58,,2,29.338843,2.479339,6.198347,3.305785,19.008264,14.876033,23.966942,,0.826446,2052859,0.11,0.1,0.15,0.21,0.69,0.04,0.0,0.2,,198,243,11614738,17,42000,1970,32799,"""ES""",37.776281,-122.464016,103,30.1,25.24,55.34,21.36,23.3,104,38.46,19.23,57.69,23.08,19.23,31,22.58,25.81,48.39,41.94,9.68
"""Peabody (George) Elementary""",15.6,2,100,30,268,0.0,100.0,20.981299,30.245325,1,"""Connie Chan""",67,3,21,11,27,39,101,1,,24.814815,1.111111,7.777778,4.074074,10.0,14.444444,37.407407,0.37037,,1583583,0.11,0.1,0.22,0.25,0.48,0.02,0.0,,,272,275,7761238,13,36000,1970,21805,"""ES""",37.783846,-122.464561,128,57.03,23.44,80.47,12.5,7.03,128,50.0,28.13,78.13,14.84,7.03,42,45.24,38.1,83.33,14.29,2.38
"""Sunset Elementary""",14.35,3,91,32,404,0.0,75.160213,22.640381,24.450149,4,"""Joel Engardio""",222,3,26,7,26,48,71,,,55.086849,0.744417,6.451613,1.736973,6.451613,11.91067,17.617866,,,11206738,0.33,0.28,0.48,0.66,,0.04,0.18,1.15,1.15,401,406,5414329,16,204592,1956,37115,"""ES""",37.750881,-122.49981,191,65.45,21.47,86.91,8.38,4.71,191,63.35,26.18,89.53,8.9,1.57,63,38.1,42.86,80.95,19.05,0.0
"""Argonne Elementary""",11.46,4,75,40,389,0.0,56.150348,28.927687,21.269509,1,"""Connie Chan""",118,12,33,13,43,39,108,2,,32.065217,3.26087,8.967391,3.532609,11.684783,10.597826,29.347826,0.543478,,9395256,0.325,0.315,0.3,0.69,0.2,0.175,0.0,0.27,0.0,389,398,300000,20,68520,1997,52000,"""ES""",37.775159,-122.476784,170,39.05,21.89,60.95,21.89,17.16,169,31.95,31.36,63.31,23.67,13.02,60,26.67,30.0,56.67,38.33,5.0
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""Academy (The)- SF @McAteer""",53.99,96,25,57,126,87.256452,16.601765,41.562364,58.169258,7,"""Myrna Melgar""",32,36,6,15,124,18,32,2,,12.075472,13.584906,2.264151,5.660377,46.792453,6.792453,12.075472,0.754717,,,,,,,,,,,,,,,,,,,,37.745563,-122.451186,60,20.0,36.67,56.67,26.67,16.67,57,0.0,19.3,19.3,22.81,57.89,59,5.17,32.76,37.93,51.72,10.34
"""Burton (Phillip and Sala) Acad…",69.96,97,60,85,1053,90.419241,43.824309,66.599007,72.81545,10,"""Shamann Walton""",321,92,17,105,414,41,18,20,3,31.134821,8.923375,1.648885,10.184287,40.155189,3.976722,1.745878,1.939864,0.29098,30758948,0.19,0.18,0.3,0.35,0.08,0.08,0.8,0.11,,1144,1228,16676468,70,774000,1961,230000,"""HS""",37.720642,-122.40515,226,30.8,32.59,63.39,19.64,16.96,219,12.84,17.43,30.28,19.72,50.0,225,13.78,30.22,44.0,44.89,11.11
"""Balboa High""",62.04,98,24,89,1262,94.308709,15.676147,71.980882,69.068612,11,"""Ahsha Safaí""",545,68,24,85,410,52,41,24,2,43.565148,5.435651,1.918465,6.794564,32.773781,4.156675,3.277378,1.918465,0.159872,83040670,0.53,0.54,0.17,1.03,1.07,0.34,1.19,0.74,1.16,1262,1296,23128311,70,446040,1928,261700,"""HS""",37.720905,-122.440571,246,31.71,37.8,69.51,20.73,9.76,256,15.23,28.52,43.75,25.39,30.86,259,6.56,34.75,41.31,52.51,6.18
"""Francisco Middle""",58.93,99,27,28,502,99.480905,18.399161,19.273717,59.158672,3,"""Aaron Peskin""",266,27,25,9,112,25,44,2,1,52.054795,5.283757,4.892368,1.761252,21.917808,4.892368,8.610568,0.391389,0.195695,22515157,0.31,0.32,0.17,0.7,,0.05,0.23,0.23,0.88,548,566,16164724,38,113020,1924,122834,"""MS""",37.804785,-122.411671,480,13.0,27.25,40.25,14.88,44.86,491,13.96,15.61,29.57,21.97,48.46,173,5.26,17.54,22.81,52.05,25.15


In [23]:
modeled_data = combined_data.sort("composite_score")\
    .with_columns(polars.arange(0, combined_data.height, eager=True).alias("equal_weighted_composite_rank"))

# calc equal_weighted_composite_score by averaging the equity, excellence, and access scores
modeled_data = modeled_data\
    .with_columns(
        ((polars.col("equity_score_p") + polars.col("excellence_score_p") + polars.col("efficiency_score_p")) / 3.0)
            .alias("equal_weighted_composite_score")
    )\
    .with_columns(
        (polars.col("composite_score") - polars.col("equal_weighted_composite_score")).alias("composite_score_change")
    )

# sort by equity_rank asc
modeled_data = modeled_data.sort("equal_weighted_composite_score")\
    .with_columns(polars.arange(0, modeled_data.height, eager=True).alias("equal_weighted_composite_rank"))\
    .sort("equal_weighted_composite_rank", descending=True)

# calculate the composite_rank_change by taking 101 - row_number() - composite_rank
modeled_data = modeled_data.with_columns(
    (polars.col("equal_weighted_composite_rank") - polars.col("composite_rank")).alias("composite_rank_change")
)

modeled_data

ColumnNotFoundError: composite_rank

Resolved plan until failure:

	---> FAILED HERE RESOLVING 'with_columns' <---
DF ["school_name", "composite_score", "equity_rank", "excellence_rank"]; PROJECT */71 COLUMNS; SELECTION: None