# Facities

Normalize facility condition and capacity data so it can be merged with RAI data.

In [52]:
%run notebooks/Setup.ipynb

import polars
import Levenshtein

## Normalize Condition Data

In [80]:
normalized_school_names = {
    "Rooftop Alternative School — Mayeda": "Rooftop Elementary",
    "Rooftop Alternative School — Twin Peaks": "Rooftop Elementary",
    "Mission High School": "Mission High",
    "Everett Middle School": "Everett Middle",
    "Balboa High School": "Balboa High",
    "George R. Moscone ES/Las Americas EES": "Moscone (George R.) Elementary",
    "Bryant Elementary School": "Bryant Elementary",
    "Rosa Parks Elementary School": "Parks (Rosa) Elementary",
    "Malcolm X Academy Elementary School": "Malcolm X Academy",
    "Commodore Sloat Elementary School": "Sloat (Commodore) Elementary",
    "San Francisco International High School": "S.F. International High",
    "Galileo Academy of Science & Technology": "Galileo High",
    "Alvarado Elementary School": "Alvarado Elementary",
    "West Portal Elementary School": "West Portal Elementary",
    "Buena Vista Horace Mann Community School": "Buena Vista/ Horace Mann K-8",
    "Grattan Elementary School": "Grattan Elementary",
    "James Denman Middle School": "Denman (James) Middle",
    "New Traditions Creative Arts Elementary School": "New Traditions Elementary",
    "Argonne Early Education School": "Argonne Elementary",
    "Argonne Elementary School": "Argonne Elementary",
    "James Lick Middle School": "Lick (James) Middle",
    "Jefferson Elementary School": "Jefferson Elementary",
    "Claire Lilienthal Alternative School — Madison": "Lilienthal (Claire) Elementary",
    "Claire Lilienthal Alte School — Winfield Scott": "Lilienthal (Claire) Elementary",
    "Alamo Elementary School": "Alamo Elementary",
    "Paul Revere School": "Revere (Paul) Elementary",
    "Paul Revere Elementary School (Annex)": "Revere (Paul) Elementary",
    "Cleveland Elementary School": "Cleveland Elementary",
    "Sunset Elementary School": "Sunset Elementary",
    "Abraham Lincoln High School": "Lincoln (Abraham) High",
    "Marshall Elementary School": "Marshall Elementary",
    "Ulloa Elementary School": "Ulloa Elementary",
    "Francisco Middle School": "Francisco Middle",
    "Marina Middle School": "Marina Middle",
    "Sherman Elementary School": "Sherman Elementary",
    "John O'Connell High School": "O'Connell (John) High",
    "Herbert Hoover Middle School": "Hoover (Herbert) Middle",
    "Dolores Huerta Elementary School": "Huerta (Dolores) Elementary",
    "Bret Harte Elementary School": "Harte (Bret) Elementary",
    "Tenderloin Community Elementary School": "Tenderloin Community",
    "Bessie Carmichael Elementary School": "Carmichael (Bessie)/FEC",
    "Bessie Carmichael Middle School": "Carmichael (Bessie)/FEC",
    "Cesar Chavez Elementary School": "Chavez (Cesar) Elementary",
    "McKinley Elementary School": "McKinley Elementary",
    "Jean Parker Elementary School": "Parker (Jean) Elementary",
    "Ida B. Wells High School": "Wells (Ida B.) High",
    "John Muir Elementary School": "Muir (John) Elementary",
    "Spring Valley Science Elementary School": "Spring Valley Elementary",
    "Dianne Feinstein Elementary School": "Feinstein (Dianne) Elementary",
    "Presidio Middle School": "Presidio Middle",
    "Lakeshore Alternative Elementary School": "Lakeshore Alternative Elementary",
    "Glen Park Elementary School": "Glen Park Elementary",
    "San Francisco Public Montessori ES": "San Francisco Public Montessori",
    "Francis Scott Key Elementary School": "Key (Francis Scott) Elementary",
    "E.R. Taylor Elementary School": "Taylor (Edward R.) Elementary",
    "Aptos Middle School": "Aptos Middle",
    "San Francisco Community School": "San Francisco Community Alternative",
    "Visitacion Valley Middle School": "Visitacion Valley Middle",
    "Dr. Martin Luther King, Jr. Academic MS": "King Jr. (Martin Luther) Academic Middle",
    "Dr. George Washington Carver ES": "Carver (George Washington) Elementary",
    "Raoul Wallenberg Traditional High School": "Wallenberg (Raoul) Traditional High",
    "June Jordan School for Equity": "Jordan (June) School for Equity",
    "Alice Fong Yu Alternative School": "Yu (Alice Fong) Elementary",
    "Mission Education Center Elementary School": "Mission Education Center",
    "Leonard R. Flynn Elementary School": "Flynn (Leonard R.) Elementary",
    "Hillcrest Elementary School": "Hillcrest Elementary",
    "Roosevelt Middle School": "Roosevelt Middle",
    "Lowell High School": "Lowell High",
    "Phillip and Sala Burton Academic High School": "Burton (Phillip and Sala) Academic High",
    "Miraloma Elementary School": "Miraloma Elementary",
    "Lafayette Elementary School": "Lafayette Elementary",
    "Robert Louis Stevenson Elementary School": "Stevenson (Robert Louis) Elementary",
    "Lawton Alternative School": "Lawton Alternative",
    "Sunnyside Elementary School": "Sunnyside Elementary",
    "Ruth Asawa School of the Arts / The Academy SF": "Asawa (Ruth) SF Sch of the Arts A Public School",
    "Thurgood Marshall Academic High School": "Marshall (Thurgood) High",
    "Gordon J. Lau Elementary School": "Lau (Gordon J.) Elementary",
    "A.P. Giannini Middle School": "Giannini (A.P.) Middle",
    "Dr. William L. Cobb Elementary School": "Cobb (William L.) Elementary",
    "Sanchez Elementary School": "Sanchez Elementary",
    "Redding Elementary School": "Redding Elementary",
    "Downtown High School": "Downtown High",
    "Starr King Elementary School": "King (Thomas Starr) Elementary",
    "George Washington High School": "Washington (George) High",
    "Daniel Webster Elementary School": "Webster (Daniel) Elementary",
    "Chinese Immersion School at DeAvila": "Chinese Immersion School at DeAvila",
    "Jose Ortega Elementary School": "Ortega (Jose) Elementary",
    "Sheridan Elementary School": "Sheridan Elementary",
    "Frank McCoppin Elementary School": "McCoppin (Frank) Elementary",
    "George Peabody Elementary School": "Peabody (George) Elementary",
    "Guadalupe Elementary School": "Guadalupe Elementary",
    "Independence High School": "Independence High",
    "Sutro Elementary School": "Sutro Elementary",
    "Clarendon Alternative Elementary School": "Clarendon Alternative Elementary",
    "Visitacion Valley Elementary School": "Visitacion Valley Elementary",
    "Junipero Serra Elementary School": "Serra (Junipero) Elementary",
    "Longfellow Elementary School": "Longfellow Elementary",
    "John Yehall Chin Elementary School": "Chin (John Yehall) Elementary",
    "Willie L. Brown Jr. Middle School": "Brown Jr. (Willie L) Middle",
    "Garfield Elementary School": "Garfield Elementary",
    "Yick Wo Alternative Elementary School": "Yick Wo Elementary",
    "Harvey Milk Civil Rights Academy": "Milk (Harvey) Civil Rights Elementary",
    "Monroe Elementary School": "Monroe Elementary",
    "Dr. Charles Drew College Preparatory Academy": "Drew (Charles) College Preparatory Academy",
    "El Dorado Elementary School": "El Dorado Elementary",
}

# sanity check on the normalized names
component_scores = polars.read_csv(workspace_path.joinpath('data/processed/component_scores.csv'))
missing_schools = set(component_scores['school_name']) - set(normalized_school_names.values())

print("Schools in component_scores but missing from normalized_school_names:")
for school in sorted(missing_schools):
    print(f"- {school}")

Schools in component_scores but missing from normalized_school_names:
- Academy (The)- SF @McAteer


In [43]:
facilities = polars.read_csv(workspace_path.joinpath('data/raw/boarddocs/facilities.csv'))

# normalize and aggregate by school name
facilities = facilities\
    .with_columns(
        polars.col("SITE NAME")
            .map_elements(lambda x: normalized_school_names.get(x), return_dtype=polars.String)
            .alias("school_name")
    )\
    .filter(polars.col('school_name').is_not_null())\
    .drop(['SITE NAME', 'PRIMARY BUILDING USE'])

# Group by school_name, summing "REPLACEMENT COSTS IN YEARS 1—5" and averaging other columns
facilities = facilities.group_by("school_name").agg([
    polars.col("REPLACEMENT COSTS IN YEARS 1—5").sum().alias("REPLACEMENT COSTS IN YEARS 1—5"),
    polars.exclude(["school_name", "REPLACEMENT COSTS IN YEARS 1—5"]).mean()
])

# Sort the dataframe by school_name for consistency
facilities = facilities.sort("school_name")

facilities.write_csv(workspace_path.joinpath('data/processed/facilities.csv'))
facilities

school_name,REPLACEMENT COSTS IN YEARS 1—5,OVERALL CAMPUS FCI,BUILDING FCI,EXTERIOR SITE FCI,Electrical System,Equipment,Exterior Enclosure,Fire Protection,Furnishings,HVAC System
str,i64,f64,f64,f64,f64,f64,f64,f64,f64,f64
"""Alamo Elementary""",14767840,0.34,0.39,0.06,0.81,,0.13,0.0,0.0,1.05
"""Alvarado Elementary""",17036439,0.43,0.45,0.25,0.55,0.09,0.45,0.0,0.32,1.14
"""Aptos Middle""",28452813,0.24,0.24,0.11,0.52,,0.18,,,
"""Argonne Elementary""",9395256,0.325,0.315,0.3,0.69,0.2,0.175,0.0,0.27,0.0
"""Asawa (Ruth) SF Sch of the Art…",22858974,0.18,0.14,0.49,0.04,1.09,0.05,0.0,0.17,
…,…,…,…,…,…,…,…,…,…,…
"""Webster (Daniel) Elementary""",3446405,0.13,0.13,0.11,0.22,0.17,0.2,1.25,0.1,
"""Wells (Ida B.) High""",7106546,0.26,0.29,0.16,0.63,0.29,0.29,,0.8,
"""West Portal Elementary""",13978910,0.42,0.45,0.33,1.12,0.0,0.23,0.03,0.0,1.14
"""Yick Wo Elementary""",575331,0.04,0.02,0.22,0.01,,0.04,0.0,0.25,


# Normalize Capacity Data

In [83]:
# capacity data also has its own normalization...
normalized_school_names = {
    "A.P. Giannini Middle School": "Giannini (A.P.) Middle",
    "Abraham Lincoln High School": "Lincoln (Abraham) High",
    "Alamo Elementary School": "Alamo Elementary",
    "Alice Fong Yu Alternative School": "Yu (Alice Fong) Elementary",
    "Alvarado Elementary School": "Alvarado Elementary",
    "Aptos Middle School": "Aptos Middle",
    "Argonne Elementary School": "Argonne Elementary",
    "Balboa High School": "Balboa High",
    "Bessie Carmichael Elementary School": "Carmichael (Bessie)/FEC",
    "Bessie Carmichael Middle School": "Carmichael (Bessie)/FEC",
    "Bret Harte Elementary School": "Harte (Bret) Elementary",
    "Bryant Elementary School": "Bryant Elementary",
    "Buena Vista Horace Mann Community School": "Buena Vista/ Horace Mann K-8",
    "Cesar Chavez Elementary School": "Chavez (Cesar) Elementary",
    "Chinese Immersion School at DeAvila": "Chinese Immersion School at DeAvila",
    "Claire Lilienthal Alternative School - Madison": "Lilienthal (Claire) Elementary",
    "Claire Lilienthal Alternative School - Winfield Scott": "Lilienthal (Claire) Elementary",
    "Clarendon Alternative Elementary School": "Clarendon Alternative Elementary",
    "Cleveland Elementary School": "Cleveland Elementary",
    "Commodore Sloat Elementary School": "Sloat (Commodore) Elementary",
    "Daniel Webster Elementary School": "Webster (Daniel) Elementary",
    "Dianne Feinstein Elementary School": "Feinstein (Dianne) Elementary",
    "Dolores Huerta Elementary School": "Huerta (Dolores) Elementary",
    "Downtown High School": "Downtown High",
    "Dr. Charles Drew College Preparatory Academy": "Drew (Charles) College Preparatory Academy",
    "Dr. George Washington Carver Elementary School": "Carver (George Washington) Elementary",
    "Dr. Martin Luther King, Jr. Academic Middle School": "King Jr. (Martin Luther) Academic Middle",
    "Dr. William L. Cobb Elementary School": "Cobb (William L.) Elementary",
    "E.R. Taylor Elementary School": "Taylor (Edward R.) Elementary",
    "El Dorado Elementary School": "El Dorado Elementary",
    "Everett Middle School": "Everett Middle",
    "Francis Scott Key Elementary School": "Key (Francis Scott) Elementary",
    "Francisco Middle School": "Francisco Middle",
    "Frank McCoppin Elementary School": "McCoppin (Frank) Elementary",
    "Galileo Academy of Science & Technology": "Galileo High",
    "Garfield Elementary School": "Garfield Elementary",
    "George Peabody Elementary School": "Peabody (George) Elementary",
    "George R. Moscone ES/Las Americas EES": "Moscone (George R.) Elementary",
    "George Washington High School": "Washington (George) High",
    "Glen Park Elementary School": "Glen Park Elementary",
    "Gordon J. Lau Elementary School": "Lau (Gordon J.) Elementary",
    "Grattan Elementary School": "Grattan Elementary",
    "Guadalupe Elementary School": "Guadalupe Elementary",
    "Harvey Milk Civil Rights Academy": "Milk (Harvey) Civil Rights Elementary",
    "Herbert Hoover Middle School": "Hoover (Herbert) Middle",
    "Hillcrest Elementary School": "Hillcrest Elementary",
    "Ida B. Wells High School": "Wells (Ida B.) High",
    "Independence High School": "Independence High",
    "James Denman Middle School": "Denman (James) Middle",
    "James Lick Middle School": "Lick (James) Middle",
    "Jean Parker Elementary School": "Parker (Jean) Elementary",
    "Jefferson Elementary School": "Jefferson Elementary",
    "John Muir Elementary School": "Muir (John) Elementary",
    "John O’Connell High School": "O'Connell (John) High",
    "John Yehall Chin Elementary School": "Chin (John Yehall) Elementary",
    "Jose Ortega Elementary School": "Ortega (Jose) Elementary",
    "June Jordan School for Equity": "Jordan (June) School for Equity",
    "Junipero Serra Elementary School": "Serra (Junipero) Elementary",
    "Lafayette Elementary School": "Lafayette Elementary",
    "Lakeshore Alternative Elementary School": "Lakeshore Alternative Elementary",
    "Lawton Alternative School": "Lawton Alternative",
    "Leonard R. Flynn Elementary School": "Flynn (Leonard R.) Elementary",
    "Longfellow Elementary School": "Longfellow Elementary",
    "Lowell High School": "Lowell High",
    "Malcolm X Academy Elementary School": "Malcolm X Academy",
    "Marina Middle School": "Marina Middle",
    "Marshall Elementary School": "Marshall Elementary",
    "Ruth Asawa School of the Arts / The Academy SF": "Asawa (Ruth) SF Sch of the Arts A Public School",
    "McKinley Elementary School": "McKinley Elementary",
    "Miraloma Elementary School": "Miraloma Elementary",
    "Mission Education Center Elementary School": "Mission Education Center",
    "Mission High School": "Mission High",
    "Monroe Elementary School": "Monroe Elementary",
    "New Traditions Creative Arts Elementary School": "New Traditions Elementary",
    "Paul Revere School": "Revere (Paul) Elementary",
    "Phillip and Sala Burton Academic High School": "Burton (Phillip and Sala) Academic High",
    "Presidio Middle School": "Presidio Middle",
    "Raoul Wallenberg Traditional High School": "Wallenberg (Raoul) Traditional High",
    "Redding Elementary School": "Redding Elementary",
    "Robert Louis Stevenson Elementary School": "Stevenson (Robert Louis) Elementary",
    "Rooftop Alternative School - Mayeda": "Rooftop Elementary",
    "Rooftop Alternative School - Twin Peaks": "Rooftop Elementary",
    "Roosevelt Middle School": "Roosevelt Middle",
    "Rosa Parks Elementary School": "Parks (Rosa) Elementary",
    "San Francisco Community School": "San Francisco Community Alternative",
    "San Francisco International High School": "S.F. International High",
    "San Francisco Public Montessori Elementary School": "San Francisco Public Montessori",
    "Sanchez Elementary School": "Sanchez Elementary",
    "Sheridan Elementary School": "Sheridan Elementary",
    "Sherman Elementary School": "Sherman Elementary",
    "Spring Valley Science Elementary School": "Spring Valley Elementary",
    "Starr King Elementary School": "King (Thomas Starr) Elementary",
    "Sunnyside Elementary School": "Sunnyside Elementary",
    "Sunset Elementary School": "Sunset Elementary",
    "Sutro Elementary School": "Sutro Elementary",
    "Tenderloin Community Elementary School": "Tenderloin Community",
    "Thurgood Marshall Academic High School": "Marshall (Thurgood) High",
    "Ulloa Elementary School": "Ulloa Elementary",
    "Visitacion Valley Elementary School": "Visitacion Valley Elementary",
    "Visitacion Valley Middle School": "Visitacion Valley Middle",
    "West Portal Elementary School": "West Portal Elementary",
    "Willie L. Brown Jr. Middle School": "Brown Jr. (Willie L) Middle",
    "Yick Wo Alternative Elementary School": "Yick Wo Elementary",
}

# sanity check on the normalized names
component_scores = polars.read_csv(workspace_path.joinpath('data/processed/component_scores.csv'))
missing_schools = set(component_scores['school_name']) - set(normalized_school_names.values())

print("Schools in component_scores but missing from normalized_school_names:")
for school in sorted(missing_schools):
    print(f"- {school}")

Schools in component_scores but missing from normalized_school_names:
- Academy (The)- SF @McAteer


In [84]:
capacity = polars.read_csv(workspace_path.joinpath('data/raw/boarddocs/capacity.csv'))

# normalize and aggregate by school name
capacity = capacity\
    .with_columns(
        polars.col("ENROLLMENT CAPACITY").cast(polars.Int64, strict=False).alias("ENROLLMENT CAPACITY"),
        polars.col("SITE NAME")
            .map_elements(lambda x: normalized_school_names.get(x), return_dtype=polars.String)
            .alias("school_name")
    )\
    .filter(polars.col('school_name').is_not_null())\
    .drop(['SITE NAME', 'ADDRESS'])

# group by school name, summing some columns and taking the first of the rest
columns_to_sum = ["STUDENT ENROLLMENT", "ENROLLMENT CAPACITY", "BOND INVESTMENTS SINCE 2003", "CLASSROOMS", "LOT SQ FT"]
capacity = capacity.group_by("school_name").agg([
    *[polars.col(col).sum().alias(col) for col in columns_to_sum],
    polars.exclude(["school_name", *columns_to_sum]).first()
])

# Sort the dataframe by school_name for consistency
capacity = capacity.sort("school_name")

capacity.write_csv(workspace_path.joinpath('data/processed/capacity.csv'))
capacity

school_name,STUDENT ENROLLMENT,ENROLLMENT CAPACITY,BOND INVESTMENTS SINCE 2003,CLASSROOMS,LOT SQ FT,YEAR BUILT,BUILDING SQ FT,PRIMARY PROGRAM
str,i64,i64,i64,i64,i64,i64,i64,str
"""Alamo Elementary""",435,413,2862675,27,60000,1926,50420,"""ES"""
"""Alvarado Elementary""",472,518,2851168,33,79247,1924,51700,"""ES"""
"""Aptos Middle""",855,904,20231647,49,121974,1931,168000,"""MS"""
"""Argonne Elementary""",389,398,300000,20,68520,1997,52000,"""ES"""
"""Asawa (Ruth) SF Sch of the Art…",1097,1065,40052770,71,1086169,1974,243882,"""HS"""
…,…,…,…,…,…,…,…,…
"""Webster (Daniel) Elementary""",341,382,12074323,20,55000,1924,42800,"""ES"""
"""Wells (Ida B.) High""",133,220,17129836,17,60671,1911,34300,"""HS"""
"""West Portal Elementary""",533,616,5450883,28,122324,1926,44700,"""ES"""
"""Yick Wo Elementary""",215,252,4926277,14,30612,1983,26000,"""ES"""
