# Composite Scores Map Processing

Breakdown of composite scores for schools are found at: https://pyj2z6-michael-chrzan.shinyapps.io/Map-App/.

The xhr_streaming response contains the actual data which needs some processing.

In [89]:
%run notebooks/Setup.ipynb

import json
import polars as polars

In [90]:
with open(workspace_path.joinpath('data/raw/rai/composite_scores_map.response'), 'r') as f:
    composite_scores_string = f.read()

# need to parse twice since json encoded string
composite_scores = json.loads(json.loads(composite_scores_string))


In [91]:
# looks like there are multiple layers of data, this is the one we care about, get a sanity check with number of schools
len(composite_scores['values']['school_map']['x']['data'][8]['text'])

101

## Process JSON into CSV

In [94]:
data_layer = composite_scores['values']['school_map']['x']['data'][8]

# create a polars dataframe from the x, y, and text attributes in data_layer
data_layer_df = polars.DataFrame({
    'x': data_layer['x'],
    'y': data_layer['y'],
    'text': data_layer['text']
})

# the text attribute is a html string such as:
# Lilienthal (Claire) Elementary <br /> Composite Score:  40.88 <br /> Composite Score Rank:  65 <br /> Equity Rank:  31 <br /> Excellence Rank:  87 <br /> Efficiency Rank:  61
# we want to use the first as the school name, and the rest as attributes to add to the existing data frame
# Split the text into separate columns
data_layer_df = data_layer_df.with_columns(
    polars.col('text').str.split('<br />')
).with_columns(
    school_name=polars.col('text').list.get(0).str.strip_chars(),
    composite_score=polars.col('text').list.get(1).str.extract(r'Composite Score:\s*([\d.]+)'),
    composite_score_rank=polars.col('text').list.get(2).str.extract(r'Composite Score Rank:\s*(\d+)'),
    equity_rank=polars.col('text').list.get(3).str.extract(r'Equity Rank:\s*(\d+)'),
    excellence_rank=polars.col('text').list.get(4).str.extract(r'Excellence Rank:\s*(\d+)'),
    efficiency_rank=polars.col('text').list.get(5).str.extract(r'Efficiency Rank:\s*(\d+)')
)

# do some conversions
data_layer_df = data_layer_df.with_columns(
    composite_score=polars.col('composite_score').cast(polars.Float64),
    composite_score_rank=polars.col('composite_score_rank').cast(polars.Int64),
    equity_rank=polars.col('equity_rank').cast(polars.Int64),
    excellence_rank=polars.col('excellence_rank').cast(polars.Int64),
    efficiency_rank=polars.col('efficiency_rank').cast(polars.Int64)
).drop('text')

# rename columns 'x' to 'longitude' and 'y' to 'latitude'
data_layer_df = data_layer_df.rename({
    'x': 'longitude',
    'y': 'latitude'
})

data_layer_df

longitude,latitude,school_name,composite_score,composite_score_rank,equity_rank,excellence_rank,efficiency_rank
f64,f64,str,f64,i64,i64,i64,i64
-122.492016,37.777765,"""Washington (George) High""",72.91,97,81,52,98
-122.490093,37.780842,"""Presidio Middle""",51.16,83,52,44,94
-122.496864,37.777548,"""Lafayette Elementary""",23.61,20,7,96,48
-122.482727,37.783034,"""Alamo Elementary""",14.13,5,6,74,35
-122.476784,37.775159,"""Argonne Elementary""",11.46,1,4,75,40
…,…,…,…,…,…,…,…
-122.436348,37.759142,"""Milk (Harvey) Civil Rights Ele…",21.5,17,9,73,71
-122.428692,37.764027,"""Everett Middle""",15.2,8,79,1,4
-122.443403,37.803091,"""Lilienthal (Claire) Elementary""",40.88,65,31,87,61
-122.436362,37.801764,"""Marina Middle""",35.98,54,65,17,49


In [95]:
# save the initial csv
data_layer_df.write_csv(workspace_path.joinpath('data/processed/composite_scores_raw.csv'))