# Summarize Demographics By Plan

In [2]:
from pathlib import Path
import geopandas as gpd
import pandas as pd
# remove col limit
pd.set_option('display.max_columns', None)

DATA_DIR = Path("../data")

In [3]:
def get_demography(df, id_col="ID"):
  df = df[[
    id_col,
    'HISP', 
    'NH_WHITE', 
    'NH_BLACK', 
    'NH_ASIAN', 
    'NH_AMIN', 
    'NH_NHPI', 
    'NH_OTHER',
    'NH_2MORE'
  ]]
  df.loc[:, 'NH_OTHER'] = df[[
    'NH_AMIN', 
    'NH_NHPI', 
    'NH_OTHER',
    'NH_2MORE'
  ]].sum(axis=1)
  df = df.drop(columns=['NH_AMIN', 'NH_NHPI',  'NH_2MORE'])
  return df

precincts12 = gpd.read_file(DATA_DIR / "chicago_precincts_2012" / "chicago_precincts_2012.shp")
precincts12['ID'] = "C" + precincts12['WARD_1'].astype(str).str.zfill(2) + precincts12['PRECINCT_1'].astype(str).str.zfill(3)

ca = gpd.read_file(DATA_DIR / "chicago_community_areas" / "chicago_community_areas.shp").rename(columns={'area_numbe': 'ID'})

wards = gpd.read_file(DATA_DIR / "chicago_wards" / "chicago_wards.shp").rename(columns={'ward': 'ID'})

demography = {
  "ca": get_demography(ca),
  "precinct": get_demography(precincts12)
}


In [8]:
from pathlib import Path
import json
PLAN_DIR = Path("../plans_from_figures")
plans_path = PLAN_DIR.glob("*.json")


def get_plan_type(stem):
  if stem.startswith("ca"):
    return "ca"
  elif stem.startswith("prec"):
    return "precinct"
  else:
    return "precinct"


def summarize_plan_demographics(plan_object, plan_type):
  demog_df = demography[plan_type]
  # change object to DF
  plan_df = pd.DataFrame({
  "ID": plan_object['assignment'].keys(),
  "DISTRICT": plan_object['assignment'].values()
  })
  return demog_df.merge(plan_df, on="ID", how="left") \
    .groupby("DISTRICT") \
    .agg({
      "HISP": "sum",
      "NH_WHITE": "sum",
      "NH_BLACK": "sum",
      "NH_ASIAN": "sum",
    }).reset_index()


for plan in plans_path:
  with open(plan, "r") as f:
    data = json.load(f)
    plan_type = get_plan_type(plan.stem)
    district_demog = summarize_plan_demographics(data, plan_type)
    district_demog.to_csv(DATA_DIR / "plan_demography" / f"{plan.stem}.csv", index=False)