In [7]:
from pathlib import Path
import pandas as pd
import json
DATA_DIR = Path("../data")
PLAN_DIR = Path(DATA_DIR / "plan_demography")


In [8]:
plan_demographies = [
  *[z for z in PLAN_DIR.glob("ca10*.csv")],
  # *[z for z in PLAN_DIR.glob("prec10*.csv")],
]

In [9]:
def generate_configuration(
  district_row,  
  candidate_count,
  cohesion_dict,
  alphas_dict,
):
  total_voters = district_row['HISP'] + district_row['NH_WHITE'] + district_row['NH_BLACK'] + district_row['NH_ASIAN'] + district_row['NH_OTHER']
  bloc_names = ["NH_WHITE", "NH_BLACK", "NH_ASIAN", "NH_OTHER", "HISP"]
  
  bloc_voter_prop = {
    name: district_row[name] / total_voters for name in bloc_names
  }
  cohesion_parameters = {
    name: cohesion_dict[name] for name in bloc_names
  }
  alphas = {
    name: alphas_dict[name] for name in bloc_names
  }
  slate_to_candidates = {
    name: [f"{name}_{i}" for i in range(candidate_count)] for name in bloc_names
  }
  return dict(
    bloc_voter_prop=bloc_voter_prop,
    cohesion_parameters=cohesion_parameters,
    alphas=alphas,
    slate_to_candidates=slate_to_candidates
  )
  

In [10]:
cohesion_matrix = [
  .9,
  .6
]

alphas_matrix = [
  .5,
  1,
  2
]

RACES = [
  "NH_WHITE",
  "NH_BLACK",
  "NH_ASIAN",
  "NH_OTHER",
  "HISP" 
]
def generate_cohesion_subdict(bloc, bloc_value):
  return {
    "NH_WHITE": bloc_value if bloc == 'NH_WHITE' else (1-bloc_value)/4,
    "NH_BLACK":bloc_value if bloc == 'NH_BLACK' else (1-bloc_value)/4, 
    "NH_ASIAN":bloc_value if bloc == 'NH_ASIAN' else (1-bloc_value)/4, 
    "NH_OTHER":bloc_value if bloc == 'NH_OTHER' else (1-bloc_value)/4, 
    "HISP":bloc_value if bloc == 'HISP' else (1-bloc_value)/4,  
  }

def generate_cohesion(cohesion):
  output = {}
  for race in RACES:
    output[race] = generate_cohesion_subdict(race, cohesion)
  return output
  
def generate_alphas_subdict(alpha):
  return {
    "NH_WHITE": alpha,
    "NH_BLACK": alpha,
    "NH_ASIAN": alpha,
    "NH_OTHER": alpha,
    "HISP": alpha,
  }
def generate_alphas(alpha):
  output = {}
  for race in RACES:
    output[race] = generate_alphas_subdict(alpha)
  return output


In [12]:
NUM_RACES = 5
set_idx = 0
print("Running...")
for candidate_count in [5]:
  for csv_path in plan_demographies:
    print(csv_path)
    df = pd.read_csv(csv_path)
    PLAN = csv_path.stem
    for cohesion in cohesion_matrix:
      cohesion_dict = generate_cohesion(cohesion)
      for alpha in alphas_matrix:
        alphas_dict = generate_alphas(alpha)

        for idx, row in df.iterrows():
          DISTRICT = str(row['DISTRICT'])
          config = generate_configuration(row, candidate_count, cohesion_dict, alphas_dict)
          config_name = f"{PLAN}_{DISTRICT}_{candidate_count}_{cohesion}_{alpha}"    
          if set_idx > 240:
            config_set = 'set1'
          elif set_idx > 120:
            config_set = 'set2'
          else:
            config_set = 'set3'

          set_idx+=1;
          with open(DATA_DIR / "plan_configs" / config_set / f"{config_name}.json", "w") as f:
              json.dump(config, f)

Running...
../data/plan_demography/ca10-5848.csv
../data/plan_demography/ca10-1042.csv
../data/plan_demography/ca10-937.csv
../data/plan_demography/ca10-87557.csv
../data/plan_demography/ca10-5304.csv
../data/plan_demography/ca10-25218.csv
