In [1]:
from pathlib import Path
import pandas as pd
import csv

data_path = Path.cwd().parent / "data"
fips_csv_path = data_path / "fips_states_2010.csv"
csv_path = data_path / "score" / "csv"

In [2]:
# EJSCreen csv Load
ejscreen_csv = data_path / "dataset" / "ejscreen_2020" / "usa.csv"
df = pd.read_csv(ejscreen_csv, dtype={'ID': 'string'}, low_memory=False)
df.head()

Unnamed: 0,ID,ACSTOTPOP,LESSHSPCT,LOWINCPCT
0,10010201001,636,0.208134,0.38522
1,10010201002,1287,0.040678,0.16317
2,10010202001,810,0.135563,0.501247
3,10010202002,1218,0.192,0.393701
4,10010203001,2641,0.125473,0.308217


In [3]:
# calculate percentiles
df['lesshs_percentile'] = df.LESSHSPCT.rank(pct = True)
df['lowin_percentile'] = df.LOWINCPCT.rank(pct = True)

In [4]:
# calculate scores
df["score_a"] = df[["lesshs_percentile", "lowin_percentile"]].mean(axis=1)
df["score_b"] = df.lesshs_percentile * df.lowin_percentile

# Create percentiles for the scores 
df["score_a_percentile"] = df.score_a.rank(pct = True)
df["score_b_percentile"] = df.score_b.rank(pct = True)
df["score_a_top_percentile_25"] = df["score_a_percentile"] >= 0.75
df["score_b_top_percentile_25"] = df["score_b_percentile"] >= 0.75
df.head()

Unnamed: 0,ID,ACSTOTPOP,LESSHSPCT,LOWINCPCT,lesshs_percentile,lowin_percentile,score_a,score_b,score_a_percentile,score_b_percentile,score_a_top_percentile_25,score_b_top_percentile_25
0,10010201001,636,0.208134,0.38522,0.793292,0.625015,0.709154,0.49582,0.73954,0.743311,False,False
1,10010201002,1287,0.040678,0.16317,0.23855,0.246722,0.242636,0.058856,0.206805,0.24959,False,False
2,10010202001,810,0.135563,0.501247,0.63439,0.772002,0.703196,0.48975,0.733009,0.738859,False,False
3,10010202002,1218,0.192,0.393701,0.765126,0.637158,0.701142,0.487506,0.730848,0.737357,False,False
4,10010203001,2641,0.125473,0.308217,0.603841,0.504977,0.554409,0.304925,0.568571,0.586058,False,False


In [5]:
# strip calculations
df = df[["ID", "ACSTOTPOP", "score_a","score_b", "score_a_percentile", "score_b_percentile","score_a_top_percentile_25","score_b_top_percentile_25"]]

In [6]:
# write nationwide csv
df.to_csv(csv_path / f"usa.csv", index = False)

In [7]:
# write per state csvs
with open(fips_csv_path) as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=",")
    line_count = 0

    for row in csv_reader:
        if line_count == 0:
            line_count += 1
        else:
            fips = row[0].strip()
            print(f"Generating data{fips} csv")
            df1 = df[df.ID.str[:2] == fips]
            # we need to name the file data01.csv for ogr2ogr csv merge to work
            df1.to_csv(csv_path / f"data{fips}.csv", index = False)

Generating data01 csv
Generating data02 csv
Generating data04 csv
Generating data05 csv
Generating data06 csv
Generating data08 csv
Generating data09 csv
Generating data10 csv
Generating data11 csv
Generating data12 csv
Generating data13 csv
Generating data15 csv
Generating data16 csv
Generating data17 csv
Generating data18 csv
Generating data19 csv
Generating data20 csv
Generating data21 csv
Generating data22 csv
Generating data23 csv
Generating data24 csv
Generating data25 csv
Generating data26 csv
Generating data27 csv
Generating data28 csv
Generating data29 csv
Generating data30 csv
Generating data31 csv
Generating data32 csv
Generating data33 csv
Generating data34 csv
Generating data35 csv
Generating data36 csv
Generating data37 csv
Generating data38 csv
Generating data39 csv
Generating data40 csv
Generating data41 csv
Generating data42 csv
Generating data44 csv
Generating data45 csv
Generating data46 csv
Generating data47 csv
Generating data48 csv
Generating data49 csv
Generating