In [1]:
import pandas as pd
import numpy as np
import random

In [2]:
# load the lookup table
df = pd.read_csv('/Users/joshpaul/epa-justice/repo/epa-justice/tbl/NCRPlaces_Census_04192024.csv')
df[['id','name','PLACENAME','AREATYPE','COMMENT']]

Unnamed: 0,id,name,PLACENAME,AREATYPE,COMMENT
0,BORO1,Aleutians East Borough,Aleutians East Borough,County,
1,BORO19,Municipality of Anchorage,Anchorage Municipality,County,
2,AK15,Anchorage,Anchorage Municipality,County,Data represent information from nearest [COLUM...
3,CENS9,Bethel Census Area,Bethel Census Area,County,
4,BORO17,Bristol Bay Borough,Bristol Bay Borough,County,
...,...,...,...,...,...
423,AK430,Willow,Willow CDP,Census designated place,Data represent information from nearest [COLUM...
424,AK556,Willow Creek,Willow Creek CDP,Census designated place,Data represent information from nearest [COLUM...
425,AK431,Wiseman,Wiseman CDP,Census designated place,Data represent information from nearest [COLUM...
426,AK432,Womens Bay,Womens Bay CDP,Census designated place,Data represent information from nearest [COLUM...


In [3]:
# create new comment strings from the relationships in the table
# add results to a comment dict, which will be copied to luts.py

comment_dict = {}

for index, row in df.iterrows():
    # exclude NaNs from commenting
    if not isinstance(row['COMMENT'], float):

        # deal with one-to-many tract situation first
        if row['AREATYPE'] == 'Census tract':
            # if >1 GVV place associated with this row, list place names (census tracts) in the comment
            sub_df = df[df['name'] == row['name']]
            if len(sub_df) > 1:
                tract_list = sub_df['PLACENAME'].tolist()
                if len(tract_list) == 2:
                    tracts = (" and ").join(tract_list)
                    comment = f"Data for this place represent multiple merged census tracts: {tracts}"
                elif len(tract_list) > 2:
                    tract_list[-1] = str("and " + tract_list[-1])
                    tracts = (", ").join(tract_list)
                    comment = f"Data for this place represent multiple merged census tracts: {tracts}"
        else:
            sub_df = df[df['PLACENAME'] == row['PLACENAME']]
            name_list = sub_df['name'].tolist()

            if len(name_list) == 1:
                comment = f"Data represent information from nearest {row['AREATYPE'].lower()} ({row['PLACENAME']}), which includes {name_list[0]}."
            elif len(name_list) == 2:
                names = (" and ").join(name_list)
                comment = f"Data represent information from nearest {row['AREATYPE'].lower()} ({row['PLACENAME']}), which includes {names}."
            elif len(name_list) > 2:
                name_list[-1] = str("and " + name_list[-1])
                names = (", ").join(name_list)
                comment = f"Data represent information from nearest {row['AREATYPE'].lower()} ({row['PLACENAME']}), which includes {names}."
        
        comment_dict[row['id']] = comment
    
    else:

        comment_dict[row['id']] = ""


In [4]:
comment_dict

{'BORO1': '',
 'BORO19': '',
 'AK15': 'Data represent information from nearest county (Anchorage Municipality), which includes Municipality of Anchorage and Anchorage.',
 'CENS9': '',
 'BORO17': '',
 'CENS11': '',
 'CENS10': '',
 'BORO18': '',
 'CENS3': '',
 'BORO8': '',
 'AK124': 'Data represent information from nearest county (Fairbanks North Star Borough), which includes Fairbanks North Star Borough and Fairbanks.',
 'BORO16': '',
 'CENS4': '',
 'BORO9': '',
 'AK172': 'Data represent information from nearest county (Juneau City and Borough), which includes City and Borough of Juneau, Juneau, Lena Beach, Auke Bay, Tee Harbor, Thane, and Douglas.',
 'AK225': 'Data represent information from nearest county (Juneau City and Borough), which includes City and Borough of Juneau, Juneau, Lena Beach, Auke Bay, Tee Harbor, Thane, and Douglas.',
 'AK27': 'Data represent information from nearest county (Juneau City and Borough), which includes City and Borough of Juneau, Juneau, Lena Beach, Auk

In [5]:
# test 5 random GVV IDs to make sure the comments make sense
gvv_ids = random.sample(df['id'].tolist(), 5)

for gvv_id in gvv_ids:
    print(df[df['id'] == gvv_id][['id','name','PLACENAME','AREATYPE']])
    print(comment_dict[gvv_id])


        id         name        PLACENAME                 AREATYPE
331  AK331  Prudhoe Bay  Prudhoe Bay CDP  Census designated place
Data represent information from nearest census designated place (Prudhoe Bay CDP), which includes Prudhoe Bay and Deadhorse.
        id                     name                PLACENAME AREATYPE
20  BORO15  Kenai Peninsula Borough  Kenai Peninsula Borough   County

        id             name             PLACENAME            AREATYPE
390  AK392  Tenakee Springs  Tenakee Springs city  Incorporated place
Data represent information from nearest incorporated place (Tenakee Springs city), which includes Tenakee Springs.
        id             name    PLACENAME                 AREATYPE
374  AK379  Susitna Station  Susitna CDP  Census designated place
Data represent information from nearest census designated place (Susitna CDP), which includes Alexander Creek, Susitna Station, and Susitna.
       id         name    PLACENAME                 AREATYPE
104  AK63  Ch

In [6]:
# test the 2 known 1-to-many places to make sure the comments make sense

sub_df = df[df['name'].isin(['Eagle River', 'Joint Base Elmendorf-Richardson'])][['id','name','PLACENAME','AREATYPE']]
print(sub_df)
for gvv_id in sub_df['id'].tolist():
    print(comment_dict[gvv_id])

       id                             name          PLACENAME      AREATYPE
45  AK103                      Eagle River  Census Tract 2.01  Census tract
46  AK103                      Eagle River  Census Tract 2.02  Census tract
47  AK103                      Eagle River  Census Tract 2.04  Census tract
48  AK103                      Eagle River  Census Tract 2.05  Census tract
49  AK103                      Eagle River  Census Tract 2.06  Census tract
55  AK439  Joint Base Elmendorf-Richardson  Census Tract 9801  Census tract
56  AK439  Joint Base Elmendorf-Richardson  Census Tract 9802  Census tract
Data for this place represent multiple merged census tracts: Census Tract 2.01, Census Tract 2.02, Census Tract 2.04, Census Tract 2.05, and Census Tract 2.06
Data for this place represent multiple merged census tracts: Census Tract 2.01, Census Tract 2.02, Census Tract 2.04, Census Tract 2.05, and Census Tract 2.06
Data for this place represent multiple merged census tracts: Census Tract 