In [76]:
import pandas as pd

In [77]:
# load "original" NCR - census lookup table.... it's been slightly modified:
# a number of communities already in GVV were missing from this table and so were added manually
# king cove, chignik, craig, port protection, gambell, elfin cove, klawock, metlakatla
tbl = pd.read_csv('/Users/joshpaul/epa-justice/repo/epa-justice/tbl/NCRPlaces_Census.csv')
# drop any rows without an ID ... these are the rows of places to add to NCR, 
# which have been processed separately and will be added later in this notebook
tbl = tbl[~tbl['id'].isna()]
# also drop any rows with no GEOIDFQ ... these are places with no equivalent census area assigned
tbl = tbl[~tbl['GEOIDFQ'].isna()]

In [78]:
tbl.sort_values(by=['id'])

Unnamed: 0,id,name,alt_name,region,country,latitude,longitude,type,GEOIDFQ,PLACENAME,AREATYPE,COMMENT
42,AK1,Afognak,Agw’aneq,Alaska,US,58.0078,-152.768,community,1600000US0201560,Aleneva CDP,Census designated place,
44,AK100,Dry Creek,,Alaska,US,63.7000,-144.567,community,1600000US0220020,Dry Creek CDP,Census designated place,
45,AK102,Eagle,Tthee T'äwdlenn,Alaska,US,64.7880,-141.200,community,1600000US0220380,Eagle city,Incorporated place,
47,AK104,Eagle Village,,Alaska,US,64.7805,-141.114,community,1600000US0220600,Eagle Village CDP,Census designated place,
48,AK105,Edna Bay,,Alaska,US,55.9489,-133.662,community,1600000US0220970,Edna Bay city,Incorporated place,
...,...,...,...,...,...,...,...,...,...,...,...,...
24,CENS5,Kusilvak Census Area,,,,,,census_area,0500000US02158,Kusilvak Census Area,County,
25,CENS6,Nome Census Area,,,,,,census_area,0500000US02180,Nome Census Area,County,
26,CENS7,Prince of Wales-Hyder Census Area,,,,,,census_area,0500000US02198,Prince of Wales-Hyder Census Area,County,
27,CENS8,Yukon-Koyukuk Census Area,,,,,,census_area,0500000US02290,Yukon-Koyukuk Census Area,County,


In [79]:
# load new NCR - census lookup table
# this table was created by manually identifying coordinates for places not found in NCR, but desired for their census data
# Adak and Eareckson Station are not included, because no NCR data is available
to_add = pd.read_csv('/Users/joshpaul/epa-justice/repo/epa-justice/tbl/new_NCRPlaces_Census.csv')
# remove Adak and Eareckson Station from this list, since their coordinates do not return any data in NCR
droplist = ["Adak", "Eareckson Station"]
to_add = to_add[~to_add['name'].isin(droplist)]

# we also need to load the GVV community table to get IDs
# right now we load from dev branch:
gvv = pd.read_csv('https://raw.githubusercontent.com/ua-snap/geospatial-vector-veracity/epa-justice/vector_data/point/alaska_point_locations.csv')
# this should be on the main branch soon, so this URL will need to be updated to:
# https://raw.githubusercontent.com/ua-snap/geospatial-vector-veracity/main/vector_data/point/alaska_point_locations.csv


In [80]:
# look up the new GVV ids and populate the to_add table with them
for index, row in to_add.iterrows():
    id = gvv[gvv['name'] == row['name']]['id'].values[0]
    to_add.at[index, 'id'] = id

to_add

  to_add.at[index, 'id'] = id


Unnamed: 0,id,name,alt_name,region,country,latitude,longitude,type,GEOIDFQ,PLACENAME,AREATYPE,COMMENT
0,AK487,Susitna,,Alaska,US,61.578610,-150.609051,community,1600000US0274340,Susitna CDP,Census designated place,
2,AK488,Alcan Border,,Alaska,US,62.685631,-141.125333,community,1600000US0201390,Alcan Border CDP,Census designated place,
3,AK489,Aleneva,,Alaska,US,58.004623,-152.882483,community,1600000US0201560,Aleneva CDP,Census designated place,
4,AK490,Badger,,Alaska,US,64.805794,-147.403888,community,1600000US0205000,Badger CDP,Census designated place,
5,AK491,Bear Creek,,Alaska,US,60.183879,-149.388613,community,1600000US0205585,Bear Creek CDP,Census designated place,
...,...,...,...,...,...,...,...,...,...,...,...,...
67,AK552,Tolsona,,Alaska,US,62.099027,-146.044410,community,1600000US0278297,Tolsona CDP,Census designated place,
68,AK553,Trapper Creek,,Alaska,US,62.311386,-150.245805,community,1600000US0278680,Trapper Creek CDP,Census designated place,
69,AK554,Whitestone,,Alaska,US,64.153587,-145.886317,community,1600000US0284120,Whitestone CDP,Census designated place,
70,AK555,Whitestone Logging Camp,,Alaska,US,58.083142,-135.436672,community,1600000US0284200,Whitestone Logging Camp CDP,Census designated place,


In [81]:
# concatenate the additional places to the original table
df = pd.concat([tbl, to_add])
df

Unnamed: 0,id,name,alt_name,region,country,latitude,longitude,type,GEOIDFQ,PLACENAME,AREATYPE,COMMENT
0,BORO1,Aleutians East Borough,,,,,,borough,0500000US02013,Aleutians East Borough,County,
1,BORO10,City and Borough of Sitka,,,,,,borough,0500000US02220,Sitka City and Borough,County,
2,BORO11,Petersburg Borough,,,,,,borough,0500000US02195,Petersburg Borough,County,
3,BORO12,City and Borough of Wrangell,,,,,,borough,0500000US02275,Wrangell City and Borough,County,
4,BORO13,Matanuska-Susitna Borough,,,,,,borough,0500000US02170,Matanuska-Susitna Borough,County,
...,...,...,...,...,...,...,...,...,...,...,...,...
67,AK552,Tolsona,,Alaska,US,62.099027,-146.044410,community,1600000US0278297,Tolsona CDP,Census designated place,
68,AK553,Trapper Creek,,Alaska,US,62.311386,-150.245805,community,1600000US0278680,Trapper Creek CDP,Census designated place,
69,AK554,Whitestone,,Alaska,US,64.153587,-145.886317,community,1600000US0284120,Whitestone CDP,Census designated place,
70,AK555,Whitestone Logging Camp,,Alaska,US,58.083142,-135.436672,community,1600000US0284200,Whitestone Logging Camp CDP,Census designated place,


In [82]:
# check for errors

# dups?
print(any(df['id'].duplicated()))
print(any(df['name'].duplicated()))
# nans?
print(any(df['id'].isna()))
print(any(df['name'].isna()))

False
False
False
False


In [83]:
# save to CSV
# this CSV will need additional multi-row entries for Fairbanks, Anchorage, Juneau, etc 
# where there will be multiple GEOIDS per place - these may contain ZCTAs
# this CSV will also need additional single-row entries for certain communities with special situations (Eagle River, Sitka, etc)

df.to_csv('tbl/geoid_lookup.csv', index=False)