In [4]:
import pandas as pd
pd.set_option('display.width', 1000)
import geopandas as gpd
from pathlib import Path

In [5]:
dir = Path(".")

# list shapefiles by category and print the filenames
shp_dict = {}

shp_dict["birds"] = list(dir.glob("**/*Bird*.shp"))
shp_dict["mammals"] = list(dir.glob("**/*Mammal*.shp"))
shp_dict["amphibians"] = list(dir.glob("**/*Amphibian*.shp"))
shp_dict["SGCN"] = list(dir.glob("**/*SGCN Richness*.shp"))

count = 0

for category in shp_dict.keys():
    print(category, ":")
    for path in shp_dict[category]:
        print(path.name)
        count = count+1
    print("\n")
print(f"\nTotal of {count} shapefiles found.")

birds :
High Bird Richness.shp
Moderate Bird Richness.shp
Low Bird Richness.shp
SGCN Bird Richness.shp
Declining Birds Richness.shp
ModHigh Bird Richness.shp
High_Bird_Richness.shp
Moderate_Bird_Richness.shp
Declining_Birds_Richness.shp
SGCN_Bird_Richness.shp
Low_Bird_Richness.shp
ModHigh_Bird_Richness.shp


mammals :
Moderate Mammals Richness.shp
High Mammal Richness.shp
Low Mammal Richness.shp
SGCN Mammal Richness.shp
ModHigh_Mammal_Richness.shp
Declining Mammal Richness.shp
Low_Mammal_Richness.shp
SGCN_Mammal_Richness.shp
Moderate_Mammals_Richness.shp
Declining_Mammal_Richness.shp
ModHigh_Mammal_Richness.shp
High_Mammal_Richness.shp


amphibians :
Moderate Amphibian Richness.shp
ModHigh Amphibian Richness.shp
SGCN Amphibian Richness.shp
ModHigh_Amphibian_Richness.shp
Moderate_Amphibian_Richness.shp
SGCN_Amphibian_Richness.shp


SGCN :
SGCN Richness.shp



Total of 31 shapefiles found.


In [6]:
# we need to correct some of the bird species IDs
# use a dict with the incorrect ID as the key and correct ID as the value
# current as of 5/21/23, confirmed with Andy Baltensperger

id_dict = {
    "ameri_kest" : "am_kestrel",
    "am_pipit" : "amer_pipit",
    "am_redstrt" : "amer_redst",
    "bank_swall" : "bank_swalo",
    "bnk_swalow" : "bank_swalo",
    "barn_swall" : "barn_swalo",
    "brn_swalow" : "barn_swalo",
    "bb_sandpip" : "bb_sndpipr",
    "black_swif" : "blak_swift",
    "chba_chkde" : "cb_chicdee",
    "chp_sparow" : "ch_sparrow",
    "cmn_redpol" : "com_redpol",
    "gh_chickad" : "gh_chicade",
    "hering_gul" : "herng_gull",
    "her_thrush" : "hrmt_thrsh",
    "l_yelolegs" : "les_yelolg",
    "les_yelowl" : "les_yelolg",
    "ocr_warblr" : "oc_warbler",
    "os_flycatc" : "os_flyctch",
    "pac_wren" : "pacif_wren",
    "pine_siskn" : "pin_siskin",
    "ps_flyctch" : "ps_flycach",
    "red_phalar" : "red_phlrop",
    "rn_phalaro" : "rn_phalrop",
    "rst_blkbrd" : "ru_blckbrd",
    "ruf_huming" : "ruf_humbrd",
    "rw_blkbird" : "rw_blakbrd",
    "sb_dowitch" : "sb_dowichr", 
    "shrtearowl" : "shtear_owl",
    "shrter_owl" : "shtear_owl",
    "song_sparr" : "sng_sparow",
    "swa_thrush" : "swn_thrush",
    "tre_swalow" : "tree_swalo",
    "tree_swall" : "tree_swalo",
    "yb_loon" : "yelobil_ln",
    "yebil_loon" : "yelobil_ln",
    "yelo_wrblr" : "ye_warbler",
}



In [7]:
# create a function that will rename columns if they match keys in the dict

def rename_bird_species(gdf, id_dict, crs):
    for col in gdf.columns:
        try:
            new_col = id_dict[col]
            gdf.rename(columns={col : new_col}, inplace=True)
            # if columns are renamed, theres a chance for duplicate column names...
            # drop any dup columns by name, keeping the first record by default
            gdf = gpd.GeoDataFrame(gdf.T.drop_duplicates().T, geometry="geometry", crs=crs)
        except:
            pass
    return gdf

In [8]:
# create a function that will list all unique species in each category
# and also return a list of geodataframes by category (EPSG 3338)
# this is also where species names can be corrected!

def list_species_by_category(shp_dict, category):

    crs = "EPSG:3338"

    names = []
    polys = []
    for shp in shp_dict[category]:
        names.append(shp.name)
        polys.append(gpd.read_file(shp).to_crs(crs))        
    
    species = []
    polys_renamed = []
    for poly in polys:
        if category == "birds":
            poly_rn = rename_bird_species(poly, id_dict, crs)
            polys_renamed.append(poly_rn)
        else:
            polys_renamed.append(poly)
        for col in poly.columns:
            if col not in ['HUC_8', 'HUC_10', 'HUC_12', 'SppRichnes', 'geometry']:
                species.append(col)
                
    return list(set(species)), polys_renamed, names

In [9]:
# list em

birds, birds_gdfs, birds_shps = list_species_by_category(shp_dict, "birds")
mammals, mammals_gdfs, mammals_shps = list_species_by_category(shp_dict, "mammals")
amphibians, amphibians_gdfs, amphibians_shps = list_species_by_category(shp_dict, "amphibians")
sgcn, sgcn_gdfs, sgcn_shps = list_species_by_category(shp_dict, "SGCN")

In [10]:
# count em

print("Total unique bird species in shapefiles: ", len(birds))
print("Total unique mammal species in shapefiles: ", len(mammals))
print("Total unique amphibian species in shapefiles: ", len(amphibians))
print("Total unique terrestrial SGCN species in shapefiles: ", len(sgcn))
print("\n")
print("Sum of unique bird, mammal, amphibian species counts in shapefiles: ", (len(birds) + len(mammals) + len(amphibians)))
print("\n")
print("Total unique terrestrial SGCN species referenced in report: 268")

Total unique bird species in shapefiles:  196
Total unique mammal species in shapefiles:  45
Total unique amphibian species in shapefiles:  7
Total unique terrestrial SGCN species in shapefiles:  209


Sum of unique bird, mammal, amphibian species counts in shapefiles:  248


Total unique terrestrial SGCN species referenced in report: 268


In [11]:
# optionally, export species lists as CSVs for easier viewing / crosswalk creation
for species_list, name in zip([birds, mammals, amphibians, sgcn], ["birds", "mammals", "amphibians", "sgcn"]):
     pd.DataFrame(data=species_list, columns=['species']).sort_values(by='species').to_csv(f"tbl/{name}.csv", index=False)

In [12]:
# optionally, rewrite the shapefiles to incorporate any renamed columns
# these are the "clean" copies that should be hosted by SNAP 
export_dir = Path('/Users/joshpaul/species/export/to_host')

all_shps = birds_shps + mammals_shps + amphibians_shps + sgcn_shps
all_gdfs = birds_gdfs + mammals_gdfs + amphibians_gdfs + sgcn_gdfs

for shp, gdf in zip(all_shps, all_gdfs):
    outfile = Path.joinpath(export_dir, ("_").join(shp.split(" ")))
    print(f"Saving {outfile}...")
    gdf.to_file(outfile)


Saving /Users/joshpaul/species/export/to_host/High_Bird_Richness.shp...
Saving /Users/joshpaul/species/export/to_host/Moderate_Bird_Richness.shp...
Saving /Users/joshpaul/species/export/to_host/Low_Bird_Richness.shp...
Saving /Users/joshpaul/species/export/to_host/SGCN_Bird_Richness.shp...
Saving /Users/joshpaul/species/export/to_host/Declining_Birds_Richness.shp...
Saving /Users/joshpaul/species/export/to_host/ModHigh_Bird_Richness.shp...
Saving /Users/joshpaul/species/export/to_host/High_Bird_Richness.shp...
Saving /Users/joshpaul/species/export/to_host/Moderate_Bird_Richness.shp...
Saving /Users/joshpaul/species/export/to_host/Declining_Birds_Richness.shp...
Saving /Users/joshpaul/species/export/to_host/SGCN_Bird_Richness.shp...
Saving /Users/joshpaul/species/export/to_host/Low_Bird_Richness.shp...
Saving /Users/joshpaul/species/export/to_host/ModHigh_Bird_Richness.shp...
Saving /Users/joshpaul/species/export/to_host/Moderate_Mammals_Richness.shp...
Saving /Users/joshpaul/species/ex

In [13]:
print(sgcn)
for gdf, shp in zip(sgcn_gdfs, sgcn_shps): 
    print(shp)    
    print(gdf.head())

['par_auklet', 'ncol_lemng', 'woodpeewee', 'ru_blckbrd', 'alxdr_wolf', 'sb_dowichr', 'lngbl_dowi', 'lt_salamnd', 'no_wheater', 'ak_marmot', 'spec_eider', 'beard_seal', 'pi_grosbek', 'wandr_tatt', 'glauc_gull', 'blcp_chkde', 'st_sealion', 'pi_guilmot', 'bargd_shrw', 'woodfrog', 'se_sndpipr', 'west_toad', 'gyrfalcon', 'la_longspr', 'ruflg_hawk', 'root_vole', 'com_loon', 'lt_jaeger', 'sno_buntng', 'ciner_shrw', 'cb_chicdee', 'an_murelet', 'stel_eider', 'blak_swift', 'am_gld_plo', 'kingfisher', 'blk_scoter', 'sa_sparrow', 'nw_dermous', 'tree_swalo', 'al_ck_goos', 'ruf_humbrd', 'brant', 'ha_wdpeckr', 'comm_raven', 'hornd_lark', 'blk_trnstn', 'horned_puf', 'dusky_shrw', 'n_fur_seal', 'mbl_murlet', 'rn_grebe', 'nwsalamndr', 'peal_falcn', 'amer_redst', 'yelobil_ln', 'li_sparrow', 'sng_sparow', 'oc_warbler', 'bald_eagle', 'c_yelothrt', 'lesr_scaup', 'tft_puffin', 'aleut_tern', 'pahbr_seal', 'gc_sparrow', 'redlg_frog', 'sanderling', 'ww_scoter', 'w_scrchowl', 'sp_sndpipr', 'keen_mytis', 'rw_blak

In [14]:
print(birds)
for gdf, shp in zip(birds_gdfs, birds_shps): 
    print(shp)    
    print(gdf.head())

['par_auklet', 'woodpeewee', 'ru_blckbrd', 'gh_chickad', 'sb_dowichr', 'lngbl_dowi', 'ps_flyctch', 'no_wheater', 'spec_eider', 'tree_swall', 'pi_grosbek', 'wandr_tatt', 'glauc_gull', 'blcp_chkde', 'pi_guilmot', 'se_sndpipr', 'chp_sparow', 'gyrfalcon', 'la_longspr', 'ruflg_hawk', 'com_loon', 'bb_sandpip', 'lt_jaeger', 'black_swif', 'sno_buntng', 'cb_chicdee', 'an_murelet', 'stel_eider', 'blak_swift', 'am_gld_plo', 'kingfisher', 'blk_scoter', 'sa_sparrow', 'tree_swalo', 'al_ck_goos', 'bank_swall', 'ruf_humbrd', 'brant', 'ha_wdpeckr', 'comm_raven', 'hornd_lark', 'blk_trnstn', 'horned_puf', 'yelothroat', 'ruf_huming', 'mbl_murlet', 'rn_grebe', 'brn_swalow', 'rst_blkbrd', 'peal_falcn', 'amer_redst', 'yelobil_ln', 'li_sparrow', 'sng_sparow', 'oc_warbler', 'c_yelothrt', 'bald_eagle', 'lesr_scaup', 'tft_puffin', 'aleut_tern', 'gc_sparrow', 'yebil_loon', 'ww_scoter', 'sanderling', 'w_scrchowl', 'sp_sndpipr', 'tre_swalow', 'rw_blakbrd', 'l_yelolegs', 'os_flycatc', 'bl_gilemot', 'lea_auklet', 'hr

In [15]:
print(mammals)
for gdf, shp in zip(mammals_gdfs, mammals_shps): 
    print(shp)    
    print(gdf.head())

['n_fur_seal', 'taiga_vole', 'lole_mytis', 'ncol_lemng', 'sing_vole', 'sredbk_vol', 'nrdbck_vol', 'alxdr_wolf', 'sihair_bat', 'ak_marmot', 'ag_squirel', 'pahbr_seal', 'beard_seal', 'pac_walrus', 'ringd_seal', 'arctic_fox', 'keen_mytis', 'mjmp_mouse', 'st_sealion', 'bargd_shrw', 'nfl_squirl', 'snosh_hare', 'polar_bear', 'pygmy_shrw', 'hry_marmot', 'root_vole', 'harbr_seal', 'nbog_lemng', 'nbrn_lemng', 'ak_hare', 'red_squirl', 'tndra_shrw', 'ciner_shrw', 'amwtr_shrw', 'ca_myotis', 'meadow_vol', 'aktny_shrw', 'lilb_mytis', 'nw_dermous', 'lgtail_vol', 'woodchuck', 'colard_pik', 'bt_woodrat', 'dusky_shrw', 'spotd_seal']
Moderate Mammals Richness.shp
      HUC_8      HUC_10        HUC_12  nbog_lemng  st_sealion  woodchuck  SppRichnes                                           geometry
0  19030102  1903010215  190301021506           0           1          0           1  POLYGON ((-979806.523 441870.783, -979752.322 ...
1  19030102  1903010214  190301021406           0           1          0   

In [16]:
print(amphibians)
for gdf, shp in zip(amphibians_gdfs, amphibians_shps): 
    print(shp)    
    print(gdf.head())

['columbiasp', 'redleggedf', 'westerntoa', 'nwsalamand', 'woodfrog', 'roughskinn', 'longtoedsa']
Moderate Amphibian Richness.shp
      HUC_8      HUC_10        HUC_12  SppRichnes  roughskinn                                           geometry
0  19030102  1903010215  190301021506           0           0  POLYGON ((-979806.523 441870.783, -979752.322 ...
1  19030102  1903010214  190301021406           0           0  POLYGON ((-987830.627 440768.120, -988047.792 ...
2  19030102  1903010215  190301021505           0           0  POLYGON ((-969780.978 442087.182, -969798.483 ...
3  19030102  1903010214  190301021405           0           0  MULTIPOLYGON (((-991838.223 445056.182, -99188...
4  19030102  1903010214  190301021404           0           0  POLYGON ((-969970.132 455008.363, -969933.319 ...
ModHigh Amphibian Richness.shp
      HUC_8      HUC_10        HUC_12  woodfrog  westerntoa  columbiasp  longtoedsa  nwsalamand  SppRichnes                                           geometry
0  

In [17]:
# melt all gdfs together and return a long format table indexed by HUC and species type

def melt_gdfs_and_concatenate(gdfs, types):
    to_concat = []
    for gdf, type in zip(gdfs, types):
        gdf["type"] = type
        melted = pd.melt(gdf.drop(columns=["geometry", "SppRichnes"], axis=0), id_vars=["HUC_8", "HUC_10", "HUC_12", "type"])
        melted.drop_duplicates(inplace=True)
        to_concat.append(melted.where(melted["value"] != 0).dropna())
    
    return pd.concat(to_concat)
    

In [18]:
types = ["birds", "mammals", "amphibians"]
gdfs = [birds_gdfs[3], mammals_gdfs[3], amphibians_gdfs[2]] # full sgcn lists only, not using concern level for now

lookup_df = melt_gdfs_and_concatenate(gdfs, types).reset_index()
lookup_df

Unnamed: 0,index,HUC_8,HUC_10,HUC_12,type,variable,value
0,432,19020702,1902070216,190207021608,birds,al_ck_goos,1
1,14474,19010103,1901010302,190101030206,birds,al_flyctch,1
2,14477,19010103,1901010302,190101030203,birds,al_flyctch,1
3,14486,19010203,1901020321,190102032106,birds,al_flyctch,1
4,14487,19010204,1901020405,190102040501,birds,al_flyctch,1
...,...,...,...,...,...,...,...
1134489,87882,19010304,1901030402,190103040208,amphibians,longtoedsa,1.0
1134490,87894,19010304,1901030402,190103040201,amphibians,longtoedsa,1.0
1134491,87914,19010304,1901030401,190103040105,amphibians,longtoedsa,1.0
1134492,87935,19010304,1901030401,190103040102,amphibians,longtoedsa,1.0


In [19]:
# export the lookup df as csv (2 copies, one for repo and one for export)
lookup_df.rename(columns={"variable" : "species_ID"}, inplace=True)
lookup_df[["HUC_8",	"HUC_10", "HUC_12",	"type",	"species_ID"]].to_csv("tbl/huc_species_lookup.csv", index=False)
lookup_df[["HUC_8",	"HUC_10", "HUC_12",	"type",	"species_ID"]].to_csv("export/huc_species_lookup.csv", index=False)

# add a dummy geometry column and export as shapefile
lookup_df['lat'] = 64.84
lookup_df['lon'] = -147.72

lookup_gdf = gpd.GeoDataFrame(lookup_df[["HUC_8",	"HUC_10", "HUC_12",	"type",	"species_ID"]], geometry=gpd.points_from_xy(lookup_df.lon, lookup_df.lat), crs="EPSG:4326")
lookup_gdf.to_file('export/huc_species_lookup.shp')

In [20]:
lookup_gdf.head()

Unnamed: 0,HUC_8,HUC_10,HUC_12,type,species_ID,geometry
0,19020702,1902070216,190207021608,birds,al_ck_goos,POINT (-147.72000 64.84000)
1,19010103,1901010302,190101030206,birds,al_flyctch,POINT (-147.72000 64.84000)
2,19010103,1901010302,190101030203,birds,al_flyctch,POINT (-147.72000 64.84000)
3,19010203,1901020321,190102032106,birds,al_flyctch,POINT (-147.72000 64.84000)
4,19010204,1901020405,190102040501,birds,al_flyctch,POINT (-147.72000 64.84000)


In [23]:
# proof of concept function to pull species data given any HUC ID as input

def get_species_by_huc(huc, lookup_df, print_it=True):
    if len(huc) == 8:
        sub_df = lookup_df[lookup_df["HUC_8"]==huc]
    elif len(huc) == 10:
        sub_df = lookup_df[lookup_df["HUC_10"]==huc]
    elif len(huc) == 12:
        sub_df = lookup_df[lookup_df["HUC_12"]==huc]

    huc_birds = list(set(sub_df[sub_df["type"] == "birds"]["species_ID"].tolist()))
    huc_mammals = list(set(sub_df[sub_df["type"] == "mammals"]["species_ID"].tolist()))
    huc_amphibs = list(set(sub_df[sub_df["type"] == "amphibians"]["species_ID"].tolist()))

    if print_it == True:
        print(f"Modeled habitat data indicate that the following species of greatest conservation need are present in HUC {huc}:")
        print("\n")
        print(f"Birds: {huc_birds}")
        print("\n")
        print(f"Mammals: {huc_mammals}")
        print("\n")
        print(f"Amphibians: {huc_amphibs}")

    return huc_birds, huc_mammals, huc_amphibs

In [24]:
huc = '190103010713'
huc_birds_, huc_mammals_, huc_amphibs_ = get_species_by_huc(huc, lookup_df)

Modeled habitat data indicate that the following species of greatest conservation need are present in HUC 190103010713:


Birds: ['woodpeewee', 'ru_blckbrd', 'sb_dowichr', 'lngbl_dowi', 'pi_grosbek', 'wandr_tatt', 'blcp_chkde', 'pi_guilmot', 'se_sndpipr', 'gyrfalcon', 'com_loon', 'sno_buntng', 'cb_chicdee', 'an_murelet', 'am_gld_plo', 'kingfisher', 'sa_sparrow', 'tree_swalo', 'ruf_humbrd', 'brant', 'ha_wdpeckr', 'comm_raven', 'mbl_murlet', 'peal_falcn', 'amer_redst', 'li_sparrow', 'sng_sparow', 'oc_warbler', 'c_yelothrt', 'bald_eagle', 'lesr_scaup', 'gc_sparrow', 'w_scrchowl', 'rw_blakbrd', 'hry_redpol', 'no_shrike', 'red_knot', 'arctc_tern', 'grgray_owl', 'to_warbler', 'whimbrel', 'ps_flycach', 'no_harrier', 'os_flyctch', 'boreal_owl', 'barn_swalo', 'per_falcon', 'nor_hawk_o', 'at_wdpeckr', 'ch_sparrow', 'no_flicker', 'red_crsbil', 'gldcrn_kin', 'wlsn_wrblr', 'bb_plover', 'snowy_owl', 'var_thrush', 'mew_gull', 'do_wdpeckr', 'glwi_gull', 'mac_warblr', 'bo_waxwing', 'les_yelolg', 'brown

In [25]:
huc = '1901030107'
huc_birds_, huc_mammals_, huc_amphibs_ = get_species_by_huc(huc, lookup_df)

Modeled habitat data indicate that the following species of greatest conservation need are present in HUC 1901030107:


Birds: ['woodpeewee', 'ru_blckbrd', 'sb_dowichr', 'lngbl_dowi', 'pi_grosbek', 'wandr_tatt', 'blcp_chkde', 'pi_guilmot', 'se_sndpipr', 'gyrfalcon', 'com_loon', 'sno_buntng', 'cb_chicdee', 'an_murelet', 'am_gld_plo', 'kingfisher', 'sa_sparrow', 'tree_swalo', 'ruf_humbrd', 'brant', 'ha_wdpeckr', 'comm_raven', 'mbl_murlet', 'rn_grebe', 'peal_falcn', 'amer_redst', 'li_sparrow', 'sng_sparow', 'oc_warbler', 'c_yelothrt', 'bald_eagle', 'lesr_scaup', 'gc_sparrow', 'w_scrchowl', 'sp_sndpipr', 'rw_blakbrd', 'hry_redpol', 'no_shrike', 'red_knot', 'arctc_tern', 'grgray_owl', 'to_warbler', 'whimbrel', 'ps_flycach', 'no_harrier', 'os_flyctch', 'boreal_owl', 'barn_swalo', 'per_falcon', 'nor_hawk_o', 'at_wdpeckr', 'ch_sparrow', 'swn_thrush', 'no_flicker', 'red_crsbil', 'gldcrn_kin', 'wlsn_wrblr', 'bb_plover', 'snowy_owl', 'qchr_gshwk', 'var_thrush', 'mew_gull', 'do_wdpeckr', 'glwi_gul

In [26]:
huc = '19010301'
huc_birds_, huc_mammals_, huc_amphibs_ = get_species_by_huc(huc, lookup_df)

Modeled habitat data indicate that the following species of greatest conservation need are present in HUC 19010301:


Birds: ['woodpeewee', 'ru_blckbrd', 'sb_dowichr', 'lngbl_dowi', 'pi_grosbek', 'wandr_tatt', 'blcp_chkde', 'pi_guilmot', 'se_sndpipr', 'gyrfalcon', 'com_loon', 'sno_buntng', 'cb_chicdee', 'an_murelet', 'am_gld_plo', 'kingfisher', 'sa_sparrow', 'tree_swalo', 'ruf_humbrd', 'brant', 'ha_wdpeckr', 'comm_raven', 'hornd_lark', 'mbl_murlet', 'rn_grebe', 'peal_falcn', 'amer_redst', 'li_sparrow', 'sng_sparow', 'oc_warbler', 'c_yelothrt', 'bald_eagle', 'lesr_scaup', 'gc_sparrow', 'w_scrchowl', 'sp_sndpipr', 'rw_blakbrd', 'hry_redpol', 'no_shrike', 'red_knot', 'arctc_tern', 'grgray_owl', 'to_warbler', 'whimbrel', 'ps_flycach', 'no_harrier', 'os_flyctch', 'boreal_owl', 'barn_swalo', 'per_falcon', 'nor_hawk_o', 'at_wdpeckr', 'ch_sparrow', 'swn_thrush', 'no_flicker', 'red_crsbil', 'gldcrn_kin', 'wlsn_wrblr', 'bb_plover', 'snowy_owl', 'qchr_gshwk', 'amer_pipit', 'var_thrush', 'bp_warbl