In [88]:
import pandas as pd
pd.set_option('display.width', 1000)
import geopandas as gpd
from pathlib import Path

In [89]:
dir = Path(".")

# list shapefiles by category and print the filenames
shp_dict = {}

shp_dict["birds"] = list(dir.glob("**/*Bird*.shp"))
shp_dict["mammals"] = list(dir.glob("**/*Mammal*.shp"))
shp_dict["amphibians"] = list(dir.glob("**/*Amphibian*.shp"))
shp_dict["SGCN"] = list(dir.glob("**/*SGCN Richness*.shp"))

count = 0

for category in shp_dict.keys():
    print(category, ":")
    for path in shp_dict[category]:
        print(path.name)
        count = count+1
    print("\n")
print(f"\nTotal of {count} shapefiles found.")

birds :
High Bird Richness.shp
Moderate Bird Richness.shp
Low Bird Richness.shp
SGCN Bird Richness.shp
Declining Birds Richness.shp
ModHigh Bird Richness.shp


mammals :
Moderate Mammals Richness.shp
High Mammal Richness.shp
Low Mammal Richness.shp
SGCN Mammal Richness.shp
ModHigh_Mammal_Richness.shp
Declining Mammal Richness.shp


amphibians :
Moderate Amphibian Richness.shp
ModHigh Amphibian Richness.shp
SGCN Amphibian Richness.shp


SGCN :
SGCN Richness.shp



Total of 16 shapefiles found.


In [90]:
# we need to correct some of the bird species IDs
# use a dict with the incorrect ID as the key and correct ID as the value
# current as of 5/21/23, confirmed with Andy Baltensperger

id_dict = {
    "ameri_kest" : "am_kestrel",
    "am_pipit" : "amer_pipit",
    "am_redstrt" : "amer_redst",
    "bank_swall" : "bank_swalo",
    "bnk_swalow" : "bank_swalo",
    "barn_swall" : "barn_swalo",
    "brn_swalow" : "barn_swalo",
    "bb_sandpip" : "bb_sndpipr",
    "black_swif" : "blak_swift",
    "chba_chkde" : "cb_chicdee",
    "chp_sparow" : "ch_sparrow",
    "cmn_redpol" : "com_redpol",
    "gh_chickad" : "gh_chicade",
    "hering_gul" : "herng_gull",
    "her_thrush" : "hrmt_thrsh",
    "l_yelolegs" : "les_yelolg",
    "les_yelowl" : "les_yelolg",
    "ocr_warblr" : "oc_warbler",
    "os_flycatc" : "os_flyctch",
    "pac_wren" : "pacif_wren",
    "pine_siskn" : "pin_siskin",
    "ps_flyctch" : "ps_flycach",
    "red_phalar" : "red_phlrop",
    "rn_phalaro" : "rn_phalrop",
    "rst_blkbrd" : "ru_blckbrd",
    "ruf_huming" : "ruf_humbrd",
    "rw_blkbird" : "rw_blakbrd",
    "sb_dowitch" : "sb_dowichr", 
    "shrtearowl" : "shtear_owl",
    "shrter_owl" : "shtear_owl",
    "song_sparr" : "sng_sparow",
    "swa_thrush" : "swn_thrush",
    "tre_swalow" : "tree_swalo",
    "tree_swall" : "tree_swalo",
    "yb_loon" : "yelobil_ln",
    "yebil_loon" : "yelobil_ln",
    "yelo_wrblr" : "ye_warbler",
}



In [91]:
# create a function that will rename columns if they match keys in the dict

def rename_bird_species(gdf, id_dict, crs):
    for col in gdf.columns:
        try:
            new_col = id_dict[col]
            gdf.rename(columns={col : new_col}, inplace=True)
            # if columns are renamed, theres a chance for duplicate column names...
            # drop any dup columns by name, keeping the first record by default
            gdf = gpd.GeoDataFrame(gdf.T.drop_duplicates().T, geometry="geometry", crs=crs)
        except:
            pass
    return gdf

In [92]:
# create a function that will list all unique species in each category
# and also return a list of geodataframes by category (EPSG 3338)
# this is also where species names can be corrected!

def list_species_by_category(shp_dict, category):

    crs = "EPSG:3338"

    names = []
    polys = []
    for shp in shp_dict[category]:
        names.append(shp.name)
        polys.append(gpd.read_file(shp).to_crs(crs))        
    
    species = []
    polys_renamed = []
    for poly in polys:
        if category == "birds":
            poly_rn = rename_bird_species(poly, id_dict, crs)
            polys_renamed.append(poly_rn)
        else:
            polys_renamed.append(poly)
        for col in poly.columns:
            if col not in ['HUC_8', 'HUC_10', 'HUC_12', 'SppRichnes', 'geometry']:
                species.append(col)
                
    return list(set(species)), polys_renamed, names

In [93]:
# list em

birds, birds_gdfs, birds_shps = list_species_by_category(shp_dict, "birds")
mammals, mammals_gdfs, mammals_shps = list_species_by_category(shp_dict, "mammals")
amphibians, amphibians_gdfs, amphibians_shps = list_species_by_category(shp_dict, "amphibians")
sgcn, sgcn_gdfs, sgcn_shps = list_species_by_category(shp_dict, "SGCN")

In [94]:
# count em

print("Total unique bird species in shapefiles: ", len(birds))
print("Total unique mammal species in shapefiles: ", len(mammals))
print("Total unique amphibian species in shapefiles: ", len(amphibians))
print("Total unique terrestrial SGCN species in shapefiles: ", len(sgcn))
print("\n")
print("Sum of unique bird, mammal, amphibian species counts in shapefiles: ", (len(birds) + len(mammals) + len(amphibians)))
print("\n")
print("Total unique terrestrial SGCN species referenced in report: 268")

Total unique bird species in shapefiles:  196
Total unique mammal species in shapefiles:  45
Total unique amphibian species in shapefiles:  7
Total unique terrestrial SGCN species in shapefiles:  209


Sum of unique bird, mammal, amphibian species counts in shapefiles:  248


Total unique terrestrial SGCN species referenced in report: 268


In [95]:
# optionally, export species lists as CSVs for easier viewing / crosswalk creation
for species_list, name in zip([birds, mammals, amphibians, sgcn], ["birds", "mammals", "amphibians", "sgcn"]):
     pd.DataFrame(data=species_list, columns=['species']).sort_values(by='species').to_csv(f"tbl/{name}.csv", index=False)

In [96]:
# optionally, rewrite the shapefiles to incorporate any renamed columns
# these are the "clean" copies that should be hosted by SNAP 
export_dir = Path('/Users/joshpaul/species/export/to_host')

all_shps = birds_shps + mammals_shps + amphibians_shps + sgcn_shps
all_gdfs = birds_gdfs + mammals_gdfs + amphibians_gdfs + sgcn_gdfs

for shp, gdf in zip(all_shps, all_gdfs):
    outfile = Path.joinpath(export_dir, ("_").join(shp.split(" ")))
    print(f"Saving {outfile}...")
    gdf.to_file(outfile)


Saving /Users/joshpaul/species/export/to_host/High_Bird_Richness.shp...
Saving /Users/joshpaul/species/export/to_host/Moderate_Bird_Richness.shp...
Saving /Users/joshpaul/species/export/to_host/Low_Bird_Richness.shp...
Saving /Users/joshpaul/species/export/to_host/SGCN_Bird_Richness.shp...
Saving /Users/joshpaul/species/export/to_host/Declining_Birds_Richness.shp...
Saving /Users/joshpaul/species/export/to_host/ModHigh_Bird_Richness.shp...
Saving /Users/joshpaul/species/export/to_host/Moderate_Mammals_Richness.shp...
Saving /Users/joshpaul/species/export/to_host/High_Mammal_Richness.shp...
Saving /Users/joshpaul/species/export/to_host/Low_Mammal_Richness.shp...
Saving /Users/joshpaul/species/export/to_host/SGCN_Mammal_Richness.shp...
Saving /Users/joshpaul/species/export/to_host/ModHigh_Mammal_Richness.shp...
Saving /Users/joshpaul/species/export/to_host/Declining_Mammal_Richness.shp...
Saving /Users/joshpaul/species/export/to_host/Moderate_Amphibian_Richness.shp...
Saving /Users/joshp

In [97]:
print(sgcn)
for gdf, shp in zip(sgcn_gdfs, sgcn_shps): 
    print(shp)    
    print(gdf.head())

['ww_scoter', 'yelobil_ln', 'no_flicker', 'no_shrike', 'nbog_lemng', 'am_kestrel', 'kingfisher', 'cre_auklet', 'st_sealion', 'gold_eagle', 'lngbl_dowi', 'snosh_hare', 'bb_wdpeckr', 'bt_woodrat', 'comm_murre', 'borl_chkde', 'les_yelolg', 'herng_gull', 'pe_cormrnt', 'horn_grebe', 'pahbr_seal', 'keen_mytis', 'nwsalamndr', 'at_wdpeckr', 'so_sndpipr', 'ag_squirel', 'amer_pipit', 'trmpt_swan', 'bl_gilemot', 'ch_sparrow', 'mbl_godwit', 'ak_marmot', 'sb_dowichr', 'blak_swift', 'bp_warbler', 'beard_seal', 'song_sparr', 'amer_redst', 'stel_eider', 'tft_puffin', 'ca_myotis', 'ringd_seal', 'rf_cormrnt', 'aktny_shrw', 'spotd_seal', 'to_warbler', 'peal_falcn', 'shtear_owl', 'her_thrush', 'pe_sndpipr', 'red_crsbil', 'arctc_loon', 'upld_sandp', 'w_scrchowl', 'ciner_shrw', 'red_knot', 'gc_rsyfnch', 'pi_grosbek', 'oc_warbler', 'al_ck_goos', 'blcp_chkde', 'no_wheater', 'hud_godwit', 'grgray_owl', 'pac_walrus', 'gyrfalcon', 'sa_sparrow', 'bargd_shrw', 'hrmt_thrsh', 'sing_vole', 'blklgd_kit', 'gh_chicade',

In [98]:
print(birds)
for gdf, shp in zip(birds_gdfs, birds_shps): 
    print(shp)    
    print(gdf.head())

['ww_scoter', 'yelobil_ln', 'no_flicker', 'no_shrike', 'am_kestrel', 'kingfisher', 'cre_auklet', 'rw_blkbird', 'gold_eagle', 'lngbl_dowi', 'bb_wdpeckr', 'comm_murre', 'borl_chkde', 'les_yelolg', 'herng_gull', 'pe_cormrnt', 'horn_grebe', 'so_sndpipr', 'at_wdpeckr', 'amer_pipit', 'trmpt_swan', 'bl_gilemot', 'ch_sparrow', 'shrtearowl', 'mbl_godwit', 'sb_dowichr', 'blak_swift', 'bb_sandpip', 'bp_warbler', 'les_yelowl', 'song_sparr', 'hering_gul', 'stel_eider', 'amer_redst', 'tft_puffin', 'rf_cormrnt', 'to_warbler', 'peal_falcn', 'shtear_owl', 'pe_sndpipr', 'red_crsbil', 'upld_sandp', 'arctc_loon', 'w_scrchowl', 'red_knot', 'gc_rsyfnch', 'pi_grosbek', 'oc_warbler', 'yelothroat', 'al_ck_goos', 'blcp_chkde', 'no_wheater', 'hud_godwit', 'grgray_owl', 'barn_swall', 'rn_phalaro', 'gyrfalcon', 'sa_sparrow', 'hrmt_thrsh', 'sav_sparow', 'blklgd_kit', 'gh_chicade', 'brant', 'com_loon', 'pine_siskn', 'gldcrn_kin', 'tre_sparow', 'pac_goldnp', 'hry_redpol', 'nor_hawk_o', 'thbi_murre', 'tre_swalow', 'ru

In [99]:
print(mammals)
for gdf, shp in zip(mammals_gdfs, mammals_shps): 
    print(shp)    
    print(gdf.head())

['pac_walrus', 'arctic_fox', 'bargd_shrw', 'sing_vole', 'taiga_vole', 'nbog_lemng', 'mjmp_mouse', 'st_sealion', 'snosh_hare', 'meadow_vol', 'harbr_seal', 'bt_woodrat', 'lole_mytis', 'sihair_bat', 'sredbk_vol', 'nbrn_lemng', 'nrdbck_vol', 'tndra_shrw', 'nfl_squirl', 'root_vole', 'pahbr_seal', 'keen_mytis', 'ag_squirel', 'red_squirl', 'hry_marmot', 'ak_hare', 'ak_marmot', 'alxdr_wolf', 'lgtail_vol', 'ncol_lemng', 'n_fur_seal', 'beard_seal', 'nw_dermous', 'ca_myotis', 'lilb_mytis', 'ringd_seal', 'aktny_shrw', 'spotd_seal', 'dusky_shrw', 'pygmy_shrw', 'woodchuck', 'amwtr_shrw', 'ciner_shrw', 'colard_pik', 'polar_bear']
Moderate Mammals Richness.shp
      HUC_8      HUC_10        HUC_12  nbog_lemng  st_sealion  woodchuck  SppRichnes                                           geometry
0  19030102  1903010215  190301021506           0           1          0           1  POLYGON ((-979806.523 441870.783, -979752.322 ...
1  19030102  1903010214  190301021406           0           1          0   

In [100]:
print(amphibians)
for gdf, shp in zip(amphibians_gdfs, amphibians_shps): 
    print(shp)    
    print(gdf.head())

['redleggedf', 'westerntoa', 'nwsalamand', 'roughskinn', 'longtoedsa', 'columbiasp', 'woodfrog']
Moderate Amphibian Richness.shp
      HUC_8      HUC_10        HUC_12  SppRichnes  roughskinn                                           geometry
0  19030102  1903010215  190301021506           0           0  POLYGON ((-979806.523 441870.783, -979752.322 ...
1  19030102  1903010214  190301021406           0           0  POLYGON ((-987830.627 440768.120, -988047.792 ...
2  19030102  1903010215  190301021505           0           0  POLYGON ((-969780.978 442087.182, -969798.483 ...
3  19030102  1903010214  190301021405           0           0  MULTIPOLYGON (((-991838.223 445056.182, -99188...
4  19030102  1903010214  190301021404           0           0  POLYGON ((-969970.132 455008.363, -969933.319 ...
ModHigh Amphibian Richness.shp
      HUC_8      HUC_10        HUC_12  woodfrog  westerntoa  columbiasp  longtoedsa  nwsalamand  SppRichnes                                           geometry
0  

In [101]:
# melt all gdfs together and return a long format table indexed by HUC and species type

def melt_gdfs_and_concatenate(gdfs, types):
    to_concat = []
    for gdf, type in zip(gdfs, types):
        gdf["type"] = type
        melted = pd.melt(gdf.drop(columns=["geometry", "SppRichnes"], axis=0), id_vars=["HUC_8", "HUC_10", "HUC_12", "type"])
        melted.drop_duplicates(inplace=True)
        to_concat.append(melted.where(melted["value"] != 0).dropna())
    
    return pd.concat(to_concat)
    

In [102]:
types = ["birds", "mammals", "amphibians"]
gdfs = [birds_gdfs[3], mammals_gdfs[3], amphibians_gdfs[2]] # full sgcn lists only, not using concern level for now

lookup_df = melt_gdfs_and_concatenate(gdfs, types).reset_index()
lookup_df

Unnamed: 0,index,HUC_8,HUC_10,HUC_12,type,variable,value
0,432,19020702,1902070216,190207021608,birds,al_ck_goos,1
1,14474,19010103,1901010302,190101030206,birds,al_flyctch,1
2,14477,19010103,1901010302,190101030203,birds,al_flyctch,1
3,14486,19010203,1901020321,190102032106,birds,al_flyctch,1
4,14487,19010204,1901020405,190102040501,birds,al_flyctch,1
...,...,...,...,...,...,...,...
1134489,87882,19010304,1901030402,190103040208,amphibians,longtoedsa,1.0
1134490,87894,19010304,1901030402,190103040201,amphibians,longtoedsa,1.0
1134491,87914,19010304,1901030401,190103040105,amphibians,longtoedsa,1.0
1134492,87935,19010304,1901030401,190103040102,amphibians,longtoedsa,1.0


In [103]:
# export the lookup df as csv
lookup_df.to_csv("export/huc_species_lookup.csv", index=False)

# add a dummy geometry column and export as shapefile
lookup_df['lat'] = 64.84
lookup_df['lon'] = -147.72

lookup_gdf = gpd.GeoDataFrame(lookup_df, geometry=gpd.points_from_xy(lookup_df.lon, lookup_df.lat), crs="EPSG:4326")
lookup_gdf.to_file('export/huc_species_lookup.shp')

In [104]:
lookup_gdf.head()

Unnamed: 0,index,HUC_8,HUC_10,HUC_12,type,variable,value,lat,lon,geometry
0,432,19020702,1902070216,190207021608,birds,al_ck_goos,1,64.84,-147.72,POINT (-147.72000 64.84000)
1,14474,19010103,1901010302,190101030206,birds,al_flyctch,1,64.84,-147.72,POINT (-147.72000 64.84000)
2,14477,19010103,1901010302,190101030203,birds,al_flyctch,1,64.84,-147.72,POINT (-147.72000 64.84000)
3,14486,19010203,1901020321,190102032106,birds,al_flyctch,1,64.84,-147.72,POINT (-147.72000 64.84000)
4,14487,19010204,1901020405,190102040501,birds,al_flyctch,1,64.84,-147.72,POINT (-147.72000 64.84000)


In [105]:
# proof of concept function to pull species data given any HUC ID as input

def get_species_by_huc(huc, lookup_df, print_it=True):
    if len(huc) == 8:
        sub_df = lookup_df[lookup_df["HUC_8"]==huc]
    elif len(huc) == 10:
        sub_df = lookup_df[lookup_df["HUC_10"]==huc]
    elif len(huc) == 12:
        sub_df = lookup_df[lookup_df["HUC_12"]==huc]

    huc_birds = list(set(sub_df[sub_df["type"] == "birds"]["variable"].tolist()))
    huc_mammals = list(set(sub_df[sub_df["type"] == "mammals"]["variable"].tolist()))
    huc_amphibs = list(set(sub_df[sub_df["type"] == "amphibians"]["variable"].tolist()))

    if print_it == True:
        print(f"Modeled habitat data indicate that the following species of greatest conservation need are present in HUC {huc}:")
        print("\n")
        print(f"Birds: {huc_birds}")
        print("\n")
        print(f"Mammals: {huc_mammals}")
        print("\n")
        print(f"Amphibians: {huc_amphibs}")

    return huc_birds, huc_mammals, huc_amphibs

In [106]:
huc = '190103010713'
huc_birds_, huc_mammals_, huc_amphibs_ = get_species_by_huc(huc, lookup_df)

Modeled habitat data indicate that the following species of greatest conservation need are present in HUC 190103010713:


Birds: ['no_flicker', 'no_shrike', 'kingfisher', 'gold_eagle', 'lngbl_dowi', 'les_yelolg', 'herng_gull', 'at_wdpeckr', 'trmpt_swan', 'ch_sparrow', 'sb_dowichr', 'amer_redst', 'to_warbler', 'peal_falcn', 'shtear_owl', 'w_scrchowl', 'red_crsbil', 'red_knot', 'pi_grosbek', 'oc_warbler', 'blcp_chkde', 'grgray_owl', 'gyrfalcon', 'sa_sparrow', 'hrmt_thrsh', 'brant', 'com_loon', 'gldcrn_kin', 'pac_goldnp', 'hry_redpol', 'nor_hawk_o', 'rubcr_king', 'com_redpol', 'per_falcon', 'whimbrel', 'c_yelothrt', 'sng_sparow', 'al_flyctch', 'cb_chicdee', 'lesr_scaup', 'cas_auklet', 'surfbird', 'an_murelet', 'arctc_tern', 'kit_murlet', 'do_wdpeckr', 'bb_plover', 'boreal_owl', 'mbl_murlet', 'comm_raven', 'pacif_wren', 'ha_wdpeckr', 'wandr_tatt', 'se_sndpipr', 'li_sparrow', 'am_gld_plo', 'ru_blckbrd', 'blk_oystca', 'grwf_goose', 'pacif_loon', 'ps_flycach', 'ye_warbler', 'mac_warblr', 'fox

In [107]:
huc = '1901030107'
huc_birds_, huc_mammals_, huc_amphibs_ = get_species_by_huc(huc, lookup_df)

Modeled habitat data indicate that the following species of greatest conservation need are present in HUC 1901030107:


Birds: ['no_flicker', 'no_shrike', 'kingfisher', 'gold_eagle', 'lngbl_dowi', 'bb_wdpeckr', 'les_yelolg', 'herng_gull', 'at_wdpeckr', 'trmpt_swan', 'ch_sparrow', 'sb_dowichr', 'amer_redst', 'to_warbler', 'peal_falcn', 'shtear_owl', 'w_scrchowl', 'red_crsbil', 'red_knot', 'pi_grosbek', 'oc_warbler', 'blcp_chkde', 'grgray_owl', 'gyrfalcon', 'sa_sparrow', 'hrmt_thrsh', 'brant', 'com_loon', 'gldcrn_kin', 'pac_goldnp', 'hry_redpol', 'nor_hawk_o', 'rubcr_king', 'com_redpol', 'per_falcon', 'whimbrel', 'c_yelothrt', 'sng_sparow', 'al_flyctch', 'cb_chicdee', 'lesr_scaup', 'cas_auklet', 'surfbird', 'an_murelet', 'arctc_tern', 'kit_murlet', 'do_wdpeckr', 'bb_plover', 'boreal_owl', 'mbl_murlet', 'comm_raven', 'pacif_wren', 'ha_wdpeckr', 'wandr_tatt', 'se_sndpipr', 'li_sparrow', 'am_gld_plo', 'ru_blckbrd', 'blk_oystca', 'grwf_goose', 'pacif_loon', 'ps_flycach', 'ye_warbler', 'mac_w

In [108]:
huc = '19010301'
huc_birds_, huc_mammals_, huc_amphibs_ = get_species_by_huc(huc, lookup_df)

Modeled habitat data indicate that the following species of greatest conservation need are present in HUC 19010301:


Birds: ['no_flicker', 'no_shrike', 'kingfisher', 'gold_eagle', 'lngbl_dowi', 'bb_wdpeckr', 'les_yelolg', 'herng_gull', 'horn_grebe', 'at_wdpeckr', 'amer_pipit', 'trmpt_swan', 'ch_sparrow', 'sb_dowichr', 'bp_warbler', 'amer_redst', 'to_warbler', 'peal_falcn', 'shtear_owl', 'w_scrchowl', 'red_crsbil', 'red_knot', 'pi_grosbek', 'oc_warbler', 'blcp_chkde', 'grgray_owl', 'gyrfalcon', 'sa_sparrow', 'hrmt_thrsh', 'brant', 'com_loon', 'gldcrn_kin', 'pac_goldnp', 'hry_redpol', 'nor_hawk_o', 'rubcr_king', 'com_redpol', 'per_falcon', 'whimbrel', 'c_yelothrt', 'sng_sparow', 'al_flyctch', 'cb_chicdee', 'lesr_scaup', 'cas_auklet', 'surfbird', 'an_murelet', 'arctc_tern', 'kit_murlet', 'do_wdpeckr', 'bb_plover', 'boreal_owl', 'mbl_murlet', 'comm_raven', 'pacif_wren', 'ha_wdpeckr', 'wandr_tatt', 'se_sndpipr', 'li_sparrow', 'am_gld_plo', 'ru_blckbrd', 'blk_oystca', 'grwf_goose', 'pacif_l