A survey of USGS Science Centers and university Cooperative Research Units was conducted to find areas where USGS research is being conducted into issues associated with species on the work plan. The results of this are captured in the source spreadsheet. This notebook pulls out a summary of this knowledge to serve as an additional source of information in the dataset.

In [1]:
import requests
import pandas as pd
import numpy as np
import bispy
from IPython.display import display
from joblib import Parallel, delayed
import json
import random

In [2]:
usgs_sc_summary = pd.read_excel(
    "sources/Prelisting Science USGS Master_19Mar2018.xlsx", 
    sheet_name="Center_Information", 
    nrows=21
)
usgs_survey_responses = pd.read_excel(
    "sources/Prelisting Science USGS Master_19Mar2018.xlsx", 
    sheet_name="USGS_Responses", 
    usecols="C,I:AA", 
    skiprows=1
)
cru_survey_responses = pd.read_excel(
    "sources/Prelisting Science USGS Master_19Mar2018.xlsx", 
    sheet_name="CRU_responses", 
    usecols="C,H:K"
)
general_capabilities = pd.read_excel(
    "sources/Prelisting Science USGS Master_19Mar2018.xlsx", 
    sheet_name="USGS General Capabilites", 
    skiprows=3, 
    header=None,
    usecols="A:B",
    names=["Center","Guild/Expertise"]
)
usgs_org_links = pd.read_excel(
    "sources/AdditionalSourceData.xlsx", 
    sheet_name="USGS Org Links"
)


In [3]:
def get_sb_org(acronym, url):
    sb_result = requests.get(f'{url}?format=json').json()
    sb_result["acronym"] = acronym
    return sb_result

In [4]:
# Use joblib to run multiple requests for ECOS documents in parallel
org_cache = Parallel(n_jobs=8)(delayed(get_sb_org)(row[1], row[2]) for row in usgs_org_links.itertuples())

In [5]:
# Dump the cache of data to a JSON file for later use
with open("cache/usgs_orgs.json", 'w') as f:
    f.write(json.dumps(org_cache, indent=4))

In [6]:
# Open up the JSON file and validate that it works showing number of cached records and an example
with open("cache/usgs_orgs.json", "r") as f:
    org_cache = json.loads(f.read())

print(len(org_cache))
display(org_cache[random.randint(0,len(org_cache)-1)])

42


{'_classSimpleName': 'UsgsOrganization',
 'acronym': 'VACFWRU',
 'active': True,
 'aliases': [{'annotation': 'old FFS Name',
   'id': 743,
   'name': 'VIRGINIA COOP F&W UNIT'},
  {'annotation': 'FBMS Name (and old FPPS)',
   'id': 129,
   'name': 'VIRGINIA CF & WRU'}],
 'description': 'Virginia Cooperative Fish and Wildlife Research Unit',
 'displayText': 'Virginia Cooperative Fish and Wildlife Research Unit',
 'extensions': {'usgsOrganization': {'fbmsCode': 'GGHEBD1700',
   'fbmsName': 'VIRGINIA CF & WRU',
   'oldFfsName': 'VIRGINIA COOP F&W UNIT',
   'oldFppsCode': '133170',
   'oldFppsName': 'VIRGINIA CF & WRU',
   'oldOrgCode': '13524450',
   'orgCode': 'GGHEBD1700'}},
 'fbmsCodes': [],
 'id': 17067,
 'link': {'href': 'https://www.sciencebase.gov/directory/organization/17067',
  'rel': 'self',
  'url': 'https://www.sciencebase.gov/directory/organization/17067'},
 'links': [{'rel': 'self',
   'url': 'https://www.sciencebase.gov/directory/organization/17067'}],
 'logoUrl': 'http://my