Once we have the best available assertion of what FWS thinks the appropriate ITIS identifier is for as many species as possible (via ECOS scraping and TESS search), we can go consult the ITIS API to retrieve information and cache for later processing. This is done via the ITIS module in the bispy package that uses various search mechanisms against ITIS and slightly repackages the returned data for use.

In [1]:
import requests
import json
import bispy
from IPython.display import display
from joblib import Parallel, delayed

itis = bispy.itis.Itis()
bis_utils = bispy.bis.Utils()

In [2]:
# Open up the cached workplan species
with open("cache/workplan_species.json", "r") as f:
    workplan_species = json.loads(f.read())

In [3]:
# The ITIS search function is intelligent enough to determine if a TSN is provided as search criteria
# Here, we pull lists together of ITIS TSNs where we have them and lookup name where we do not and put both together to run the process
tsn_list = [r["ITIS TSN"] for r in workplan_species if r["ITIS TSN"] is not None]
names_without_tsns = [r["Lookup Name"] for r in workplan_species if r["ITIS TSN"] is None]
itis_search_list = tsn_list
itis_search_list.extend(names_without_tsns)

In [4]:
%%time
# Use joblib to run multiple requests for ITIS documents in parallel via known ITIS TSNs
itis_cache = Parallel(n_jobs=8)(delayed(itis.search)(name_or_tsn) for name_or_tsn in itis_search_list)

CPU times: user 1.05 s, sys: 124 ms, total: 1.17 s
Wall time: 1min 8s


In [6]:
# Cache the array of retrieved documents and return/display a random sample for verification
display(bis_utils.doc_cache("cache/itis.json", itis_cache))

{'Doc Cache File': 'cache/itis.json',
 'Document Number 148': {'Processing Metadata': {'Date Processed': '2019-07-03T21:50:19.063333',
   'Detailed Results': [{'Exact Match': 'http://services.itis.gov/?wt=json&rows=10&q=tsn:36804'}],
   'Status': 'Exact Match',
   'Summary Result': 'Exact Match'},
  'itisData': [{'cacheDate': '2019-07-03T21:50:50.085684',
    'commonnames': [{'language': 'English',
      'name': 'purpledisk honeycombhead'}],
    'createDate': '1996-06-13 14:51:08',
    'hierarchy': ['Plantae',
     'Viridiplantae',
     'Streptophyta',
     'Embryophyta',
     'Tracheophyta',
     'Spermatophytina',
     'Magnoliopsida',
     'Asteranae',
     'Asterales',
     'Asteraceae',
     'Balduina',
     'Balduina atropurpurea'],
    'kingdom': 'Plantae',
    'nameWInd': 'Balduina atropurpurea',
    'nameWOInd': 'Balduina atropurpurea',
    'parentTSN': '36803',
    'rank': 'Species',
    'synonymTSNs': ['36804:$512874$'],
    'synonyms': ['36804:$Endorima atropurpurea$'],
   