Once we have the best available assertion of what FWS thinks the appropriate ITIS identifier is for as many species as possible (via ECOS scraping and TESS search), we can go consult the ITIS API to retrieve information and cache for later processing. This is done via the ITIS module in the bispy package that uses various search mechanisms against ITIS and slightly repackages the returned data for use.

In [1]:
import requests
import json
import bispy
from IPython.display import display
from joblib import Parallel, delayed
import random

itis = bispy.itis.Itis()

In [2]:
# Open up the cached workplan species
with open("cache/workplan_species.json", "r") as f:
    workplan_species = json.loads(f.read())

In [3]:
# Prepare two lists - one of the TSNs we already know about from ECOS scraping and the other of the remaining scientific names
tsn_list = [r["ITIS TSN"] for r in workplan_species if r["ITIS TSN"] is not None]
names_without_tsns = [r["Scientific Name"] for r in workplan_species if r["ITIS TSN"] is None]
itis_search_list = tsn_list
itis_search_list.extend(names_without_tsns)


In [4]:
# Use joblib to run multiple requests for ITIS documents in parallel via known ITIS TSNs
itis_cache = Parallel(n_jobs=8)(delayed(itis.search)(name_or_tsn) for name_or_tsn in itis_search_list)

In [5]:
# Dump the ITIS data to a cache file
with open("cache/itis.json", "w") as f:
    f.write(json.dumps(itis_cache, indent=4))

In [6]:
# Open the file back up and verify
with open("cache/itis.json", "r") as f:
    itis_cache = json.loads(f.read())

print(len(itis_cache))
display(itis_cache[random.randint(0,len(itis_cache)-1)])

363


{'Processing Metadata': {'Date Processed': '2019-07-01T18:46:49.465344',
  'Detailed Results': [{'Exact Match': 'http://services.itis.gov/?wt=json&rows=10&q=tsn:178684'}],
  'Status': 'Exact Match',
  'Summary Result': 'Exact Match'},
 'itisData': [{'cacheDate': '2019-07-01T18:46:57.597814',
   'commonnames': [{'language': 'English', 'name': 'Tinian Monarch'}],
   'createDate': '1996-06-13 14:51:08',
   'hierarchy': ['Animalia',
    'Bilateria',
    'Deuterostomia',
    'Chordata',
    'Vertebrata',
    'Gnathostomata',
    'Tetrapoda',
    'Aves',
    'Passeriformes',
    'Monarchidae',
    'Monarcha',
    'Monarcha takatsukasae'],
   'kingdom': 'Animalia',
   'nameWInd': 'Monarcha takatsukasae',
   'nameWOInd': 'Monarcha takatsukasae',
   'parentTSN': '178682',
   'rank': 'Species',
   'taxonomy': [{'name': 'Animalia', 'rank': 'Kingdom'},
    {'name': 'Bilateria', 'rank': 'Subkingdom'},
    {'name': 'Deuterostomia', 'rank': 'Infrakingdom'},
    {'name': 'Chordata', 'rank': 'Phylum'},