In [93]:
import requests, json
import random, scipy
from scipy.optimize import curve_fit
import numpy as np

In [94]:
API_URL = 'https://api.gbif.org/v1/occurrence/search/'
DATASETKEY = '50c9509d-22c7-4a22-a47d-8c48425ef4a7'  # iNaturalist research-grade observations
TAXONKEYS = {'Arthropoda': '54', 'Aves': '212', 'Tracheophyta': '7707728'}
STARTYEAR = '2016'
ENDYEAR = '2021'
LIMIT = 300

In [107]:
NUM_CURVEFITS = 200

In [100]:
# sample geometry: a rectangle that includes San Jose CR
# actual geometry is a WKT polygon

geom = 'POLYGON((-84.41345 9.59533,-83.68835 9.59533,-83.68835 10.24465,-84.41345 10.24465,-84.41345 9.59533))'

In [101]:
# Get observation records from GBIF

observations = {taxon: [] for taxon in TAXONKEYS.keys()}

for taxon in TAXONKEYS.keys():
    offset = 0
    url = '{0}?dataset_key={1}&taxon_key={2}&year={3},{4}&geometry={5}&limit={6}&offset={7}&hasCoordinate=true'.format(API_URL, DATASETKEY, TAXONKEYS[taxon], STARTYEAR, ENDYEAR, geom, str(LIMIT), str(offset))
    resp = requests.get(url)
    results = resp.json()
    print('{0}: {1}/{2}'.format(taxon, results['offset'], results['count']))
    observations[taxon] += [{
        'species': i['species'],
        'lat': i['decimalLatitude'],
        'lon': i['decimalLongitude'],
    } for i in results['results'] if 'species' in i.keys()]
    while not results['endOfRecords']:
        offset += LIMIT
        url = '{0}?dataset_key={1}&taxon_key={2}&year={3},{4}&geometry={5}&limit={6}&offset={7}&hasCoordinate=true'.format(API_URL, DATASETKEY, TAXONKEYS[taxon], STARTYEAR, ENDYEAR, geom, str(LIMIT), str(offset))
        resp = requests.get(url)
        results = resp.json()
        print('{0}: {1}/{2}'.format(taxon, results['offset'], results['count']))
        observations[taxon] += [{
            'species': i['species'],
            'lat': i['decimalLatitude'],
            'lon': i['decimalLongitude'],
        } for i in results['results'] if 'species' in i.keys()]

Arthropoda: 0/5733
Arthropoda: 300/5733
Arthropoda: 600/5733
Arthropoda: 900/5733
Arthropoda: 1200/5733
Arthropoda: 1500/5733
Arthropoda: 1800/5733
Arthropoda: 2100/5733
Arthropoda: 2400/5733
Arthropoda: 2700/5733
Arthropoda: 3000/5733
Arthropoda: 3300/5733
Arthropoda: 3600/5733
Arthropoda: 3900/5733
Arthropoda: 4200/5733
Arthropoda: 4500/5733
Arthropoda: 4800/5733
Arthropoda: 5100/5733
Arthropoda: 5400/5733
Arthropoda: 5700/5733
Aves: 0/7240
Aves: 300/7240
Aves: 600/7240
Aves: 900/7240
Aves: 1200/7240
Aves: 1500/7240
Aves: 1800/7240
Aves: 2100/7240
Aves: 2400/7240
Aves: 2700/7240
Aves: 3000/7240
Aves: 3300/7240
Aves: 3600/7240
Aves: 3900/7240
Aves: 4200/7240
Aves: 4500/7240
Aves: 4800/7240
Aves: 5100/7240
Aves: 5400/7240
Aves: 5700/7240
Aves: 6000/7240
Aves: 6300/7240
Aves: 6600/7240
Aves: 6900/7240
Aves: 7200/7240
Tracheophyta: 0/4342
Tracheophyta: 300/4342
Tracheophyta: 600/4342
Tracheophyta: 900/4342
Tracheophyta: 1200/4342
Tracheophyta: 1500/4342
Tracheophyta: 1800/4342
Tracheophy

In [108]:
# Estimate species counts by estimating asymptote of species-accumulation curve created when observation order is randomized
# Final estimate is average over NUM_CURVEFITS estimates

count_estimates = {taxon: None for taxon in TAXONKEYS.keys()}
for taxon in TAXONKEYS.keys():
    taxon_observations = [i['species'] for i in observations[taxon]]
    asymptotes = []
    tries = 0
    while len(asymptotes) < NUM_CURVEFITS:   # Different observation-orders give different results, so average over many
        tries += 1
        taxon_observations.sort(key=lambda x: random.random())                    # Randomize order of observations
        sac = []                                                            # Initialize species accumulation curve data
        for obs_count in range(1, len(taxon_observations)):                       # Go through observation list from beginning
            sac.append(len(set(taxon_observations[:obs_count])))                  # and count unique species from start to index
        if len(sac) > 5 and tries <= 1000:          # Avoid letting infinite-species errors stop the process
            try:
                asymptotes.append(scipy.optimize.curve_fit(lambda x,a,b,c: -((a*np.exp(-b*x))+c), list(range(1, len(sac)+1)), sac)[0][2])
            except:
                pass
        else:
            asymptotes.append(-1)
    if -1 in asymptotes:
        count_estimates[taxon] = "insufficient data"
    else:
        count_estimates[taxon] = -round(np.mean(asymptotes))  
    print('{0}: {1}'.format(taxon, str(count_estimates[taxon])))

  asymptotes.append(scipy.optimize.curve_fit(lambda x,a,b,c: -((a*np.exp(-b*x))+c), list(range(1, len(sac)+1)), sac)[0][2])
  asymptotes.append(scipy.optimize.curve_fit(lambda x,a,b,c: -((a*np.exp(-b*x))+c), list(range(1, len(sac)+1)), sac)[0][2])


Arthropoda: 2123
Aves: 514
Tracheophyta: 1503
