In [59]:
import requests, json, geojson
import random, scipy
from collections import defaultdict
from scipy.optimize import curve_fit
import numpy as np
from shapely.geometry import Point, Polygon, shape
from shapely.ops import unary_union

In [2]:
API_URL = 'https://api.gbif.org/v1/occurrence/search/'
DATASETKEY = '50c9509d-22c7-4a22-a47d-8c48425ef4a7'  # iNaturalist research-grade observations
TAXONKEYS = {'Arthropoda': '54', 'Aves': '212', 'Tracheophyta': '7707728'}
STARTYEAR = '2016'
ENDYEAR = '2021'
LIMIT = 300

NUM_CURVEFITS = 200

In [3]:
BOUNDARIES_URL = 'https://cities-urbanshift.s3.eu-west-3.amazonaws.com/data/boundaries/ADM2/boundary-CRI-San_Jose-ADM2.geojson'

In [4]:
def geojson_to_polygons(g):
    result = []
    for feature in g['features']:
        name = feature['properties']['shapeName']
        coordpairs = [(float(i[0]), float(i[1])) for i in feature['geometry']['coordinates'][0][0]]
        result.append((name, Polygon(coordpairs)))
    return result

In [11]:
def do_one_polygon(name, poly):
    
    def boundingbox_wkt(p):
        # Returns WKT for bounding box.
        # Necessary because GBIF API won't accept complex polygons.
        minx, miny, maxx, maxy = p.bounds
        return 'POLYGON (({0} {3}, {0} {2}, {1} {2}, {0} {3}))'.format(str(minx), str(maxx), str(miny), str(maxy))
    
    outputs = []
    # Get observation records from GBIF
    
    observations = {taxon: [] for taxon in TAXONKEYS.keys()}
    species_by_redlistcat = {taxon: [] for taxon in TAXONKEYS.keys()}
    
    if poly.type == 'MultiPolygon':
        poly = unary_union(poly)

    for taxon in TAXONKEYS.keys():
        offset = -LIMIT
        while offset == -LIMIT or not results['endOfRecords']:
            offset += LIMIT
            url = '{0}?dataset_key={1}&taxon_key={2}&year={3},{4}&geometry={5}&limit={6}&offset={7}&hasCoordinate=true'.format(API_URL, DATASETKEY, TAXONKEYS[taxon], STARTYEAR, ENDYEAR, boundingbox_wkt(poly), str(LIMIT), str(offset))
            resp = requests.get(url)
            results = resp.json()
            print('  {0}: {1}/{2}'.format(taxon, results['offset'], results['count']))
            # Note spatial subsetting of points happens below (twice) as part of the conditions in the list comprehensions
            observations[taxon] += [{
                'species': i['species'],
                'lat': i['decimalLatitude'],  # We don't really need to save lat/lon for this
                'lon': i['decimalLongitude'],
            } for i in results['results'] if 'species' in i.keys() and Point(float(i['decimalLongitude']), float(i['decimalLatitude'])).within(poly)]
            species_by_redlistcat[taxon] += [{
                'species': i['species'],
                'iucnRedListCategory': i['iucnRedListCategory']
            } for i in results['results'] if 'iucnRedListCategory' in i.keys() and 'species' in i.keys() and Point(float(i['decimalLongitude']), float(i['decimalLatitude'])).within(poly)]
    
    # Estimate species counts by estimating asymptote of species-accumulation curve created when observation order is randomized
    # Final estimate is average over NUM_CURVEFITS estimates

    count_estimates = {taxon: None for taxon in TAXONKEYS.keys()}
    for taxon in TAXONKEYS.keys():
        taxon_observations = [i['species'] for i in observations[taxon]]
        asymptotes = []
        tries = 0
        while len(asymptotes) < NUM_CURVEFITS:   # Different observation-orders give different results, so average over many
            tries += 1
            taxon_observations.sort(key=lambda x: random.random())                    # Randomize order of observations
            sac = []                                                            # Initialize species accumulation curve data
            for obs_count in range(1, len(taxon_observations)):                       # Go through observation list from beginning
                sac.append(len(set(taxon_observations[:obs_count])))                  # and count unique species from start to index
            if len(sac) > 5 and tries <= 1000:          # Avoid letting infinite-species errors stop the process
                try:
                    asymptotes.append(scipy.optimize.curve_fit(lambda x,a,b,c: -((a*np.exp(-b*x))+c), list(range(1, len(sac)+1)), sac)[0][2])
                except:
                    pass
            else:
                asymptotes.append(-1)
        if -1 in asymptotes:
            count_estimates[taxon] = "insufficient data"
        else:
            count_estimates[taxon] = -round(np.mean(asymptotes))
        outputs.append((name, 'speciescount', taxon, str(count_estimates[taxon])))
        
    # Count observed species tagged with IUCN Red List categories

    for taxon in TAXONKEYS.keys():
        redlistcats = defaultdict(list)
        for i in species_by_redlistcat[taxon]:
            redlistcats[i['iucnRedListCategory']].append(i['species'])
        for cat in redlistcats:
            redlistcats[cat] = list(set(redlistcats[cat]))
        for cat in redlistcats:
            outputs.append((name, 'redlistcount', cat, str(len(redlistcats[cat]))))
    return outputs

In [6]:
allbounds = requests.get(BOUNDARIES_URL).json()
muni_polygons = geojson_to_polygons(allbounds)
outputs_allpolygons = []
for polygon in muni_polygons:
    print(polygon[0])
    outputs_allpolygons += do_one_polygon(polygon[0], polygon[1])
polygon_union = unary_union([i[1] for i in muni_polygons])
print('ALL')
outputs_allpolygons += do_one_polygon('ALL', polygon_union)

San Jose
Arthropoda: 0/140
Aves: 0/112
Tracheophyta: 0/154


  asymptotes.append(scipy.optimize.curve_fit(lambda x,a,b,c: -((a*np.exp(-b*x))+c), list(range(1, len(sac)+1)), sac)[0][2])
  asymptotes.append(scipy.optimize.curve_fit(lambda x,a,b,c: -((a*np.exp(-b*x))+c), list(range(1, len(sac)+1)), sac)[0][2])


Alajuela
Arthropoda: 0/682
Arthropoda: 300/682
Arthropoda: 600/682
Aves: 0/828
Aves: 300/828
Aves: 600/828
Tracheophyta: 0/371
Tracheophyta: 300/371
Moravia
Arthropoda: 0/273
Aves: 0/76
Tracheophyta: 0/210
Paraíso
Arthropoda: 0/414
Arthropoda: 300/414
Aves: 0/1826
Aves: 300/1826
Aves: 600/1826
Aves: 900/1826
Aves: 1200/1826
Aves: 1500/1826
Aves: 1800/1826
Tracheophyta: 0/862
Tracheophyta: 300/862
Tracheophyta: 600/862
Poás
Arthropoda: 0/100
Aves: 0/122
Tracheophyta: 0/73
Mora
Arthropoda: 0/12
Aves: 0/6
Tracheophyta: 0/22
Alvarado
Arthropoda: 0/7
Aves: 0/33
Tracheophyta: 0/29
Oreamuno
Arthropoda: 0/62
Aves: 0/228
Tracheophyta: 0/116
Heredia Urban
Arthropoda: 0/18
Aves: 0/70
Tracheophyta: 0/28
Heredia Rural
Arthropoda: 0/115
Aves: 0/489
Aves: 300/489
Tracheophyta: 0/156
Tibás
Arthropoda: 0/17
Aves: 0/29
Tracheophyta: 0/22
Vasquez de Coronado
Arthropoda: 0/103
Aves: 0/145
Tracheophyta: 0/176
Atenas
Arthropoda: 0/73
Aves: 0/19
Tracheophyta: 0/39
Desamparados
Arthropoda: 0/46
Aves: 0/52
Tra

In [71]:
# Write outputs to csv
ofile = open('speciescount_outputs.csv', 'w')
for line in outputs_allpolygons:
    ofile.write(','.join(line))
    ofile.write('\n')
ofile.close()

In [69]:
polygon_union = unary_union([i[1] for i in muni_polygons])
print('ALL')
outputs_allpolygons += do_one_polygon('ALL', polygon_union)

ALL
  Arthropoda: 0/3874
  Arthropoda: 300/3874
  Arthropoda: 600/3874
  Arthropoda: 900/3874
  Arthropoda: 1200/3874
  Arthropoda: 1500/3874
  Arthropoda: 1800/3874
  Arthropoda: 2100/3874
  Arthropoda: 2400/3874
  Arthropoda: 2700/3874
  Arthropoda: 3000/3874
  Arthropoda: 3300/3874
  Arthropoda: 3600/3874
  Aves: 0/5029
  Aves: 300/5029
  Aves: 600/5029
  Aves: 900/5029
  Aves: 1200/5029
  Aves: 1500/5029
  Aves: 1800/5029
  Aves: 2100/5029
  Aves: 2400/5029
  Aves: 2700/5029
  Aves: 3000/5029
  Aves: 3300/5029
  Aves: 3600/5029
  Aves: 3900/5029
  Aves: 4200/5029
  Aves: 4500/5029
  Aves: 4800/5029
  Tracheophyta: 0/2929
  Tracheophyta: 300/2929
  Tracheophyta: 600/2929
  Tracheophyta: 900/2929
  Tracheophyta: 1200/2929
  Tracheophyta: 1500/2929
  Tracheophyta: 1800/2929
  Tracheophyta: 2100/2929
  Tracheophyta: 2400/2929
  Tracheophyta: 2700/2929


  asymptotes.append(scipy.optimize.curve_fit(lambda x,a,b,c: -((a*np.exp(-b*x))+c), list(range(1, len(sac)+1)), sac)[0][2])
  asymptotes.append(scipy.optimize.curve_fit(lambda x,a,b,c: -((a*np.exp(-b*x))+c), list(range(1, len(sac)+1)), sac)[0][2])
