In [None]:
# import sys
# !{sys.executable} -m pip install pip geemap

In [None]:
import warnings
warnings.filterwarnings(action='ignore')

In [None]:
import os, requests, json, geojson
import random, scipy
from collections import defaultdict
from scipy.optimize import curve_fit
import numpy as np
from shapely.geometry import Point, Polygon, MultiPolygon, MultiPoint, shape
from shapely.ops import unary_union

import shapely
from shapely.ops import transform

import pandas as pd
import geopandas as gpd
import ee
import geemap

import boto3

In [None]:
API_URL = 'https://api.gbif.org/v1/occurrence/search/'
DATASETKEY = '50c9509d-22c7-4a22-a47d-8c48425ef4a7'  # iNaturalist research-grade observations
TAXONKEYS = {'Arthropoda': '54', 'Aves': '212', 'Tracheophyta': '7707728'}

STARTYEAR = '2016'
ENDYEAR = '2021'

LIMIT = 300

In [None]:
# get list of urbanshift cities
boundary_georef = pd.read_csv('https://cities-urbanshift.s3.eu-west-3.amazonaws.com/data/boundaries/v_0/boundary_georef.csv')

# remove cities without tree cover data availability
#tml_not_available_cities = ['BRA-Salvador','MEX-Monterrey']
tml_not_available_cities = []
boundary_georef = boundary_georef[~boundary_georef['geo_name'].isin(tml_not_available_cities)].reset_index(drop=True)
boundary_georef

In [None]:
def boundingbox_wkt(p):
    # Returns WKT for bounding box.
    # Necessary because GBIF API won't accept complex polygons.
    minx, miny, maxx, maxy = p.bounds
    return 'POLYGON (({0} {3}, {0} {2}, {1} {2}, {0} {3}))'.format(str(minx), str(maxx), str(miny), str(maxy))

In [None]:
# connect to s3
aws_credentials = pd.read_csv('/home/jovyan/PlanetaryComputerExamples/aws_credentials.csv')
aws_key = aws_credentials.iloc[0]['Access key ID']
aws_secret = aws_credentials.iloc[0]['Secret access key']

s3 = boto3.resource(
    service_name='s3',
    aws_access_key_id=aws_key,
    aws_secret_access_key=aws_secret
)

# specify bucket name
bucket_name = 'cities-urbanshift' 

aws_s3_dir = "https://cities-urbanshift.s3.eu-west-3.amazonaws.com/data"

In [None]:
def do_one_geom(poly, taxonname, boundary_id):
    print(boundary_id, taxonname)
    observations = []
    
    if poly.type == 'MultiPolygon':
        poly = unary_union(poly)
    
    if str(poly) != 'GEOMETRYCOLLECTION EMPTY':
        box = boundingbox_wkt(poly)

        offset = -LIMIT
        while offset == -LIMIT or not results['endOfRecords']:
            offset += LIMIT
            url = '{0}?dataset_key={1}&taxon_key={2}&year={3},{4}&geometry={5}&limit={6}&offset={7}&hasCoordinate=true'.format(API_URL, DATASETKEY, TAXONKEYS[taxonname], STARTYEAR, ENDYEAR, box, LIMIT, offset)
            resp = requests.get(url)
            results = resp.json()
            print('  {0}: {1}/{2}'.format(taxonname, results['offset'], results['count']))
            # Note spatial subsetting of points happens below (twice) as part of the conditions in the list comprehensions
            observations += [{
                'species': i['species'],
                'lat': i['decimalLatitude'],  # We don't really need to save lat/lon for this
                'lon': i['decimalLongitude'],
            } for i in results['results'] if 'species' in i.keys() and Point(float(i['decimalLongitude']), float(i['decimalLatitude'])).within(poly)]
        g = gpd.GeoDataFrame(geometry=[Point(i['lon'], i['lat']) for i in observations])
        g['species'] = [i['species'] for i in observations]
        filepath = "{0}_extracts/{1}-{0}-2016-2021.geojson".format(taxonname, boundary_id)
        g.to_file(filepath, driver='GeoJSON')

        # upload in s3
        s3.meta.client.upload_file(
            filepath, 
            bucket_name, 
            'data/biodiversity/GBIF/{0}-{1}.geojson'.format(taxonname, boundary_id),
            # 'data/land_use/esa_world_cover/v_0/'+ boundary_id + '-ESA-world_cover-2020_50m.tif',
            ExtraArgs={'ACL':'public-read'}
        )

In [None]:
for i in range(len(boundary_georef)):
    boundary_id = boundary_georef.loc[i, 'geo_name']+'-' + boundary_georef.loc[i, 'aoi_boundary_name']
    boundary_path = aws_s3_dir +'/boundaries/v_0/boundary-' + boundary_id + '.geojson'
    boundary_geo = requests.get(boundary_path).json()
    temp_gdf = gpd.GeoDataFrame.from_features(boundary_geo)
    for taxonname in TAXONKEYS:
        do_one_geom(temp_gdf.iloc[0]['geometry'], taxonname, boundary_id)

In [None]:
outdf = result[['geometry', 'geo_id', 'geo_level', 'geo_name', 'geo_parent_name', 'creation_date', 'SICB-3']]

In [None]:
# upload to aws
key_data = 'indicators/biodiversity/BIO-3.csv'
bucket_name = 'cities-urbanshift' 
outdf.to_csv(
    f"s3://{bucket_name}/{key_data}",
    index=False,
    storage_options={
        "key": aws_key,
        "secret": aws_secret
    },
)

# make it public
object_acl = s3.ObjectAcl(bucket_name,key_data)
response = object_acl.put(ACL='public-read')

In [None]:
ofile = open('test.csv', 'w')
ofile.write('test,test,test\n')
ofile.close()

In [None]:
import pandas as pd

In [None]:
outdf = pd.read_csv('test.csv')

In [None]:
outdf.to_csv(
    f"s3://{bucket_name}/{key_data}",
    index=False,
    storage_options={
        "key": aws_key,
        "secret": aws_secret
    },
)

In [None]:
s3 = boto3.resource(
    service_name='s3',
    aws_access_key_id=aws_key,
    aws_secret_access_key=aws_secret
)
key_data = 'indicators/biodiversity/test.csv'
bucket_name = 'cities-urbanshift' 
object_acl = s3.ObjectAcl(bucket_name,key_data)

In [None]:
my_bucket = s3.Bucket(bucket_name)

In [None]:
for my_bucket_object in my_bucket.objects.all():
    print(my_bucket_object.key)