In [None]:
import warnings
warnings.filterwarnings(action='ignore')

In [None]:
import os, requests, json, geojson
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, Polygon, MultiPolygon, shape
from shapely.ops import unary_union
from shapely.ops import transform
import fiona
import shapely
import pyproj
import boto3

In [None]:
# define directory
out_dir = os.getcwd()
bucket_name = 'cities-indicators'
aws_s3_dir = "https://"+bucket_name+".s3.eu-west-3.amazonaws.com"
boundary_ext = '/data/boundaries/'
indicators_file_aws = 'indicators/indicators.csv'

In [None]:
WDPA_FILENAME = 'data/WDPA_data/WDPA_Oct2022_Public_shp-polygons'
KBA_FILENAME = 'data/KBA_data/KBAsGlobal_2022_March_01_POL'
WDPA_TEMPFILENAME = 'temp_wdpadata'
KBA_TEMPFILENAME = 'temp_kbadata'

In [None]:
# connect to s3
aws_credentials = pd.read_csv('/home/jovyan/PlanetaryComputerExamples/aws_credentials.csv')
aws_key = aws_credentials.iloc[0]['Access key ID']
aws_secret = aws_credentials.iloc[0]['Secret access key']

s3 = boto3.resource(
    service_name='s3',
    aws_access_key_id=aws_key,
    aws_secret_access_key=aws_secret
)

In [None]:
source_bucket = s3.Bucket(bucket_name)
for ext in ('shp', 'shx'):
    source_bucket.download_file('{0}.{1}'.format(WDPA_FILENAME, ext), '{0}.{1}'.format(WDPA_TEMPFILENAME, ext))
    source_bucket.download_file('{0}.{1}'.format(KBA_FILENAME, ext), '{0}.{1}'.format(KBA_TEMPFILENAME, ext))

In [None]:
wdpa_multi = shapely.geometry.MultiPolygon([shapely.geometry.shape(poly['geometry']) for poly in fiona.open('{0}.shp'.format(WDPA_TEMPFILENAME))])
kba_multi = shapely.geometry.MultiPolygon([shapely.geometry.shape(poly['geometry']) for poly in fiona.open('{0}.shp'.format(KBA_TEMPFILENAME))]) 

In [None]:
# get list of cities
boundary_georef = pd.read_csv(aws_s3_dir + boundary_ext + 'boundary_georef.csv')
boundary_georef

In [None]:
# download extracts and upload to AWS

SOURCEDATA = {
    'KBA': kba_multi,
    'WDPA': wdpa_multi
}
for i in range(0,len(boundary_georef)):
    boundary_id = boundary_georef.loc[i, 'geo_name']+'-' + boundary_georef.loc[i, 'aoi_boundary_name']
    boundary_path = aws_s3_dir + boundary_ext +'boundary-'+boundary_id+'.geojson'
    boundary_geo = requests.get(boundary_path).json()
    temp_gdf = gpd.GeoDataFrame.from_features(boundary_geo)
    for sourcename in SOURCEDATA:
        sourcegeom = SOURCEDATA[sourcename]
        result = unary_union(MultiPolygon([i for i in sourcegeom.geoms if i.intersects(temp_gdf.iloc[0]['geometry'])]))
        g = gpd.GeoDataFrame(geometry=[result])
        g['boundary_id'] = [boundary_id]
        filepath = "data/{1}-{0}-2022.geojson".format(sourcename, boundary_id) # local folder must already exist or be created manually before running. 
        g.to_file(filepath, driver='GeoJSON')

        # upload in s3
        s3.meta.client.upload_file(
            filepath, 
            bucket_name, 
            'data/biodiversity/{0}/{1}-{0}-2022.geojson'.format(sourcename, boundary_id),
            ExtraArgs={'ACL':'public-read'}
        )
            
