In [None]:
# import sys
# !{sys.executable} -m pip install pip earthengine-api
# !{sys.executable} -m pip install pip geemap
# !{sys.executable} -m pip install pip wget
# !{sys.executable} -m pip install pip fiona[s3]

In [None]:
import os, requests, json, geojson
import pandas as pd
import geopandas as gpd
#import ee
#import geemap
import fiona
import boto3
from fiona.session import AWSSession
import shapely

In [None]:
# define directory
out_dir = os.getcwd()
bucket_name = 'cities-indicators'
aws_s3_dir = "https://"+bucket_name+".s3.eu-west-3.amazonaws.com"
boundary_ext = '/data/boundaries/'
indicators_file_aws = 'indicators/indicators.csv'

In [None]:
OUTPUT_FILENAME = 'LND-7-protected-KBAs.csv'

In [None]:
WDPA_FILENAME = 'data/WDPA_data/WDPA_Oct2022_Public_shp-polygons'
KBA_FILENAME = 'data/KBA_data/KBAsGlobal_2022_March_01_POL'
WDPA_TEMPFILENAME = 'temp_wdpadata'
KBA_TEMPFILENAME = 'temp_kbadata'

In [None]:
# connect to s3
aws_credentials = pd.read_csv('/home/jovyan/PlanetaryComputerExamples/aws_credentials.csv')
aws_key = aws_credentials.iloc[0]['Access key ID']
aws_secret = aws_credentials.iloc[0]['Secret access key']

s3 = boto3.resource(
    service_name='s3',
    aws_access_key_id=aws_key,
    aws_secret_access_key=aws_secret
)

In [None]:
source_bucket = s3.Bucket(bucket_name)
for ext in ('shp', 'shx'):
    source_bucket.download_file('{0}.{1}'.format(WDPA_FILENAME, ext), '{0}.{1}'.format(WDPA_TEMPFILENAME, ext))
    source_bucket.download_file('{0}.{1}'.format(KBA_FILENAME, ext), '{0}.{1}'.format(KBA_TEMPFILENAME, ext))

In [None]:
wdpa_multi = shapely.geometry.MultiPolygon([shapely.geometry.shape(poly['geometry']) for poly in fiona.open('{0}.shp'.format(WDPA_TEMPFILENAME))])
kba_multi = shapely.geometry.MultiPolygon([shapely.geometry.shape(poly['geometry']) for poly in fiona.open('{0}.shp'.format(KBA_TEMPFILENAME))]) 

In [None]:
# get list of cities
boundary_georef = pd.read_csv(aws_s3_dir + boundary_ext + 'boundary_georef.csv')
boundary_georef

In [None]:
def do_one_geom(row):
    poly = row[0]
    box = poly.bounds
    protected_box = shapely.ops.clip_by_rect(wdpa_multi, *box)
    protected_box = shapely.ops.unary_union(protected_box)    # unary_union repairs inalid geometries
    kba_box = shapely.ops.clip_by_rect(kba_multi, *box)
    kba_box = shapely.ops.unary_union(kba_box)
    key_poly = kba_box.intersection(poly)
    protectedkey_poly = protected_box.intersection(key_poly)
    keypoly_area = key_poly.area
    if keypoly_area > 0:
        result = protectedkey_poly.area/key_poly.area
    else:
        result = -9999
    print('{0}: {1}'.format(row[1], result))
    return result

In [None]:
for i in range(0,len(boundary_georef)):
    if not OUTPUT_FILENAME in os.listdir('.'):
        so_far_df = pd.DataFrame()
        so_far_df.to_csv(OUTPUT_FILENAME)
        so_far = []
    else:
        so_far_df = pd.read_csv(OUTPUT_FILENAME)
        so_far = [so_far_df.iloc[j]['geo_id'] for j in range(len(so_far_df))]
    
    most_recent = []
    #outputs_df = pd.DataFrame()
    for boundary_name in ['aoi_boundary_name', 'units_boundary_name']:
        if type(boundary_georef.loc[i, boundary_name]) != float: # sometimes boundary_id is nan
            boundary_id = boundary_georef.loc[i, 'geo_name']+'-' + boundary_georef.loc[i, boundary_name]
            boundary_path = aws_s3_dir + boundary_ext +'boundary-'+boundary_id+'.geojson'
            boundary_geo = requests.get(boundary_path).json()
            temp_gdf = gpd.GeoDataFrame.from_features(boundary_geo)
            temp_gdf['LND_7_percentKBAsProtected'] = temp_gdf.apply(do_one_geom, axis=1)
            most_recent.append(temp_gdf.copy())
    result = pd.concat([so_far_df] + most_recent, axis=0)
    result[['geometry', 'geo_id', 'geo_level', 'geo_name', 'geo_parent_name', 'LND_7_percentKBAsProtected']].to_csv(OUTPUT_FILENAME)

In [None]:
processedcities = pd.read_csv(OUTPUT_FILENAME)
processedcities

In [None]:
# delete temporary files
for ext in ('shp', 'shx'):
    os.remove('{0}.{1}'.format(WDPA_TEMPFILENAME, ext))

# Merge with indicator table

In [None]:
# read indicator table
cities_indicators = pd.read_csv(aws_s3_dir +'/'+ indicators_file_aws)
cities_indicators

In [None]:
def merge_indicators(indicator_table, new_indicator_table, indicator_name):
    if indicator_name in indicator_table.columns:
        print("replace with new calculations")
        indicator_table.drop(indicator_name, inplace=True, axis=1)
        cities_indicators_df = indicator_table.merge(new_indicator_table[["geo_id",indicator_name]], 
                                                     on='geo_id', 
                                                     how='left')
    else:
        print("add new indicators")
        cities_indicators_df = indicator_table.merge(new_indicator_table[["geo_id",indicator_name]], 
                                                     on='geo_id', 
                                                     how='left')
    return(cities_indicators_df)

In [None]:
cities_indicators_merged = merge_indicators(indicator_table = cities_indicators,
                                            new_indicator_table = processedcities,
                                            indicator_name = 'LND_7_percentKBAsProtected')

In [None]:
cities_indicators_merged

## Upload in aws s3

In [None]:
# upload to aws
key_data = indicators_file_aws
cities_indicators_merged.to_csv(
    f"s3://{bucket_name}/{key_data}",
    index=False,
    storage_options={
        "key": aws_key,
        "secret": aws_secret
    },
)

In [None]:
# make it public
object_acl = s3.ObjectAcl(bucket_name,key_data)
response = object_acl.put(ACL='public-read')