In [None]:
# import sys
# !{sys.executable} -m pip install --extra-index-url https://artifactory.vgt.vito.be/api/pypi/python-packages/simple terracatalogueclient

In [None]:
import warnings
warnings.filterwarnings(action='ignore')

In [None]:
import os, requests, json, geojson, gc
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, Polygon, MultiPolygon, shape
from shapely.ops import unary_union
import rasterio
from rasterio.features import shapes
from shapely.ops import transform
import numpy as np
import numpy.ma as ma
from rasterio.merge import merge as rmerge
import rasterio.mask
import fiona
import shapely
import pyproj
from rasterstats import zonal_stats
from terracatalogueclient import Catalogue as Terracat
import boto3

In [None]:
# define directory
out_dir = os.getcwd()
bucket_name = 'cities-indicators'
aws_s3_dir = "https://"+bucket_name+".s3.eu-west-3.amazonaws.com"
boundary_ext = '/data/boundaries/'
indicators_file_aws = 'indicators/indicators.csv'

In [None]:
OUTPUT_FILENAME = 'BIO-1-natural-areas.csv'

In [None]:
project = pyproj.Transformer.from_crs(
    pyproj.CRS.from_epsg(4326), # source coordinate system
    pyproj.CRS.from_epsg(4326), # destination coordinate system
    always_xy=True
)
# Get ESA Wo

In [None]:
worldcover_catalogue = Terracat().authenticate() 
terracat = worldcover_catalogue.authenticate_non_interactive('tedwongwri', 'WRIpass12!')

In [None]:
# get list of cities
boundary_georef = pd.read_csv(aws_s3_dir + boundary_ext + 'boundary_georef.csv')
boundary_georef

In [None]:
def boundingbox_wkt(p):
    # Returns WKT for bounding box.
    # Necessary because GBIF API won't accept complex polygons.
    minx, miny, maxx, maxy = p.bounds
    return 'POLYGON (({0} {3}, {0} {2}, {1} {2}, {0} {3}))'.format(str(minx), str(maxx), str(miny), str(maxy))

def classify_naturalarea(r):
    r[r == 0] = 0   # no data
    r[(r > 0) & (r <= 30)] = 1  # Tree cover
    #r[r == 20] = 1  # Shrubland
    #r[r == 30] = 1  # Grassland
    #r[r == 40] = 0  # Cropland
    r[(r > 30) & (r <= 80)] = 0  # Built-up
    #r[r == 60] = 0  # Bare / sparse vegetation
    #r[r == 70] = 0  # Snow and ice
    #r[r == 80] = 0  # Permanent water bodies
    r[r > 80] = 1  # Herbaceous wetland
    #r[r == 95] = 1  # Mangroves
    #r[r == 100] = 1 # Moss and lichen
    return r

def mask_raster_with_geometry(raster, transform, shapes, **kwargs):
    # This function clips and masks raster
    # Adapted from https://gis.stackexchange.com/a/387772
    """Wrapper for rasterio.mask.mask to allow for in-memory processing.

    Docs: https://rasterio.readthedocs.io/en/latest/api/rasterio.mask.html

    Args:
        raster (numpy.ndarray): raster to be masked with dim: [H, W]
        transform (affine.Affine): the transform of the raster
        shapes, **kwargs: passed to rasterio.mask.mask

    Returns:
        masked: numpy.ndarray or numpy.ma.MaskedArray with dim: [H, W], and new affine transform
    """
    with rasterio.io.MemoryFile() as memfile:
        with memfile.open(
            driver='GTiff',
            height=raster.shape[0],
            width=raster.shape[1],
            count=1,
            dtype=raster.dtype,
            transform=transform,
        ) as dataset:
            dataset.write(raster, 1)
        with memfile.open() as dataset:
            output, new_transform = rasterio.mask.mask(dataset, shapes, **kwargs)
    return output.squeeze(0), new_transform

def do_one_geom(row):    
    poly = unary_union(row[0])
    
    box = boundingbox_wkt(poly)

    products = terracat.get_products('urn:eop:VITO:ESA_WorldCover_10m_2020_V1', geometry=box)
    # Create mosaic raster of Worldcover rasters downloaded from ESA
    rasters_to_mosaic = []
    for product in products:
        if not '{0}dir'.format(product.title) in os.listdir('.'):
            terracat.download_file(product.data[0], '{0}dir'.format(product.title))
        raster = rasterio.open('{0}dir/{0}_Map.tif'.format(product.title))
        rasters_to_mosaic.append(raster)
    mosaic, mosaic_transform = rmerge(rasters_to_mosaic, bounds=poly.bounds)
    del rasters_to_mosaic
    gc.collect()
    
    # Create bbox of all ones
    boxras, boxras_transform = mask_raster_with_geometry(mosaic[0], mosaic_transform, [shapely.geometry.box(*poly.bounds)], crop=True)
    allone = (boxras * 0) + 1
    
    # Make boxras into a mask
    maskras, dummy = mask_raster_with_geometry(allone, mosaic_transform, [poly], crop=True)
    
    clipped_raster, clip_transform = mask_raster_with_geometry(mosaic[0], mosaic_transform, [poly], crop=True)
    
    # Explicitly mask clipped raster. (Clipping doesn't seem to clip well on its own.)
    naturalarea_raster = ma.masked_array(classify_naturalarea(clipped_raster), mask=((1-maskras)*-1))
    del mosaic
    gc.collect()
    
    result = np.mean(naturalarea_raster)
    
    print('{0}: {1}'.format(row[1], result))
    return result


In [None]:
for i in range(0,len(boundary_georef)):
    if not OUTPUT_FILENAME in os.listdir('.'):
        so_far_df = pd.DataFrame()
        so_far_df.to_csv(OUTPUT_FILENAME)
        so_far = []
    else:
        so_far_df = pd.read_csv(OUTPUT_FILENAME)
        so_far = [so_far_df.iloc[j]['geo_id'] for j in range(len(so_far_df))]

    #outputs_df = pd.DataFrame()
    do_units = True
    most_recent = []
    for boundary_name in ['aoi_boundary_name', 'units_boundary_name']:
        if type(boundary_georef.loc[i, boundary_name]) != float: # sometimes boundary_id is nan
            boundary_id = boundary_georef.loc[i, 'geo_name']+'-' + boundary_georef.loc[i, boundary_name]
            boundary_path = aws_s3_dir + boundary_ext +'boundary-'+boundary_id+'.geojson'
            boundary_geo = requests.get(boundary_path).json()
            temp_gdf = gpd.GeoDataFrame.from_features(boundary_geo)
            #if True or boundary_name == 'aoi_boundary_name':  # Calculates naturalarea for aoi, uses it for all unit-of-analysis calculations.
               
            temp_gdf['BIO_1_percentNaturalArea'] = temp_gdf.apply(do_one_geom, axis=1)
          
            most_recent.append(temp_gdf.copy())
            if boundary_name == 'aoi_boundary_name':
                do_units = most_recent[-1]['BIO_1_percentNaturalArea'][0] > 0
    # Delete WorldCover rasters to prevent filling up disk
    for directoryname in os.listdir('.'):
        if directoryname[:15] == 'ESA_WorldCover_':
            for filename in os.listdir(directoryname):
                os.remove('{0}/{1}'.format(directoryname, filename))
            os.rmdir(directoryname)
            
    result = pd.concat([so_far_df] + most_recent, axis=0)
    result[['geometry', 'geo_id', 'geo_level', 'geo_name', 'geo_parent_name', 'BIO_1_percentNaturalArea']].to_csv(OUTPUT_FILENAME)

In [None]:
processedcities = pd.read_csv(OUTPUT_FILENAME)
# ''out_dir +'/' OUTPUT_FILENAME'
processedcities

# Merge with indicator table

In [None]:
# read indicator table
cities_indicators = pd.read_csv(aws_s3_dir +'/'+ indicators_file_aws)
cities_indicators

In [None]:
def merge_indicators(indicator_table, new_indicator_table, indicator_name):
    if indicator_name in indicator_table.columns:
        print("replace with new calculations")
        indicator_table.drop(indicator_name, inplace=True, axis=1)
        cities_indicators_df = indicator_table.merge(new_indicator_table[["geo_id",indicator_name]], 
                                                     on='geo_id', 
                                                     how='left')
    else:
        print("add new indicators")
        cities_indicators_df = indicator_table.merge(new_indicator_table[["geo_id",indicator_name]], 
                                                     on='geo_id', 
                                                     how='left')
    return(cities_indicators_df)

In [None]:
cities_indicators_merged = merge_indicators(indicator_table = cities_indicators,
                                            new_indicator_table = processedcities,
                                            indicator_name = 'BIO_1_percentNaturalArea')

In [None]:
cities_indicators_merged

# Upload in aws s3

In [None]:
# connect to s3
aws_credentials = pd.read_csv('/home/jovyan/PlanetaryComputerExamples/aws_credentials.csv')
aws_key = aws_credentials.iloc[0]['Access key ID']
aws_secret = aws_credentials.iloc[0]['Secret access key']

s3 = boto3.resource(
    service_name='s3',
    aws_access_key_id=aws_key,
    aws_secret_access_key=aws_secret
)

In [None]:
# upload to aws
key_data = indicators_file_aws
cities_indicators_merged.to_csv(
    f"s3://{bucket_name}/{key_data}",
    index=False,
    storage_options={
        "key": aws_key,
        "secret": aws_secret
    },
)

In [None]:
# make it public
object_acl = s3.ObjectAcl(bucket_name,key_data)
response = object_acl.put(ACL='public-read')