In [1]:
import sys
#!{sys.executable} -m pip install --extra-index-url https://artifactory.vgt.vito.be/api/pypi/python-packages/simple terracatalogueclient

In [3]:
import warnings
warnings.filterwarnings(action='ignore')

In [4]:
import os, requests, json, geojson, gc
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, Polygon, MultiPolygon, shape
from shapely.ops import unary_union
import rasterio
from rasterio.features import shapes
from shapely.ops import transform
import numpy as np
import numpy.ma as ma
from rasterio.merge import merge as rmerge
import rasterio.mask
import fiona
import shapely
import pyproj
from rasterstats import zonal_stats
from terracatalogueclient import Catalogue as Terracat

In [5]:
# define directory
out_dir = os.getcwd()
aws_s3_dir = "https://cities-urbanshift.s3.eu-west-3.amazonaws.com/data"

In [55]:
OUTPUT_FILENAME = 'BIO-1.csv'

In [7]:
project = pyproj.Transformer.from_crs(
    pyproj.CRS.from_epsg(4326), # source coordinate system
    pyproj.CRS.from_epsg(4326), # destination coordinate system
    always_xy=True
)
# Get ESA Wo

In [8]:
worldcover_catalogue = Terracat().authenticate() 
terracat = worldcover_catalogue.authenticate_non_interactive('tedwongwri', 'WRIpass12!')

In [9]:
# get list of urbanshift cities
boundary_georef = pd.read_csv('https://cities-urbanshift.s3.eu-west-3.amazonaws.com/data/boundaries/v_0/boundary_georef.csv')

# remove cities without tree cover data availability
#tml_not_available_cities = ['BRA-Salvador','MEX-Monterrey']
tml_not_available_cities = []
boundary_georef = boundary_georef[~boundary_georef['geo_name'].isin(tml_not_available_cities)].reset_index(drop=True)
boundary_georef

Unnamed: 0,geo_name,level,aoi_boundary_name,units_boundary_name,city_name,country_name,country_code,continent
0,ARG-Mendoza,region,ADM3union,ADM3,Mendoza,Argentina,ARG,America
1,ARG-Mar_del_Plata,city,ADM3,ADM4,Mar del Plata city,Argentina,ARG,America
2,ARG-Mar_del_Plata,region,ADM2,,Mar del Plata region,Argentina,ARG,America
3,ARG-Ushuaia,city,ADM4,ADM5,Ushuaia city,Argentina,ARG,America
4,ARG-Ushuaia,region,ADM3,,Ushuaia region,Argentina,ARG,America
5,ARG-Salta,region,ADM2union,ADM3,Salta,Argentina,ARG,America
6,ARG-Buenos_Aires,region,ADM2union,ADM2,Buenos Aires,Argentina,ARG,America
7,BRA-Teresina,city,ADM4union,ADM4,Teresina city,Brazil,BRA,America
8,BRA-Teresina,region,ADM2union,ADM2,Teresina region,Brazil,BRA,America
9,BRA-Florianopolis,city,ADM4union,ADM4,Florianopolis,Brazil,BRA,America


In [54]:
def boundingbox_wkt(p):
    # Returns WKT for bounding box.
    # Necessary because GBIF API won't accept complex polygons.
    minx, miny, maxx, maxy = p.bounds
    return 'POLYGON (({0} {3}, {0} {2}, {1} {2}, {0} {3}))'.format(str(minx), str(maxx), str(miny), str(maxy))

def classify_naturalarea(r):
    r[r == 0] = 0   # no data
    r[(r > 0) & (r <= 30)] = 1  # Tree cover
    #r[r == 20] = 1  # Shrubland
    #r[r == 30] = 1  # Grassland
    #r[r == 40] = 0  # Cropland
    r[(r > 30) & (r <= 80)] = 0  # Built-up
    #r[r == 60] = 0  # Bare / sparse vegetation
    #r[r == 70] = 0  # Snow and ice
    #r[r == 80] = 0  # Permanent water bodies
    r[r > 80] = 1  # Herbaceous wetland
    #r[r == 95] = 1  # Mangroves
    #r[r == 100] = 1 # Moss and lichen
    return r

def mask_raster_with_geometry(raster, transform, shapes, **kwargs):
    # This function clips and masks raster
    # Adapted from https://gis.stackexchange.com/a/387772
    """Wrapper for rasterio.mask.mask to allow for in-memory processing.

    Docs: https://rasterio.readthedocs.io/en/latest/api/rasterio.mask.html

    Args:
        raster (numpy.ndarray): raster to be masked with dim: [H, W]
        transform (affine.Affine): the transform of the raster
        shapes, **kwargs: passed to rasterio.mask.mask

    Returns:
        masked: numpy.ndarray or numpy.ma.MaskedArray with dim: [H, W], and new affine transform
    """
    with rasterio.io.MemoryFile() as memfile:
        with memfile.open(
            driver='GTiff',
            height=raster.shape[0],
            width=raster.shape[1],
            count=1,
            dtype=raster.dtype,
            transform=transform,
        ) as dataset:
            dataset.write(raster, 1)
        with memfile.open() as dataset:
            output, new_transform = rasterio.mask.mask(dataset, shapes, **kwargs)
    return output.squeeze(0), new_transform

def do_one_geom(row):    
    poly = unary_union(row[0])
    
    box = boundingbox_wkt(poly)

    products = terracat.get_products('urn:eop:VITO:ESA_WorldCover_10m_2020_V1', geometry=box)
    # Create mosaic raster of Worldcover rasters downloaded from ESA
    rasters_to_mosaic = []
    for product in products:
        if not '{0}dir'.format(product.title) in os.listdir('.'):
            terracat.download_file(product.data[0], '{0}dir'.format(product.title))
        raster = rasterio.open('{0}dir/{0}_Map.tif'.format(product.title))
        rasters_to_mosaic.append(raster)
    mosaic, mosaic_transform = rmerge(rasters_to_mosaic, bounds=poly.bounds)
    del rasters_to_mosaic
    gc.collect()
    
    # Create bbox of all ones
    boxras, boxras_transform = mask_raster_with_geometry(mosaic[0], mosaic_transform, [shapely.geometry.box(*poly.bounds)], crop=True)
    allone = (boxras * 0) + 1
    
    # Make boxras into a mask
    maskras, dummy = mask_raster_with_geometry(allone, mosaic_transform, [poly], crop=True)
    
    clipped_raster, clip_transform = mask_raster_with_geometry(mosaic[0], mosaic_transform, [poly], crop=True)
    
    # Explicitly mask clipped raster. (Clipping doesn't seem to clip well on its own.)
    naturalarea_raster = ma.masked_array(classify_naturalarea(clipped_raster), mask=((1-maskras)*-1))
    del mosaic
    gc.collect()
    
    result = np.mean(naturalarea_raster)
    
    print('{0}: {1}'.format(row[1], result))
    return result


In [None]:
for i in range(len(boundary_georef)):
    if not OUTPUT_FILENAME in os.listdir('.'):
        so_far_df = pd.DataFrame()
        so_far_df.to_csv(OUTPUT_FILENAME)
        so_far = []
    else:
        so_far_df = pd.read_csv(OUTPUT_FILENAME)
        so_far = [so_far_df.iloc[j]['geo_id'] for j in range(len(so_far_df))]

    #outputs_df = pd.DataFrame()
    do_units = True
    most_recent = []
    for boundary_name in ['aoi_boundary_name', 'units_boundary_name']:
        if type(boundary_georef.loc[i, boundary_name]) != float: # sometimes boundary_id is nan
            boundary_id = boundary_georef.loc[i, 'geo_name']+'-' + boundary_georef.loc[i, boundary_name]
            boundary_path = aws_s3_dir +'/boundaries/v_0/boundary-' + boundary_id + '.geojson'
            boundary_geo = requests.get(boundary_path).json()
            temp_gdf = gpd.GeoDataFrame.from_features(boundary_geo)
            #if True or boundary_name == 'aoi_boundary_name':  # Calculates naturalarea for aoi, uses it for all unit-of-analysis calculations.
               
            temp_gdf['BIO-1'] = temp_gdf.apply(do_one_geom, axis=1)
          
            most_recent.append(temp_gdf.copy())
            if boundary_name == 'aoi_boundary_name':
                do_units = most_recent[-1]['BIO-1'][0] > 0
    # Delete WorldCover rasters to prevent filling up disk
    for directoryname in os.listdir('.'):
        if directoryname[:15] == 'ESA_WorldCover_':
            for filename in os.listdir(directoryname):
                os.remove('{0}/{1}'.format(directoryname, filename))
            os.rmdir(directoryname)
            
    result = pd.concat([so_far_df] + most_recent, axis=0)
    result[['geometry', 'geo_id', 'geo_level', 'geo_name', 'geo_parent_name', 'creation_date', 'BIO-1']].to_csv(OUTPUT_FILENAME)

ARG-Mendoza_ADM-3-union_1: 0.3401021104152014
ARG-Mendoza_ADM-3_1: 0.42228238805265245
ARG-Mendoza_ADM-3_2: 0.28647975777656104
ARG-Mendoza_ADM-3_3: 0.28250828529637334
ARG-Mendoza_ADM-3_4: 0.18553765748936266
ARG-Mendoza_ADM-3_5: 0.26173898811679824
ARG-Mendoza_ADM-3_6: 0.326164621216989
ARG-Mendoza_ADM-3_7: 0.24276496180651375
ARG-Mendoza_ADM-3_8: 0.6017961259292196
ARG-Mendoza_ADM-3_9: 0.6658141619049653
ARG-Mendoza_ADM-3_10: 0.46772228059771886
ARG-Mendoza_ADM-3_11: 0.600048966997
ARG-Mendoza_ADM-3_12: 0.24240256953745726
ARG-Mendoza_ADM-3_13: 0.23427991886409735
ARG-Mendoza_ADM-3_14: 0.06267768536667197
ARG-Mendoza_ADM-3_15: 0.15086418406848046
ARG-Mendoza_ADM-3_16: 0.19052646787512365
ARG-Mendoza_ADM-3_17: 0.2605283234054358
ARG-Mendoza_ADM-3_18: 0.14812074329437483
ARG-Mendoza_ADM-3_19: 0.10585277515556668
ARG-Mendoza_ADM-3_20: 0.19579762619705604
ARG-Mendoza_ADM-3_21: 0.2548070597522696
ARG-Mendoza_ADM-3_22: 0.1634158294535653
ARG-Mendoza_ADM-3_23: 0.1177694583969368
ARG-Mendoz