In [1]:
import sys
!{sys.executable} -m pip install --extra-index-url https://artifactory.vgt.vito.be/api/pypi/python-packages/simple terracatalogueclient

Looking in indexes: https://pypi.org/simple, https://artifactory.vgt.vito.be/api/pypi/python-packages/simple
Collecting terracatalogueclient
  Using cached https://artifactory.vgt.vito.be/api/pypi/python-packages/terracatalogueclient/0.1.14/terracatalogueclient-0.1.14-py3-none-any.whl (12 kB)
Collecting requests-auth>=5.3.0
  Using cached https://artifactory.vgt.vito.be/api/pypi/python-packages/packages/packages/60/af/da90802d91cbc45bdc160e9dcc70a07cbb581d748549edc3d42d25e04c8f/requests_auth-6.0.0-py3-none-any.whl (25 kB)
Installing collected packages: requests-auth, terracatalogueclient
Successfully installed requests-auth-6.0.0 terracatalogueclient-0.1.14


In [2]:
import warnings
warnings.filterwarnings(action='ignore')

In [2]:
import os, requests, json, geojson
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, Polygon, MultiPolygon, shape
from shapely.ops import unary_union
import rasterio
from rasterio.features import shapes
from shapely.ops import transform
from rasterio.merge import merge as rmerge
import rasterio.mask
import fiona
import shapely
import pyproj
import boto3
from terracatalogueclient import Catalogue as Terracat

In [4]:
# define directory
out_dir = os.getcwd()
aws_s3_dir = "https://cities-urbanshift.s3.eu-west-3.amazonaws.com/data"

In [3]:
aws_creds = json.loads(open('aws_credentials.json').readline())
ACCESS_KEY = aws_creds['ACCESS_KEY']
SECRET_KEY = aws_creds['SECRET_KEY']
SOURCEDATA_BUCKETNAME = 'cities-urbanshift'

KBA_FILENAME = 'data/KBA_data/KBAsGlobal_2022_March_01_POL'
KBA_TEMPFILENAME = 'temp_kbadata'
OUTPUT_FILENAME = 'LND-8.csv'

In [6]:
project = pyproj.Transformer.from_crs(
    pyproj.CRS.from_epsg(4326), # source coordinate system
    pyproj.CRS.from_epsg(4326), # destination coordinate system
    always_xy=True
)
# Get ESA Wo

In [7]:
worldcover_catalogue = Terracat().authenticate() 
terracat = worldcover_catalogue.authenticate_non_interactive('tedwongwri', 'WRIpass12!')

In [4]:
s3 = boto3.resource(
    service_name = 's3',
    aws_access_key_id = ACCESS_KEY,
    aws_secret_access_key = SECRET_KEY
)

bucket = s3.Bucket(SOURCEDATA_BUCKETNAME)
for ext in ('shp', 'shx'):
    bucket.download_file('{0}.{1}'.format(KBA_FILENAME, ext), '{0}.{1}'.format(KBA_TEMPFILENAME, ext))

kba_multi = shapely.geometry.MultiPolygon([shapely.geometry.shape(poly['geometry']) for poly in fiona.open('{0}.shp'.format(KBA_TEMPFILENAME))]) 

  shell = ob[0]
  holes = ob[1]


In [9]:
# get list of urbanshift cities
boundary_georef = pd.read_csv('https://cities-urbanshift.s3.eu-west-3.amazonaws.com/data/boundaries/v_0/boundary_georef.csv')

# remove cities without tree cover data availability
#tml_not_available_cities = ['BRA-Salvador','MEX-Monterrey']
tml_not_available_cities = []
boundary_georef = boundary_georef[~boundary_georef['geo_name'].isin(tml_not_available_cities)].reset_index(drop=True)
boundary_georef

Unnamed: 0,geo_name,level,aoi_boundary_name,units_boundary_name,city_name,country_name,country_code,continent
0,ARG-Mendoza,region,ADM3union,ADM3,Mendoza,Argentina,ARG,America
1,ARG-Mar_del_Plata,city,ADM3,ADM4,Mar del Plata city,Argentina,ARG,America
2,ARG-Mar_del_Plata,region,ADM2,,Mar del Plata region,Argentina,ARG,America
3,ARG-Ushuaia,city,ADM4,ADM5,Ushuaia city,Argentina,ARG,America
4,ARG-Ushuaia,region,ADM3,,Ushuaia region,Argentina,ARG,America
5,ARG-Salta,region,ADM2union,ADM3,Salta,Argentina,ARG,America
6,ARG-Buenos_Aires,region,ADM2union,ADM2,Buenos Aires,Argentina,ARG,America
7,BRA-Teresina,city,ADM4union,ADM4,Teresina city,Brazil,BRA,America
8,BRA-Teresina,region,ADM2union,ADM2,Teresina region,Brazil,BRA,America
9,BRA-Florianopolis,city,ADM4union,ADM4,Florianopolis,Brazil,BRA,America


In [10]:
def boundingbox_wkt(p):
    # Returns WKT for bounding box.
    # Necessary because GBIF API won't accept complex polygons.
    minx, miny, maxx, maxy = p.bounds
    return 'POLYGON (({0} {3}, {0} {2}, {1} {2}, {0} {3}))'.format(str(minx), str(maxx), str(miny), str(maxy))

def classify_builtup(r):
    r[r != 50] = 0
    r[r == 50] = 1    # built up
    return r

def mask_raster_with_geometry(raster, transform, shapes, **kwargs):
    # This function clips and masks raster
    # Adapted from https://gis.stackexchange.com/a/387772
    """Wrapper for rasterio.mask.mask to allow for in-memory processing.

    Docs: https://rasterio.readthedocs.io/en/latest/api/rasterio.mask.html

    Args:
        raster (numpy.ndarray): raster to be masked with dim: [H, W]
        transform (affine.Affine): the transform of the raster
        shapes, **kwargs: passed to rasterio.mask.mask

    Returns:
        masked: numpy.ndarray or numpy.ma.MaskedArray with dim: [H, W], and new affine transform
    """
    with rasterio.io.MemoryFile() as memfile:
        with memfile.open(
            driver='GTiff',
            height=raster.shape[0],
            width=raster.shape[1],
            count=1,
            dtype=raster.dtype,
            transform=transform,
        ) as dataset:
            dataset.write(raster, 1)
        with memfile.open() as dataset:
            output, new_transform = rasterio.mask.mask(dataset, shapes, **kwargs)
    return output.squeeze(0), new_transform

def do_one_geom(row):
    poly = row[0]
    shapelybox = poly.bounds
    kba_box = shapely.ops.clip_by_rect(kba_multi, *shapelybox)
    kba_box = shapely.ops.unary_union(kba_box)
    kba_poly = kba_box.intersection(poly)
    kbapoly_area = kba_poly.area
    if kbapoly_area > 0:
    
        box = boundingbox_wkt(poly)

        products = terracat.get_products('urn:eop:VITO:ESA_WorldCover_10m_2020_V1', geometry=box)

        # Create mosaic raster of Worldcover rasters downloaded from ESA
        rasters_to_mosaic = []
        for product in products:
            if not '{0}dir'.format(product.title) in os.listdir('.'):
                terracat.download_file(product.data[0], '{0}dir'.format(product.title))
            raster = rasterio.open('{0}dir/{0}_Map.tif'.format(product.title))
            rasters_to_mosaic.append(raster)

        mosaic, mosaic_transform = rmerge(rasters_to_mosaic)


        # Clip raster to district boundary
        if poly.type == 'Polygon':
            multipoly = MultiPolygon([poly])
        else:
            multipoly = poly

        clipped_raster, clip_transform = mask_raster_with_geometry(mosaic[0], mosaic_transform, multipoly, crop=True)
        # Classify clipped raster as habitat/nonhabitat
        builtup_raster = classify_builtup(clipped_raster)


        # Vectorize and collect only the builtup patches
        with rasterio.Env():
            image = builtup_raster
            results = (
            {'properties': {'raster_val': v}, 'geometry': s}
            for (s, v) 
            in shapes(image, transform=clip_transform) if v == 1)
        shapelist = list(results)
        shapelist = shapelist[:-1]
        all_patches = [  # project to UTM so that spatial unit is meter
            transform(project.transform, Polygon(shapelist[i]['geometry']['coordinates'][0])) for i in range(len(shapelist))
        ]
        patches = [j.simplify(0, preserve_topology=False) for j in all_patches]
        #patchgeoms = gpd.GeoDataFrame(geometry=patches, crs='EPSG:{}'.format(target_epsg), index=range(len(patches)))
        #patchgeoms_sindex = patchgeoms.sindex

        builtup_multi = MultiPolygon(patches)

        builtup_box = shapely.ops.clip_by_rect(builtup_multi, *shapelybox)
        builtup_box = shapely.ops.unary_union(builtup_box)    # unary_union repairs inalid geometries

        builtup_kba_poly = builtup_box.intersection(kba_poly)

        result = builtup_kba_poly.area/kbapoly_area
    else:
        result = -9999

    print('{0}: {1}'.format(row[1], result))
    return result

In [11]:
for i in range(len(boundary_georef)):
    if not OUTPUT_FILENAME in os.listdir('.'):
        so_far_df = pd.DataFrame()
        so_far_df.to_csv(OUTPUT_FILENAME)
        so_far = []
    else:
        so_far_df = pd.read_csv(OUTPUT_FILENAME)
        so_far = [so_far_df.iloc[j]['geo_id'] for j in range(len(so_far_df))]
    
    most_recent = []
    #outputs_df = pd.DataFrame()
    for boundary_name in ['aoi_boundary_name', 'units_boundary_name']:
        if type(boundary_georef.loc[i, boundary_name]) != float: # sometimes boundary_id is nan
            boundary_id = boundary_georef.loc[i, 'geo_name']+'-' + boundary_georef.loc[i, boundary_name]
            boundary_path = aws_s3_dir +'/boundaries/v_0/boundary-' + boundary_id + '.geojson'
            boundary_geo = requests.get(boundary_path).json()
            temp_gdf = gpd.GeoDataFrame.from_features(boundary_geo)
            temp_gdf['LND-8'] = temp_gdf.apply(do_one_geom, axis=1)
            most_recent.append(temp_gdf.copy())
            
    # Delete WorldCover rasters to prevent filling up disk
    for directoryname in os.listdir('.'):
        if directoryname[:15] == 'ESA_WorldCover_':
            for filename in os.listdir(directoryname):
                os.remove('{0}/{1}'.format(directoryname, filename))
            os.rmdir(directoryname)
            
    result = pd.concat([so_far_df] + most_recent, axis=0)
    result[['geometry', 'geo_id', 'geo_level', 'geo_name', 'geo_parent_name', 'creation_date', 'LND-8']].to_csv(OUTPUT_FILENAME)

ARG-Mendoza_ADM-3-union_1: 0.0033788518450487887
ARG-Mendoza_ADM-3_1: -9999
ARG-Mendoza_ADM-3_2: -9999
ARG-Mendoza_ADM-3_3: -9999
ARG-Mendoza_ADM-3_4: -9999
ARG-Mendoza_ADM-3_5: -9999
ARG-Mendoza_ADM-3_6: -9999
ARG-Mendoza_ADM-3_7: -9999
ARG-Mendoza_ADM-3_8: -9999
ARG-Mendoza_ADM-3_9: -9999
ARG-Mendoza_ADM-3_10: -9999
ARG-Mendoza_ADM-3_11: -9999
ARG-Mendoza_ADM-3_12: -9999
ARG-Mendoza_ADM-3_13: -9999
ARG-Mendoza_ADM-3_14: -9999
ARG-Mendoza_ADM-3_15: -9999
ARG-Mendoza_ADM-3_16: -9999
ARG-Mendoza_ADM-3_17: -9999
ARG-Mendoza_ADM-3_18: -9999
ARG-Mendoza_ADM-3_19: -9999
ARG-Mendoza_ADM-3_20: -9999
ARG-Mendoza_ADM-3_21: -9999
ARG-Mendoza_ADM-3_22: -9999
ARG-Mendoza_ADM-3_23: -9999
ARG-Mendoza_ADM-3_24: -9999
ARG-Mendoza_ADM-3_25: -9999
ARG-Mendoza_ADM-3_26: -9999
ARG-Mendoza_ADM-3_27: -9999
ARG-Mendoza_ADM-3_28: -9999
ARG-Mendoza_ADM-3_29: -9999
ARG-Mendoza_ADM-3_30: -9999
ARG-Mendoza_ADM-3_31: -9999
ARG-Mendoza_ADM-3_32: -9999
ARG-Mendoza_ADM-3_33: -9999
ARG-Mendoza_ADM-3_34: -9999
ARG-Mend

In [5]:
for ext in ('shp', 'shx'):
    os.remove('{0}.{1}'.format(KBA_TEMPFILENAME, ext))