In [1]:
import sys
#!{sys.executable} -m pip install --extra-index-url https://artifactory.vgt.vito.be/api/pypi/python-packages/simple terracatalogueclient

In [2]:
import warnings
warnings.filterwarnings(action='ignore')

In [3]:
import os, requests, json, geojson, gc
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, Polygon, MultiPolygon, shape
from shapely.ops import unary_union
import rasterio
from rasterio.features import shapes
from shapely.ops import transform
from rasterio.merge import merge as rmerge
import rasterio.mask
import fiona
import shapely
import pyproj
from terracatalogueclient import Catalogue as Terracat

In [4]:
# define directory
out_dir = os.getcwd()
aws_s3_dir = "https://cities-urbanshift.s3.eu-west-3.amazonaws.com/data"

In [5]:
OUTPUT_FILENAME = 'BIO-1.csv'

In [6]:
project = pyproj.Transformer.from_crs(
    pyproj.CRS.from_epsg(4326), # source coordinate system
    pyproj.CRS.from_epsg(4326), # destination coordinate system
    always_xy=True
)
# Get ESA Wo

In [7]:
worldcover_catalogue = Terracat().authenticate() 
terracat = worldcover_catalogue.authenticate_non_interactive('tedwongwri', 'WRIpass12!')

In [8]:
# get list of urbanshift cities
boundary_georef = pd.read_csv('https://cities-urbanshift.s3.eu-west-3.amazonaws.com/data/boundaries/v_0/boundary_georef.csv')

# remove cities without tree cover data availability
#tml_not_available_cities = ['BRA-Salvador','MEX-Monterrey']
tml_not_available_cities = []
boundary_georef = boundary_georef[~boundary_georef['geo_name'].isin(tml_not_available_cities)].reset_index(drop=True)
boundary_georef

Unnamed: 0,geo_name,level,aoi_boundary_name,units_boundary_name,city_name,country_name,country_code,continent
0,ARG-Mendoza,region,ADM3union,ADM3,Mendoza,Argentina,ARG,America
1,ARG-Mar_del_Plata,city,ADM3,ADM4,Mar del Plata city,Argentina,ARG,America
2,ARG-Mar_del_Plata,region,ADM2,,Mar del Plata region,Argentina,ARG,America
3,ARG-Ushuaia,city,ADM4,ADM5,Ushuaia city,Argentina,ARG,America
4,ARG-Ushuaia,region,ADM3,,Ushuaia region,Argentina,ARG,America
5,ARG-Salta,region,ADM2union,ADM3,Salta,Argentina,ARG,America
6,ARG-Buenos_Aires,region,ADM2union,ADM2,Buenos Aires,Argentina,ARG,America
7,BRA-Teresina,city,ADM4union,ADM4,Teresina city,Brazil,BRA,America
8,BRA-Teresina,region,ADM2union,ADM2,Teresina region,Brazil,BRA,America
9,BRA-Florianopolis,city,ADM4union,ADM4,Florianopolis,Brazil,BRA,America


In [9]:
def boundingbox_wkt(p):
    # Returns WKT for bounding box.
    # Necessary because GBIF API won't accept complex polygons.
    minx, miny, maxx, maxy = p.bounds
    return 'POLYGON (({0} {3}, {0} {2}, {1} {2}, {0} {3}))'.format(str(minx), str(maxx), str(miny), str(maxy))

def classify_naturalarea(r):
    r[r == 10] = 1  # Tree cover
    r[r == 20] = 1  # Shrubland
    r[r == 30] = 1  # Grassland
    r[r == 40] = 0  # Cropland
    r[r == 50] = 0  # Built-up
    r[r == 60] = 0  # Bare / sparse vegetation
    r[r == 70] = 0  # Snow and ice
    r[r == 80] = 0  # Permanent water bodies
    r[r == 90] = 1  # Herbaceous wetland
    r[r == 95] = 1  # Mangroves
    r[r == 100] = 1 # Moss and lichen
    return r

def mask_raster_with_geometry(raster, transform, shapes, **kwargs):
    # This function clips and masks raster
    # Adapted from https://gis.stackexchange.com/a/387772
    """Wrapper for rasterio.mask.mask to allow for in-memory processing.

    Docs: https://rasterio.readthedocs.io/en/latest/api/rasterio.mask.html

    Args:
        raster (numpy.ndarray): raster to be masked with dim: [H, W]
        transform (affine.Affine): the transform of the raster
        shapes, **kwargs: passed to rasterio.mask.mask

    Returns:
        masked: numpy.ndarray or numpy.ma.MaskedArray with dim: [H, W], and new affine transform
    """
    with rasterio.io.MemoryFile() as memfile:
        with memfile.open(
            driver='GTiff',
            height=raster.shape[0],
            width=raster.shape[1],
            count=1,
            dtype=raster.dtype,
            transform=transform,
        ) as dataset:
            dataset.write(raster, 1)
        with memfile.open() as dataset:
            output, new_transform = rasterio.mask.mask(dataset, shapes, **kwargs)
    return output.squeeze(0), new_transform

def do_one_geom(row):
    poly = row[0]
    
    shapelybox = poly.bounds
    naturalarea_box = shapely.ops.clip_by_rect(naturalarea_multi, *shapelybox)
    naturalarea_box = shapely.ops.unary_union(naturalarea_box)    # unary_union repairs inalid geometries

    naturalarea_poly = naturalarea_box.intersection(poly)

    result = naturalarea_poly.area/poly.area

    print('{0}: {1}'.format(row[1], result))
    return result

In [10]:
for i in range(len(boundary_georef)):
    if not OUTPUT_FILENAME in os.listdir('.'):
        so_far_df = pd.DataFrame()
        so_far_df.to_csv(OUTPUT_FILENAME)
        so_far = []
    else:
        so_far_df = pd.read_csv(OUTPUT_FILENAME)
        so_far = [so_far_df.iloc[j]['geo_id'] for j in range(len(so_far_df))]
    
    most_recent = []

    do_units = True
    for boundary_name in ['aoi_boundary_name', 'units_boundary_name']:
        if type(boundary_georef.loc[i, boundary_name]) != float: # sometimes boundary_id is nan
            boundary_id = boundary_georef.loc[i, 'geo_name']+'-' + boundary_georef.loc[i, boundary_name]
            boundary_path = aws_s3_dir +'/boundaries/v_0/boundary-' + boundary_id + '.geojson'
            boundary_geo = requests.get(boundary_path).json()
            temp_gdf = gpd.GeoDataFrame.from_features(boundary_geo)
            if boundary_name == 'aoi_boundary_name':  # Calculates naturalarea for aoi, uses it for all unit-of-analysis calculations.
                row = temp_gdf.iloc[0]
                poly = row[0]

                box = boundingbox_wkt(poly)

                products = terracat.get_products('urn:eop:VITO:ESA_WorldCover_10m_2020_V1', geometry=box)

                # Create mosaic raster of Worldcover rasters downloaded from ESA
                rasters_to_mosaic = []
                for product in products:
                    if not '{0}dir'.format(product.title) in os.listdir('.'):
                        terracat.download_file(product.data[0], '{0}dir'.format(product.title))
                    raster = rasterio.open('{0}dir/{0}_Map.tif'.format(product.title))
                    rasters_to_mosaic.append(raster)

                mosaic, mosaic_transform = rmerge(rasters_to_mosaic)

                del rasters_to_mosaic
                gc.collect()

                # Clip raster to district boundary

                clipped_raster, clip_transform = mask_raster_with_geometry(mosaic[0], mosaic_transform, MultiPolygon([shapely.geometry.box(*poly.bounds)]), crop=True)

                naturalarea_raster = classify_naturalarea(clipped_raster)

                with rasterio.Env():
                    image = naturalarea_raster
                    results = (
                    {'properties': {'raster_val': v}, 'geometry': s}
                    for (s, v) 
                    in shapes(image, transform=clip_transform) if v == 1)

                patches = [Polygon(j['geometry']['coordinates'][0]).simplify(0.0001, preserve_topology=False) for j in results][:-1]
                naturalarea_multi = MultiPolygon(patches)
            
            
            
            if boundary_name == 'aoi_boundary_name' or (boundary_name == 'units_boundary_name' and do_units):
                temp_gdf['BIO-1'] = temp_gdf.apply(do_one_geom, axis=1)
            else:
                temp_gdf['BIO-1'] = temp_gdf.apply(lambda x: 0, axis=1)
            most_recent.append(temp_gdf.copy())
            if boundary_name == 'aoi_boundary_name':
                do_units = most_recent[-1]['BIO-1'][0] > 0
                
    result = pd.concat([so_far_df] + most_recent, axis=0)
    result[['geometry', 'geo_id', 'geo_level', 'geo_name', 'geo_parent_name', 'creation_date', 'BIO-1']].to_csv(OUTPUT_FILENAME)

    # Delete WorldCover rasters to prevent filling up disk
    for directoryname in os.listdir('.'):
        if directoryname[:15] == 'ESA_WorldCover_':
            for filename in os.listdir(directoryname):
                os.remove('{0}/{1}'.format(directoryname, filename))
            os.rmdir(directoryname)
            
    

1
2
2.5
3
4
5
6
7
8
9
CHN-Chongqing_ADM-1_1: 0.5730709004769271
CHN-Chongqing_ADM-3_1: 0.9864654740580203
CHN-Chongqing_ADM-3_2: 0.8413766870486638
CHN-Chongqing_ADM-3_3: 0.960723883717128
CHN-Chongqing_ADM-3_4: 0.0
CHN-Chongqing_ADM-3_5: 0.9133046665135954
CHN-Chongqing_ADM-3_6: 0.8375094342514007
CHN-Chongqing_ADM-3_7: 0.6274605023868757
CHN-Chongqing_ADM-3_8: 0.9908784874699175
CHN-Chongqing_ADM-3_9: 0.0
CHN-Chongqing_ADM-3_10: 0.04502804175602456
CHN-Chongqing_ADM-3_11: 0.012911017055749993
CHN-Chongqing_ADM-3_12: 0.9999999999999998
CHN-Chongqing_ADM-3_13: 0.9971717624976001
CHN-Chongqing_ADM-3_14: 0.9921859426395685
CHN-Chongqing_ADM-3_15: 0.7452839743526767
CHN-Chongqing_ADM-3_16: 0.0
CHN-Chongqing_ADM-3_17: 0.7792769184733147
CHN-Chongqing_ADM-3_18: 0.6823137122273611
CHN-Chongqing_ADM-3_19: 0.9999999999999996
CHN-Chongqing_ADM-3_20: 0.9809609877053462
CHN-Chongqing_ADM-3_21: 0.5969925962405979
CHN-Chongqing_ADM-3_22: 0.0
CHN-Chongqing_ADM-3_23: 0.3346192479691558
CHN-Chongqing_

PermissionError: [WinError 32] The process cannot access the file because it is being used by another process: 'ESA_WorldCover_10m_2020_v100_N30E105dir/ESA_WorldCover_10m_2020_v100_N30E105_Map.tif'