In [1]:
import os, json
import pandas as pd
import boto3
import geopandas as gpd

from math import floor
from pystac_client import Client
import planetary_computer as pc
import numpy as np
import rasterio
import requests, pyproj
from pyproj.aoi import AreaOfInterest
from pyproj.database import query_utm_crs_info
from rasterio.merge import merge as rmerge
import rasterio.mask
from rasterio.features import shapes
from shapely.geometry import Polygon, MultiPolygon, box, shape
from shapely.ops import transform, unary_union
import networkx as nx

In [2]:
import warnings
warnings.filterwarnings(action='ignore')

In [3]:
CONNECTIVITY_DISTANCE = 100    # Max distance two patches can be apart and be considered connected (meter)
MIN_PATCHSIZE = 10000    # Min patch size to be included in analysis (sq meter)

In [4]:
# get list of urbanshift cities
boundary_georef = pd.read_csv('https://cities-urbanshift.s3.eu-west-3.amazonaws.com/data/boundaries/v_0/boundary_georef.csv') 

In [5]:
boundary_georef

Unnamed: 0,geo_name,level,aoi_boundary_name,units_boundary_name,city_name,country_name,country_code,continent
0,ARG-Mendoza,region,ADM3union,ADM3,Mendoza,Argentina,ARG,America
1,ARG-Mar_del_Plata,city,ADM3,ADM4,Mar del Plata city,Argentina,ARG,America
2,ARG-Mar_del_Plata,region,ADM2,,Mar del Plata region,Argentina,ARG,America
3,ARG-Ushuaia,city,ADM4,ADM5,Ushuaia city,Argentina,ARG,America
4,ARG-Ushuaia,region,ADM3,,Ushuaia region,Argentina,ARG,America
5,ARG-Salta,region,ADM2union,ADM3,Salta,Argentina,ARG,America
6,ARG-Buenos_Aires,region,ADM2union,ADM2,Buenos Aires,Argentina,ARG,America
7,BRA-Teresina,city,ADM4union,ADM4,Teresina city,Brazil,BRA,America
8,BRA-Teresina,region,ADM2union,ADM2,Teresina region,Brazil,BRA,America
9,BRA-Florianopolis,city,ADM4union,ADM4,Florianopolis,Brazil,BRA,America


In [6]:
# Convert district-boundary geojsons to Shapely polygons

def geojson_to_polygons(g):
    result = []
    for feature in g['features']:
        name = feature['properties']['geo_name']
        if type(feature['geometry']['coordinates'][0][0][0]) == list:
            coordpairs = [(float(i[0]), float(i[1])) for i in feature['geometry']['coordinates'][0][0]]
        else:
            coordpairs = [(float(i[0]), float(i[1])) for i in feature['geometry']['coordinates'][0]]
        result.append((name, Polygon(coordpairs)))
    return result

#district_polys = geojson_to_polygons(Districts_json)

In [7]:
# This function clips and masks raster
# Adapted from https://gis.stackexchange.com/a/387772

def mask_raster_with_geometry(raster, transform, shapes, **kwargs):
    """Wrapper for rasterio.mask.mask to allow for in-memory processing.

    Docs: https://rasterio.readthedocs.io/en/latest/api/rasterio.mask.html

    Args:
        raster (numpy.ndarray): raster to be masked with dim: [H, W]
        transform (affine.Affine): the transform of the raster
        shapes, **kwargs: passed to rasterio.mask.mask

    Returns:
        masked: numpy.ndarray or numpy.ma.MaskedArray with dim: [H, W], and new affine transform
    """
    with rasterio.io.MemoryFile() as memfile:
        with memfile.open(
            driver='GTiff',
            height=raster.shape[0],
            width=raster.shape[1],
            count=1,
            dtype=raster.dtype,
            transform=transform,
        ) as dataset:
            dataset.write(raster, 1)
        with memfile.open() as dataset:
            output, new_transform = rasterio.mask.mask(dataset, shapes, **kwargs)
    return output.squeeze(0), new_transform

In [8]:
# Reclassify from ESA WorldCover classes to habitat/nonhabitat

def classify_habitat(r):  # Note: order is important
    r[r == 60] = 0    # sparse veg
    r[r >= 90] = 1    # herbaceous wetland, mangrove, lichen & moss
    r[r == 80] = 0    # permanent open water
    r[r == 70] = 0    # snow/ice
    r[r == 40] = 0    # cropland
    r[r == 50] = 0    # built up
    r[r == 30] = 1    # grassland
    r[r == 20] = 1    # shrubland
    r[r == 10] = 1    # tree cover
    r[r == 0] = 0     # no data

    return r

In [9]:
def within_distance(rownum, gdf, sidx):
    if rownum % 1000 == 0:
        print('     {0} / {1}'.format(rownum, gdf.count().geometry))
    z = list(sidx.intersection(gdf.iloc[rownum].geometry.buffer(CONNECTIVITY_DISTANCE).bounds))  # Just look at patches in bounding box of buffered
    distances = gdf.iloc[z].distance(gdf.iloc[rownum].geometry)
    return [i for i in z if distances[i] <= CONNECTIVITY_DISTANCE if i != rownum]  # Return only those in actual buffer

In [10]:
def do_one_geom(row):
    print(row['geo_id'], row['geo_name'])
    geom = row[0]
    if type(geom) == MultiPolygon:
        district_poly = MultiPolygon(row[0])
    else:
        district_poly = MultiPolygon([row[0]])
    bounds = district_poly.bounds
    centroid_lon = district_poly.centroid.xy[0][0]
    centroid_lat = district_poly.centroid.xy[1][0]
    target_epsg = (32600 + [0, 100][int(centroid_lat < 0)]) + floor((180 + centroid_lon) / 6) + 1
    # EPSG is 32600 (or 32700 if lat is neg) + longitude zone. Each zone is six degrees, and first zone is 1.
    # Transform from EPSG:4326 to target EPSG
    project = pyproj.Transformer.from_crs(
        pyproj.CRS.from_epsg(4326), # source coordinate system
        pyproj.CRS.from_epsg(target_epsg), # destination coordinate system
        always_xy=True
    )
    # Get ESA WorldCover raster tiles covering AOI
    catalog = Client.open("https://planetarycomputer.microsoft.com/api/stac/v1")
    search = catalog.search(
        collections=["esa-worldcover"],
        bbox=bounds,
    )
    items = list(search.get_items())
    rasters_to_mosaic = []
    signed_hrefs = [pc.sign(i.assets["map"].href) for i in items]
    for href in signed_hrefs:
        raster = rasterio.open(href)
        rasters_to_mosaic.append(raster)

    # Stitch rasters together
    mosaic, mosaic_transform = rmerge(rasters_to_mosaic)
    # Clip raster to district boundary
    clipped_raster, clip_transform = mask_raster_with_geometry(mosaic[0], mosaic_transform, district_poly, crop=True)
    # Classify clipped raster as habitat/nonhabitat
    hab_raster = classify_habitat(clipped_raster)
    # Vectorize and collect only the habitat patches (as opposed to nonhabitat)
    with rasterio.Env():
        image = hab_raster
        results = (
        {'properties': {'raster_val': v}, 'geometry': s}
        for (s, v) 
        in shapes(image, transform=clip_transform) if v == 1)
    shapelist = list(results)
    all_patches = [  # project to UTM so that spatial unit is meter
        transform(project.transform, shape(j['geometry'])) for j in shapelist
    ]
    patches = [
        i for i in [j.simplify(100, preserve_topology=False) for j in all_patches] if i.area >= MIN_PATCHSIZE # Remove very small patches from consideration
#        j for j in all_patches if j.area >= MIN_PATCHSIZE # Remove very small patches from consideration
    ]
    patchgeoms = gpd.GeoDataFrame(geometry=patches, crs='EPSG:{}'.format(target_epsg), index=range(len(patches)))
    patchgeoms_sindex = patchgeoms.sindex


    connected = {
        j: within_distance(j, patchgeoms, patchgeoms_sindex) for j in range(len(patches))
    }
    # Find clusters from connected pairs
    edges = []
    for k in connected:
        for j in connected[k]:
            edges.append((k, j))
    G = nx.Graph()
    G.add_nodes_from(range(len(patches)))
    G.add_edges_from(edges)
    clusters = nx.connected_components(G)
    # Calculate indicator
    total_area = sum([j.area for j in patches])
    cluster_areas = []
    for k in clusters:
        cluster_areas.append(sum([patches[j].area for j in k]))
    if total_area > 0:
        return sum([j**2 for j in cluster_areas]) / (total_area**2)
    else:
        return 0

In [11]:
%%time
results = []
for i in range(len(boundary_georef)): 
    for boundary_name in ['aoi_boundary_name', 'units_boundary_name']:
        if type(boundary_georef.loc[i, boundary_name]) != float: # sometimes boundary_id is nan
            boundary_id = boundary_georef.loc[i, 'geo_name'] + '-' + boundary_georef.loc[i, boundary_name]
            # read boundaries
            boundary_path = 'https://cities-urbanshift.s3.eu-west-3.amazonaws.com/data/boundaries/v_0/boundary-'+boundary_id +'.geojson'
            boundary_geo = requests.get(boundary_path).json()
            gdf = gpd.GeoDataFrame.from_features(boundary_geo)
            gdf['BIO-2'] = gdf.apply(do_one_geom, axis=1)
            results.append(gdf[['geo_id', 'geo_name', 'BIO-2']].copy())
            output = pd.concat(results, axis=0)
            output.to_csv('BIO-2.csv')

CRI-San_Jose_ADM-2-union_1 CRI-San_Jose
     0 / 452
CRI-San_Jose_ADM-2_1 San Jose
     0 / 61
CRI-San_Jose_ADM-2_2 Alajuela
     0 / 72
CRI-San_Jose_ADM-2_3 Moravia
     0 / 13
CRI-San_Jose_ADM-2_4 Paraíso
     0 / 4
CRI-San_Jose_ADM-2_5 Poás
     0 / 4
CRI-San_Jose_ADM-2_6 Mora
     0 / 5
CRI-San_Jose_ADM-2_7 Alvarado
     0 / 2
CRI-San_Jose_ADM-2_8 Oreamuno
     0 / 16
CRI-San_Jose_ADM-2_9 Heredia Urban
     0 / 41
CRI-San_Jose_ADM-2_10 Heredia Rural
     0 / 1
CRI-San_Jose_ADM-2_11 Tibás
     0 / 15
CRI-San_Jose_ADM-2_12 Vasquez de Coronado
     0 / 20
CRI-San_Jose_ADM-2_13 Atenas
     0 / 1
CRI-San_Jose_ADM-2_14 Desamparados
     0 / 44
CRI-San_Jose_ADM-2_15 Aserrí
     0 / 8
CRI-San_Jose_ADM-2_24 Barva
     0 / 9
CRI-San_Jose_ADM-2_25 Belén
     0 / 22
CRI-San_Jose_ADM-2_26 Escazú
     0 / 16
CRI-San_Jose_ADM-2_27 Flores
     0 / 15
CRI-San_Jose_ADM-2_28 La Unión
     0 / 15
CRI-San_Jose_ADM-2_29 San Isidro
     0 / 4
CRI-San_Jose_ADM-2_30 San Rafael
     0 / 12
CRI-San_Jose_ADM-