In [1]:
import os, json
import pandas as pd
import boto3
import geopandas as gpd

from math import floor
from pystac_client import Client
import planetary_computer as pc
import numpy as np
import rasterio
import requests, pyproj
from pyproj.aoi import AreaOfInterest
from pyproj.database import query_utm_crs_info
from rasterio.merge import merge as rmerge
import rasterio.mask
from rasterio.features import shapes
from shapely.geometry import Polygon, MultiPolygon, box
from shapely.ops import transform, unary_union
import networkx as nx

In [2]:
import warnings
warnings.filterwarnings(action='ignore')

In [3]:
CONNECTIVITY_DISTANCE = 100    # Max distance two patches can be apart and be considered connected (meter)
MIN_PATCHSIZE = 1000    # Min patch size to be included in analysis (sq meter)

In [4]:
# get list of urbanshift cities
boundary_georef = pd.read_csv('https://cities-urbanshift.s3.eu-west-3.amazonaws.com/data/boundaries/v_0/boundary_georef.csv') 

In [5]:
boundary_georef

Unnamed: 0,geo_name,level,aoi_boundary_name,units_boundary_name,city_name,country_name,country_code,continent
0,ARG-Mendoza,region,ADM3union,ADM3,Mendoza,Argentina,ARG,America
1,ARG-Mar_del_Plata,city,ADM3,ADM4,Mar del Plata city,Argentina,ARG,America
2,ARG-Mar_del_Plata,region,ADM2,,Mar del Plata region,Argentina,ARG,America
3,ARG-Ushuaia,city,ADM4,ADM5,Ushuaia city,Argentina,ARG,America
4,ARG-Ushuaia,region,ADM3,,Ushuaia region,Argentina,ARG,America
5,ARG-Salta,region,ADM2union,ADM3,Salta,Argentina,ARG,America
6,ARG-Buenos_Aires,region,ADM2union,ADM2,Buenos Aires,Argentina,ARG,America
7,BRA-Teresina,city,ADM4union,ADM4,Teresina city,Brazil,BRA,America
8,BRA-Teresina,region,ADM2union,ADM2,Teresina region,Brazil,BRA,America
9,BRA-Florianopolis,city,ADM4union,ADM4,Florianopolis,Brazil,BRA,America


In [6]:
# Convert district-boundary geojsons to Shapely polygons

def geojson_to_polygons(g):
    result = []
    for feature in g['features']:
        name = feature['properties']['geo_name']
        if type(feature['geometry']['coordinates'][0][0][0]) == list:
            coordpairs = [(float(i[0]), float(i[1])) for i in feature['geometry']['coordinates'][0][0]]
        else:
            coordpairs = [(float(i[0]), float(i[1])) for i in feature['geometry']['coordinates'][0]]
        result.append((name, Polygon(coordpairs)))
    return result

#district_polys = geojson_to_polygons(Districts_json)

In [7]:
# This function clips and masks raster
# Adapted from https://gis.stackexchange.com/a/387772

def mask_raster_with_geometry(raster, transform, shapes, **kwargs):
    """Wrapper for rasterio.mask.mask to allow for in-memory processing.

    Docs: https://rasterio.readthedocs.io/en/latest/api/rasterio.mask.html

    Args:
        raster (numpy.ndarray): raster to be masked with dim: [H, W]
        transform (affine.Affine): the transform of the raster
        shapes, **kwargs: passed to rasterio.mask.mask

    Returns:
        masked: numpy.ndarray or numpy.ma.MaskedArray with dim: [H, W], and new affine transform
    """
    with rasterio.io.MemoryFile() as memfile:
        with memfile.open(
            driver='GTiff',
            height=raster.shape[0],
            width=raster.shape[1],
            count=1,
            dtype=raster.dtype,
            transform=transform,
        ) as dataset:
            dataset.write(raster, 1)
        with memfile.open() as dataset:
            output, new_transform = rasterio.mask.mask(dataset, shapes, **kwargs)
    return output.squeeze(0), new_transform

In [8]:
# Reclassify from ESA WorldCover classes to habitat/nonhabitat

def classify_habitat(r):  # Note: order is important
    r[r == 60] = 1    # sparse veg
    r[r >= 90] = 1    # herbaceous wetland, mangrove, lichen & moss
    r[r == 80] = 0    # permanent open water
    r[r == 70] = 0    # snow/ice
    r[r == 40] = 0    # cropland
    r[r == 50] = 0    # built up
    r[r >= 10] = 1    # tree, shrub, grassland
    r[r == 0] = 0

    return r

In [9]:
def within_distance(rownum, gdf, sidx):
    if rownum % 1000 == 0:
        print('     {0} / {1}'.format(rownum, gdf.count().geometry))
    z = list(sidx.intersection(gdf.iloc[rownum].geometry.buffer(CONNECTIVITY_DISTANCE).bounds))
    distances = gdf.iloc[z].distance(gdf.iloc[rownum].geometry)
    return [i for i in z if distances[i] <= CONNECTIVITY_DISTANCE if i != rownum]

In [10]:
def do_one_geom(row):
    print(row['geo_id'], row['geo_name'])
    geom = row[0]
    if type(geom) == MultiPolygon:
        district_poly = MultiPolygon(row[0])
    else:
        district_poly = MultiPolygon([row[0]])
    bounds = district_poly.bounds
    centroid_lon = district_poly.centroid.xy[0][0]
    centroid_lat = district_poly.centroid.xy[1][0]
    target_epsg = (32600 + [0, 100][int(centroid_lat < 0)]) + floor((180 + centroid_lon) / 6) + 1
    # EPSG is 32600 (or 32700 if lat is neg) + longitude zone. Each zone is six degrees, and first zone is 1.
    # Transform from EPSG:4326 to target EPSG
    project = pyproj.Transformer.from_crs(
        pyproj.CRS.from_epsg(4326), # source coordinate system
        pyproj.CRS.from_epsg(target_epsg), # destination coordinate system
        always_xy=True
    )
    # Get ESA WorldCover raster tiles covering AOI
    catalog = Client.open("https://planetarycomputer.microsoft.com/api/stac/v1")
    search = catalog.search(
        collections=["esa-worldcover"],
        bbox=bounds,
    )
    items = list(search.get_items())
    rasters_to_mosaic = []
    signed_hrefs = [pc.sign(i.assets["map"].href) for i in items]
    for href in signed_hrefs:
        raster = rasterio.open(href)
        rasters_to_mosaic.append(raster)

    # Stitch rasters together
    mosaic, mosaic_transform = rmerge(rasters_to_mosaic)
    # Clip raster to district boundary
    clipped_raster, clip_transform = mask_raster_with_geometry(mosaic[0], mosaic_transform, district_poly, crop=True)
    # Classify clipped raster as habitat/nonhabitat
    hab_raster = classify_habitat(clipped_raster)
    # Vectorize and collect only the habitat patches (as opposed to nonhabitat)
    with rasterio.Env():
        image = hab_raster
        results = (
        {'properties': {'raster_val': v}, 'geometry': s}
        for (s, v) 
        in shapes(image, transform=clip_transform) if v == 1)
    shapelist = list(results)
    shapelist = shapelist[:-1]
    all_patches = [  # project to UTM so that spatial unit is meter
        transform(project.transform, Polygon(shapelist[i]['geometry']['coordinates'][0])) for i in range(len(shapelist))
    ]
    patches = [
        i for i in [j.simplify(10, preserve_topology=False) for j in all_patches] if i.area >= MIN_PATCHSIZE # Remove very small patches from consideration
    #    i for i in all_patches if i.area >= MIN_PATCHSIZE # Remove very small patches from consideration
    ]
    patchgeoms = gpd.GeoDataFrame(geometry=patches, crs='EPSG:{}'.format(target_epsg), index=range(len(patches)))
    patchgeoms_sindex = patchgeoms.sindex


    connected = {
        i: within_distance(i, patchgeoms, patchgeoms_sindex) for i in range(len(patches))
    }
    # Find clusters from connected pairs
    edges = []
    for k in connected:
        for i in connected[k]:
            edges.append((k, i))
    G = nx.Graph()
    G.add_nodes_from(range(len(patches)))
    G.add_edges_from(edges)
    clusters = nx.connected_components(G)
    # Calculate indicator
    total_area = sum([i.area for i in patches])
    cluster_areas = []
    for i in clusters:
        cluster_areas.append(sum([patches[j].area for j in i]))
    if total_area > 0:
        return sum([i**2 for i in cluster_areas]) / (total_area**2)
    else:
        return 0

In [11]:
%%time
results = []
for i in range(len(boundary_georef)): 
    for boundary_name in ['aoi_boundary_name', 'units_boundary_name']:
        if type(boundary_georef.loc[i, boundary_name]) != float: # sometimes boundary_id is nan
            boundary_id = boundary_georef.loc[i, 'geo_name'] + '-' + boundary_georef.loc[i, boundary_name]
            # read boundaries
            boundary_path = 'https://cities-urbanshift.s3.eu-west-3.amazonaws.com/data/boundaries/v_0/boundary-'+boundary_id +'.geojson'
            boundary_geo = requests.get(boundary_path).json()
            gdf = gpd.GeoDataFrame.from_features(boundary_geo)
            gdf['BIO-2'] = gdf.apply(do_one_geom, axis=1)
            results.append(gdf[['geo_id', 'geo_name', 'BIO-2']].copy())
            output = pd.concat(results, axis=0)
            output.to_csv('BIO-2.csv')

CHN-Chengdu_ADM-3-union_1 CHN-Chengdu
     0 / 310193
     1000 / 310193
     2000 / 310193
     3000 / 310193
     4000 / 310193
     5000 / 310193
     6000 / 310193
     7000 / 310193
     8000 / 310193
     9000 / 310193
     10000 / 310193
     11000 / 310193
     12000 / 310193
     13000 / 310193
     14000 / 310193
     15000 / 310193
     16000 / 310193
     17000 / 310193
     18000 / 310193
     19000 / 310193
     20000 / 310193
     21000 / 310193
     22000 / 310193
     23000 / 310193
     24000 / 310193
     25000 / 310193
     26000 / 310193
     27000 / 310193
     28000 / 310193
     29000 / 310193
     30000 / 310193
     31000 / 310193
     32000 / 310193
     33000 / 310193
     34000 / 310193
     35000 / 310193
     36000 / 310193
     37000 / 310193
     38000 / 310193
     39000 / 310193
     40000 / 310193
     41000 / 310193
     42000 / 310193
     43000 / 310193
     44000 / 310193
     45000 / 310193
     46000 / 310193
     47000 / 310193
     48000 / 31

In [23]:
# read indicator table
cities_indicators = pd.read_csv('https://cities-urbanshift.s3.eu-west-3.amazonaws.com/indicators/cities_indicators_v2.csv')

In [24]:
cities_indicators

Unnamed: 0,geo_id,geo_level,geo_name,geo_parent_name
0,ARG-Mendoza_ADM-3-union_1,ADM-3-union,ARG-Mendoza,ARG-Mendoza
1,ARG-Mendoza_ADM-3_1,ADM-3,Distrito Las Barrancas,ARG-Mendoza
2,ARG-Mendoza_ADM-3_2,ADM-3,Distrito San Roque,ARG-Mendoza
3,ARG-Mendoza_ADM-3_3,ADM-3,Distrito Fray Luis Beltrán,ARG-Mendoza
4,ARG-Mendoza_ADM-3_4,ADM-3,Distrito Rodeo del Medio,ARG-Mendoza
...,...,...,...,...
1935,IDN-Palembang_ADM-3_12,ADM-3,Bukit Kecil,IDN-Palembang
1936,IDN-Palembang_ADM-3_13,ADM-3,Ilir Barat II,IDN-Palembang
1937,IDN-Palembang_ADM-3_14,ADM-3,Ilir Barat I,IDN-Palembang
1938,IDN-Palembang_ADM-3_15,ADM-3,Gandus,IDN-Palembang


In [20]:
sibc2_df = pd.read_csv('outputs.tsv', sep='\t')

In [21]:
sibc2_df = sibc2_df.drop('geo_name', axis=1).rename({'SICB-2':'SICB_2_habitat_connectivity'}, axis=1)

In [22]:
sibc2_df.head()

Unnamed: 0,geo_id,SICB_2_habitat_connectivity
0,ARG-Mendoza_ADM-3-union_1,0.654064
1,ARG-Mendoza_ADM-3_1,0.051461
2,ARG-Mendoza_ADM-3_2,0.910816
3,ARG-Mendoza_ADM-3_3,0.979369
4,ARG-Mendoza_ADM-3_4,0.958122


In [27]:
cities_indicators_merged = cities_indicators.merge(sibc2_df[["geo_id", 'SICB_2_habitat_connectivity']], 
                                                     on='geo_id', 
                                                     how='left')

In [28]:
cities_indicators_merged.head()

Unnamed: 0,geo_id,geo_level,geo_name,geo_parent_name,SICB_2_habitat_connectivity
0,ARG-Mendoza_ADM-3-union_1,ADM-3-union,ARG-Mendoza,ARG-Mendoza,0.654064
1,ARG-Mendoza_ADM-3_1,ADM-3,Distrito Las Barrancas,ARG-Mendoza,0.051461
2,ARG-Mendoza_ADM-3_2,ADM-3,Distrito San Roque,ARG-Mendoza,0.910816
3,ARG-Mendoza_ADM-3_3,ADM-3,Distrito Fray Luis Beltrán,ARG-Mendoza,0.979369
4,ARG-Mendoza_ADM-3_4,ADM-3,Distrito Rodeo del Medio,ARG-Mendoza,0.958122


## Upload in aws s3

In [2]:
# connect to s3

import boto3

aws_key = "AKIA4GK7IHHC5RCMFKEG"
aws_secret = "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"

s3 = boto3.resource(
    service_name='s3',
    aws_access_key_id=aws_key,
    aws_secret_access_key=aws_secret
)

In [30]:
# upload to aws
key_data = 'indicators/cities_indicators_v2_test-ted.csv'
bucket_name = 'cities-urbanshift' 
cities_indicators_merged.to_csv(
    f"s3://{bucket_name}/{key_data}",
    index=False,
    storage_options={
        "key": aws_key,
        "secret": aws_secret
    },
)

In [31]:
# make it public
object_acl = s3.ObjectAcl(bucket_name,key_data)
response = object_acl.put(ACL='public-read')

In [None]:
https://cities-urbanshift.s3.eu-west-3.amazonaws.com/indicators/cities_indicators_v2_test-ted.csv