## This script finds the mean distance from a resident of an informal or atomistic area to the nearest public transit stop.

### Method:

1. Reclass ULU: [2, 3] --> [1, 1], and 0 otherwise. This gives 1 for informal/atomistic. Call this **informal**.
1. Multiply WorldPop raster by **informal**. This gives popsize only in informal pixels, zero otherwise. Call this **informal_pop**.
1. Use [Overpass API](https://dev.overpass-api.de/overpass-doc/en/index.html) to retrieve OSM transit-stop points within city bounding box.
1. Create distance raster giving distance within 20000m of OSM transit stop features. Call this **transit_distance**.
1. Multiply **informal_pop** by **transit_distance**. Call this **informal_persondistancetotransit**.
1. Sum over **informal_pop** to get total number of residents of informal/atomistic. Call this **total_informal_pop**.
1. Return **informal_persondistancetotransit** / **total_informal_pop**. This is the average distance from an informal/atomistic dwelling to nearest transit stop.


In [1]:
import os, requests, json
import overpy
import pandas as pd
import geemap
import ee
import shapely
from shapely.geometry import Polygon, MultiPolygon
from shapely.geometry import shape as Shape
from shapely.ops import unary_union
#ee.Authenticate()

In [2]:
ee.Initialize()

In [3]:
POP_YEAR = 2020

In [4]:
boundaries_foldername = 'Deep Dive boundaries'
geos = []
for city_geojson_filename in os.listdir(boundaries_foldername):
    info_part = city_geojson_filename.split('.')[0]
    bfile = open('{0}/{1}'.format(boundaries_foldername, city_geojson_filename), encoding="utf-8")
    country_code = info_part.split('-')[1]
    cityname = info_part.split('-')[2]
    level = '-'.join(info_part.split('-')[3:])
    geos.append((country_code, cityname, level, bfile.read()))
    bfile.close()

#Jakarta
boundary_georef = pd.read_csv('https://cities-urbanshift.s3.eu-west-3.amazonaws.com/data/boundaries/v_0/boundary_georef.csv')
i = 25
boundary_id_aoi = '{0}-{1}'.format(boundary_georef.loc[i, 'geo_name'], boundary_georef.loc[i, 'aoi_boundary_name'])
boundary_path = 'https://cities-urbanshift.s3.eu-west-3.amazonaws.com/data/boundaries/v_0/boundary-{0}.geojson'.format(boundary_id_aoi)
boundary_geo = requests.get(boundary_path).json()
geos.append((boundary_georef.loc[i, 'country_code'], boundary_georef.loc[i, 'city_name'], boundary_georef.loc[i, 'units_boundary_name'], json.dumps(boundary_geo)))

In [4]:
if False:
    # define directory
    out_dir = os.getcwd()
    bucket_name = 'cities-indicators'
    aws_s3_dir = "https://"+bucket_name+".s3.eu-west-3.amazonaws.com"

    # get list of c4f cities
    boundary_georef = pd.read_csv('https://cities-cities4forests.s3.eu-west-3.amazonaws.com/data/boundaries/v_0/boundary_georef.csv')
    
    geos = []
    for i in range(len(boundary_georef)):
        boundary_id_aoi = '{0}-{1}'.format(boundary_georef.loc[i, 'geo_name'], boundary_georef.loc[i, 'aoi_boundary_name'])
        boundary_path = '{0}/data/boundaries/boundary-{1}.geojson'.format(aws_s3_dir, boundary_id_aoi)
        boundary_geo = requests.get(boundary_path).json()
        geos.append((boundary_georef.loc[i, 'country_code'], boundary_georef.loc[i, 'city_name'], boundary_georef.loc[i, 'geo_level'], boundary_geo))

In [4]:
if True:
    boundaries_foldername = 'Deep Dive boundaries'
    geos = []
    for city_geojson_filename in os.listdir(boundaries_foldername):
        bfile = open('{0}/{1}'.format(boundaries_foldername, city_geojson_filename), encoding="utf-8")
        country_code = city_geojson_filename.split('-')[1]
        cityname = city_geojson_filename.split('-')[2]
        level = city_geojson_filename.split('-')[3]
        geos.append((country_code, cityname, level, bfile.read()))
        bfile.close()

In [5]:
def get_bbox(ee_obj):
    ee_geom = ee_obj.geometry()
    coords = ee_geom.bounds().getInfo()['coordinates'][0]
    left = coords[0][0]
    bottom = coords[0][1]
    right = coords[1][0]
    top = coords[2][1]
    return (bottom, left, top, right)

In [6]:
def geojson_to_ee(geojson):
    geo = json.loads(geojson)
    shapes = [Shape(f['geometry']) for f in geo['features']]
    shape_union = unary_union(shapes)

    return ee.FeatureCollection(ee.Geometry(json.loads(shapely.to_geojson(shape_union))))

In [7]:
# Get ULU polygons

ULU200 = ee.ImageCollection('projects/wri-datalab/urban_land_use/V1')
ULU4000 = ee.ImageCollection('projects/wri-datalab/cities/urban_land_use/V1')
ULUv2 = ee.ImageCollection('projects/wri-datalab/urban_land_use/V2')
ULU = ULU200.merge(ULU4000).merge(ULUv2)
ULU = ULU.select('lulc').reduce(ee.Reducer.firstNonNull()).rename('lulc') #//.clip(cityArea)//.updateMask(ULUMexico)
informal = ULU.mask(ULU.mask().gt(0)).remap([2, 3], [1, 1], 0)

In [8]:
# Get WorldPop data

pop = ee.ImageCollection('WorldPop/GP/100m/pop').filter(ee.Filter.equals('year', POP_YEAR))

In [19]:
def fraction_near_transit(country_code, geojson, group='all'):
    # group can be all, children, elderly, female, informal
    
    #boundary_id_aoi = '{0}-{1}'.format(boundary_georef.loc[i, 'geo_name'], boundary_georef.loc[i, 'aoi_boundary_name'])
    #boundary_path = '{0}/data/boundaries/boundary-{1}.geojson'.format(aws_s3_dir, boundary_id_aoi)
    #boundary_geo = requests.get(boundary_path).json()
    #boundary_geo_ee = geemap.geojson_to_ee(boundary_geo)
    
    boundary_geo_ee = geojson_to_ee(geojson)
    bbox = get_bbox(boundary_geo_ee)
    
    
    if group in ['all', 'informal']:
        localpop = pop.filter(ee.Filter.equals('country', country_code)).select('population').first().clip(boundary_geo_ee)
        if group == 'all':
            pop_of_interest = localpop
        else: # if group == 'informal':
            pop_of_interest = localpop.multiply(informal)
    else: # if group in ['elderly', 'children', 'female']:
        vulnpop_country = ee.ImageCollection("WorldPop/GP/100m/pop_age_sex").filter(ee.Filter.eq('country', country_code)).filter(ee.Filter.eq('year', 2020)).first()
        if group == 'elderly':
            pop_of_interest = vulnpop_country.select('M_70').rename("population").add(vulnpop_country.select('M_75')).add(vulnpop_country.select('M_80')).add(vulnpop_country.select('F_70')).add(vulnpop_country.select('F_75')).add(vulnpop_country.select('F_80'))
        elif group == 'children':
            pop_of_interest = vulnpop_country.select('M_0').rename("population").add(vulnpop_country.select('M_1')).add(vulnpop_country.select('M_5')).add(vulnpop_country.select('M_10')).add(vulnpop_country.select('M_15')).add(vulnpop_country.select('F_0')).add(vulnpop_country.select('F_1')).add(vulnpop_country.select('F_5')).add(vulnpop_country.select('F_10')).add(vulnpop_country.select('F_15'))
        else: # if group == 'female':
            pop_of_interest = vulnpop_country.select('F_0').rename("population").add(vulnpop_country.select('F_1')).add(vulnpop_country.select('F_5')).add(vulnpop_country.select('F_10')).add(vulnpop_country.select('F_15')).add(vulnpop_country.select('F_20')).add(vulnpop_country.select('F_25')).add(vulnpop_country.select('F_30')).add(vulnpop_country.select('F_35')).add(vulnpop_country.select('F_40')).add(vulnpop_country.select('F_45')).add(vulnpop_country.select('F_50')).add(vulnpop_country.select('F_55')).add(vulnpop_country.select('F_60')).add(vulnpop_country.select('F_65')).add(vulnpop_country.select('F_70')).add(vulnpop_country.select('F_75')).add(vulnpop_country.select('F_80'))
    #print(pop_of_interest.mask().reduceRegion(reducer= ee.Reducer.sum(), geometry= boundary_geo_ee.geometry()).getInfo())
    if pop_of_interest.mask().reduceRegion(reducer= ee.Reducer.sum(), geometry= boundary_geo_ee.geometry()).getInfo()['population'] == 0:
        return -9999
    
    
    # Get transit points
    
    query = '('
    query += 'node[public_transport=platform]{0};'.format(str(bbox))
    query += 'node[highway=bus_stop]{0};'.format(str(bbox))
    query += 'node[highway=platform]{0};'.format(str(bbox))
    query += 'node[public_transport=stop_position]{0};'.format(str(bbox))
    query += 'node[railway=stop]{0};'.format(str(bbox))
    query += 'node[railway=platform]{0};'.format(str(bbox))
    query += 'node[station=subway]{0};'.format(str(bbox))
    query += 'node[railway=halt]{0};'.format(str(bbox))
    query += 'node[railway=tram_stop]{0};'.format(str(bbox))
    query += 'node[amenity=ferry_terminal]{0};'.format(str(bbox))
    query += 'node[aerialway=station]{0};'.format(str(bbox))
    query += 'node[amenity=ferry_terminal]{0};'.format(str(bbox))
    query += 'node[amenity=ferry_terminal]{0};'.format(str(bbox))
    query += ');out;'
    
    api = overpy.Overpass()
    result = api.query(query)
    transit_features = [ee.Feature(ee.Geometry({"type": "Point", "coordinates": [float(i.lon), float(i.lat)]})) for i in result.nodes]
    transit_featurecollection = ee.FeatureCollection(transit_features)
    
    buffer_geom = transit_featurecollection.geometry().buffer(400, 1).intersection(boundary_geo_ee.geometry())
    
    nearpoptotal = pop_of_interest.reduceRegion(reducer= ee.Reducer.sum(), geometry=buffer_geom, scale=100).getInfo()['population']
    
    total_pop = pop_of_interest.reduceRegion(reducer= ee.Reducer.sum(), geometry= boundary_geo_ee.geometry(), scale=100).getInfo()['population']
    
    if total_pop > 0:
        return nearpoptotal / total_pop
    else:
        return -9999

In [20]:
results = {}
dones = []
for geo_info in geos:
    if not geo_info[1] in dones:
        dones.append(geo_info[1])
        print(geo_info[1])
        results['{0}-{1}-{2}'.format(geo_info[0], geo_info[1], geo_info[2])] = {
            #'mean_dist_to_openspace_informal': mean_distance_to_openspace(geo_info[0], geo_info[3], informal_only=True),
            #'mean_dist_to_openspace_all': mean_distance_to_openspace(geo_info[0], geo_info[3], informal_only=False),
            'popfraction_near_transit_informal': fraction_near_transit(geo_info[0], geo_info[3], group='informal'),
            'popfraction_near_transit_elderly': fraction_near_transit(geo_info[0], geo_info[3], group='elderly'),
            'popfraction_near_transit_children': fraction_near_transit(geo_info[0], geo_info[3], group='children'),
            'popfraction_near_transit_female': fraction_near_transit(geo_info[0], geo_info[3], group='female'),
            'popfraction_near_transit_all': fraction_near_transit(geo_info[0], geo_info[3], group='all')
        }
df = pd.DataFrame(results).transpose()

Belem
Campinas
Rio_de_Janerio
Salvador
Teresina
Beijing
Chengdu
Ningbo
Shenzhen
Kinshasa
Barranquilla
Bogota
BogotaUrban
Cali
Addis_Ababa
Dire_Dawa
Accra
Bangalore
Delhi
Hyderabad
Kochi
Mumbai
Nashik
Surat
Nairobi
Guadalajara
Leon
Merida
Mexico_City
Monterrey
Kigali
Musanze
Istanbul
Jakarta


In [21]:
pd.DataFrame(results).transpose()

Unnamed: 0,popfraction_near_transit_informal,popfraction_near_transit_elderly,popfraction_near_transit_children,popfraction_near_transit_female,popfraction_near_transit_all
BRA-Belem-ADM3,0.0781,0.349961,0.306879,0.325191,0.321415
BRA-Campinas-city,0.020501,0.358124,0.3575,0.357763,0.357717
BRA-Rio_de_Janerio-ADM2,0.305549,0.487856,0.487854,0.48785,0.48785
BRA-Salvador-ADM4,0.459389,0.64196,0.641413,0.641603,0.641562
BRA-Teresina-ADM4,0.074286,0.464573,0.464123,0.465531,0.465264
CHN-Beijing-area,0.30534,0.658938,0.605137,0.624202,0.621573
CHN-Chengdu-ADM3,0.454764,0.18387,0.178658,0.197351,0.197681
CHN-Ningbo-ADM3,0.063216,0.116621,0.121827,0.125581,0.125046
CHN-Shenzhen-city,0.890137,0.829604,0.807375,0.813433,0.811541
COD-Kinshasa-city-matchingADM7,0.110604,0.10299,0.103031,0.103046,0.103045


In [22]:
pd.DataFrame(results).transpose().to_csv('access_to_transport_v2.csv')