In [None]:
# import sys
# !{sys.executable} -m pip install pip earthengine-api
# !{sys.executable} -m pip install pip geemap
# !{sys.executable} -m pip install pip wget

In [None]:
import ee
# ee.Authenticate()

In [None]:
ee.Initialize()

In [None]:
import os, requests, json, geojson
import pandas as pd
import geopandas as gpd
import geemap
import fiona
import shapely
from shapely.validation import make_valid
from shapely.geometry import Polygon, MultiPolygon, box, shape
import boto3

In [None]:
# define directory
out_dir = os.getcwd()
OUTPUT_FILENAME = 'UrbanShift_GRE_3_4_OpenSpaceHectaresper1000people.csv'
bucket_name = 'cities-urbanshift' 
aws_s3_dir = "https://"+bucket_name+".s3.eu-west-3.amazonaws.com"

In [None]:
#load population
pop = ee.ImageCollection('WorldPop/GP/100m/pop')
pop = (pop.filter(
    ee.Filter.inList('year',[2020]))
    .select('population'))
popImg = pop.mean().rename('population')

popProj = pop.first().projection()
popScale = popProj.nominalScale()

In [None]:
# get list of cities
boundary_georef = pd.read_csv(aws_s3_dir + '/data/boundaries/v_0/boundary_georef.csv')
boundary_georef

In [None]:
# Convert geojsons to Shapely Polygons

def geojson_to_polygons(g):
    result = []
    for feature in g['features']:
        if type(feature['geometry']['coordinates'][0][0][0]) == list:
            coordpairs = [(float(i[0]), float(i[1])) for i in feature['geometry']['coordinates'][0][0]]
        else:
            coordpairs = [(float(i[0]), float(i[1])) for i in feature['geometry']['coordinates'][0]]
        result.append((Polygon(coordpairs)))
    return result

In [None]:
boundary_id = "ARG-Mendoza-ADM3union"

# read boundaries
boundary_path = aws_s3_dir +'/data/boundaries/v_0/boundary-'+boundary_id+'.geojson'
boundary_geo = requests.get(boundary_path).json()
boundary_geo_shapely = MultiPolygon(geojson_to_polygons(boundary_geo))
#read open space
openspace_path = aws_s3_dir +'/data/open_space/openstreetmap/v_0/'+boundary_id+'-OSM-open_space-2022.geojson'
openspace_geo = requests.get(openspace_path).json()
openspace_geo_shapely = MultiPolygon(geojson_to_polygons(openspace_geo))
openspace_poly = make_valid(openspace_geo_shapely).intersection(boundary_geo_shapely)

In [None]:
boundary_geo_shapely

In [None]:
openspace_geo_shapely

In [None]:
openspace_poly

In [None]:
def do_one_geom(row):
    bound_poly = row[0]
    bound_poly_ee = ee.FeatureCollection(gpd.GeoSeries([bound_poly]).__geo_interface__)
    openspace_poly = make_valid(openspace_geo_shapely).intersection(bound_poly)
    if openspace_poly.is_empty:
        openspace_poly_ee = ee.FeatureCollection(gpd.GeoSeries().apply(lambda x: 0, axis=1).__geo_interface__)
    else: 
        openspace_poly_ee = ee.FeatureCollection(gpd.GeoSeries([openspace_poly]).__geo_interface__)
    # openspace_area = openspace_poly.area
    def calcAreaHA(feat):
        featarea = ee.Number(feat.area().multiply(0.0001))
        return feat.set({
            'AreaHA':featarea
        })
    openspace_area = openspace_poly_ee.map(calcAreaHA).aggregate_sum('AreaHA').getInfo()
    polyPop = popImg.reduceRegions(bound_poly_ee,ee.Reducer.sum(),popScale).first().get('sum').getInfo()
    # print(openspace_area,polyPop)
    result = openspace_area / (polyPop * 0.001) # hectares of open space per 1000 people
    print(openspace_area, polyPop)
    print('{0}: {1}'.format(row[1], result))
    return result 

In [None]:
for i in range(len(boundary_georef)):
    if not OUTPUT_FILENAME in os.listdir('.'):
        so_far_df = pd.DataFrame()
        so_far_df.to_csv(OUTPUT_FILENAME)
        so_far = []
    else:
        so_far_df = pd.read_csv(OUTPUT_FILENAME)
        so_far = [so_far_df.iloc[j]['geo_id'] for j in range(len(so_far_df))]
    most_recent = []
    #outputs_df = pd.DataFrame()
    do_units = True
    for boundary_name in ['aoi_boundary_name', 'units_boundary_name']:
        if type(boundary_georef.loc[i, boundary_name]) != float: # sometimes boundary_id is nan
            boundary_id = boundary_georef.loc[i, 'geo_name']+'-' + boundary_georef.loc[i, boundary_name]
            boundary_id_aoi = boundary_georef.loc[i, 'geo_name']+'-'+boundary_georef.loc[i, 'aoi_boundary_name']
            # read boundaries
            boundary_path = aws_s3_dir +'/data/boundaries/v_0/boundary-'+boundary_id+'.geojson'
            boundary_geo = requests.get(boundary_path).json()
            boundary_geo_ee = ee.FeatureCollection(boundary_geo)
            boundary_geo_shapely = MultiPolygon(geojson_to_polygons(boundary_geo))
            #read open space
            openspace_path = aws_s3_dir +'/data/open_space/openstreetmap/v_0/'+boundary_id_aoi+'-OSM-open_space-2022.geojson'
            openspace_geo = requests.get(openspace_path).json()
            openspace_geo_shapely = MultiPolygon(geojson_to_polygons(openspace_geo))
            temp_gdf = gpd.GeoDataFrame.from_features(boundary_geo)
            if (boundary_name == 'aoi_boundary_name') or (boundary_name == 'units_boundary_name' and do_units):
                temp_gdf['GRE_3_4_OpenSpaceHectaresper1000people'] = temp_gdf.apply(do_one_geom, axis=1)
            else:
                 temp_gdf['GRE_3_4_OpenSpaceHectaresper1000people'] = temp_gdf.apply(lambda x: 0, axis=1)
            if boundary_name == 'aoi_boundary_name':
                do_units = temp_gdf.iloc[-1]['GRE_3_4_OpenSpaceHectaresper1000people'] != 0
            most_recent.append(temp_gdf.copy())
    result = pd.concat([so_far_df] + most_recent, axis=0)
    result = result[['geo_id', 'geo_level', 'geo_name', 'geo_parent_name', 'GRE_3_4_OpenSpaceHectaresper1000people']]
    result.to_csv(OUTPUT_FILENAME)

In [None]:
processedcities = pd.read_csv(OUTPUT_FILENAME)
# ''out_dir +'/' OUTPUT_FILENAME'
processedcities

# Merge with indicator table

In [None]:
# read indicator table
#cities_indicators = pd.read_csv(aws_s3_dir + '/indicators/cities_indicators_erictest.csv')
cities_indicators = pd.read_csv(aws_s3_dir + '/indicators/cities_indicators_ericV1.csv')
cities_indicators

In [None]:
def merge_indicators(indicator_table, new_indicator_table, indicator_name):
    if indicator_name in indicator_table.columns:
        print("replace with new calculations")
        indicator_table.drop(indicator_name, inplace=True, axis=1)
        new_indicator_table = new_indicator_table.drop_duplicates()
        cities_indicators_df = indicator_table.merge(new_indicator_table[["geo_id",indicator_name]], 
                                                     on='geo_id', 
                                                     how='left',
                                                     validate='one_to_many')
    else:
        print("add new indicators")
        new_indicator_table = new_indicator_table.drop_duplicates()
        cities_indicators_df = indicator_table.merge(new_indicator_table[["geo_id",indicator_name]], 
                                                     on='geo_id', 
                                                     how='left',
                                                     validate='one_to_many')
    return(cities_indicators_df)

In [None]:
cities_indicators_merged = merge_indicators(indicator_table = cities_indicators,
                                            new_indicator_table = processedcities,
                                            indicator_name = 'GRE_3_4_OpenSpaceHectaresper1000people')

In [None]:
cities_indicators_merged

# Upload in aws s3

In [None]:
# connect to s3
aws_credentials = pd.read_csv('/home/jovyan/PlanetaryComputerExamples/aws_credentials.csv')
# aws_credentials = pd.read_csv('C:\\Users\\Saif.Shabou\\OneDrive - World Resources Institute\\Documents\\aws\\credentials.csv')
aws_key = aws_credentials.iloc[0]['Access key ID']
aws_secret = aws_credentials.iloc[0]['Secret access key']

s3 = boto3.resource(
    service_name='s3',
    aws_access_key_id=aws_key,
    aws_secret_access_key=aws_secret
)

In [None]:
# upload to aws
# key_data = 'data/indicators/cities_indicators_erictest.csv'
# key_data = 'indicators/cities_indicators_v2test.csv'
key_data = 'indicators/cities_indicators_ericV1.csv'

cities_indicators_merged.to_csv(
    f"s3://{bucket_name}/{key_data}",
    index=False,
    storage_options={
        "key": aws_key,
        "secret": aws_secret
    },
)

In [None]:
# make it public
object_acl = s3.ObjectAcl(bucket_name,key_data)
response = object_acl.put(ACL='public-read')