In [8]:
import sys, os, time
import rasterio
import boto3
import certifi
import mercantile

import pandas as pd
import geopandas as gpd

from rasterio.session import AWSSession
from shapely.geometry import Point, box
from botocore import UNSIGNED
from botocore.config import Config

sys.path.append(r"C:\WBG\Work\Code\GOSTrocks\src")
import GOSTrocks.rasterMisc as rMisc

s3_client = boto3.client('s3', config=Config(signature_version=UNSIGNED), verify=False)
from rasterio.session import AWSSession

rasterio_session = AWSSession(aws_unsigned=True)

def tPrint(s):
    """prints the time along with the message"""
    print("%s\t%s" % (time.strftime("%H:%M:%S"), s))

In [18]:
FUA_file = r"C:\WBG\Work\data\URBAN\GHS_FUA_UCDB2015_GLOBE_R2019A_54009_1K_V1_0.gpkg"

fb_aws_bucket = "dataforgood-fb-data"
fb_key_path = "csv/month=2019-06/country={ISO3}/type=total_population/{ISO3}_total_population.csv.gz"

rwi_folder = r"C:\WBG\Work\data\RWI\relative-wealth-index-april-2021"
rwi_files = [os.path.join(rwi_folder, f) for f in os.listdir(rwi_folder) if f.endswith(".csv")]

out_folder = r"C:\WBG\Work\MENA_Urban\RESULTS\RWI"
# Read in hrsl rasters
hrsl_file = "s3://dataforgood-fb-data/hrsl-cogs/hrsl_general/hrsl_general-latest.vrt"

inFUA = gpd.read_file(FUA_file)
inFUA = inFUA.to_crs(4326)

In [10]:
# using mercantile to generate the quadkey tiles
zoom_level = 14
def get_tiles_from_gdf(gdf, zoom_level):
    all_vals = []
    for tile in (mercantile.tiles(*gdf.total_bounds, zoom_level)):
        quadkey = mercantile.quadkey(tile.x, tile.y, zoom_level)
        geometry = box(*mercantile.bounds(tile))
        all_vals.append([quadkey, geometry])
    ret_val = gpd.GeoDataFrame(pd.DataFrame(all_vals, columns=['pop_quadkey', 'geometry']),
                            geometry="geometry", crs=4326)
    ret_val = gpd.sjoin(ret_val, inD, how='inner').loc[:,['pop_quadkey', 'geometry']]
    return(ret_val)


In [19]:
# Loop through each RWI file and calculate weighted RWI for each FUA
with rasterio.Env(session=rasterio_session):
    inHRSL = rasterio.open(hrsl_file)
    for rwi_file in rwi_files:        
        iso3 = os.path.basename(rwi_file)[:3]
        out_file = os.path.join(out_folder, f"{iso3}_rwi_fua_aggregated.csv")
        if not os.path.exists(out_file):
            tPrint(f"Processing file: {iso3}")
            # Read in RWI data, convert to geodata frame
            inD  = pd.read_csv(rwi_file)
            inD = gpd.GeoDataFrame(inD, geometry=gpd.points_from_xy(inD.longitude, inD.latitude), crs=4326)        
            inD['rwi_quadkey'] = inD.apply(lambda x: mercantile.quadkey(mercantile.tile(x.longitude, x.latitude, zoom_level)), axis=1)
            inD = gpd.sjoin(inD, inFUA.loc[:,['eFUA_ID', 'geometry']], how='inner')
            inD.drop(columns=['index_right'], inplace=True)
            if inD.shape[0] > 0:
                # get a dataframe of intersecting quadkeys
                quadkey_tiles = get_tiles_from_gdf(inD, zoom_level)        
                quadkey_tiles = quadkey_tiles.to_crs(inHRSL.crs).reset_index()
                # calculate total population in each quadkey
                res = rMisc.zonalStats(quadkey_tiles, inHRSL, minVal=0, verbose=False)
                res = pd.DataFrame(res, columns=["SUM", "MIN", "MAX", "MEAN"])
                quadkey_tiles['pop'] = res.SUM

                # Merge quadkey population back to the RWI values
                rwi_res = pd.merge(quadkey_tiles.loc[:,['pop_quadkey', 'pop']], inD, left_on='pop_quadkey', right_on='rwi_quadkey', how='inner')
                fua_pop = rwi_res.groupby('eFUA_ID').agg({'pop':'sum'}).reset_index()
                fua_pop = fua_pop.rename(columns={'pop':'FUA_pop'})
                rwi_res = pd.merge(rwi_res.loc[:,['rwi','error','geometry','rwi_quadkey','eFUA_ID', 'pop']], fua_pop, on='eFUA_ID', how='inner')
                rwi_res['pop_weight'] = rwi_res['pop'] / rwi_res['FUA_pop']
                rwi_res['rwi_weighted'] = rwi_res['rwi'] * rwi_res['pop_weight']
                rwi_fua_aggregated = rwi_res.groupby('eFUA_ID').agg({'rwi_weighted':'sum'}).reset_index()
                rwi_fua_aggregated.to_csv(out_file, index=False)
            else:
                tPrint(f"No FUA data for {iso3}")
            


09:41:48	Processing file: BTN
09:41:48	No FUA data for BTN
09:41:48	Processing file: DMA
09:41:48	No FUA data for DMA
09:41:48	Processing file: EGY
09:42:16	Processing file: GRD
09:42:16	No FUA data for GRD
09:42:16	Processing file: LCA
09:42:16	No FUA data for LCA
09:42:16	Processing file: VCT
09:42:16	No FUA data for VCT


In [16]:
inD  = pd.read_csv(rwi_file)
inD = gpd.GeoDataFrame(inD, geometry=gpd.points_from_xy(inD.longitude, inD.latitude), crs=4326)        
inD['rwi_quadkey'] = inD.apply(lambda x: mercantile.quadkey(mercantile.tile(x.longitude, x.latitude, zoom_level)), axis=1)
inD = gpd.sjoin(inD, inFUA.loc[:,['eFUA_ID', 'geometry']], how='inner')
#inD.drop(columns=['index_right'], inplace=True)
inD.head()

Unnamed: 0,quadkey,latitude,longitude,rwi,error,geometry,rwi_quadkey,index_right,eFUA_ID
