In [1]:
import sys
import os
import rasterio

import geopandas as gpd
import pandas as pd

sys.path.append("/home/wb411133/Code/GOSTrocks/src")

import GOSTrocks.ntlMisc as ntlMisc
import GOSTrocks.rasterMisc as rMisc
from GOSTrocks.misc import tPrint



In [2]:
data_folder = "s3://wbg-geography01/URBANIZATION/MENA/Extents/"
ucdb_file       = "/home/wb411133/Code/GOSTurban/GHS_STAT_UCDB2015MT_GLOBE_R2019A_V1_2.gpkg"
fua_file        = os.path.join(data_folder, "GHS_FUA_UCDB2015_GLOBE_R2019A_54009_1K_V1_0.gpkg")
fua_peripheries = os.path.join(data_folder, "FUA_peripheries.gpkg")

In [3]:
inU = gpd.read_file(ucdb_file)
# If the peripheries exists read them in, if not, create them
inF = gpd.read_file(fua_file)
inF = inF.to_crs(inU.crs)

'''inP = gpd.read_file(fua_peripheries)
inP = inP.to_crs(inU.crs)
inP['geometry'] = inP.buffer(0)
'''

"inP = gpd.read_file(fua_peripheries)\ninP = inP.to_crs(inU.crs)\ninP['geometry'] = inP.buffer(0)\n"

In [None]:
ghsl_folder = "/home/public/Data/GLOBAL/GHSL/Built"
ghsl_files = [x for x in os.listdir(ghsl_folder) if x.endswith(".tif")]

In [None]:
out_folder = "s3://wbg-geography01/URBANIZATION/MENA/ZONAL_RES/GHSL"
fua_res_file = os.path.join(out_folder, "fua_ntl_zonal.csv")
core_res_file = os.path.join(out_folder, "core_ntl_zonal.csv")

fua_zonal = inF.copy()
core_zonal = inU.copy()
for ghsl_file in ghsl_files:
    year = ghsl_file.split("_")[3][1:]
    inR = rasterio.open(os.path.join(ghsl_folder, ghsl_file))
    if inF.crs != inR.crs:
        inF = inF.to_crs(inR.crs)
        tPrint("Reprojected FUAs")
    if inU.crs != inR.crs:
        inU = inU.to_crs(inR.crs)
        tPrint("Reprojected Cores")
    # Run zonal on FUA
    fua_res = rMisc.zonalStats(inF, inR, minVal=0, maxVal=10000)
    fua_res = pd.DataFrame(fua_res, columns = ['SUM', 'MIN', 'MAX', 'MEAN'])
    fua_zonal[f'ghsl_{year}'] = fua_res['SUM']
    # Run zonal on core
    core_res = rMisc.zonalStats(inU, inR, minVal=0, maxVal=10000)
    core_res = pd.DataFrame(fua_res, columns = ['SUM', 'MIN', 'MAX', 'MEAN'])
    core_zonal[f'ghsl_{year}'] = core_res['SUM']
    tPrint(f"Completed {year}")
core_zonal.to_csv(core_res_file)    
fua_zonal.to_csv(fua_res_file)

# Population stats

In [4]:
ghs_pop_folder = "/home/public/Data/GLOBAL/GHSL/Pop"

In [None]:
'''# Downlaod and unzip GHS_POP data
import urllib.request
import zipfile
url_path_base = "https://jeodpp.jrc.ec.europa.eu/ftp/jrc-opendata/GHSL/GHS_POP_GLOBE_R2023A/GHS_POP_E{year}_GLOBE_R2023A_54009_1000/V1-0/GHS_POP_E{year}_GLOBE_R2023A_54009_1000_V1_0.zip"
for year in range(1975, 2021, 5):
    url_path = url_path_base.format(year=year)    
    out_file = os.path.join(ghs_pop_folder, os.path.basename(url_path))
    if not os.path.exists(out_file)
        urllib.request.urlretrieve(url_path, out_file)
    tPrint(out_file)
zip_files = [x for x in os.listdir(ghs_pop_folder) if x.endswith("0.zip")]
for zip_file in zip_files:
    with zipfile.ZipFile(os.path.join(ghs_pop_folder, zip_file), 'r') as zip_ref:
        zip_ref.extractall(ghs_pop_folder)
    tPrint(zip_file)
for zip_file in zip_files:
    os.remove(os.path.join(ghs_pop_folder, zip_file))
    
'''

In [5]:
out_folder = "s3://wbg-geography01/URBANIZATION/MENA/ZONAL_RES/GHSPop"
fua_res_file = os.path.join(out_folder, "fua_ghspop_zonal.csv")
core_res_file = os.path.join(out_folder, "core_ghspop_zonal.csv")
ghs_pop_files = [x for x in os.listdir(ghs_pop_folder) if x.endswith("1000_V1_0.tif")]

fua_zonal = inF.copy()
core_zonal = inU.copy()
for ghs_pop_file in ghs_pop_files:
    year = ghs_pop_file.split("_")[2][1:]
    inR = rasterio.open(os.path.join(ghs_pop_folder, ghs_pop_file))
    if inF.crs != inR.crs:
        inF = inF.to_crs(inR.crs)
        tPrint("Reprojected FUAs")
    if inU.crs != inR.crs:
        inU = inU.to_crs(inR.crs)
        tPrint("Reprojected Cores")
    # Run zonal on FUA
    fua_res = rMisc.zonalStats(inF, inR, minVal=0)
    fua_res = pd.DataFrame(fua_res, columns = ['SUM', 'MIN', 'MAX', 'MEAN'])
    fua_zonal[f'ghs_pop_{year}'] = fua_res['SUM']
    # Run zonal on core
    core_res = rMisc.zonalStats(inU, inR, minVal=0, maxVal=10000)
    core_res = pd.DataFrame(fua_res, columns = ['SUM', 'MIN', 'MAX', 'MEAN'])
    core_zonal[f'ghs_pop_{year}'] = core_res['SUM']
    tPrint(f"Completed {year}")
core_zonal.to_csv(core_res_file)    
fua_zonal.to_csv(fua_res_file)

10:55:26	Reprojected FUAs
10:55:26	Reprojected Cores
10:56:08	Completed 2030
10:56:50	Completed 1975
10:57:32	Completed 1980
10:58:14	Completed 1985
10:58:56	Completed 1990
10:59:38	Completed 1995
11:00:21	Completed 2000
11:01:03	Completed 2005
11:01:44	Completed 2010
11:02:23	Completed 2015
11:03:03	Completed 2020
