In [None]:
import sys
import os
import osmnx

import geopandas as gpd
import pandas as pd
import multiprocessing

from shapely.ops import transform
import pyproj

sys.path.append("/home/wb411133/Code/GOSTrocks/src")

from GOSTrocks.misc import tPrint

In [None]:
data_folder = "s3://wbg-geography01/URBANIZATION/MENA/"
ucdb_file = os.path.join(
    data_folder, "Extents", "GHS_STAT_UCDB2015MT_GLOBE_R2019A_V1_2.gpkg"
)
fua_file = os.path.join(
    data_folder, "Extents", "GHS_FUA_UCDB2015_GLOBE_R2019A_54009_1K_V1_0.gpkg"
)
fua_peripheries = os.path.join(data_folder, "Extents", "FUA_peripheries.gpkg")

# Highway features are reclassified to 4 OSMLR classes for simplification and standardization
#   https://mapzen.com/blog/osmlr-2nd-technical-preview/
OSMLR_Classes = {
    "motorway": "OSMLR_1",
    "motorway_link": "OSMLR_1",
    "trunk": "OSMLR_1",
    "trunk_link": "OSMLR_1",
    "primary": "OSMLR_1",
    "primary_link": "OSMLR_1",
    "secondary": "OSMLR_2",
    "secondary_link": "OSMLR_2",
    "tertiary": "OSMLR_2",
    "tertiary_link": "OSMLR_2",
    "unclassified": "OSMLR_3",
    "unclassified_link": "OSMLR_3",
    "residential": "OSMLR_3",
    "residential_link": "OSMLR_3",
    "track": "OSMLR_4",
    "service": "OSMLR_4",
}

In [None]:
inF = gpd.read_file(fua_file)
inF = inF.to_crs(4326)
inP = gpd.read_file(fua_peripheries)
if inP.crs != inF.crs:
    inP = inP.to_crs(4326)
inP["geometry"] = inP.buffer(0)

In [None]:
wgs84 = pyproj.CRS("EPSG:4326")
wm = pyproj.CRS("EPSG:3278")

project = pyproj.Transformer.from_crs(wgs84, wm, always_xy=True).transform


def summarize_road_length(curR, core_shp, fua_shp, transformer):
    """curR - GeoDataFrame of roads
    core_shp - shapely polygon of core
    transformer - turn wgs84 into 3857

    returns - array of length intersecting [core_shp, fua_shp] in metres
    """
    core_rds = curR.unary_union.intersection(core_shp)
    core_rds = transform(transformer, core_rds)
    fua_rds = curR.unary_union.intersection(fua_shp)
    fua_rds = transform(transformer, fua_rds)
    return [core_rds.length, fua_rds.length]

In [None]:
all_res = []
for idx, cur_city in inF.iterrows():
    cur_roads = osmnx.geometries_from_polygon(cur_city["geometry"], {"highway": True})
    cur_roads["OSMLR"] = cur_roads["highway"].map(OSMLR_Classes)
    cur_periphery = inP.loc[inP["eFUA_ID"] == cur_city["eFUA_ID"]].iloc[0]
    cur_res = {"eFUA_ID": cur_city["eFUA_ID"]}
    for lbl, rds in cur_roads.groupby("OSMLR"):
        rd_length = summarize_road_length(
            rds, cur_city.geometry, cur_periphery.geometry, project
        )
        cur_res[f"core_{lbl}_m"] = rd_length[0]
        cur_res[f"per_{lbl}_m"] = rd_length[1]
    all_res.append(cur_res)
    tPrint(idx)

# Multiprocessing

In [None]:
def summarize_road_length(cur_city, cur_p):
    """
    cur_city - geoseries of current city attributes
    cur_p - geoseries of matching periphery
    transformer - turn wgs84 into 3857

    returns - array of length intersecting [e_ID, core_shp, fua_shp] in metres
    """
    wgs84 = pyproj.CRS("EPSG:4326")
    wm = pyproj.CRS("EPSG:3278")
    transformer = pyproj.Transformer.from_crs(wgs84, wm, always_xy=True).transform
    cur_res = {"eFUA_ID": cur_city["eFUA_ID"]}
    try:
        try:
            cur_roads = osmnx.geometries_from_polygon(
                cur_city["geometry"], {"highway": True}
            )
        except:
            cur_roads = osmnx.geometries_from_polygon(
                cur_city["geometry"].buffer(0), {"highway": True}
            )
        cur_roads["OSMLR"] = cur_roads["highway"].map(OSMLR_Classes)
        for lbl, curR in cur_roads.groupby("OSMLR"):
            core_rds = curR.unary_union.intersection(cur_city.geometry)
            core_rds = transform(transformer, core_rds)
            fua_rds = curR.unary_union.intersection(cur_p.geometry)
            fua_rds = transform(transformer, fua_rds)
            cur_res[f"core_{lbl}_m"] = core_rds.length
            cur_res[f"per_{lbl}_m"] = fua_rds.length
        tPrint(f"Completed {cur_city['eFUA_name']}")
    except:
        tPrint(f"*****Error {cur_city['eFUA_name']}")
    return cur_res

In [None]:
# prepare mp arguments
all_args = []
for idx, cur_city in inF.iterrows():
    cur_periphery = inP.loc[inP["eFUA_ID"] == cur_city["eFUA_ID"]].iloc[0]
    all_args.append([cur_city, cur_periphery])

In [None]:
summarize_road_length(*all_args[0])

In [None]:
with multiprocessing.Pool(70) as pool:
    res = pool.starmap(summarize_road_length, all_args)

In [None]:
osm_res = pd.DataFrame(res)
bad_fua = osm_res.loc[
    osm_res["core_OSMLR_2_m"].isna() * osm_res["core_OSMLR_3_m"].isna(), "eFUA_ID"
].values
updatedF = inF.copy()
updatedF.set_index("eFUA_ID")
all_res = []
for cur_fua in bad_fua:
    cur_city = inF.loc[inF["eFUA_ID"] == cur_fua].iloc[0]
    cur_periphery = inP.loc[inP["eFUA_ID"] == cur_city["eFUA_ID"]].iloc[0]
    try:
        cur_city["geometry"] = cur_city["geometry"].buffer(0)
        cur_periphery["geometry"] = cur_periphery["geometry"].buffer(0)
        cur_res = summarize_road_length(cur_city, cur_periphery)
        all_res.append(cur_res)
    except:
        tPrint("")

In [None]:
fixed_roads = pd.DataFrame(all_res)
fixed_roads = fixed_roads.set_index("eFUA_ID")
final_dataset = osm_res.set_index("eFUA_ID")
final_dataset.update(fixed_roads)
final_dataset

In [None]:
final_dataset.to_csv(
    os.path.join(
        data_folder, "ZONAL_RES", "OSM_ROAD_LENGTH", "OSMLR_Classes_length_m.csv"
    )
)

In [None]:
data_folder

In [None]:
data_folder