# Calculate Urban Metrics: Sprawl
Normalized difference between the share of areas with population density below the regional average density and the share of areas with population density above the regional average density (Fallah et al., 2011).

Sprawl L H = ((L%−H%)+1)*0.5

Where L% is the share of metropolitan population living in a grid cell with density below the overall grid cell group median and H% is the share of metropolitan population living in a grid cell with density above the overall grid cell group median. The sprawl measure in Equation (8) is an index that ranges between 0 and 1; values closer to 1 represent greater sprawl.

To account for ‘rural clusters’ in metropolitan areas, grid cells with density below 200 persons per square mile are excluded (or 77 per square km).

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import sys
import time

In [None]:
import pandas as pd
import geopandas as gpd
import numpy as np

In [None]:
from shapely.geometry import mapping
from shapely.geometry import Point

In [None]:
# Get reference to GOSTNets
sys.path.append(r"C:\repos\INFRA_SAP")
from infrasap.urban_metrics import *

In [None]:
import rasterio
from rasterio.mask import mask
from rasterio import Affine  # or from affine import Affine

In [None]:
start_time = time.time()

## Inputs are GHS pop and the urban extents
Mollweide projection should work good because it is an equal-area projection

In [None]:
GHS_pop = r"C:\Users\war-machine\Documents\world_bank_work\UZB_project\GHS_POP_E2015_GLOBE_R2019A_54009_1K_V1_0\GHS_POP_E2015_GLOBE_R2019A_54009_1K_V1_0.tif"
# GHS_pop = r"C:\Users\war-machine\Documents\world_bank_work\UZB_project\clipped_eca_no_russia_1km.tif"
# GHS_pop = r"C:\Users\war-machine\Documents\world_bank_work\UZB_project\2015_1km_GHS_Pop\GHS_POP_2015_UZB_merged.tif"

In [None]:
# shpName = r"C:\Users\war-machine\Documents\world_bank_work\UZB_project\metrics_shape_tool\Final_urban_extent_metrics\ECA_all_urban_extents_100k_mollweide.shp"
# shpName = r"C:\Users\war-machine\Documents\world_bank_work\UZB_project\metrics_shape_tool\russia_urban_extents_merged_mollweide.shp"
# shpName = r"C:\repos\GOST_Urban\Notebooks\Implementations\eca_wo_rus_urban_clusters_ghs_pop_smooth_100k_mollweide2.shp"
# shpName = r"C:\Users\war-machine\Documents\world_bank_work\UZB_project\metrics_shape_tool\UZB_only_FUAs_Project_Mollweide.shp"
# shpName = r"C:\Users\war-machine\Documents\world_bank_work\UZB_project\metrics_shape_tool\UZB_ghs_built_up_extents_4326\UZB_only_ghs_built_up_extents_mollweide_geom_fixed_greater_50k.shp"
# shpName = r"C:\Users\war-machine\Documents\world_bank_work\UZB_project\metrics_shape_tool\UZB_ghs_built_up_extents_4326\UZB_ghs_built_up_extents_mollweide_geom_fixed.shp"
# shpName = r"C:\Users\war-machine\Documents\world_bank_work\UZB_project\metrics_shape_tool\ECA_wo_rus_urban_extents\eca_wo_rus_built_up_extents_molleweide.shp"
shpName = r"C:\Users\war-machine\Documents\world_bank_work\UZB_project\metrics_shape_tool\all_urban_clusters_5k_up_molleweide.shp"

## First find overall grid cell group median

In [None]:
# Pseudocode

# pop_values = []
# For each Shape/FUA:
# Select all built-up pixels that are mostly within shape (and exclude pixels less than 77 per square km)
# For each pixel:
# pop_values.append(pixel value)


# cell_group_median = median of pop_values

In [None]:
%%time

with rasterio.open(GHS_pop) as src:
    pixelSizeX, pixelSizeY = src.res
    print(pixelSizeX, pixelSizeY)

    input_shapes_gpd = gpd.read_file(shpName)

    # pop_values = []
    pop_values = []
    # for entry in input_shapes_gpd.head(2).iterrows():
    for entry in input_shapes_gpd.iterrows():
        print(entry[0])

        # extract the geometry in GeoJSON format
        geometry = entry[1]["geometry"]  # list of shapely geometries
        # geometry = geoms[0] # shapely geometry
        # converts to geojson format
        geoms = [mapping(geometry)]

        # extract the raster values values within the polygon
        out_image, out_transform = mask(src, geoms, crop=True, nodata=-9999.0)
        data = out_image[0, :, :]

        row, col = np.where(data != -9999.0)
        val = np.extract(data != -9999.0, data)

        # Adding the x,y, and geometry columns is not necessary
        T1 = out_transform * Affine.translation(0.5, 0.5)  # reference the pixel centre
        # row,column to x,y
        rc2xy = lambda r, c: (c, r) * T1

        d = gpd.GeoDataFrame({"col": col, "row": row, "val": val})

        # coordinate transformation
        d["x"] = d.apply(lambda row: rc2xy(row.row, row.col)[0], axis=1)
        d["y"] = d.apply(lambda row: rc2xy(row.row, row.col)[1], axis=1)

        # geometry
        d["geometry"] = d.apply(lambda row: Point(row["x"], row["y"]), axis=1)

        # exclude pixels with value less than 77
        print(len(d))

        # print(d)
        print(d.val[d.val > 77].to_list())

        print(len(d[d.val > 77]))

        # extend values to pop_values
        pop_values.extend(d.val[d.val > 77].to_list())

In [None]:
import statistics

UZB_pop_median = statistics.median(pop_values)

In [None]:
UZB_pop_median

## Second calculate the Sprawl metric for each shape

In [None]:
# Pseudocode

# for each Shape/FUA:
# pixel_count_below_median = 0
# pixel_count_above_median = 0

# Select all built-up pixels that are mostly within shape (and exclude pixels less than 77 per square km)
# calculate pixel_share_below_median and pixel_share_above_median

# Sprawl = ((L%−H%)+1)*0.5
# Sprawl = ((pixel_share_below_median-pixel_share_above_median)+1)*.5

In [None]:
%%time

with rasterio.open(GHS_pop) as src:
    pixelSizeX, pixelSizeY = src.res
    print(pixelSizeX, pixelSizeY)

    input_shapes_gpd = gpd.read_file(shpName)

    # pixel_count_below_median = 0
    pixel_count_below_median = 0
    # pixel_count_above_median = 0
    pixel_count_above_median = 0

    # for entry in input_shapes_gpd.head(3).iterrows():
    for entry in input_shapes_gpd.iterrows():
        print(entry[0])

        # extract the geometry in GeoJSON format
        geometry = entry[1]["geometry"]  # list of shapely geometries
        # geometry = geoms[0] # shapely geometry
        geoms = [mapping(geometry)]

        # extract the raster values values within the polygon
        out_image, out_transform = mask(src, geoms, crop=True, nodata=-9999.0)
        data = out_image[0, :, :]

        row, col = np.where(data != -9999.0)
        val = np.extract(data != -9999.0, data)

        d = gpd.GeoDataFrame({"col": col, "row": row, "val": val})

        # exclude pixels with value less than 77
        d = d[d.val > 77]
        d_count = len(d)
        # print(f"d_count is {d_count}")

        # print(d.val[d.val < UZB_pop_median])
        # print(len(d.val[d.val < UZB_pop_median]))
        pixel_share_below_median = len(d.val[d.val < UZB_pop_median]) / d_count
        print(f"pixel_share_below_median is: {pixel_share_below_median}")

        # print(d.val[d.val > UZB_pop_median])
        # print(len(d.val[d.val > UZB_pop_median]))
        pixel_share_above_median = len(d.val[d.val > UZB_pop_median]) / d_count
        print(f"pixel_share_above_median is: {pixel_share_above_median}")

        # Sprawl = ((L%−H%)+1)*0.5
        # Sprawl = ((pixel_count_below_median-pixel_count_above_median)+1)*.5
        Sprawl = ((pixel_share_below_median - pixel_share_above_median) + 1) * 0.5
        print(f"Sprawl index is: {Sprawl}")

        # creates a temporary GDF for just the row's shape
        temp_gdf = input_shapes_gpd.iloc[[entry[0]]]

        # print("print temp_gdf")
        # print(temp_gdf)

        # Put all metrics in a DataFrame
        metrics_scalar = {}
        metrics_scalar["sprawl_index"] = [Sprawl]
        metrics_df = pd.DataFrame(metrics_scalar)

        # print("print metrics_scalar")
        # print(metrics_scalar)

        # and concatenate it with the row's shape
        new_temp_gdf = pd.concat([temp_gdf.reset_index(drop=True), metrics_df], axis=1)

        # print("print new_temp_gdf")
        # print(new_temp_gdf)
        # print(entry[0])
        # put the results of each row into a new DataFrame
        if entry[0] == 0:
            print("new_temp_gdf")
            output_new_temp_gdf = new_temp_gdf
        else:
            output_new_temp_gdf = output_new_temp_gdf.append(
                new_temp_gdf, ignore_index=True
            )

In [None]:
output_new_temp_gdf

In [None]:
# make the GeoDataFrame unprojected
output_new_temp_gdf = output_new_temp_gdf.to_crs("epsg:4326")

# output = r"C:\Users\war-machine\Documents\world_bank_work\UZB_project\metrics_shape_tool"
# output = r"C:\Users\war-machine\Documents\world_bank_work\UZB_project\metrics_shape_tool\eca_metrics_results_russia"
# output = r"C:\Users\war-machine\Documents\world_bank_work\UZB_project\metrics_shape_tool\eca_urban_metrics_results_wo_rus"
# output = r"C:\Users\war-machine\Documents\world_bank_work\UZB_project\metrics_shape_tool\UZB_only_GHS_FUAs_results"
# output = r"C:\Users\war-machine\Documents\world_bank_work\UZB_project\metrics_shape_tool\UZB_only_GHS_urban_extents_results"
# output = r"C:\Users\war-machine\Documents\world_bank_work\UZB_project\metrics_shape_tool\UZB_only_GHS_urban_extents_results_all"
# output = r"C:\Users\war-machine\Documents\world_bank_work\UZB_project\metrics_shape_tool\eca_urban_metrics_results_wo_rus_all"
output = r"C:\Users\war-machine\Documents\world_bank_work\UZB_project\metrics_shape_tool\all_urban_extents_results_5k_up"

# save as CSV

# output_new_temp_gdf.to_csv(output + r"\ECA_all_urban_metrics_100k_sprawl.csv")
# output_new_temp_gdf.to_csv(output + r"\UZB_only_urban_metrics_FUAs_sprawl.csv")
# output_new_temp_gdf.to_csv(output + r"\UZB_only_urban_metrics_urban_extents_sprawl.csv")
# output_new_temp_gdf.to_csv(output + r"\UZB_only_urban_metrics_urban_extents_all_sprawl.csv")
# output_new_temp_gdf.to_csv(output + r"\ECA_wo_rus_urban_metrics_urban_extents_all_sprawl.csv")
output_new_temp_gdf.to_csv(output + r"\all_urban_metrics_5k_up_sprawl.csv")

In [None]:
print(f"total time to process: {time.time()-start_time}")