## Calculate Urban Metrics: Sprawl
Normalized difference between the share of areas with population density below the regional average density and the share of areas with population density above the regional average density (Fallah et al., 2011).

Sprawl L H = ((L%−H%)+1)*0.5

Where L% is the share of metropolitan population living in a grid cell with density below the overall grid cell group median and H% is the share of metropolitan population living in a grid cell with density above the overall grid cell group median. The sprawl measure in Equation (8) is an index that ranges between 0 and 1; values closer to 1 represent greater sprawl.

To account for ‘rural clusters’ in metropolitan areas, grid cells with density below 200 persons per square mile are excluded (or 77 per square km).

In [49]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [50]:
import sys, os, inspect, logging, importlib, time

In [51]:
import pandas as pd
import geopandas as gpd
import numpy as np
import math, random

In [52]:
import shapely
from shapely.geometry import mapping
from shapely.geometry import Point

In [53]:
# Get reference to GOSTNets
sys.path.append(r'C:\repos\INFRA_SAP')
from infrasap.urban_metrics import *

In [54]:
import rasterio
from rasterio.mask import mask
from rasterio import Affine # or from affine import Affine

In [55]:
GHS_pop = r"C:\Users\war-machine\Documents\world_bank_work\UZB_project\2015_1km_GHS_Pop\GHS_POP_2015_UZB_merged.tif"

In [56]:
shpName = r"C:\Users\war-machine\Documents\world_bank_work\UZB_project\metrics_shape_tool\UZB_only_FUAs_Project_Mollweide.shp"

In [57]:
# Psuedocode

# First find overall grid cell group median 

#pop_values = []
# For each Shape/FUA:
    # Select all built-up pixels that are mostly within shape (and exclude pixels less than 77 per square km)
    #For each pixel:
        # pop_values.append(pixel value)
    
    
#cell_group_median = median of pop_values

In [59]:
%%time

with rasterio.open(GHS_pop) as src:
    pixelSizeX, pixelSizeY = src.res
    print(pixelSizeX, pixelSizeY)
    
    input_shapes_gpd = gpd.read_file(shpName)

    #pop_values = []
    pop_values = []
    #for entry in input_shapes_gpd.head(2).iterrows():
    for entry in input_shapes_gpd.iterrows():
        print(entry[0])
        
        # extract the geometry in GeoJSON format
        geometry = entry[1]['geometry'] # list of shapely geometries
        #geometry = geoms[0] # shapely geometry
        geoms = [mapping(geometry)]

        # extract the raster values values within the polygon 
        out_image, out_transform = mask(src, geoms, crop=True, nodata=-9999.0)
        data = out_image[0,:,:]
        
        row, col = np.where(data != -9999.0) 
        val = np.extract(data != -9999.0, data)

        T1 = out_transform * Affine.translation(0.5, 0.5) # reference the pixel centre
        rc2xy = lambda r, c: (c, r) * T1  
        
        d = gpd.GeoDataFrame({'col':col,'row':row,'val':val})
        
        # coordinate transformation
        d['x'] = d.apply(lambda row: rc2xy(row.row,row.col)[0], axis=1)
        d['y'] = d.apply(lambda row: rc2xy(row.row,row.col)[1], axis=1)
        
        # geometry
        d['geometry'] = d.apply(lambda row: Point(row['x'], row['y']), axis=1)
        
        # exlude pixels with value less than 77
        print(len(d))
        
        #print(d)
        print(d.val[d.val > 77].to_list())
        
        print(len(d[d.val > 77]))
        
        #extend values to pop_values
        pop_values.extend(d.val[d.val > 77].to_list())
        

1000.0 1000.0
0
46
[418.4430236816406, 974.1602783203125, 1354.2064208984375, 1691.033447265625, 713.865478515625, 1026.6883544921875, 81.10102081298828, 329.8472900390625, 127.940673828125, 329.24249267578125, 382.4326171875, 2771.860107421875, 4697.90087890625, 4930.45654296875, 5607.87841796875, 1275.115966796875, 3042.232177734375, 3331.2060546875, 1673.9600830078125, 2063.9853515625, 176.19427490234375, 2796.4853515625, 4974.615234375, 7074.13720703125, 6227.0185546875, 3712.502685546875, 2196.636962890625, 289.7667541503906, 272.12115478515625, 916.7120361328125, 1852.1240234375, 4720.5322265625, 5072.69970703125, 150.0244140625, 452.393798828125, 2073.122802734375, 257.56500244140625, 650.0831298828125, 386.0696105957031, 1636.75634765625, 2855.748046875, 98.85284423828125]
42
1
18
[3403.17919921875, 3886.03125, 3984.185546875, 790.697509765625, 2243.70947265625, 220.6392059326172, 1734.974853515625, 5438.29736328125, 3080.59814453125, 283.42388916015625, 10661.3486328125, 11454




[880.6145629882812, 356.12030029296875, 325.0610046386719, 801.467529296875, 657.0778198242188, 579.1875610351562, 96.69558715820312, 275.2107849121094, 440.1971435546875, 264.2425231933594, 187.3129425048828, 349.6303405761719, 688.94970703125, 328.7122802734375, 445.2207946777344, 293.0735168457031, 946.6444702148438, 1632.2572021484375, 1216.3837890625, 1047.5006103515625, 1849.1162109375, 1333.7916259765625, 1685.0941162109375, 2368.53271484375, 791.1348876953125, 196.84019470214844, 530.9300537109375, 1134.4775390625, 140.6986846923828, 111.37653350830078, 762.5675048828125, 2095.29736328125, 2465.11279296875, 1658.7261962890625, 2562.28857421875, 2319.827392578125, 3024.81591796875, 3436.904541015625, 2894.4931640625, 2570.035400390625, 2322.34814453125, 1368.3397216796875, 779.1047973632812, 633.0827026367188, 1729.9315185546875, 2508.07568359375, 1142.076171875, 1715.5069580078125, 1832.4307861328125, 2890.314697265625, 1531.830810546875, 1348.53759765625, 2168.46728515625, 34



1259
[501.2041015625, 208.30746459960938, 407.37786865234375, 827.15576171875, 1244.8450927734375, 1814.274169921875, 1896.3724365234375, 1660.9600830078125, 1327.3404541015625, 779.96337890625, 1155.7720947265625, 2296.073486328125, 303.3127746582031, 334.12664794921875, 530.1795043945312, 424.6109619140625, 1382.16015625, 1982.427978515625, 1027.64453125, 1480.6156005859375, 501.6917419433594, 457.58074951171875, 1422.7464599609375, 1755.7987060546875, 1774.2232666015625, 286.7215270996094, 118.4649887084961, 213.28590393066406, 436.32305908203125, 1118.352294921875, 629.840576171875, 97.22150421142578, 461.7117004394531, 1005.5535888671875, 622.9615478515625, 925.533203125, 2299.821533203125, 1652.435791015625, 370.8089904785156, 1678.746337890625, 2693.78515625, 2932.785888671875, 677.4301147460938, 99.31713104248047, 673.6253662109375, 1185.454833984375, 1156.4564208984375, 2139.745361328125, 359.1146240234375, 557.312744140625, 611.8912353515625, 895.751220703125, 232.52952575683



97
[1227.7379150390625, 2105.47021484375, 326.396484375, 1134.0098876953125, 544.7688598632812, 1775.79296875, 1680.5894775390625, 197.1739959716797, 1355.771240234375, 1256.91943359375, 82.78280639648438, 2115.145263671875, 1085.6029052734375, 3438.059326171875, 483.1141357421875, 750.2340698242188, 1390.2496337890625, 1479.334716796875, 911.5027465820312, 1876.70947265625, 2150.358642578125, 666.2351684570312, 2639.23974609375, 3025.388671875, 854.4801025390625, 663.9542236328125, 524.0569458007812, 1118.600341796875, 2194.3876953125, 3546.61865234375, 2943.82177734375, 1207.0367431640625, 315.37567138671875, 1724.142822265625, 2726.476318359375, 1740.3848876953125, 1749.6328125, 1298.548583984375, 1496.9224853515625, 1003.1173095703125, 390.6617736816406, 161.10650634765625, 713.7503662109375, 3517.610107421875, 3231.3564453125, 744.6939697265625, 1273.0965576171875, 2410.92236328125, 636.21044921875, 768.7245483398438, 862.911865234375, 2249.11328125, 3553.54248046875, 3636.1848144



In [60]:
import statistics
UZB_pop_median = statistics.median(pop_values)

In [61]:
# Psuedocode

# for each Shape/FUA:
    # pixel_count_below_median = 0
    # pixel_count_above_median = 0
    
    # Select all built-up pixels that are mostly within shape (and exclude pixels less than 77 per square km)
    # calculate pixel_share_below_median and pixel_share_above_median
    
    # Sprawl = ((L%−H%)+1)*0.5
    # Sprawl = ((pixel_share_below_median-pixel_share_above_median)+1)*.05

In [62]:
%%time

with rasterio.open(GHS_pop) as src:
    pixelSizeX, pixelSizeY = src.res
    print(pixelSizeX, pixelSizeY)
    
    input_shapes_gpd = gpd.read_file(shpName)

    # pixel_count_below_median = 0
    pixel_count_below_median = 0
    # pixel_count_above_median = 0
    pixel_count_above_median = 0
    
    #for entry in input_shapes_gpd.head(3).iterrows():
    for entry in input_shapes_gpd.iterrows():
        print(entry[0])
        
        # extract the geometry in GeoJSON format
        geometry = entry[1]['geometry'] # list of shapely geometries
        #geometry = geoms[0] # shapely geometry
        geoms = [mapping(geometry)]

        # extract the raster values values within the polygon 
        out_image, out_transform = mask(src, geoms, crop=True, nodata=-9999.0)
        data = out_image[0,:,:]
        
        row, col = np.where(data != -9999.0) 
        val = np.extract(data != -9999.0, data)

        T1 = out_transform * Affine.translation(0.5, 0.5) # reference the pixel centre
        rc2xy = lambda r, c: (c, r) * T1  
        
        d = gpd.GeoDataFrame({'col':col,'row':row,'val':val})
        
        # coordinate transformation
        d['x'] = d.apply(lambda row: rc2xy(row.row,row.col)[0], axis=1)
        d['y'] = d.apply(lambda row: rc2xy(row.row,row.col)[1], axis=1)
        
        # geometry
        d['geometry'] = d.apply(lambda row: Point(row['x'], row['y']), axis=1)
        
        # exlude pixels with value less than 77
        d = d[d.val > 77]
        d_count = len(d)
        print(f"d_count is {d_count}")
        
        #print(d.val[d.val < UZB_pop_median])
        print(len(d.val[d.val < UZB_pop_median]))
        pixel_share_below_median = len(d.val[d.val < UZB_pop_median]) / d_count
        
        #print(d.val[d.val > UZB_pop_median])
        print(len(d.val[d.val > UZB_pop_median]))
        pixel_share_above_median = len(d.val[d.val > UZB_pop_median]) / d_count
        
        # Sprawl = ((L%−H%)+1)*0.5
        # Sprawl = ((pixel_count_below_median-pixel_count_above_median)+1)*.05
        Sprawl = ((pixel_share_below_median-pixel_share_above_median)+1)*.05
        print(f"Sprawl index is: {Sprawl}")
                       
        # creates a temporary GDF for just the row's shape
        temp_gdf = input_shapes_gpd.iloc[[entry[0]]]
        
        #print("print temp_gdf")
        #print(temp_gdf)
        
        # Put all metrics in a DataFrame
        metrics_scalar = {}
        metrics_scalar['sprawl_index'] = [Sprawl]
        metrics_df = pd.DataFrame(metrics_scalar)
        
        #print("print metrics_scalar")
        #print(metrics_scalar)
        
        # and concatinate it with the row's shape
        new_temp_gdf = pd.concat([temp_gdf.reset_index(drop=True), metrics_df], axis=1)
        
        #print("print new_temp_gdf")
        #print(new_temp_gdf)
        #print(entry[0])
        # put the results of each row into a new DataFrame
        if entry[0] == 0:
            print("new_temp_gdf")
            output_new_temp_gdf = new_temp_gdf
        else:
            output_new_temp_gdf = output_new_temp_gdf.append(new_temp_gdf, ignore_index=True)     

1000.0 1000.0
0
d_count is 42
21
21
Sprawl index is: 0.05
new_temp_gdf
1
d_count is 18
6
12
Sprawl index is: 0.03333333333333334
2
d_count is 142
70
72
Sprawl index is: 0.04929577464788733
3
d_count is 31
14
17
Sprawl index is: 0.04516129032258065
4
d_count is 89
30
59
Sprawl index is: 0.03370786516853933
5
d_count is 127
64
63
Sprawl index is: 0.050393700787401574
6
d_count is 151
84
67
Sprawl index is: 0.05562913907284768
7
d_count is 56
31
25
Sprawl index is: 0.05535714285714286
8
d_count is 47
21
26
Sprawl index is: 0.04468085106382979
9
d_count is 147
64
83
Sprawl index is: 0.0435374149659864
10




d_count is 85
43
42
Sprawl index is: 0.05058823529411765
11
d_count is 179
107
72
Sprawl index is: 0.05977653631284916
12
d_count is 151
83
68
Sprawl index is: 0.05496688741721855
13
d_count is 127
78
49
Sprawl index is: 0.061417322834645675
14
d_count is 44
21
23
Sprawl index is: 0.04772727272727273
15
d_count is 162
76
86
Sprawl index is: 0.04691358024691358
16
d_count is 249
94
155
Sprawl index is: 0.037751004016064266
17
d_count is 506
256
250
Sprawl index is: 0.05059288537549407
18




d_count is 50
25
25
Sprawl index is: 0.05
19
d_count is 158
72
86
Sprawl index is: 0.04556962025316456
20
d_count is 39
17
22
Sprawl index is: 0.043589743589743594
21
d_count is 39
15
24
Sprawl index is: 0.038461538461538464
22
d_count is 155
58
97
Sprawl index is: 0.03741935483870967
23
d_count is 28
10
18
Sprawl index is: 0.03571428571428571
24
d_count is 1021
465
556
Sprawl index is: 0.045543584720861896
25




d_count is 40
22
18
Sprawl index is: 0.05500000000000001
26
d_count is 62
33
28
Sprawl index is: 0.05403225806451613
27
d_count is 116
80
36
Sprawl index is: 0.06896551724137932
28
d_count is 59
34
25
Sprawl index is: 0.057627118644067804
29
d_count is 106
47
59
Sprawl index is: 0.04433962264150944
30
d_count is 127
72
55
Sprawl index is: 0.05669291338582677
31
d_count is 66
34
32
Sprawl index is: 0.051515151515151514
32
d_count is 252
133
119
Sprawl index is: 0.052777777777777785
33
d_count is 60
30
30
Sprawl index is: 0.05
34
d_count is 40
18
22
Sprawl index is: 0.045
35
d_count is 98




59
39
Sprawl index is: 0.06020408163265306
36
d_count is 109
62
47
Sprawl index is: 0.05688073394495413
37
d_count is 512
242
270
Sprawl index is: 0.047265625000000006
38
d_count is 85
45
40
Sprawl index is: 0.05294117647058824
39
d_count is 81
38
43
Sprawl index is: 0.04691358024691358
40
d_count is 45
24
21
Sprawl index is: 0.05333333333333334
41
d_count is 387
216
171
Sprawl index is: 0.05581395348837209
42
d_count is 43
22
21
Sprawl index is: 0.051162790697674425
43
d_count is 85
45
40
Sprawl index is: 0.05294117647058824




44
d_count is 40
20
20
Sprawl index is: 0.05
45
d_count is 127
80
47
Sprawl index is: 0.06299212598425197
46
d_count is 393
210
183
Sprawl index is: 0.0534351145038168
47
d_count is 41
15
26
Sprawl index is: 0.036585365853658534
48
d_count is 44
24
20
Sprawl index is: 0.05454545454545454
Wall time: 1.06 s




In [63]:
output_new_temp_gdf

Unnamed: 0,Name,descriptio,timestamp,begin,end,altitudeMo,tessellate,extrude,visibility,drawOrder,...,UC_area,FUA_p_2015,UC_p_2015,Com_p_2015,_count,_sum,_mean,sum_sq_km,geometry,sprawl_index
0,,,,,,,-1,0,-1,0,...,19.0,85652.43,69508.58,16143.844847,275.0,1021.848162,3.715811,2.221,"POLYGON ((5800000.000 4952000.000, 5798000.000...",0.05
1,,,,,,,-1,0,-1,0,...,13.0,65461.34,62013.08,3448.26825,112.0,482.444814,4.307543,2.68,"POLYGON ((6007000.000 4959000.000, 6008000.000...",0.033333
2,,,,,,,-1,0,-1,0,...,73.0,458838.5,401135.2,57703.29232,1086.0,7815.237149,7.196351,4.39,"POLYGON ((6155000.000 4854000.000, 6159000.000...",0.049296
3,,,,,,,-1,0,-1,0,...,16.0,71340.75,60618.55,10722.196915,215.0,236.673061,1.100805,7.171,"POLYGON ((4937000.000 5055000.000, 4936000.000...",0.045161
4,,,,,,,-1,0,-1,0,...,58.0,318517.0,288239.8,30277.208678,839.0,1557.179394,1.855995,1.153,"POLYGON ((4943000.000 5072000.000, 4944000.000...",0.033708
5,,,,,,,-1,0,-1,0,...,55.0,213269.7,136678.8,76590.945883,903.0,448.739338,0.496943,3.094,"POLYGON ((5058000.000 4953000.000, 5059000.000...",0.050394
6,,,,,,,-1,0,-1,0,...,59.0,294797.7,215565.9,79231.752805,1197.0,2535.01963,2.117811,1.313,"POLYGON ((5069000.000 4970000.000, 5072000.000...",0.055629
7,,,,,,,-1,0,-1,0,...,21.0,126482.3,96651.27,29831.047428,448.0,344.660887,0.769332,4.923,"POLYGON ((5076000.000 4980000.000, 5080000.000...",0.055357
8,,,,,,,-1,0,-1,0,...,27.0,93583.63,76794.64,16788.997039,347.0,317.778349,0.915788,5.674,"POLYGON ((5089000.000 4958000.000, 5090000.000...",0.044681
9,,,,,,,-1,0,-1,0,...,77.0,326631.5,252613.7,74017.854774,973.0,451.59969,0.464131,2.913,"POLYGON ((5100000.000 4968000.000, 5105000.000...",0.043537


In [64]:
# make the GeoDataFrame unprojected
output_new_temp_gdf = output_new_temp_gdf.to_crs('epsg:4326')

# save as shapefile
output_new_temp_gdf.to_file(r"C:\repos\INFRA_SAP\Notebooks\UZB_sprawl_index_4326.shp")