# calculate urban clusters

### Classifies urban clusters using the Degree of urbanisation methodology (https://ghsl.jrc.ec.europa.eu/degurbaDefinitions.php)

This methodology uses consistent 1X1 kilometer grid cells to classify cells based on population density, contiguity, and population size.

In [3]:
# import libraries
import sys, os, inspect, logging, importlib
import rasterio
import pandas as pd
import geopandas as gpd

In [4]:
# https://stackoverflow.com/questions/34478398/import-local-function-from-a-module-housed-in-another-directory-with-relative-im
module_path = os.path.abspath(os.path.join('../..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [5]:
module_path

'C:\\Users\\wb546254\\Documents\\GitHub\\INFRA_SAP'

In [11]:
from infrasap.UrbanRaster import *

## Inputs include a population grid. In this analysis we will calculate urban clusters using  WorldPop

In [12]:
# bring in WorldPop
worldpop_ubz_file = r"C:\Users\wb546254\OneDrive - WBG\Documents\ubz_urban_analysis\kazakstan_data\kaz_ppp_2020_1km_Aggregated.tif"

WorldPop is an unprojected WGS84 raster. The strategy we will use is to calculate the area in sq km of each pixel in the  raster. This will enable us to create a normalized version of the raster that contains population count per sq km for the pixel values. 

In [13]:
# The WorldPoP file is unprojected in WGS84
# Each pixel is 0.00833333 by 0.00833333 decimal degrees, which is approximately 1X1 kilometer at the equator
# The problem with unprojected WGS84 data is that the pixel area changes depending on the latitude.
worldpop_ubz = rasterio.open(worldpop_ubz_file)
worldpop_ubz.crs

CRS.from_epsg(4326)

In [14]:
# calc_areagrid calculates the area in sq km of each pixel
worldpop_UBZ_areagrid = calc_areagrid(worldpop_ubz_file)
worldpop_UBZ_areagrid

array([[0.48936868, 0.48936868, 0.48936868, ..., 0.48936868, 0.48936868,
        0.48936868],
       [0.48947109, 0.48947109, 0.48947109, ..., 0.48947109, 0.48947109,
        0.48947109],
       [0.4895735 , 0.4895735 , 0.4895735 , ..., 0.4895735 , 0.4895735 ,
        0.4895735 ],
       ...,
       [0.65292263, 0.65292263, 0.65292263, ..., 0.65292263, 0.65292263,
        0.65292263],
       [0.6530027 , 0.6530027 , 0.6530027 , ..., 0.6530027 , 0.6530027 ,
        0.6530027 ],
       [0.65308275, 0.65308275, 0.65308275, ..., 0.65308275, 0.65308275,
        0.65308275]])

In [15]:
# convert to float32
print(worldpop_UBZ_areagrid.dtype)
worldpop_UBZ_areagrid = np.float32(worldpop_UBZ_areagrid)
print(worldpop_UBZ_areagrid.dtype)

float64
float32


In [16]:
# create numpy ndarray of worldpop_ubz
# pop_values
with rasterio.open(worldpop_ubz_file, 'r') as ds:
    worldpop_ubz_ndarray = ds.read()  # read all raster values

In [17]:
ds.meta

{'driver': 'GTiff',
 'dtype': 'float32',
 'nodata': -99999.0,
 'width': 4899,
 'height': 1786,
 'count': 1,
 'crs': CRS.from_epsg(4326),
 'transform': Affine(0.0083333333, 0.0, 46.49041648586578,
        0.0, -0.0083333333, 55.44124996151709)}

In [18]:
type(worldpop_ubz_ndarray)

numpy.ndarray

In [19]:
worldpop_ubz_ndarray

array([[[-99999., -99999., -99999., ..., -99999., -99999., -99999.],
        [-99999., -99999., -99999., ..., -99999., -99999., -99999.],
        [-99999., -99999., -99999., ..., -99999., -99999., -99999.],
        ...,
        [-99999., -99999., -99999., ..., -99999., -99999., -99999.],
        [-99999., -99999., -99999., ..., -99999., -99999., -99999.],
        [-99999., -99999., -99999., ..., -99999., -99999., -99999.]]],
      dtype=float32)

In [20]:
worldpop_ubz_ndarray.shape

(1, 1786, 4899)

### flatten array to 2 dimensions

In [52]:
# One shape dimension can be -1. 
# In this case, the value is inferred from 
# the length of the array and remaining dimensions.
worldpop_ubz_ndarray_2d = worldpop_ubz_ndarray.reshape(-1, worldpop_ubz_ndarray.shape[-1])

In [53]:
worldpop_ubz_ndarray_2d.shape

(1786, 4899)

In [54]:
# create a new array where values are pop_values / sq km
# pop_values may contain -99999 values for nodata, in this case the new values will still be large negative values.
# This should be OK
# new_array = pop_values / areagrid
pop_per_sq_km = np.divide(worldpop_ubz_ndarray_2d, worldpop_UBZ_areagrid)

In [55]:
pop_per_sq_km

array([[-204342.88, -204342.88, -204342.88, ..., -204342.88, -204342.88,
        -204342.88],
       [-204300.11, -204300.11, -204300.11, ..., -204300.11, -204300.11,
        -204300.11],
       [-204257.38, -204257.38, -204257.38, ..., -204257.38, -204257.38,
        -204257.38],
       ...,
       [-153155.97, -153155.97, -153155.97, ..., -153155.97, -153155.97,
        -153155.97],
       [-153137.2 , -153137.2 , -153137.2 , ..., -153137.2 , -153137.2 ,
        -153137.2 ],
       [-153118.42, -153118.42, -153118.42, ..., -153118.42, -153118.42,
        -153118.42]], dtype=float32)

### set all negative values to -99999.0, which is the NoData value

In [56]:
pop_per_sq_km[pop_per_sq_km < 0] = -99999.0

In [57]:
with rasterio.open(worldpop_ubz_file, 'r') as ds:
    print(ds.meta)
    ulX = ds.meta['transform'][2]
    ulY = ds.meta['transform'][5]

{'driver': 'GTiff', 'dtype': 'float32', 'nodata': -99999.0, 'width': 4899, 'height': 1786, 'count': 1, 'crs': CRS.from_epsg(4326), 'transform': Affine(0.0083333333, 0.0, 46.49041648586578,
       0.0, -0.0083333333, 55.44124996151709)}


## Save ndarray as raster

In [58]:
# https://gis.stackexchange.com/questions/279953/numpy-array-to-gtiff-using-rasterio-without-source-raster

from rasterio.transform import from_origin

#arr = np.random.randint(5, size=(100,100)).astype(np.float)

# rasterio.transform.from_origin(west, north, xsize, ysize)
# Return an Affine transformation given upper left and pixel sizes.
transform = from_origin(ulX,ulY,0.00833333,0.00833333)

new_dataset = rasterio.open('kazakhstan_normalized_worldpop_per_km_sq.tif', 'w', driver='GTiff',
                            height = pop_per_sq_km.shape[0], width = pop_per_sq_km.shape[1],
                            count=1, dtype=str(pop_per_sq_km.dtype),
                            crs='+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs',
                            transform=transform)

new_dataset.nodata = -99999.0
new_dataset.write(pop_per_sq_km, 1)
new_dataset.close()

In [59]:
# inspect
with rasterio.open('kazakhstan_normalized_worldpop_per_km_sq.tif', 'r') as ds:
    print(ds.meta)

{'driver': 'GTiff', 'dtype': 'float32', 'nodata': -99999.0, 'width': 4899, 'height': 1786, 'count': 1, 'crs': CRS.from_epsg(4326), 'transform': Affine(0.00833333, 0.0, 46.49041648586578,
       0.0, -0.00833333, 55.44124996151709)}


## Calculate Urban Extents

### run normalized_world_pop urban centres

In [65]:
# import normalized WorldPoP file
normalized_world_pop = r"C:\Users\wb546254\Documents\GitHub\INFRA_SAP\Notebooks\Implementations\kazakhstan_normalized_worldpop_per_km_sq.tif"

In [66]:
urban_raster = urbanGriddedPop(normalized_world_pop)

In [67]:
urban_extents = urban_raster.calculateUrban(densVal=1500, totalPopThresh=50000, smooth=True)

In [68]:
wp_urbanExtents = r"kazakstan_urban_centres_world_pop_normalized_smooth.shp"

In [69]:
#urban_extents.to_csv(wp_urbanExtents)
urban_extents.to_file(wp_urbanExtents)

### run normalized_world_pop urban clusters

In [70]:
urban_raster = urbanGriddedPop(normalized_world_pop)
urban_extents = urban_raster.calculateUrban(densVal=300, totalPopThresh=5000, smooth=True)
wp_urbanExtents = r"kazakstan_urban_clusters_world_pop_normalized_smooth.shp"
urban_extents.to_file(wp_urbanExtents)