In [195]:
import sys, os, json
import rasterio, geopy

import pandas as pd
import geopandas as gpd

sys.path.insert(0, "../../../../gostrocks/src")

import GOSTRocks.rasterMisc as rMisc
import GOSTRocks.ntlMisc as ntl
from GOSTRocks.misc import tPrint

sys.path.append("../../../src")

import GOST_Urban.UrbanRaster as urban
import GOST_Urban.urban_helper as clippy

%load_ext autoreload
%autoreload 2

# read in local important parameters
local_json = "/home/wb411133/Code/urbanParameters.json"
with open(local_json, 'r') as inJ:
    important_vars = json.load(inJ)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [204]:
iso3 = "UKR"
output_dir = f"/home/wb411133/data/Projects/{iso3}_Urbanization"
if not os.path.exists(output_dir):
    os.makedirs(output_dir)
    
population_file = f"/home/public/Data/GLOBAL/Population/WorldPop_PPP_2020/MOSAIC_ppp_prj_2020/ppp_prj_2020_{iso3}.tif"
admin_bounds = "/home/public/Data/COUNTRY/UKR/ADMIN/geoBoundaries-UKR-ADM3.geojson"
GHSL_file = "/home/public/Data/GLOBAL/GHSL/ghsl.vrt"

# Define output files
urban_extents_file = os.path.join(output_dir, f"{iso3}_urban_extents.geojson")
urban_extents_raster_file = os.path.join(output_dir, f"{iso3}_urban_extents.tif")
urban_extents_hd_file = os.path.join(output_dir, f"{iso3}_urban_extents_hd.geojson")
urban_extents_hd_raster_file = os.path.join(output_dir, f"{iso3}_urban_extents_hd.tif")
admin_urban_summary = os.path.join(output_dir, "adm3_urban_summary.shp")
urban_admin_summary = os.path.join(output_dir, f"{iso3}_ADM3_urban_summary.csv")

final_folder = os.path.join(output_dir, "Mapping_Data")
if not os.path.exists(final_folder):
    os.makedirs(final_folder)
    
admin_final = os.path.join(final_folder, "admin_summarized.shp")
urban_final = os.path.join(final_folder, "urban_summarized.shp")
urban_hd_final = os.path.join(final_folder, "urban_hd_summarized.shp")
focal_cities = os.path.join(final_folder, "FOCAL_AOIs.shp")

In [64]:
inAdmin = gpd.read_file(admin_bounds)
inP = rasterio.open(population_file)
inG = rasterio.open(GHSL_file)

# Run urbanization analysis
1. Create urban extents  
2. Calculate urban population in admin bounds  
3. Summarize nighttime lights in extents and admin
4. Summarize GHSL in extents and admin

In [None]:
# 1. Create urban extents
if not os.path.exists(urban_extents_file):
    urban_calculator = urban.urbanGriddedPop(inP)
    urban_extents = urban_calculator.calculateUrban(densVal=3, totalPopThresh=5000, 
                                               smooth=False, queen=False,
                                               verbose=True, raster=urban_extents_raster_file)
    urban_extents_hd = urban_calculator.calculateUrban(densVal=15, totalPopThresh=50000, 
                                               smooth=True, queen=False,
                                               verbose=True, raster=, raster=urban_extents_raster_file)
    urban_extents.to_file(urban_extents_file, driver="GeoJSON")
    urban_extents_hd.to_file(urban_extents_hd_file, driver="GeoJSON")


In [76]:
# 2. Calculate urban population in admin areas
pop_worker = clippy.summarize_population(population_file, gpd.read_file(admin_bounds), urban_extents_raster_file, urban_extents_hd_raster_file)
summarized_urban = pop_worker.calculate_zonal()
urban_res = summarized_urban.loc[:,[x for x in summarized_urban.columns if "SUM" in x]]
urban_res.columns = ['TOTAL_POP', "URBAN_POP", "URBAN_HD_POP"]
urban_res['shapeID'] = inAdmin['shapeID']
urban_res['shapeName'] = inAdmin['shapeName']
urban_res.to_csv(urban_admin_summary)

In [24]:
# 3. Summarize nighttime lights in admin bounds and urban extents
ntl_files = ntl.find_monthly_ntl()

urbanD = gpd.read_file(urban_extents_file)
urbanHD = gpd.read_file(urban_extents_hd_file)

Completed loop: 0


In [53]:
viirs_folder = os.path.join(output_dir, "NTL_ZONAL_RES")
if not os.path.exists(viirs_folder):
    os.makedirs(viirs_folder)

for ntl_file in ntl_files:
    inR = rasterio.open(ntl_file)
    name = os.path.basename(ntl_file).split("_")[3]
    tPrint("Processing %s" % name)
    urban_res_file = os.path.join(viirs_folder, f"URBAN_{name}.csv")
    urban_hd_res_file = os.path.join(viirs_folder, f"HD_URBAN_{name}.csv")
    admin_res_file = os.path.join(viirs_folder, f"ADMIN_{name}.csv")
        
    # Urban Summary
    if not os.path.exists(urban_res_file):
        urban_res = rMisc.zonalStats(urbanD, inR, minVal=0.1)
        col_names = [f'URBAN_{name}_{x}' for x in ['SUM','MIN','MAX','MEAN']]
        urban_df = pd.DataFrame(urban_res, columns=col_names)
        urban_df.to_csv(urban_res_file)
    # HD Urban Summary
    if not os.path.exists(urban_hd_res_file):
        hd_urban_res = rMisc.zonalStats(urbanHD, inR, minVal=0.1)
        col_names = [f'HD_URBAN_{name}_{x}' for x in ['SUM','MIN','MAX','MEAN']]
        hd_urban_df = pd.DataFrame(hd_urban_res, columns=col_names)
        hd_urban_df.to_csv(urban_hd_res_file)
    # admin Summary
    if not os.path.exists(admin_res_file):
        admin_res = rMisc.zonalStats(inAdmin, inR, minVal=0.1)
        col_names = [f'ADM_URBAN_{name}_{x}' for x in ['SUM','MIN','MAX','MEAN']]
        admin_df = pd.DataFrame(admin_res, columns=col_names)
        admin_df.to_csv(admin_res_file)
    

11:55:56	Processing 201204-201303
11:56:14	Processing 2013
11:56:31	Processing 2014
11:56:48	Processing 2015
11:57:04	Processing 2016
11:57:20	Processing 2017
11:57:36	Processing 2018
11:57:53	Processing 2019
11:58:10	Processing 2020


In [120]:
# Compile VIIRS results
urb_files = [x for x in os.listdir(viirs_folder) if x.startswith("URBAN")]
for x in urb_files:
    tempD = pd.read_csv(os.path.join(viirs_folder, x), index_col=0)
    urbanD[x[:-4]] = tempD.iloc[:,0]

hd_urb_files = [x for x in os.listdir(viirs_folder) if x.startswith("HD_URBAN")]
for x in hd_urb_files:
    tempD = pd.read_csv(os.path.join(viirs_folder, x), index_col=0)
    urbanHD[x[:-4]] = tempD.iloc[:,0]
    
admin_urb_files = [x for x in os.listdir(viirs_folder) if x.startswith("ADMIN")]
for x in admin_urb_files:
    tempD = pd.read_csv(os.path.join(viirs_folder, x), index_col=0)
    inAdmin[x[:-4]] = tempD.iloc[:,0]

urbanD.to_csv(urban_viirs_summary)
urbanHD.to_csv(urban_hd_viirs_summary)
inAdmin.to_csv(admin_viirs_summary)

In [134]:
# 4. Summarize GHSL in extents and admin
ghsl_cols = [f'c_{x}' for x in [1,2,3,4,5,6]]
admin_ghsl_summary = os.path.join(output_dir, "admin_GHSL_summary.csv")
urban_ghsl_summary = os.path.join(output_dir, "urban_GHSL_summary.csv")
urbanHD_ghsl_summary = os.path.join(output_dir, "urbanhd_GHSL_summary.csv")

if not os.path.exists(admin_ghsl_summary):
    res = rMisc.zonalStats(inAdmin, inG, rastType='C', unqVals = [1,2,3,4,5,6], reProj=True)
    res = pd.DataFrame(res, columns = ghsl_cols)
    res['gID'] = inAdmin['shapeID']
    res.to_csv(admin_ghsl_summary)
    
if not os.path.exists(urban_ghsl_summary):
    res = rMisc.zonalStats(urbanD, inG, rastType='C', unqVals = [1,2,3,4,5,6], reProj=True)
    res = pd.DataFrame(res, columns = ghsl_cols)
    res['gID'] = urbanD['ID']
    res.to_csv(urban_ghsl_summary)
    
if not os.path.exists(urbanHD_ghsl_summary):
    res = rMisc.zonalStats(urbanHD, inG, rastType='C', unqVals = [1,2,3,4,5,6], reProj=True)
    res = pd.DataFrame(res, columns = ghsl_cols)
    res['gID'] = urbanHD['ID']
    res.to_csv(urbanHD_ghsl_summary)
    
for ghsl_file in [admin_ghsl_summary, urban_ghsl_summary, urbanHD_ghsl_summary]:
    adm_ghsl = pd.read_csv(ghsl_file, index_col=0)
    adm_ghsl['b2014'] = adm_ghsl.apply(lambda x: x['c_3'] + x['c_4'] + x['c_5'] + x['c_6'], axis=1)
    adm_ghsl['b2000'] = adm_ghsl.apply(lambda x:            x['c_4'] + x['c_5'] + x['c_6'], axis=1)
    adm_ghsl['b1990'] = adm_ghsl.apply(lambda x:                       x['c_5'] + x['c_6'], axis=1)
    
    def get_built(x):
        cur_built = x['b2014']
        base_built = x['b1990']
        if base_built == 0:
            base_built = x['b2000']
        try:
            return((cur_built - base_built)/base_built)
        except:
            return(-1)
    adm_ghsl['g_14_90'] = adm_ghsl.apply(get_built, axis=1)
    adm_ghsl.to_csv(ghsl_file)

# Compile all results for admin divisions and urban extents

In [142]:
# Compile data
# [shapefile, population_summary, viirs_summary, ghsl_summary, out_file]
for cur_def in [
                [admin_bounds, urban_admin_summary, admin_viirs_summary, admin_ghsl_summary, admin_final],
                [urban_extents_file, '', urban_viirs_summary, urban_ghsl_summary, urban_final],
                [urban_extents_hd_file, '', urban_hd_viirs_summary, urbanHD_ghsl_summary, urban_hd_final]
               ]:
    curD = gpd.read_file(cur_def[0])
    if cur_def[1] != '':
        curPop = pd.read_csv(cur_def[1], index_col=0)   
        curD['Pop'] = curPop['TOTAL_POP']
        curD['urbanPop'] = curPop.apply(lambda x: x['URBAN_POP']/x['TOTAL_POP'], axis=1)
        curD['urbanPopHD'] = curPop.apply(lambda x: x['URBAN_HD_POP']/x['TOTAL_POP'], axis=1)
    viirsD = pd.read_csv(cur_def[2], index_col=0)
    curD['NTL2013'] = viirsD.iloc[:,-8]
    curD['NTL2020'] = viirsD.iloc[:,-1]
    curD['NTL_g'] = curD.apply(lambda x: (x['NTL2020'] - x['NTL2013'])/x['NTL2013'], axis=1)
    ghslD = pd.read_csv(cur_def[3], index_col=0)
    curD['b2014'] = ghslD['b2014']
    curD['g_14_90'] = ghslD['g_14_90']
    curD.to_file(cur_def[4])
    
    

In [144]:
gpd.read_file(admin_final).head()

Unnamed: 0,shapeName,Level,shapeID,shapeGroup,shapeType,Pop,urbanPop,urbanPopHD,NTL2013,NTL2020,NTL_g,b2014,g_14_90,geometry
0,Tinystivska,ADM3,UKR-ADM3-10664576B65180807,UKR,ADM3,3164.477051,0.041801,0.0,67.887924,216.104889,2.18326,3487,0.044638,"POLYGON ((33.72008 44.71634, 33.71926 44.71593..."
1,Uiutnenska,ADM3,UKR-ADM3-10664576B20681754,UKR,ADM3,4659.81543,0.174562,0.0,97.932007,354.656372,2.621455,16472,0.317549,"POLYGON ((33.32283 45.22140, 33.32233 45.22139..."
2,Marfivska,ADM3,UKR-ADM3-10664576B29349824,UKR,ADM3,2376.817139,0.0,0.0,39.036156,414.872192,9.627896,636,0.177778,"POLYGON ((36.16669 45.18376, 36.12935 45.18249..."
3,Medvedivska,ADM3,UKR-ADM3-10664576B32425034,UKR,ADM3,3065.207031,0.0,0.0,45.49316,267.982178,4.890604,4206,0.342483,"MULTIPOLYGON (((34.55497 45.84983, 34.54492 45..."
4,Oleksiivska,ADM3,UKR-ADM3-10664576B19447239,UKR,ADM3,1716.462158,0.0,0.0,10.310785,329.533112,30.96004,1291,0.778237,"POLYGON ((33.75281 45.58381, 33.71871 45.58878..."


In [145]:
gpd.read_file(urban_final).head()

Unnamed: 0,ID,Pop,NTL2013,NTL2020,NTL_g,b2014,g_14_90,geometry
0,9258,73396.0,467.266479,527.870972,0.1297,39097.0,0.150352,"POLYGON ((33.47958 51.88625, 33.48208 51.88625..."
1,9263,5969.456055,40.433155,57.875954,0.431398,7113.0,0.262962,"POLYGON ((24.54208 51.68292, 24.54708 51.68292..."
2,9264,32302.546875,122.652893,239.193436,0.950165,22658.0,0.202016,"POLYGON ((33.88208 51.69208, 33.88625 51.69208..."
3,9269,9637.308594,61.957565,119.894577,0.935108,14085.0,0.333302,"POLYGON ((24.96375 51.64042, 24.96458 51.64042..."
4,9278,25341.306641,5.466973,7.418443,0.356956,369.0,3.855263,"POLYGON ((30.59542 51.52375, 30.60042 51.52375..."


In [146]:
gpd.read_file(urban_hd_final).head()

Unnamed: 0,ID,Pop,NTL2013,NTL2020,NTL_g,b2014,g_14_90,geometry
0,2657,52206.855469,364.444336,419.953583,0.152312,32926.0,0.14929,"POLYGON ((33.45458 51.88125, 33.46125 51.88125..."
1,2658,221214.5,1054.136108,1941.63916,0.841925,102841.0,0.184818,"POLYGON ((31.25458 51.55042, 31.25875 51.55042..."
2,2659,61731.027344,291.76947,862.169556,1.954968,52527.0,0.15632,"POLYGON ((33.23958 51.25625, 33.24958 51.25625..."
3,2662,113739.703125,5424.758789,4393.543945,-0.190094,113650.0,0.065705,"POLYGON ((34.76625 50.95958, 34.77292 50.95958..."
4,2663,148510.109375,2359.865234,2369.23877,0.003972,90305.0,0.23865,"POLYGON ((25.36708 50.78792, 25.36792 50.78792..."


# Extract sample data for mapping

In [181]:
out_ntl_2013 = os.path.join(final_folder, "VIIRS_2013.tif")
out_ntl_2014 = os.path.join(final_folder, "VIIRS_2014.tif")
out_ntl_2020 = os.path.join(final_folder, "VIIRS_2020.tif")

In [155]:
# extract nighttime lights for 2013 and 2020
ntl_files = ntl.find_monthly_ntl()
ntl_files

Completed loop: 0


['s3://wbgdecinternal-ntl/NTL/VIIRS/Annual/VIIRS_ANNUAL_EOG/VNL_v2_npp_201204-201303_global_vcmcfg_c202101211500.average.tif',
 's3://wbgdecinternal-ntl/NTL/VIIRS/Annual/VIIRS_ANNUAL_EOG/VNL_v2_npp_2013_global_vcmcfg_c202101211500.average.tif',
 's3://wbgdecinternal-ntl/NTL/VIIRS/Annual/VIIRS_ANNUAL_EOG/VNL_v2_npp_2014_global_vcmslcfg_c202101211500.average.tif',
 's3://wbgdecinternal-ntl/NTL/VIIRS/Annual/VIIRS_ANNUAL_EOG/VNL_v2_npp_2015_global_vcmslcfg_c202101211500.average.tif',
 's3://wbgdecinternal-ntl/NTL/VIIRS/Annual/VIIRS_ANNUAL_EOG/VNL_v2_npp_2016_global_vcmslcfg_c202101211500.average.tif',
 's3://wbgdecinternal-ntl/NTL/VIIRS/Annual/VIIRS_ANNUAL_EOG/VNL_v2_npp_2017_global_vcmslcfg_c202101211500.average.tif',
 's3://wbgdecinternal-ntl/NTL/VIIRS/Annual/VIIRS_ANNUAL_EOG/VNL_v2_npp_2018_global_vcmslcfg_c202101211500.average.tif',
 's3://wbgdecinternal-ntl/NTL/VIIRS/Annual/VIIRS_ANNUAL_EOG/VNL_v2_npp_2019_global_vcmslcfg_c202101211500.average.tif',
 's3://wbgdecinternal-ntl/NTL/VIIRS

In [182]:
if not os.path.exists(out_ntl_2013):
    rMisc.clipRaster(rasterio.open(ntl_files[1]), inAdmin, out_ntl_2013)
    
if not os.path.exists(out_ntl_2014):
    rMisc.clipRaster(rasterio.open(ntl_files[2]), inAdmin, out_ntl_2014)
    
if not os.path.exists(out_ntl_2020):
    rMisc.clipRaster(rasterio.open(ntl_files[-1]), inAdmin, out_ntl_2020)

In [205]:
# Extract ghsl for the 9 focal cities
in_cities = gpd.read_file(focal_cities)
in_cities.head()

Unnamed: 0,id,Name,geometry
0,1,Odessa,"POLYGON ((30.25058 46.14250, 30.25996 46.65254..."
1,2,Zaporizzia,"POLYGON ((34.94663 47.92600, 35.38985 47.93029..."
2,3,Sumy,"POLYGON ((34.68477 50.98500, 34.96624 50.98270..."
3,4,Poltava,"POLYGON ((34.36383 49.64435, 34.70744 49.64554..."
4,5,Harkiv,"POLYGON ((35.97394 50.11743, 36.51403 50.11625..."


In [207]:
cnt = 0
max_cnt = 100
for idx, row in in_cities.iterrows():
    out_file = os.path.join(final_folder, f"ghsl_{row['Name']}.tif")
    if not os.path.exists(out_file):
        rMisc.clipRaster(inG, gpd.GeoDataFrame(pd.DataFrame(row).transpose(), geometry='geometry', crs=in_cities.crs), out_file)
    cnt += 1
    if cnt >= max_cnt:
        break