# ECA Urban extents

Calculate urban extents using GHS-Pop for all of ECA; attribute with nighttime lights values

In [1]:
import sys
import os
import json
import boto3
import multiprocessing
import rasterio

import pandas as pd
import geopandas as gpd

from botocore.config import Config
from botocore import UNSIGNED
from shapely.geometry import Point

sys.path.insert(0, "/home/wb411133/Code/gostrocks/src")
import GOSTRocks.rasterMisc as rMisc
import GOSTRocks.ntlMisc as ntl
from GOSTRocks.misc import tPrint

sys.path.append("../../../src")
import GOST_Urban.country_helper as country_helper

%load_ext autoreload
%autoreload 2

# read in local important parameters
local_json = "/home/wb411133/Code/urbanParameters.json"
with open(local_json, "r") as inJ:
    important_vars = json.load(inJ)

s3 = boto3.client("s3", config=Config(signature_version=UNSIGNED))

In [2]:
global_population_ghs_file = "/home/public/Data/GLOBAL/Population/GHS/2022_1km/GHS_POP_E2020_GLOBE_R2022A_54009_1000_V1_0.tif"
global_admin = "/home/public/Data/GLOBAL/ADMIN/Admin0_Polys.shp"
global_ghsl_folder = "/home/public/Data/GLOBAL/GHSL/v2022/"

output_folder = "/home/wb411133/projects/KAZ_SCADR_Urbanization/DATA/ECA_Extents"
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

ntl_files = ntl.aws_search_ntl()
ghsl_files = [
    os.path.join(global_ghsl_folder, x)
    for x in os.listdir(global_ghsl_folder)
    if x.endswith(".tif")
]
ghsl_files.sort()

In [8]:
inR = rasterio.open(global_population_ghs_file)

allAdmin = gpd.read_file(global_admin)
inAdmin = allAdmin.loc[
    (allAdmin["Region"] == "Europe & Central Asia")
    | (allAdmin["ISO3"].isin(["RUS", "ROU", "HRV"]))
]
inAdmin = inAdmin.to_crs(inR.crs)

# Calculate urban with helper object

In [9]:
all_args = []
for idx, row in inAdmin.iterrows():
    iso3 = row["ISO3"]
    tPrint(f"*********STARTING {iso3}")
    sel_country = gpd.GeoDataFrame(inAdmin.loc[inAdmin["ISO3"] == iso3], crs=inR.crs)
    sel_country["geometry"] = sel_country["geometry"].apply(lambda x: x.buffer(0))
    cur_folder = os.path.join(output_folder, iso3)
    if not os.path.exists(cur_folder):
        os.makedirs(cur_folder)
    pop_file = os.path.join(cur_folder, f"{iso3}_ghs_pop_2020.tif")
    if not os.path.exists(pop_file):
        rMisc.clipRaster(inR, sel_country, pop_file)
    inP = rasterio.open(pop_file)
    if iso3 == "HRV":
        all_args.append(
            [iso3, sel_country, cur_folder, pop_file, ntl_files, ghsl_files]
        )

15:35:29	*********STARTING ALB
15:35:29	*********STARTING ARM
15:35:29	*********STARTING AZE
15:35:29	*********STARTING BLR
15:35:29	*********STARTING BIH
15:35:29	*********STARTING BGR
15:35:29	*********STARTING HRV
15:35:29	*********STARTING GEO
15:35:29	*********STARTING HUN
15:35:29	*********STARTING KAZ
15:35:29	*********STARTING KGZ
15:35:30	*********STARTING MDA
15:35:30	*********STARTING ROU
15:35:30	*********STARTING RUS
15:35:30	*********STARTING TJK
15:35:30	*********STARTING MKD
15:35:30	*********STARTING TUR
15:35:30	*********STARTING TKM
15:35:30	*********STARTING UKR
15:35:30	*********STARTING UZB
15:35:30	*********STARTING MNE
15:35:30	*********STARTING SRB
15:35:30	*********STARTING KSV


In [10]:
def run_extractor(iso3, sel_country, cur_folder, inP, ntl_files, ghsl_files):
    extractor = country_helper.urban_country(iso3, sel_country, cur_folder, inP)
    # extractor.delete_urban_data()
    extractor.calculate_urban_extents()
    extractor.summarize_ntl(ntl_files=ntl_files)
    extractor.summarize_ghsl(ghsl_files, clip_raster=True, binary_calc=True)

In [11]:
# run a single country
run_extractor(*all_args[0])

15:35:38	Running urbanization for HRV
15:35:38	: Read in urban data
15:35:38	: Creating Shape 0
15:36:04	: Read in urban data
15:36:04	: Creating Shape 0
15:41:07	E1975
15:41:08	E1980
15:41:09	E1985
15:41:10	E1990
15:41:11	E1995
15:41:12	E2000
15:41:13	E2005
15:41:14	E2010
15:41:15	E2015
15:41:17	E2020
15:41:18	P2025LIN
15:41:19	P2030LIN


In [106]:
with multiprocessing.Pool(len(all_args)) as pool:
    pool.starmap(run_extractor, all_args)

09:54:20	Running urbanization for ROU
09:54:20	: Read in urban data
09:54:20	Running urbanization for RUS
09:54:20	: Creating Shape 0
09:54:22	: Read in urban data
09:54:23	: Creating Shape 1000
09:54:23	: Creating Shape 0
09:54:25	: Creating Shape 2000
09:56:00	: Read in urban data
09:56:00	: Creating Shape 0
10:03:26	E1975
10:03:28	E1980
10:03:30	E1985
10:03:33	E1990
10:03:35	E1995
10:03:37	E2000
10:03:39	E2005
10:03:41	E2010
10:03:44	E2015
10:03:46	E2020
10:03:48	P2025LIN
10:03:50	P2030LIN
10:23:18	: Creating Shape 1000
10:49:09	: Creating Shape 2000
11:15:11	: Creating Shape 3000
11:40:36	: Creating Shape 4000
12:06:19	: Creating Shape 5000
12:31:27	: Creating Shape 6000
12:54:56	: Creating Shape 7000
13:20:46	: Creating Shape 8000
13:45:43	: Creating Shape 9000
14:11:56	: Creating Shape 10000
14:37:39	: Creating Shape 11000
15:03:18	: Creating Shape 12000
15:29:21	: Creating Shape 13000
15:55:06	: Creating Shape 14000
16:21:06	: Creating Shape 15000
16:47:03	: Creating Shape 16000

# Merge data

In [35]:
hd_files = []
center_files = []
for root, folders, files in os.walk(output_folder):
    for f in files:
        if f.endswith("extents.geojson"):
            center_files.append(os.path.join(root, f))
        if f.endswith("extents_hd.geojson"):
            hd_files.append(os.path.join(root, f))

In [44]:
all_res = []

for cFile in center_files:
    curD = gpd.read_file(cFile)
    iso3 = os.path.basename(cFile)[:3]
    curD["ISO3"] = iso3
    all_res.append(curD)

final_center = pd.concat(all_res)

In [46]:
all_res = []

for cFile in hd_files:
    curD = gpd.read_file(cFile)
    iso3 = os.path.basename(cFile)[:3]
    curD["ISO3"] = iso3
    all_res.append(curD)

hd_center = pd.concat(all_res)

In [54]:
# match cities to centers
inCities = pd.read_csv(
    "/home/wb411133/projects/KAZ_SCADR_Urbanization/DATA/CITIES/worldcities.csv"
)
geoms = [Point(x) for x in zip(inCities["lng"], inCities["lat"])]
inCities = gpd.GeoDataFrame(inCities, geometry=geoms, crs=4326)
inCities.to_file(
    "/home/wb411133/projects/KAZ_SCADR_Urbanization/DATA/CITIES/worldcities.geojson",
    driver="GeoJSON",
)
inCities.head()

Unnamed: 0,city,city_ascii,lat,lng,country,iso2,iso3,admin_name,capital,population,id,geometry
0,Tokyo,Tokyo,35.6839,139.7744,Japan,JP,JPN,Tōkyō,primary,39105000.0,1392685764,POINT (139.77440 35.68390)
1,Jakarta,Jakarta,-6.2146,106.8451,Indonesia,ID,IDN,Jakarta,primary,35362000.0,1360771077,POINT (106.84510 -6.21460)
2,Delhi,Delhi,28.6667,77.2167,India,IN,IND,Delhi,admin,31870000.0,1356872604,POINT (77.21670 28.66670)
3,Manila,Manila,14.6,120.9833,Philippines,PH,PHL,Manila,primary,23971000.0,1608618140,POINT (120.98330 14.60000)
4,São Paulo,Sao Paulo,-23.5504,-46.6339,Brazil,BR,BRA,São Paulo,admin,22495000.0,1076532519,POINT (-46.63390 -23.55040)


In [55]:
final_center.head()

Unnamed: 0,ID,Pop,area_km,City,State,Country,geometry,ISO3
0,382,5140.474613,14.580036,,,,"POLYGON ((19.42322 42.22001, 19.42147 42.21095...",ALB
1,383,101286.859611,105.284091,,,,"POLYGON ((19.50192 42.12943, 19.49319 42.08416...",ALB
2,384,18543.250703,29.028823,,,,"POLYGON ((20.41983 42.09321, 20.41801 42.08416...",ALB
3,385,24087.070476,30.569495,,,,"POLYGON ((19.65514 41.80375, 19.65341 41.79471...",ALB
4,386,10523.733877,34.099436,,,,"POLYGON ((19.64473 41.74953, 19.64300 41.74049...",ALB


In [77]:
final_center["wCity"] = ""
final_center.reset_index(inplace=True)

for idx, row in final_center.iterrows():
    try:
        sel_city = inCities.loc[inCities.intersects(row["geometry"])]
    except:
        sel_city = inCities.loc[inCities.intersects(row["geometry"].buffer(0))]
    if sel_city.shape[0] > 0:
        final_center.loc[idx, "wCity"] = sel_city["city"].iloc[0]

TopologyException: side location conflict at 49.958401276489617 40.507087222959193
TopologyException: side location conflict at 68.701023661527557 37.755427777143758
TopologyException: side location conflict at 32.713419042156112 39.85334402309207
TopologyException: side location conflict at 27.001225643823744 39.576433690873856
TopologyException: side location conflict at 70.856244995412752 40.516059059789249


In [78]:
final_center.to_file(
    os.path.join(output_folder, "all_urban_centers.geojson"), driver="GeoJSON"
)

In [79]:
hd_center["wCity"] = ""
hd_center.reset_index(inplace=True)
for idx, row in hd_center.iterrows():
    try:
        sel_city = inCities.loc[inCities.intersects(row["geometry"])]
    except:
        sel_city = inCities.loc[inCities.intersects(row["geometry"].buffer(0))]
    if sel_city.shape[0] > 0:
        hd_center.loc[idx, "wCity"] = sel_city["city"].iloc[0]
        break

In [80]:
hd_center

Unnamed: 0,level_0,index,ID,Pop,area_km,City,State,Country,geometry,ISO3,wCity
0,0,0,51,73411.448531,30.844770,Shkoder,Northern Albania,Albania,"POLYGON ((19.49319 42.08416, 19.48449 42.03890...",ALB,Shkodër
1,1,1,52,849847.976494,220.002916,Tirana Municipality,Central Albania,Albania,"POLYGON ((19.73288 41.39751, 19.72945 41.37947...",ALB,
2,2,2,53,120825.867993,51.416028,Bashkia Durrës,Northern Albania,Albania,"POLYGON ((19.43811 41.35243, 19.42969 41.30736...",ALB,
3,3,3,54,70617.849402,24.664147,Elbasan,Central Albania,Albania,"POLYGON ((20.08810 41.13621, 20.08638 41.12721...",ALB,
4,4,4,55,58537.557705,17.415388,Fier,Southern Albania,Albania,"POLYGON ((19.55382 40.75847, 19.55051 40.74050...",ALB,
...,...,...,...,...,...,...,...,...,...,...,...
333,333,0,72,67854.357322,29.795604,,,,"POLYGON ((20.85225 42.90091, 20.84647 42.87362...",KSV,
334,334,1,73,50956.758158,24.041732,,,,"POLYGON ((20.31269 42.69181, 20.30896 42.67364...",KSV,
335,335,2,74,225043.041450,101.665246,,,,"POLYGON ((21.16049 42.70089, 21.15854 42.69181...",KSV,
336,336,3,75,57571.206100,25.651477,,,,"POLYGON ((21.14319 42.39224, 21.13936 42.37411...",KSV,


In [81]:
hd_center.to_file(
    os.path.join(output_folder, "all_hd_urban_centers.geojson"), driver="GeoJSON"
)

# Delete GHSL files

In [85]:
bad_files = []
for root, dirs, files in os.walk(output_folder):
    for f in files:
        if f.endswith("100_V1_0.tif"):
            bad_files.append(os.path.join(root, f))

bad_files

['/home/wb411133/projects/KAZ_SCADR_Urbanization/DATA/ECA_Extents/ALB/GHSL_Rasters/GHS_BUILT_S_E1975_GLOBE_R2022A_54009_100_V1_0.tif',
 '/home/wb411133/projects/KAZ_SCADR_Urbanization/DATA/ECA_Extents/ALB/GHSL_Rasters/GHS_BUILT_S_E1980_GLOBE_R2022A_54009_100_V1_0.tif',
 '/home/wb411133/projects/KAZ_SCADR_Urbanization/DATA/ECA_Extents/ALB/GHSL_Rasters/GHS_BUILT_S_E1985_GLOBE_R2022A_54009_100_V1_0.tif',
 '/home/wb411133/projects/KAZ_SCADR_Urbanization/DATA/ECA_Extents/ALB/GHSL_Rasters/GHS_BUILT_S_E1990_GLOBE_R2022A_54009_100_V1_0.tif',
 '/home/wb411133/projects/KAZ_SCADR_Urbanization/DATA/ECA_Extents/ALB/GHSL_Rasters/GHS_BUILT_S_E1995_GLOBE_R2022A_54009_100_V1_0.tif',
 '/home/wb411133/projects/KAZ_SCADR_Urbanization/DATA/ECA_Extents/ALB/GHSL_Rasters/GHS_BUILT_S_E2000_GLOBE_R2022A_54009_100_V1_0.tif',
 '/home/wb411133/projects/KAZ_SCADR_Urbanization/DATA/ECA_Extents/ALB/GHSL_Rasters/GHS_BUILT_S_E2005_GLOBE_R2022A_54009_100_V1_0.tif',
 '/home/wb411133/projects/KAZ_SCADR_Urbanization/DATA/E

In [86]:
for f in bad_files:
    os.remove(f)