# Extract data for urban calculations

Test input for Tanzania

0. Select focal ADM, buffer by 1km, rasterize as [0, 1]
1. Download DEM data from ASTER, mosaick
2. Calculate slope of DEM
3. Extract water layer from Globcover
4. Rasterize building footprints
5. Select population layer
6. Standardize all rasters to population layer  
   a. Set area outside of focal admin to NoData  
   b. Set everything to 16bit  
   
   


In [1]:
import sys
import os
import importlib
import rasterio
import rasterio.warp

import geopandas as gpd


# Import raster helpers
sys.path.append("../../../gostrocks/src")

import GOSTRocks.rasterMisc as rMisc
from GOSTRocks.misc import tPrint

# Import GOST urban functions
sys.path.append("../../")
import src.UrbanRaster as urban
import src.urban_helper as helper

In [2]:
global_bounds = "/home/public/Data/GLOBAL/ADMIN/Admin0_Polys.shp"
global_bounds_adm2 = "/home/public/Data/GLOBAL/ADMIN/Admin2_Polys.shp"

inG = gpd.read_file(global_bounds)
inG2 = gpd.read_file(global_bounds_adm2)

runSmall = True
runLarge = True

In [3]:
importlib.reload(helper)
importlib.reload(rMisc)


def calculate_urban(iso3, inG, inG2, pop_files, ea_file, km=True, small=True):
    global_landcover = "/home/public/Data/GLOBAL/LANDCOVER/GLOBCOVER/2015/ESACCI-LC-L4-LCCS-Map-300m-P1Y-2015-v2.0.7.tif"
    global_ghspop = "/home/public/Data/GLOBAL/Population/GHS/250/GHS_POP_E2015_GLOBE_R2019A_54009_250_V1_0.tif"
    global_ghspop_1k = "/home/public/Data/GLOBAL/Population/GHS/GHS_POP_E2015_GLOBE_R2019A_54009_1K_V1_0.tif"
    global_ghbuilt = "/home/public/Data/GLOBAL/URBAN/GHS/GHS_1K_BUILT/GHS_BUILT_LDS2014_GLOBE_R2018A_54009_1K_V1_0.tif"
    global_dem_1k = "/home/public/Data/GLOBAL/ELEV/noaa_1km.tif"
    ghs_smod = "/home/public/Data/GLOBAL/URBAN/GHS/GHS_SMOD/GHS_SMOD_POP2015_GLOBE_R2019A_54009_1K_V2_0.tif"
    ghsl_vrt = "/home/public/Data/GLOBAL/GHSL/ghsl.vrt"

    output_folder = "/home/wb411133/temp/%s_URBAN_DATA_new_naming" % iso3
    inD = inG.loc[inG["ISO3"] == iso3]
    inD["geometry"] = inD["geometry"].apply(lambda x: x.buffer(500))
    inD = inD.to_crs({"init": "epsg:4326"})

    inD2 = inG2.loc[inG2["ISO3"] == iso3]
    inD2 = inD2.to_crs({"init": "epsg:4326"})

    ### Process 1km data
    if km:
        xx = helper.urban_country(
            iso3,
            output_folder,
            inD,
            pop_files,
            final_folder="FINAL_STANDARD_1KM",
            ghspop_suffix="1k",
        )
        adm2_res = os.path.join(xx.final_folder, "URBAN_ADMIN2_STATS_COMPILED.csv")
        ea_res = os.path.join(xx.final_folder, "URBAN_COMMUNE_STATS_COMPILED.csv")
        tPrint("***** Extracting Global Layers %s" % iso3)
        xx.extract_layers(
            global_landcover,
            global_ghspop,
            global_ghspop_1k,
            global_ghbuilt,
            ghsl_vrt,
            ghs_smod,
        )
        tPrint("***** Downloading and processing elevation %s" % iso3)
        xx.process_dem(global_dem=global_dem_1k)
        tPrint("***** Standardizing rasters")
        xx.standardize_rasters()
        tPrint("***** Calculating Urban")
        xx.calculate_urban()
        tPrint("***** Evaluating Data")
        xx.evaluateOutput()

    ### Process 250m data
    if small:
        xx = helper.urban_country(iso3, output_folder, inD, pop_files)
        tPrint("***** Extracting Global Layers %s" % iso3)
        xx.extract_layers(
            global_landcover,
            global_ghspop,
            global_ghspop_1k,
            global_ghbuilt,
            ghsl_vrt,
            ghs_smod,
        )
        tPrint("***** Downloading and processing elevation %s" % iso3)
        xx.process_dem(global_dem=global_dem_1k)
        tPrint("***** Standardizing rasters")
        xx.standardize_rasters()
        tPrint("***** Calculating Urban")
        xx.calculate_urban()
        xx.evaluateOutput()
        tPrint("***** Calculating Zonal admin2")
        if os.path.exists(ea_file):
            if not os.path.exists(
                os.path.join(output_folder, "URBAN_ADMIN2_STATS_COMPILED.csv")
            ):
                zonal_adm2 = xx.pop_zonal_admin(inD2)
                zonal_adm2.to_csv(
                    os.path.join(output_folder, "URBAN_ADMIN2_STATS_COMPILED.csv")
                )
            tPrint("***** Calculating Zonal communes")
            if not os.path.exists(
                os.path.join(output_folder, "URBAN_COMMUNE_STATS_COMPILED.csv")
            ):
                inEA = gpd.read_file(ea_file)
                zonal_ea = xx.pop_zonal_admin(inEA)
                zonal_ea.to_csv(
                    os.path.join(output_folder, "URBAN_COMMUNE_STATS_COMPILED.csv")
                )

In [None]:
# Process ETH
iso3 = "ETH"
local_path = "/home/public/Data/COUNTRY/{country}/WORLDPOP/".format(country=iso3)
pop_2015_un = os.path.join(local_path, "%s_ppp_2015_UNadj.tif" % iso3.lower())
pop_2018_un = os.path.join(local_path, "%s_ppp_2016_UNadj.tif" % iso3.lower())
pop_files = [
    [pop_2015_un, "%s_upo15.tif" % iso3.lower()],
    [pop_2018_un, "%s_upo16.tif" % iso3.lower()],
]
output_folder = "/home/wb411133/temp/%s_URBAN_DATA_new_naming" % iso3
ea_file = ""

calculate_urban(iso3, inG, inG2, pop_files, ea_file, small=runSmall, km=runLarge)

In [9]:
importlib.reload(helper)
# Process EGY
iso3 = "EGY"
local_path = "/home/public/Data/COUNTRY/{country}/POPULATION/WORLDPOP/".format(
    country=iso3
)
pop_2015_un = os.path.join(local_path, "%s_ppp_2015_UNadj.tif" % iso3.lower())
pop_2018_un = os.path.join(local_path, "%s_ppp_2013_UNadj.tif" % iso3.lower())
pop_files = [
    [pop_2015_un, "%s_upo15.tif" % iso3.lower()],
    [pop_2018_un, "%s_upo16.tif" % iso3.lower()],
]
output_folder = "/home/wb411133/temp/%s_URBAN_DATA_new_naming" % iso3
ea_file = ""

calculate_urban(iso3, inG, inG2, pop_files, ea_file, small=runSmall, km=runLarge)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app


08:29:05	***** Extracting Global Layers EGY
08:29:05	***** Downloading and processing elevation EGY
08:29:05	***** Standardizing rasters
/home/wb411133/temp/EGY_URBAN_DATA_new_naming/egy_adm.tif
/home/wb411133/temp/EGY_URBAN_DATA_new_naming/egy_gpo.tif
/home/wb411133/temp/EGY_URBAN_DATA_new_naming/egy_wat_lc.tif
/home/wb411133/temp/EGY_URBAN_DATA_new_naming/egy_wat.tif
/home/wb411133/temp/EGY_URBAN_DATA_new_naming/egy_slo.tif
/home/wb411133/temp/EGY_URBAN_DATA_new_naming/egy_ele.tif
/home/wb411133/temp/EGY_URBAN_DATA_new_naming/egy_gsmod.tif
/home/wb411133/temp/EGY_URBAN_DATA_new_naming/egy_gbu.tif
/home/wb411133/temp/EGY_URBAN_DATA_new_naming/egy_upo15.tif
/home/wb411133/temp/EGY_URBAN_DATA_new_naming/egy_upo16.tif
/home/wb411133/temp/EGY_URBAN_DATA_new_naming/egy1k_gpo.tif
08:29:05	***** Calculating Urban
/home/wb411133/temp/EGY_URBAN_DATA_new_naming/FINAL_STANDARD_1KM/egy1k_upo15.tif
/home/wb411133/temp/EGY_URBAN_DATA_new_naming/FINAL_STANDARD_1KM/egy1k_upo16.tif
/home/wb411133/temp

In [None]:
# Process COL
iso3 = "COL"
local_path = "/home/public/Data/COUNTRY/{country}/POPULATION/WORLDPOP/".format(
    country=iso3
)
pop_2015_un = os.path.join(local_path, "%s_ppp_2015_UNadj.tif" % iso3.lower())
pop_files = [[pop_2015_un, "%s_upo15.tif" % iso3.lower()]]
output_folder = "/home/wb411133/temp/%s_URBAN_DATA_new_naming" % iso3
ea_file = ""

calculate_urban(iso3, inG, inG2, pop_files, ea_file, small=runSmall, km=runLarge)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app


11:50:15	***** Extracting Global Layers COL
11:50:15	Extracting water


In [None]:
importlib.reload(helper)
# Process GHA
iso3 = "GHA"
local_path = "/home/public/Data/COUNTRY/{country}/POPULATION/WORLDPOP/".format(
    country=iso3
)
pop_2015_un = os.path.join(local_path, "%s_ppp_2015_UNadj.tif" % iso3.lower())
pop_2018_un = os.path.join(local_path, "%s_ppp_2017_UNadj.tif" % iso3.lower())
pop_2015_con = os.path.join(local_path, "ppp_prj_2015_%s_UNadj.tif" % iso3)
pop_2018_con = os.path.join(local_path, "ppp_prj_2017_%s_UNadj.tif" % iso3)

pop_files = [
    [pop_2015_un, "%s_upo15.tif" % iso3.lower()],
    [pop_2018_un, "%s_upo17.tif" % iso3.lower()],
    [pop_2015_con, "%s_cpo15.tif" % iso3.lower()],
    [pop_2018_con, "%s_cpo17.tif" % iso3.lower()],
]
output_folder = "/home/wb411133/temp/%s_URBAN_DATA_new_naming" % iso3
ea_file = os.path.join(output_folder, "FINAL_EAS.shp")

calculate_urban(iso3, inG, inG2, pop_files, ea_file, small=runsmall, km=runLarge)

In [None]:
importlib.reload(helper)
# Process BGD
iso3 = "BGD"
local_path = "/home/public/Data/COUNTRY/{country}/POPULATION/WORLDPOP/".format(
    country=iso3
)
ea_file = os.path.join(output_folder, "mauza11_reprojected.shp")
pop_2015_un = os.path.join(local_path, "%s_ppp_2015_UNadj.tif" % iso3.lower())
pop_2018_un = os.path.join(local_path, "%s_ppp_2018_UNadj.tif" % iso3.lower())

pop_files = [
    [pop_2015_un, "%s_upo15.tif" % iso3.lower()],
    [pop_2018_un, "%s_upo18.tif" % iso3.lower()],
]

output_folder = "/home/wb411133/temp/%s_URBAN_DATA_new_naming" % iso3
calculate_urban(iso3, inG, inG2, pop_files, ea_file, small=runSmall, km=runLarge)

In [None]:
# Process TZA
iso3 = "TZA"
local_path = (
    "/home/public/Data/COUNTRY/{country}/POPULATION/WORLDPOP/TZA_2015_2018".format(
        country=iso3
    )
)
pop_2015_un = os.path.join(local_path, "%s_ppp_2015_UNadj.tif" % iso3.lower())
pop_2018_un = os.path.join(local_path, "%s_ppp_2018_UNadj.tif" % iso3.lower())
pop_2015_con = os.path.join(local_path, "ppp_prj_2015_%s_UNadj.tif" % iso3)
pop_2018_con = os.path.join(local_path, "ppp_prj_2018_%s_UNadj.tif" % iso3)

pop_files = [
    [pop_2015_un, "%s_upo15.tif" % iso3.lower()],
    [pop_2018_un, "%s_upo18.tif" % iso3.lower()],
    [pop_2015_con, "%s_cpo15.tif" % iso3.lower()],
    [pop_2018_con, "%s_cpo18.tif" % iso3.lower()],
]

output_folder = "/home/wb411133/temp/%s_URBAN_DATA_new_naming" % iso3
ea_file = os.path.join(output_folder, "FINAL_EAS.shp")

calculate_urban(iso3, inG, inG2, pop_files, ea_file, small=runSmall, km=runLarge)

In [None]:
# Process VNM
iso3 = "VNM"
local_path = "/home/public/Data/COUNTRY/{country}/POPULATION/WORLDPOP/".format(
    country=iso3
)
pop_2015_un = os.path.join(local_path, "%s_ppp_2015_UNadj.tif" % iso3.lower())
pop_2018_un = os.path.join(local_path, "%s_ppp_2018_UNadj.tif" % iso3.lower())
pop_files = [
    [pop_2015_un, "%s_upo15.tif" % iso3.lower()],
    [pop_2018_un, "%s_upo18.tif" % iso3.lower()],
    [pop_2015_con, "%s_cpo15.tif" % iso3.lower()],
    [pop_2018_con, "%s_cpo18.tif" % iso3.lower()],
]
output_folder = "/home/wb411133/temp/%s_URBAN_DATA_new_naming" % iso3
ea_file = os.path.join(output_folder, "Commune shapefiles", "VN_communes2008.shp")

calculate_urban(iso3, inG, inG2, pop_files, ea_file, small=runSmall, km=runLarge)

# Debugging

In [None]:
importlib.reload(urban)
pop_file = (
    "/home/wb411133/temp/BGD_URBAN_DATA_new_naming/FINAL_STANDARD_1KM/bgd1k1k_gpo.tif"
)
urban_shp = urban.urbanGriddedPop(pop_file)
shps = urban_shp.calculateUrban(
    raster=out_file, densVal=1500, totalPopThresh=50000, smooth=True, queen=True
)

In [None]:
# Comparing complete GHS-SMOD classification with
ghs_smod = "/home/public/Data/GLOBAL/URBAN/GHS/GHS_SMOD/GHS_SMOD_POP2015_GLOBE_R2019A_54009_1K_V2_0.tif"
in_bounds = (
    "/home/wb411133/temp/BGD_URBAN_DATA_new_naming/FINAL_STANDARD_1KM/bgd_adm.shp"
)
out_file = (
    "/home/wb411133/temp/BGD_URBAN_DATA_new_naming/FINAL_STANDARD_1KM/bgd1k_smod.tif"
)

inR = rasterio.open(ghs_smod)
inB = gpd.read_file(in_bounds)
rMisc.clipRaster(inR, inB, out_file)

In [None]:
inS = rasterio.open(out_file)
inP = rasterio.open(pop_file)
smod = inS.read()
pop = inP.read()
pop[pop < 0] = 0

In [None]:
smod_vals = [10, 11, 12, 13, 21, 22, 23, 30]
total_pop = pop.sum()
for val in smod_vals:
    cur_smod = (smod == val).astype(int)
    cur_pop = pop * cur_smod
    total_curpop = cur_pop.sum()
    print(f"{val}: {(total_curpop.sum()/total_pop*100)}")

In [None]:
54.1 + 17.6 + 0.3 + 17