# Extract data for urban calculations

Test input for Tanzania

0. Select focal ADM, buffer by 1km, rasterize as [0, 1]
1. Download DEM data from ASTER, mosaick
2. Calculate slope of DEM
3. Extract water layer from Globcover
4. Rasterize building footprints
5. Select population layer
6. Standardize all rasters to population layer  
   a. Set area outside of focal admin to NoData  
   b. Set everything to 16bit  
   
   


In [1]:
import sys, os, importlib, shutil
import requests
import rasterio, elevation, richdem
import rasterio.warp
from rasterio import features

import pandas as pd
import geopandas as gpd
import numpy as np

from shapely.geometry import MultiPolygon, Polygon, box

#Import GOST urban functions
sys.path.append("../")
import src.UrbanRaster as urban
import src.urban_helper as helper

#Import raster helpers
sys.path.append("../../gostrocks/src")

import GOSTRocks.rasterMisc as rMisc
from GOSTRocks.misc import tPrint

In [2]:
global_bounds = "/home/public/Data/GLOBAL/ADMIN/Admin0_Polys.shp"
global_bounds_adm2 = "/home/public/Data/GLOBAL/ADMIN/Admin2_Polys.shp"

inG = gpd.read_file(global_bounds)
inG2 = gpd.read_file(global_bounds_adm2)

In [3]:
importlib.reload(helper)
importlib.reload(rMisc)

def calculate_urban(iso3, inG, inG2, pop_files, ea_file):
    global_landcover  = "/home/public/Data/GLOBAL/LANDCOVER/GLOBCOVER/2015/ESACCI-LC-L4-LCCS-Map-300m-P1Y-2015-v2.0.7.tif"
    global_ghspop = "/home/public/Data/GLOBAL/Population/GHS/250/GHS_POP_E2015_GLOBE_R2019A_54009_250_V1_0.tif"
    global_ghspop_1k = "/home/public/Data/GLOBAL/Population/GHS/GHS_POP_E2015_GLOBE_R2019A_54009_1K_V1_0.tif"
    global_ghbuilt = "/home/public/Data/GLOBAL/URBAN/GHS/GHS_1K_BUILT/GHS_BUILT_LDS2014_GLOBE_R2018A_54009_1K_V1_0.tif"
    global_dem_1k = "/home/public/Data/GLOBAL/ELEV/noaa_1km.tif"
    
    output_folder = "/home/wb411133/temp/%s_URBAN_DATA" % iso3
    inD = inG.loc[inG['ISO3'] == iso3]
    inD['geometry'] = inD['geometry'].apply(lambda x: x.buffer(500))
    inD = inD.to_crs({'init':'epsg:4326'})
    
    inD2 = inG2.loc[inG2['ISO3'] == iso3]
    inD2 = inD2.to_crs({'init':'epsg:4326'})  
    
    ### Process 1km data
    xx = helper.urban_country(iso3, output_folder, inD, final_folder="FINAL_STANDARD_1KM", ghspop_suffix="1K")
    adm2_res = os.path.join(xx.final_folder, "URBAN_ADMIN2_STATS_COMPILED.csv") 
    ea_res   = os.path.join(xx.final_folder, "URBAN_COMMUNE_STATS_COMPILED.csv")
    tPrint("***** Extracting Global Layers %s" % iso3)
    xx.extract_layers(global_landcover, global_ghspop_1k, global_ghbuilt)
    tPrint("***** Downloading and processing elevation %s" % iso3)
    xx.process_dem(global_dem=global_dem_1k)
    tPrint("***** Standardizing rasters")
    xx.standardize_rasters(pop_files=pop_files)
    tPrint("***** Calculating Urban")
    xx.calculate_urban(pop_files)
    xx.calculate_urban([xx.ghspop_file])

    ### Process 250m data 
    xx = helper.urban_country(iso3, output_folder, inD)
    tPrint("***** Extracting Global Layers %s" % iso3)
    xx.extract_layers(global_landcover, global_ghspop, global_ghbuilt)
    tPrint("***** Downloading and processing elevation %s" % iso3)
    xx.process_dem(global_dem=global_dem_1k)
    tPrint("***** Standardizing rasters")
    xx.standardize_rasters(pop_files=pop_files)
    tPrint("***** Calculating Urban")
    final_pop_files = [os.path.join(xx.final_folder, os.path.basename(x)) for x in pop_files]
    xx.calculate_urban(final_pop_files)
    xx.calculate_urban([xx.ghspop_file])
    tPrint("***** Calculating Zonal admin2")
    if os.path.exists(ea_file):
        if not os.path.exists(os.path.join(output_folder, "URBAN_ADMIN2_STATS_COMPILED.csv")):
            zonal_adm2 = xx.pop_zonal_admin(inD2)
            zonal_adm2.to_csv(os.path.join(output_folder, "URBAN_ADMIN2_STATS_COMPILED.csv"))
        tPrint("***** Calculating Zonal communes")
        if not os.path.exists(os.path.join(output_folder, "URBAN_COMMUNE_STATS_COMPILED.csv")):
            inEA = gpd.read_file(ea_file)
            zonal_ea = xx.pop_zonal_admin(inEA)
            zonal_ea.to_csv(os.path.join(output_folder, "URBAN_COMMUNE_STATS_COMPILED.csv"))

In [6]:
# Process TZA
iso3 = "TZA"
local_path = "/home/public/Data/COUNTRY/{country}/POPULATION/WORLDPOP/TZA_2015_2018".format(country=iso3)
pop_2015_un = os.path.join(local_path, "%s_ppp_2015_UNadj.tif" % iso3.lower())
pop_2018_un = os.path.join(local_path, "%s_ppp_2018_UNadj.tif" % iso3.lower())
pop_2015_con = os.path.join(local_path, "ppp_prj_2015_%s_UNadj.tif" % iso3)
pop_2018_con = os.path.join(local_path, "ppp_prj_2018_%s_UNadj.tif" % iso3)

pop_files = [pop_2015_un, pop_2018_un, pop_2015_con, pop_2018_con]
output_folder = "/home/wb411133/temp/%s_URBAN_DATA" % iso3
ea_file = os.path.join(output_folder, 'FINAL_EAS.shp')

calculate_urban(iso3, inG, inG2, pop_files, '')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  del sys.path[0]


10:33:21	***** Extracting Global Layers TZA
10:33:21	***** Downloading and processing elevation TZA
10:33:21	***** Standardizing rasters
/home/wb411133/temp/TZA_URBAN_DATA/TZA_ADMIN.tif
/home/wb411133/temp/TZA_URBAN_DATA/TZA_GHS1K.tif
/home/wb411133/temp/TZA_URBAN_DATA/TZA_LC_H20.tif
/home/wb411133/temp/TZA_URBAN_DATA/TZA_SLOPE.tif
/home/wb411133/temp/TZA_URBAN_DATA/TZA_DEM.tif
/home/wb411133/temp/TZA_URBAN_DATA/TZA_GHSBUILT.tif
/home/public/Data/COUNTRY/TZA/POPULATION/WORLDPOP/TZA_2015_2018/tza_ppp_2015_UNadj.tif
/home/public/Data/COUNTRY/TZA/POPULATION/WORLDPOP/TZA_2015_2018/tza_ppp_2018_UNadj.tif
/home/public/Data/COUNTRY/TZA/POPULATION/WORLDPOP/TZA_2015_2018/ppp_prj_2015_TZA_UNadj.tif
/home/public/Data/COUNTRY/TZA/POPULATION/WORLDPOP/TZA_2015_2018/ppp_prj_2018_TZA_UNadj.tif
10:33:54	***** Calculating Urban
10:33:54	***Starting 
10:33:54	/home/wb411133/temp/TZA_URBAN_DATA/FINAL_STANDARD_1KM/tza_ppp_2015_UNadj_urban.tif
10:34:14	/home/wb411133/temp/TZA_URBAN_DATA/FINAL_STANDARD_1KM/t

In [7]:
# Process GHA
iso3 = "GHA"
local_path = "/home/public/Data/COUNTRY/{country}/POPULATION/WORLDPOP/".format(country=iso3)
pop_2015_un = os.path.join(local_path, "%s_ppp_2015_UNadj.tif" % iso3.lower())
pop_2018_un = os.path.join(local_path, "%s_ppp_2017_UNadj.tif" % iso3.lower())
pop_2015_con = os.path.join(local_path, "ppp_prj_2015_%s_UNadj.tif" % iso3)
pop_2018_con = os.path.join(local_path, "ppp_prj_2017_%s_UNadj.tif" % iso3)

pop_files = [pop_2015_un, pop_2018_un, pop_2015_con, pop_2018_con]
output_folder = "/home/wb411133/temp/%s_URBAN_DATA" % iso3
ea_file = os.path.join(output_folder, 'FINAL_EAS.shp')

calculate_urban(iso3, inG, inG2, pop_files, ea_file)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  del sys.path[0]


11:24:47	***** Extracting Global Layers GHA
11:24:47	***** Downloading and processing elevation GHA
11:24:47	Downloading DEM
11:24:47	Calculating slope
11:24:47	***** Standardizing rasters
/home/wb411133/temp/GHA_URBAN_DATA/GHA_ADMIN.tif
/home/wb411133/temp/GHA_URBAN_DATA/GHA_GHS1K.tif
/home/wb411133/temp/GHA_URBAN_DATA/GHA_LC_H20.tif
/home/wb411133/temp/GHA_URBAN_DATA/GHA_SLOPE.tif
/home/wb411133/temp/GHA_URBAN_DATA/GHA_DEM.tif
/home/wb411133/temp/GHA_URBAN_DATA/GHA_GHSBUILT.tif
/home/public/Data/COUNTRY/GHA/POPULATION/WORLDPOP/gha_ppp_2015_UNadj.tif
/home/public/Data/COUNTRY/GHA/POPULATION/WORLDPOP/gha_ppp_2017_UNadj.tif
/home/public/Data/COUNTRY/GHA/POPULATION/WORLDPOP/ppp_prj_2015_GHA_UNadj.tif
/home/public/Data/COUNTRY/GHA/POPULATION/WORLDPOP/ppp_prj_2017_GHA_UNadj.tif
11:24:47	***** Calculating Urban
11:24:47	***Starting 
11:24:47	/home/wb411133/temp/GHA_URBAN_DATA/FINAL_STANDARD_1KM/gha_ppp_2015_UNadj_urban.tif
11:24:47	/home/wb411133/temp/GHA_URBAN_DATA/FINAL_STANDARD_1KM/gha_p

AttributeError: 'NoneType' object has no attribute 'is_empty'

In [None]:
# Process BGD
iso3 = "BGD"
local_path = "/home/public/Data/COUNTRY/{country}/POPULATION/WORLDPOP/".format(country=iso3)
pop_2015_un = os.path.join(local_path, "%s_ppp_2015_UNadj.tif" % iso3.lower())
pop_2018_un = os.path.join(local_path, "%s_ppp_2018_UNadj.tif" % iso3.lower())
pop_files = [pop_2015_un, pop_2018_un]
output_folder = "/home/wb411133/temp/%s_URBAN_DATA" % iso3
ea_file = os.path.join(output_folder, 'mauza11_reprojected.shp')

calculate_urban(iso3, inG, inG2, pop_files, ea_file)

In [None]:
# Process VNM
iso3 = "VNM"
local_path = "/home/public/Data/COUNTRY/{country}/POPULATION/WORLDPOP/".format(country=iso3)
pop_2015_un = os.path.join(local_path, "%s_ppp_2015_UNadj.tif" % iso3.lower())
pop_2018_un = os.path.join(local_path, "%s_ppp_2018_UNadj.tif" % iso3.lower())
pop_files = [pop_2015_un, pop_2018_un]
output_folder = "/home/wb411133/temp/%s_URBAN_DATA" % iso3
ea_file = os.path.join(output_folder, 'Commune shapefiles', 'VN_communes2008.shp')

calculate_urban(iso3, inG, inG2, pop_files, ea_file)

In [None]:
### Process 250m data
importlib.reload(helper)
importlib.reload(rMisc)



# Debugging

In [None]:
importlib.reload(helper)

#Evaluate the population variations in the final products
for iso3 in ["VNM","TZA","GHA","BGD"]:
    output_folder = "/home/wb411133/temp/%s_URBAN_DATA" % iso3
    xx = helper.urban_country(iso3, output_folder, inD)
    xx.compare_pop_rasters()

In [None]:
#Summarize zonal results
adm2_zonal = os.path.join(output_folder, "URBAN_ADMIN2_STATS_COMPILED.csv")
ea_zonal = os.path.join(output_folder, "URBAN_COMMUNE_STATS_COMPILED.csv")

In [None]:
inD = pd.read_csv(ea_zonal, index_col=0)
for col in inD.columns[:3]:
    print(f'{col}: {inD[col].sum()}')