# Extract data for urban calculations

Test input for Tanzania

0. Select focal ADM, buffer by 1km, rasterize as [0, 1]
1. Download DEM data from ASTER, mosaick
2. Calculate slope of DEM
3. Extract water layer from Globcover
4. Rasterize building footprints
5. Select population layer
6. Standardize all rasters to population layer  
   a. Set area outside of focal admin to NoData  
   b. Set everything to 16bit  
   
   


In [1]:
import sys, os, importlib, shutil
import requests
import rasterio, elevation, richdem
import rasterio.warp
from rasterio import features

import pandas as pd
import geopandas as gpd
import numpy as np

from shapely.geometry import MultiPolygon, Polygon, box, Point

#Import raster helpers
sys.path.append("../../../gostrocks/src")

import GOSTRocks.rasterMisc as rMisc
from GOSTRocks.misc import tPrint

#Import GOST urban functions
sys.path.append("../../")
import src.UrbanRaster as urban
import src.urban_helper as helper



  shapely_geos_version, geos_capi_version_string


In [2]:
global_bounds = "/home/public/Data/GLOBAL/ADMIN/Admin0_Polys.shp"
global_bounds_adm2 = "/home/public/Data/GLOBAL/ADMIN/Admin2_Polys.shp"

inG = gpd.read_file(global_bounds)
inG2 = gpd.read_file(global_bounds_adm2)

runSmall = True
runLarge = True

In [3]:
importlib.reload(helper)
importlib.reload(rMisc)

def calculate_urban(iso3, inG, inG2, pop_files, ea_file, km=True, small=True):
    global_landcover  = "/home/public/Data/GLOBAL/LANDCOVER/GLOBCOVER/2015/ESACCI-LC-L4-LCCS-Map-300m-P1Y-2015-v2.0.7.tif"
    global_ghspop = "/home/public/Data/GLOBAL/Population/GHS/250/GHS_POP_E2015_GLOBE_R2019A_54009_250_V1_0.tif"
    global_ghspop_1k = "/home/public/Data/GLOBAL/Population/GHS/GHS_POP_E2015_GLOBE_R2019A_54009_1K_V1_0.tif"
    global_ghbuilt = "/home/public/Data/GLOBAL/URBAN/GHS/GHS_1K_BUILT/GHS_BUILT_LDS2014_GLOBE_R2018A_54009_1K_V1_0.tif"
    global_dem_1k = "/home/public/Data/GLOBAL/ELEV/noaa_1km.tif"
    ghs_smod = "/home/public/Data/GLOBAL/URBAN/GHS/GHS_SMOD/GHS_SMOD_POP2015_GLOBE_R2019A_54009_1K_V2_0.tif"
    ghsl_vrt = "/home/public/Data/GLOBAL/GHSL/ghsl.vrt"

    output_folder = "/home/wb411133/temp/%s_URBAN_DATA_new_naming" % iso3
    admin2_250_stats = os.path.join(output_folder, "URBAN_ADMIN2_STATS_COMPILED.csv")
    commune_250_stats = os.path.join(output_folder, "URBAN_COMMUNE_STATS_COMPILED.csv")
    admin2_1k_stats = os.path.join(output_folder, "URBAN_ADMIN2_STATS_COMPILED_1k.csv")
    commune_1k_stats = os.path.join(output_folder, "URBAN_COMMUNE_STATS_COMPILED_1k.csv")
    
    inD = inG.loc[inG['ISO3'] == iso3]
    inD['geometry'] = inD['geometry'].apply(lambda x: x.buffer(500))
    inD = inD.to_crs({'init':'epsg:4326'})
    
    inD2 = inG2.loc[inG2['ISO3'] == iso3]
    inD2 = inD2.to_crs({'init':'epsg:4326'})  
    
    ### Process 1km data
    if km:
        xx = helper.urban_country(iso3, output_folder, inD, pop_files,
                                final_folder="FINAL_STANDARD_1KM", ghspop_suffix="1k")
        adm2_res = os.path.join(xx.final_folder, "URBAN_ADMIN2_STATS_COMPILED.csv") 
        ea_res   = os.path.join(xx.final_folder, "URBAN_COMMUNE_STATS_COMPILED.csv")
        tPrint("***** Extracting Global Layers %s" % iso3)
        xx.extract_layers(global_landcover, global_ghspop, global_ghspop_1k, global_ghbuilt, ghsl_vrt, ghs_smod)
        tPrint("***** Downloading and processing elevation %s" % iso3)
        xx.process_dem(global_dem=global_dem_1k)
        tPrint("***** Standardizing rasters")
        xx.standardize_rasters()
        tPrint("***** Calculating Urban")
        xx.calculate_urban()
        tPrint("***** Calculating Zonal admin2")
        if os.path.exists(ea_file):
            if not os.path.exists(admin2_1k_stats):
                zonal_adm2 = xx.pop_zonal_admin(inD2)
                zonal_adm2.to_csv(admin2_1k_stats)
            tPrint("***** Calculating Zonal communes")
            if not os.path.exists(commune_1k_stats):
                inEA = gpd.read_file(ea_file)
                zonal_ea = xx.pop_zonal_admin(inEA)
                zonal_ea.to_csv(commune_1k_stats)
        tPrint("***** Evaluating Data")
        xx.evaluateOutput(admin2_1k_stats, commune_1k_stats)        
    
    ### Process 250m data 
    if small:
        xx = helper.urban_country(iso3, output_folder, inD, pop_files)
        tPrint("***** Extracting Global Layers %s" % iso3)
        xx.extract_layers(global_landcover, global_ghspop, global_ghspop_1k, global_ghbuilt, ghsl_vrt, ghs_smod)
        tPrint("***** Downloading and processing elevation %s" % iso3)
        xx.process_dem(global_dem=global_dem_1k)
        tPrint("***** Standardizing rasters")
        xx.standardize_rasters()
        tPrint("***** Calculating Urban")
        xx.calculate_urban()
        tPrint("***** Calculating Zonal admin2")
        if os.path.exists(ea_file):
            if not os.path.exists(admin2_250_stats):
                zonal_adm2 = xx.pop_zonal_admin(inD2)
                zonal_adm2.to_csv(admin2_250_stats)
            tPrint("***** Calculating Zonal communes")
            if not os.path.exists(commune_250_stats):
                inEA = gpd.read_file(ea_file)
                zonal_ea = xx.pop_zonal_admin(inEA)
                zonal_ea.to_csv(commune_250_stats)
        tPrint("***** Evaluating Data")
        xx.evaluateOutput(admin2_250_stats, commune_250_stats)
        
                
# Summarize Pierre's urbanization numbers
def calc_pp_urban(in_folder, default_pop_file, admin_layer):
    urban_layers = [os.path.join(in_folder, x) for x in os.listdir(in_folder) if x[-4:] == ".tif"]
    cur_layer = urban_layers[0]
    inD = gpd.read_file(admin_layer)
    default_pop_1k = default_pop_file.replace(default_pop_file[:3], "%s1k" % default_pop_file[:3])
    for cur_layer in urban_layers:
        tPrint(cur_layer)
        #Open and read in urban data
        urban_r = rasterio.open(cur_layer)
        urban_data = urban_r.read()
        urban_data = (urban_data > 0).astype(urban_r.meta['dtype'])
        #Extract population data
        urban_layer = os.path.basename(cur_layer)  
        default_pop = default_pop_file
        if "1k" in urban_layer:
            default_pop = default_pop_1k
            pop_layer = os.path.basename(cur_layer)[:11]
            pop_folder = os.path.join(output_folder, "FINAL_STANDARD_1KM")
        else:
            pop_layer = os.path.basename(cur_layer)[:9]
            pop_folder = os.path.join(output_folder, "FINAL_STANDARD")
        pop_file = os.path.join(pop_folder,"%s.tif" % pop_layer)
        if not os.path.exists(pop_file):            
            pop_file = os.path.join(pop_folder, default_pop)
            
        pop_data = rasterio.open(pop_file).read()
        pop_data = pop_data * urban_data
        meta = urban_r.meta.copy()
        meta.update(dtype = pop_data.dtype)
        
        with rMisc.create_rasterio_inmemory(meta, pop_data) as pop_r:
            res = rMisc.zonalStats(inD, pop_r, reProj=True)
            res = pd.DataFrame(res, columns=['SUM', 'MIN', 'MAX', 'MEAN'])
            
        inD[os.path.basename(cur_layer).replace(".tif","")] = res['SUM']
    return(inD)

In [4]:
importlib.reload(helper)
# Process GHA
iso3 = "GHA"
local_path = "/home/public/Data/COUNTRY/{country}/POPULATION/WORLDPOP/".format(country=iso3)
pop_2015_un = os.path.join(local_path, "%s_ppp_2015_UNadj.tif" % iso3.lower())
pop_2018_un = os.path.join(local_path, "%s_ppp_2017_UNadj.tif" % iso3.lower())
pop_2015_con = os.path.join(local_path, "ppp_prj_2015_%s_UNadj.tif" % iso3)
pop_2018_con = os.path.join(local_path, "ppp_prj_2017_%s_UNadj.tif" % iso3)

pop_files = [[pop_2015_un, "%s_upo15.tif" % iso3.lower()], 
             [pop_2018_un, "%s_upo17.tif" % iso3.lower()], 
             [pop_2015_con, "%s_cpo15.tif" % iso3.lower()], 
             [pop_2018_con, "%s_cpo17.tif" % iso3.lower()]]
output_folder = "/home/wb411133/temp/%s_URBAN_DATA_new_naming" % iso3
ea_file = os.path.join(output_folder, 'FINAL_EAS.shp')

calculate_urban(iso3, inG, inG2, pop_files, ea_file, small=runSmall, km=runLarge)
pp_urban = calc_pp_urban(os.path.join(output_folder, "ghana"), "%s_cpo17.tif" % iso3.lower(), ea_file)
pd.DataFrame(pp_urban.drop(['geometry'], axis=1)).to_csv(ea_file.replace(".shp", "_PP_Urban.csv"))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  super(GeoDataFrame, self).__setitem__(key, value)
  return _prepare_from_string(" ".join(pjargs))


19:44:01	***** Extracting Global Layers GHA
19:44:01	***** Downloading and processing elevation GHA
19:44:01	***** Standardizing rasters
/home/wb411133/temp/GHA_URBAN_DATA_new_naming/gha_adm.tif
/home/wb411133/temp/GHA_URBAN_DATA_new_naming/gha_gpo.tif
/home/wb411133/temp/GHA_URBAN_DATA_new_naming/gha_wat_lc.tif
/home/wb411133/temp/GHA_URBAN_DATA_new_naming/gha_wat.tif
/home/wb411133/temp/GHA_URBAN_DATA_new_naming/gha_wat.tif
/home/wb411133/temp/GHA_URBAN_DATA_new_naming/gha_slo.tif
/home/wb411133/temp/GHA_URBAN_DATA_new_naming/gha_ele.tif
/home/wb411133/temp/GHA_URBAN_DATA_new_naming/gha_gsmod.tif
/home/wb411133/temp/GHA_URBAN_DATA_new_naming/gha_gbu.tif
/home/wb411133/temp/GHA_URBAN_DATA_new_naming/gha_upo15.tif
/home/wb411133/temp/GHA_URBAN_DATA_new_naming/gha_upo17.tif
/home/wb411133/temp/GHA_URBAN_DATA_new_naming/gha_cpo15.tif
/home/wb411133/temp/GHA_URBAN_DATA_new_naming/gha_cpo17.tif
/home/wb411133/temp/GHA_URBAN_DATA_new_naming/gha1k_gpo.tif
19:44:01	***** Calculating Urban
/ho

  exec(code_obj, self.user_global_ns, self.user_ns)


19:44:05	***** Extracting Global Layers GHA
19:44:05	***** Downloading and processing elevation GHA
19:44:05	***** Standardizing rasters
/home/wb411133/temp/GHA_URBAN_DATA_new_naming/gha_adm.tif
/home/wb411133/temp/GHA_URBAN_DATA_new_naming/gha_gpo.tif
/home/wb411133/temp/GHA_URBAN_DATA_new_naming/gha_wat_lc.tif
/home/wb411133/temp/GHA_URBAN_DATA_new_naming/gha_wat.tif
/home/wb411133/temp/GHA_URBAN_DATA_new_naming/gha_wat.tif
/home/wb411133/temp/GHA_URBAN_DATA_new_naming/gha_slo.tif
/home/wb411133/temp/GHA_URBAN_DATA_new_naming/gha_ele.tif
/home/wb411133/temp/GHA_URBAN_DATA_new_naming/gha_gsmod.tif
/home/wb411133/temp/GHA_URBAN_DATA_new_naming/gha_gbu.tif
/home/wb411133/temp/GHA_URBAN_DATA_new_naming/gha_upo15.tif
/home/wb411133/temp/GHA_URBAN_DATA_new_naming/gha_upo17.tif
/home/wb411133/temp/GHA_URBAN_DATA_new_naming/gha_cpo15.tif
/home/wb411133/temp/GHA_URBAN_DATA_new_naming/gha_cpo17.tif
/home/wb411133/temp/GHA_URBAN_DATA_new_naming/gha_gpo.tif
19:44:06	***** Calculating Urban
/home

In [5]:
# Process ETH
iso3 = "ETH"
local_path = "/home/public/Data/COUNTRY/{country}/WORLDPOP/".format(country=iso3)
pop_2015_un = os.path.join(local_path, "%s_ppp_2015_UNadj.tif" % iso3.lower())
pop_2018_un = os.path.join(local_path, "%s_ppp_2016_UNadj.tif" % iso3.lower())
pop_files = [[pop_2015_un, "%s_upo15.tif" % iso3.lower()], 
             [pop_2018_un, "%s_upo16.tif" % iso3.lower()]]
output_folder = "/home/wb411133/temp/%s_URBAN_DATA_new_naming" % iso3
ea_file = os.path.join(output_folder, "Kebeles", "all_kebeles.shp")

calculate_urban(iso3, inG, inG2, pop_files, ea_file, small=runSmall, km=runLarge)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  super(GeoDataFrame, self).__setitem__(key, value)
  return _prepare_from_string(" ".join(pjargs))


19:56:54	***** Extracting Global Layers ETH
19:56:54	***** Downloading and processing elevation ETH
19:56:54	***** Standardizing rasters
/home/wb411133/temp/ETH_URBAN_DATA_new_naming/eth_adm.tif
/home/wb411133/temp/ETH_URBAN_DATA_new_naming/eth_gpo.tif
/home/wb411133/temp/ETH_URBAN_DATA_new_naming/eth_wat_lc.tif
/home/wb411133/temp/ETH_URBAN_DATA_new_naming/eth_wat.tif
/home/wb411133/temp/ETH_URBAN_DATA_new_naming/eth_wat.tif
/home/wb411133/temp/ETH_URBAN_DATA_new_naming/eth_slo.tif
/home/wb411133/temp/ETH_URBAN_DATA_new_naming/eth_ele.tif
/home/wb411133/temp/ETH_URBAN_DATA_new_naming/eth_gsmod.tif
/home/wb411133/temp/ETH_URBAN_DATA_new_naming/eth_gbu.tif
/home/wb411133/temp/ETH_URBAN_DATA_new_naming/eth_upo15.tif
/home/wb411133/temp/ETH_URBAN_DATA_new_naming/eth_upo16.tif
/home/wb411133/temp/ETH_URBAN_DATA_new_naming/eth1k_gpo.tif
19:56:55	***** Calculating Urban
/home/wb411133/temp/ETH_URBAN_DATA_new_naming/FINAL_STANDARD_1KM/eth1k_upo15.tif
/home/wb411133/temp/ETH_URBAN_DATA_new_nam

In [6]:
importlib.reload(helper)
# Process COL
iso3 = "COL"
local_path = "/home/public/Data/COUNTRY/{country}/POPULATION/WORLDPOP/".format(country=iso3)
pop_2015_un = os.path.join(local_path, "%s_ppp_2015_UNadj.tif" % iso3.lower())
pop_files = [[pop_2015_un, "%s_upo15.tif" % iso3.lower()]]
output_folder = "/home/wb411133/temp/%s_URBAN_DATA_new_naming" % iso3
ea_file = ''

calculate_urban(iso3, inG, inG2, pop_files, ea_file, small=runSmall, km=runLarge)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  super(GeoDataFrame, self).__setitem__(key, value)
  return _prepare_from_string(" ".join(pjargs))


19:57:33	***** Extracting Global Layers COL
19:57:33	***** Downloading and processing elevation COL
19:57:33	***** Standardizing rasters
/home/wb411133/temp/COL_URBAN_DATA_new_naming/col_adm.tif
/home/wb411133/temp/COL_URBAN_DATA_new_naming/col_gpo.tif
/home/wb411133/temp/COL_URBAN_DATA_new_naming/col_wat_lc.tif
/home/wb411133/temp/COL_URBAN_DATA_new_naming/col_wat.tif
/home/wb411133/temp/COL_URBAN_DATA_new_naming/col_wat.tif
/home/wb411133/temp/COL_URBAN_DATA_new_naming/col_slo.tif
/home/wb411133/temp/COL_URBAN_DATA_new_naming/col_ele.tif
/home/wb411133/temp/COL_URBAN_DATA_new_naming/col_gsmod.tif
/home/wb411133/temp/COL_URBAN_DATA_new_naming/col_gbu.tif
/home/wb411133/temp/COL_URBAN_DATA_new_naming/col_upo15.tif
/home/wb411133/temp/COL_URBAN_DATA_new_naming/col1k_gpo.tif
19:57:33	***** Calculating Urban
/home/wb411133/temp/COL_URBAN_DATA_new_naming/FINAL_STANDARD_1KM/col1k_upo15.tif
/home/wb411133/temp/COL_URBAN_DATA_new_naming/FINAL_STANDARD_1KM/col1k_gpo.tif
19:57:33	***** Calculat

In [None]:
importlib.reload(helper)
# Process EGY
iso3 = "EGY"
local_path = "/home/public/Data/COUNTRY/{country}/POPULATION/WORLDPOP/".format(country=iso3)
pop_2015_un = os.path.join(local_path, "%s_ppp_2015_UNadj.tif" % iso3.lower())
pop_2018_un = os.path.join(local_path, "%s_ppp_2013_UNadj.tif" % iso3.lower())
pop_files = [[pop_2015_un, "%s_upo15.tif" % iso3.lower()], 
             [pop_2018_un, "%s_upo16.tif" % iso3.lower()]]
output_folder = "/home/wb411133/temp/%s_URBAN_DATA_new_naming" % iso3

ea_file = os.path.join(output_folder, "EGY_adm3.shp")

calculate_urban(iso3, inG, inG2, pop_files, ea_file, small=runSmall, km=runLarge)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  super(GeoDataFrame, self).__setitem__(key, value)
  return _prepare_from_string(" ".join(pjargs))


19:58:10	***** Extracting Global Layers EGY
19:58:10	***** Downloading and processing elevation EGY
19:58:10	***** Standardizing rasters
/home/wb411133/temp/EGY_URBAN_DATA_new_naming/egy_adm.tif
/home/wb411133/temp/EGY_URBAN_DATA_new_naming/egy_gpo.tif
/home/wb411133/temp/EGY_URBAN_DATA_new_naming/egy_wat_lc.tif
/home/wb411133/temp/EGY_URBAN_DATA_new_naming/egy_wat.tif
/home/wb411133/temp/EGY_URBAN_DATA_new_naming/egy_wat.tif
/home/wb411133/temp/EGY_URBAN_DATA_new_naming/egy_slo.tif
/home/wb411133/temp/EGY_URBAN_DATA_new_naming/egy_ele.tif
/home/wb411133/temp/EGY_URBAN_DATA_new_naming/egy_gsmod.tif
/home/wb411133/temp/EGY_URBAN_DATA_new_naming/egy_gbu.tif
/home/wb411133/temp/EGY_URBAN_DATA_new_naming/egy_upo15.tif
/home/wb411133/temp/EGY_URBAN_DATA_new_naming/egy_upo16.tif
/home/wb411133/temp/EGY_URBAN_DATA_new_naming/egy1k_gpo.tif
19:58:10	***** Calculating Urban
/home/wb411133/temp/EGY_URBAN_DATA_new_naming/FINAL_STANDARD_1KM/egy1k_upo15.tif
/home/wb411133/temp/EGY_URBAN_DATA_new_nam

In [None]:
importlib.reload(helper)
# Process AGO
iso3 = "AGO"
local_path = "/home/public/Data/COUNTRY/{country}/POPULATION/WORLDPOP/".format(country=iso3)
pop_2015_un = os.path.join(local_path, "%s_ppp_2015_UNadj.tif" % iso3.lower())
pop_2018_un = os.path.join(local_path, "%s_ppp_2018_UNadj.tif" % iso3.lower())
pop_files = [[pop_2015_un, "%s_upo15.tif" % iso3.lower()], 
             [pop_2018_un, "%s_upo18.tif" % iso3.lower()]]
output_folder = "/home/wb411133/temp/%s_URBAN_DATA_new_naming" % iso3
ea_file = os.path.join(output_folder, 'admin', 'bairros.shp')

calculate_urban(iso3, inG, inG2, pop_files, ea_file, small=runSmall, km=runLarge)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  super(GeoDataFrame, self).__setitem__(key, value)
  return _prepare_from_string(" ".join(pjargs))


19:58:33	***** Extracting Global Layers AGO
19:58:33	***** Downloading and processing elevation AGO
19:58:33	***** Standardizing rasters
/home/wb411133/temp/AGO_URBAN_DATA_new_naming/ago_adm.tif
/home/wb411133/temp/AGO_URBAN_DATA_new_naming/ago_gpo.tif
/home/wb411133/temp/AGO_URBAN_DATA_new_naming/ago_wat_lc.tif
/home/wb411133/temp/AGO_URBAN_DATA_new_naming/ago_wat.tif
/home/wb411133/temp/AGO_URBAN_DATA_new_naming/ago_wat.tif
/home/wb411133/temp/AGO_URBAN_DATA_new_naming/ago_slo.tif
/home/wb411133/temp/AGO_URBAN_DATA_new_naming/ago_ele.tif
/home/wb411133/temp/AGO_URBAN_DATA_new_naming/ago_gsmod.tif
/home/wb411133/temp/AGO_URBAN_DATA_new_naming/ago_gbu.tif
/home/wb411133/temp/AGO_URBAN_DATA_new_naming/ago_upo15.tif
/home/wb411133/temp/AGO_URBAN_DATA_new_naming/ago_upo18.tif
/home/wb411133/temp/AGO_URBAN_DATA_new_naming/ago1k_gpo.tif
19:58:33	***** Calculating Urban
/home/wb411133/temp/AGO_URBAN_DATA_new_naming/FINAL_STANDARD_1KM/ago1k_upo15.tif
/home/wb411133/temp/AGO_URBAN_DATA_new_nam

In [None]:
importlib.reload(helper)
# Process BGD
iso3 = "BGD"
local_path = "/home/public/Data/COUNTRY/{country}/POPULATION/WORLDPOP/".format(country=iso3)
pop_2015_un = os.path.join(local_path, "%s_ppp_2015_UNadj.tif" % iso3.lower())
pop_2018_un = os.path.join(local_path, "%s_ppp_2018_UNadj.tif" % iso3.lower())

pop_files = [[pop_2015_un, "%s_upo15.tif" % iso3.lower()], 
             [pop_2018_un, "%s_upo18.tif" % iso3.lower()]]

output_folder = "/home/wb411133/temp/%s_URBAN_DATA_new_naming" % iso3
ea_file = os.path.join(output_folder, 'mauza11_reprojected.shp')
calculate_urban(iso3, inG, inG2, pop_files, ea_file, small=runSmall, km=runLarge)
#pp_urban = calc_pp_urban(os.path.join(output_folder, "bangladesh"), "%s_upo18.tif" % iso3.lower(), ea_file)
#pd.DataFrame(pp_urban.drop(['geometry'], axis=1)).to_csv(ea_file.replace(".shp", "_PP_Urban.csv"))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  super(GeoDataFrame, self).__setitem__(key, value)
  return _prepare_from_string(" ".join(pjargs))


19:59:13	***** Extracting Global Layers BGD
19:59:13	***** Downloading and processing elevation BGD
19:59:13	***** Standardizing rasters
/home/wb411133/temp/BGD_URBAN_DATA_new_naming/bgd_adm.tif
/home/wb411133/temp/BGD_URBAN_DATA_new_naming/bgd_gpo.tif
/home/wb411133/temp/BGD_URBAN_DATA_new_naming/bgd_wat_lc.tif
/home/wb411133/temp/BGD_URBAN_DATA_new_naming/bgd_wat.tif
/home/wb411133/temp/BGD_URBAN_DATA_new_naming/bgd_wat.tif
/home/wb411133/temp/BGD_URBAN_DATA_new_naming/bgd_slo.tif
/home/wb411133/temp/BGD_URBAN_DATA_new_naming/bgd_ele.tif
/home/wb411133/temp/BGD_URBAN_DATA_new_naming/bgd_gsmod.tif
/home/wb411133/temp/BGD_URBAN_DATA_new_naming/bgd_gbu.tif
/home/wb411133/temp/BGD_URBAN_DATA_new_naming/bgd_upo15.tif
/home/wb411133/temp/BGD_URBAN_DATA_new_naming/bgd_upo18.tif
/home/wb411133/temp/BGD_URBAN_DATA_new_naming/bgd1k_gpo.tif
19:59:13	***** Calculating Urban
/home/wb411133/temp/BGD_URBAN_DATA_new_naming/FINAL_STANDARD_1KM/bgd1k_upo15.tif
/home/wb411133/temp/BGD_URBAN_DATA_new_nam

  exec(code_obj, self.user_global_ns, self.user_ns)


19:59:15	***** Extracting Global Layers BGD
19:59:15	***** Downloading and processing elevation BGD
19:59:15	***** Standardizing rasters
/home/wb411133/temp/BGD_URBAN_DATA_new_naming/bgd_adm.tif
/home/wb411133/temp/BGD_URBAN_DATA_new_naming/bgd_gpo.tif
/home/wb411133/temp/BGD_URBAN_DATA_new_naming/bgd_wat_lc.tif
/home/wb411133/temp/BGD_URBAN_DATA_new_naming/bgd_wat.tif
/home/wb411133/temp/BGD_URBAN_DATA_new_naming/bgd_wat.tif
/home/wb411133/temp/BGD_URBAN_DATA_new_naming/bgd_slo.tif
/home/wb411133/temp/BGD_URBAN_DATA_new_naming/bgd_ele.tif
/home/wb411133/temp/BGD_URBAN_DATA_new_naming/bgd_gsmod.tif
/home/wb411133/temp/BGD_URBAN_DATA_new_naming/bgd_gbu.tif
/home/wb411133/temp/BGD_URBAN_DATA_new_naming/bgd_upo15.tif
/home/wb411133/temp/BGD_URBAN_DATA_new_naming/bgd_upo18.tif
/home/wb411133/temp/BGD_URBAN_DATA_new_naming/bgd_gpo.tif
19:59:16	***** Calculating Urban
/home/wb411133/temp/BGD_URBAN_DATA_new_naming/FINAL_STANDARD/bgd_upo15.tif
/home/wb411133/temp/BGD_URBAN_DATA_new_naming/FINA

In [None]:
importlib.reload(helper)
# Process VNM
iso3 = "VNM"
local_path = "/home/public/Data/COUNTRY/{country}/POPULATION/WORLDPOP/".format(country=iso3)
pop_2015_un = os.path.join(local_path, "%s_ppp_2015_UNadj.tif" % iso3.lower())
pop_2018_un = os.path.join(local_path, "%s_ppp_2018_UNadj.tif" % iso3.lower())
pop_files = [[pop_2015_un, "%s_upo15.tif" % iso3.lower()], 
             [pop_2018_un, "%s_upo18.tif" % iso3.lower()]]
output_folder = "/home/wb411133/temp/%s_URBAN_DATA_new_naming" % iso3
ea_file = os.path.join(output_folder, 'VN_communes2008.shp')

calculate_urban(iso3, inG, inG2, pop_files, ea_file, small=runSmall, km=runLarge)
#pp_urban = calc_pp_urban(os.path.join(output_folder, "vietnam"), "%s_upo18.tif" % iso3.lower(), ea_file)
#pd.DataFrame(pp_urban.drop(['geometry'], axis=1)).to_csv(ea_file.replace(".shp", "_PP_Urban.csv"))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  super(GeoDataFrame, self).__setitem__(key, value)
  return _prepare_from_string(" ".join(pjargs))


19:59:20	***** Extracting Global Layers VNM
19:59:20	***** Downloading and processing elevation VNM
19:59:20	***** Standardizing rasters
/home/wb411133/temp/VNM_URBAN_DATA_new_naming/vnm_adm.tif
/home/wb411133/temp/VNM_URBAN_DATA_new_naming/vnm_gpo.tif
/home/wb411133/temp/VNM_URBAN_DATA_new_naming/vnm_wat_lc.tif
/home/wb411133/temp/VNM_URBAN_DATA_new_naming/vnm_wat.tif
/home/wb411133/temp/VNM_URBAN_DATA_new_naming/vnm_wat.tif
/home/wb411133/temp/VNM_URBAN_DATA_new_naming/vnm_slo.tif
/home/wb411133/temp/VNM_URBAN_DATA_new_naming/vnm_ele.tif
/home/wb411133/temp/VNM_URBAN_DATA_new_naming/vnm_gsmod.tif
/home/wb411133/temp/VNM_URBAN_DATA_new_naming/vnm_gbu.tif
/home/wb411133/temp/VNM_URBAN_DATA_new_naming/vnm_upo15.tif
/home/wb411133/temp/VNM_URBAN_DATA_new_naming/vnm_upo18.tif
/home/wb411133/temp/VNM_URBAN_DATA_new_naming/vnm1k_gpo.tif
19:59:20	***** Calculating Urban
/home/wb411133/temp/VNM_URBAN_DATA_new_naming/FINAL_STANDARD_1KM/vnm1k_upo15.tif
/home/wb411133/temp/VNM_URBAN_DATA_new_nam

In [None]:
importlib.reload(helper)
# Process TZA
iso3 = "TZA"
local_path = "/home/public/Data/COUNTRY/{country}/POPULATION/WORLDPOP/TZA_2015_2018".format(country=iso3)
pop_2015_un = os.path.join(local_path, "%s_ppp_2015_UNadj.tif" % iso3.lower())
pop_2018_un = os.path.join(local_path, "%s_ppp_2018_UNadj.tif" % iso3.lower())
pop_2015_con = os.path.join(local_path, "ppp_prj_2015_%s_UNadj.tif" % iso3)
pop_2018_con = os.path.join(local_path, "ppp_prj_2018_%s_UNadj.tif" % iso3)

pop_files = [[pop_2015_un, "%s_upo15.tif" % iso3.lower()], 
             [pop_2018_un, "%s_upo18.tif" % iso3.lower()], 
             [pop_2015_con, "%s_cpo15.tif" % iso3.lower()], 
             [pop_2018_con, "%s_cpo18.tif" % iso3.lower()]]

output_folder = "/home/wb411133/temp/%s_URBAN_DATA_new_naming" % iso3
ea_file = ''

calculate_urban(iso3, inG, inG2, pop_files, ea_file, small=runSmall, km=runLarge)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  super(GeoDataFrame, self).__setitem__(key, value)
  return _prepare_from_string(" ".join(pjargs))


06:29:09	***** Extracting Global Layers TZA
06:29:09	***** Downloading and processing elevation TZA
06:29:09	***** Standardizing rasters
/home/wb411133/temp/TZA_URBAN_DATA_new_naming/tza_adm.tif
/home/wb411133/temp/TZA_URBAN_DATA_new_naming/tza_gpo.tif
/home/wb411133/temp/TZA_URBAN_DATA_new_naming/tza_wat_lc.tif
/home/wb411133/temp/TZA_URBAN_DATA_new_naming/tza_wat.tif
/home/wb411133/temp/TZA_URBAN_DATA_new_naming/tza_wat.tif
/home/wb411133/temp/TZA_URBAN_DATA_new_naming/tza_slo.tif
/home/wb411133/temp/TZA_URBAN_DATA_new_naming/tza_ele.tif
/home/wb411133/temp/TZA_URBAN_DATA_new_naming/tza_gsmod.tif
/home/wb411133/temp/TZA_URBAN_DATA_new_naming/tza_gbu.tif
/home/wb411133/temp/TZA_URBAN_DATA_new_naming/tza_upo15.tif
/home/wb411133/temp/TZA_URBAN_DATA_new_naming/tza_upo18.tif
/home/wb411133/temp/TZA_URBAN_DATA_new_naming/tza_cpo15.tif
/home/wb411133/temp/TZA_URBAN_DATA_new_naming/tza_cpo18.tif
/home/wb411133/temp/TZA_URBAN_DATA_new_naming/tza1k_gpo.tif
06:32:41	***** Calculating Urban
/ho

In [None]:
# Process point location analysis
input_file = os.path.join(output_folder, "sample_imp.csv")
inD = pd.read_csv(input_file)
geoms = [Point(row['gps_imp_lo'], row['gps_imp_la']) for idx, row in inD.iterrows()]
inD = gpd.GeoDataFrame(inD, geometry=geoms, crs={'init':'epsg:4326'})
if inD.crs != urban_r.crs:
    inD = inD.to_crs(urban_r.crs)
geoms = [(row['geometry'].x, row['geometry'].y) for idx, row in inD.iterrows()]

In [None]:
# Summarize Pierre's urbanization numbers
in_folder = os.path.join(output_folder, "tanzania")
urban_layers = [os.path.join(in_folder, x) for x in os.listdir(in_folder)]

cur_layer = urban_layers[0]
for cur_layer in urban_layers:
    tPrint(cur_layer)
    urban_r = rasterio.open(cur_layer)
    urban_data = urban_r.read()
    urban_data = (urban_data > 0).astype(urban_r.meta['dtype'])
    '''if "1k" in cur_layer:
        pop_layer = os.path.basename(cur_layer)[:11]
        pop_folder = os.path.join(output_folder, "FINAL_STANDARD_1KM")
    else:
        pop_layer = os.path.basename(cur_layer)[:9]
        pop_folder = os.path.join(output_folder, "FINAL_STANDARD")
    pop_file = os.path.join(pop_folder,"%s.tif" % pop_layer)

    pop_data = rasterio.open(pop_file).read()
    '''
    #pop_data = pop_data * urban_data
    with rMisc.create_rasterio_inmemory(urban_r.profile, urban_data) as pop_r:
        res = pop_r.sample(geoms)
        res = [x[0] for x in list(res)]
    inD[os.path.basename(cur_layer).replace(".tif","")] = res

In [None]:
pd.DataFrame(inD.drop(['geometry'], axis=1)).to_csv(input_file.replace(".csv", "_urban_PP.csv"))

In [None]:
pop_tiffs = ['/home/wb411133/temp/TZA_URBAN_DATA_new_naming/FINAL_STANDARD/tza_upo15.tif',
'/home/wb411133/temp/TZA_URBAN_DATA_new_naming/FINAL_STANDARD/tza_upo18.tif',
'/home/wb411133/temp/TZA_URBAN_DATA_new_naming/FINAL_STANDARD/tza_cpo15.tif',
'/home/wb411133/temp/TZA_URBAN_DATA_new_naming/FINAL_STANDARD/tza_cpo18.tif',
'/home/wb411133/temp/TZA_URBAN_DATA_new_naming/FINAL_STANDARD/tza_gpo.tif']

for pFile in pop_tiffs:
    urb_file = pFile.replace(".tif", "_urban.tif")
    hd_file = pFile.replace(".tif", "_urban_hd.tif")
    for curFile in [urb_file, hd_file]:
        inUrb = rasterio.open(curFile)
        if inD.crs!= inUrb.crs:
            inD = inD.to_crs(inUrb.crs)
        geoms = [(row['geometry'].x, row['geometry'].y) for idx, row in inD.iterrows()]
        urb_res = inUrb.sample(geoms)
        inD[os.path.basename(curFile).replace(".tif","")] = [x[0] for x in list(urb_res)]
pd.DataFrame(inD.drop(['geometry'], axis=1)).to_csv(input_file.replace(".csv", "_urban.csv"))

In [None]:
# Calculate national summaries of urbanization
pop_tiffs = ['/home/wb411133/temp/TZA_URBAN_DATA_new_naming/FINAL_STANDARD/tza_upo15.tif',
'/home/wb411133/temp/TZA_URBAN_DATA_new_naming/FINAL_STANDARD/tza_upo18.tif',
'/home/wb411133/temp/TZA_URBAN_DATA_new_naming/FINAL_STANDARD/tza_cpo15.tif',
'/home/wb411133/temp/TZA_URBAN_DATA_new_naming/FINAL_STANDARD/tza_cpo18.tif',
'/home/wb411133/temp/TZA_URBAN_DATA_new_naming/FINAL_STANDARD/tza_gpo.tif']

for pFile in pop_tiffs:
    urb_file = pFile.replace(".tif", "_urban.tif")
    hd_file = pFile.replace(".tif", "_urban_hd.tif")
    popR = rasterio.open(pFile)
    popD = popR.read()
    popD[popD < 0] = 0
    
    urbR = rasterio.open(urb_file)
    urbD = urbR.read()

    hdR = rasterio.open(hd_file)
    hdD = hdR.read()
    
    print(f"{os.path.basename(pFile)}: {round(popD.sum())}, {round((popD * urbD).sum())},  \
                     hd pop: {round((popD * hdD).sum())}")
    

# Debugging

In [None]:
# combine Ethiopian Kebeles
folder = "/home/wb411133/temp/ETH_URBAN_DATA_new_naming/Kebeles"
files = [x for x in os.listdir(folder) if x[-4:] == ".shp"]
for f in files:
    xx = gpd.read_file(os.path.join(folder, f))
    try:
        final = final.append(xx)
    except:
        final = xx

In [None]:
final.reset_index(inplace=True)

In [None]:
final.to_file(os.path.join(folder, "all_kebeles.shp"))

In [None]:
iso3 = 'tza'
folder = "/home/wb411133/temp/%s_URBAN_DATA_new_naming" % iso3.upper()

pop_250_base = os.path.join(folder, "%s_gpo.tif" % iso3)
pop_1k_base = os.path.join(folder, "%s1k_gpo.tif" % iso3)

pop_250_scaled = os.path.join(folder, 'FINAL_STANDARD', "%s_gpo.tif" % iso3)
pop_1k_scaled = os.path.join(folder, 'FINAL_STANDARD_1KM', "%s1k_gpo.tif" % iso3)

In [None]:
for rFile in [pop_250_base, pop_250_scaled, pop_1k_base, pop_1k_scaled]:
    inR = rasterio.open(rFile)
    inD = inR.read()
    inD[inD < 0] = 0
    tPrint(f"{round(inD.sum())}")

In [None]:
final_pop = pop_1k_base
final_urban    = final_pop.replace(".tif", "_urban.tif")
final_urban_hd = final_pop.replace(".tif", "_urban_hd.tif")
urbanR = urban.urbanGriddedPop(final_pop)
in_raster = rasterio.open(final_pop)
total_ratio = (in_raster.res[0] * in_raster.res[1]) / 1000000
total_ratio 

In [None]:
# calculate urban from 1k population layer with various warping methods
urban_shp   = urbanR.calculateUrban(densVal = 300,  totalPopThresh=5000,  raster=final_urban)
cluster_shp = urbanR.calculateUrban(densVal = 1500, totalPopThresh=50000, raster=final_urban_hd, smooth=True, queen=True)
    

In [None]:
#Summarize population
def summarize_pop(final_pop):
    final_pop = pop_1k_base
    final_urban    = final_pop.replace(".tif", "_urban.tif")
    final_urban_hd = final_pop.replace(".tif", "_urban_hd.tif")

    urbanR = rasterio.open(final_urban)
    urbanHDR = rasterio.open(final_urban_hd)
    urbanD = urbanR.read()
    hdD = urbanHDR.read()
    popD = in_raster.read()
    popD[popD == in_raster.meta['nodata']] = 0

    totalPop = popD.sum()
    urbanPop = (popD * urbanD).sum()
    hdPop = (popD * hdD).sum()
    print(f'Per urban: {(urbanPop/totalPop*100)}')
    print(f'Per HD urban: {(hdPop/totalPop*100)}')

In [None]:
summarize_pop(final_pop)

In [None]:
summarize_pop("/home/wb411133/temp/TZA_URBAN_DATA_new_naming/FINAL_STANDARD_1K/tza1k_gpo.tif")

In [None]:
global_ghsl = "/home/public/Data/GLOBAL/GHSL/ghsl.vrt"
pop1k = '/home/wb411133/temp/VNM_URBAN_DATA_new_naming/vnm1k_gpo.tif'
iso3 = 'VNM'
in_folder = '/home/wb411133/temp/TESTING_WATER_GHSL'
inD = gpd.read_file(os.path.join(in_folder, 'aoi.geojson'))
out_raw = os.path.join(in_folder, "ghsl_water.tif")
out_pop = os.path.join(in_folder, "ghs_pop_1k.tif")

In [None]:
# Why aren't water masks extracting
inR = rasterio.open(pop1k)
if inD.crs != inR.crs:
    inD = inD.to_crs(inR.crs)
ul = inR.index(*inD.total_bounds[0:2])
lr = inR.index(*inD.total_bounds[2:4])
# read the subset of the data into a numpy array
window = ((float(lr[0]), float(ul[0]+1)), (float(ul[1]), float(lr[1]+1)))
data = inR.read(1, window=window, masked = False)

In [None]:
b = inD.total_bounds
new_transform = rasterio.transform.from_bounds(b[0], b[1], b[2], b[3], data.shape[1], data.shape[0])
meta = inR.meta.copy()
meta.update(driver='GTiff',width=data.shape[1], height=data.shape[0], transform=new_transform)
data = data.astype(meta['dtype'])
with rasterio.open(out_pop, 'w', **meta) as outR:
    outR.write_band(1, data)

In [None]:
#test some resampling
template_r = out_pop
in_raster = out_raw

tempR = rasterio.open(template_r)
inR = rasterio.open(in_raster)
inD = inR.read()[0,:,:]

for rDef in [
                [rasterio.warp.Resampling.nearest, "nearest"],
                [rasterio.warp.Resampling.bilinear, "bil"],
                [rasterio.warp.Resampling.max, "max"],
                [rasterio.warp.Resampling.cubic, "cubic"]
            ]:
    out_array = np.zeros(tempR.shape)
    rasterio.warp.reproject(inD, out_array, 
                        src_transform=inR.meta['transform'], dst_transform=tempR.meta['transform'],
                        src_crs = inR.crs, dst_crs = tempR.crs,
                        src_nodata = inR.meta['nodata'], dst_nodata = tempR.meta['nodata'],
                        resampling = rDef[0])
    print(rDef[0])
    print(out_array.sum())
    out_file = os.path.join(in_folder, "ghsl_wat_%s.tif" % rDef[1])
    meta = tempR.meta.copy()
    meta.update(dtype=out_array.dtype)
    with rasterio.open(out_file, 'w', **meta) as outR:
        outR.write_band(1, out_array)

In [None]:
import os, shutil

In [None]:
# Delete existing files
in_folder = "/home/wb411133/temp"
for root, dirs, files in os.walk(in_folder):
    for d in dirs:
        if (d == "FINAL_STANDARD") or (d == "FINAL_STANDARD_1KM"):
            cur_dir = os.path.join(root, d)
            if os.path.isdir(cur_dir):
                shutil.rmtree(cur_dir)