# Extract data for urban calculations

Test input for Tanzania

0. Select focal ADM, buffer by 1km, rasterize as [0, 1]
1. Download DEM data from ASTER, mosaick
2. Calculate slope of DEM
3. Extract water layer from Globcover
4. Rasterize building footprints
5. Select population layer
6. Standardize all rasters to population layer  
   a. Set area outside of focal admin to NoData  
   b. Set everything to 16bit  
   
   


In [52]:
import sys, os, importlib, shutil, pathlib, datetime
import requests
import rasterio, elevation, richdem
import rasterio.warp
from rasterio import features
from datetime import datetime

import pandas as pd
import geopandas as gpd
import numpy as np

from shapely.geometry import MultiPolygon, Polygon, box, Point

#Import raster helpers
sys.path.insert(0, "/home/wb411133/Code/gostrocks/src")

import GOSTRocks.rasterMisc as rMisc
import GOSTRocks.metadataMisc as meta
from GOSTRocks.misc import tPrint

#Import GOST urban functions
sys.path.append("../../../src")
import GOST_Urban.UrbanRaster as urban
import GOST_Urban.urban_helper as helper

#Import local functions
import novelUrbanization as nu
from novelUrbanization import *

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [53]:
global_bounds = "/home/public/Data/GLOBAL/ADMIN/Admin0_Polys.shp"
global_bounds_adm2 = "/home/public/Data/GLOBAL/ADMIN/Admin2_Polys.shp"

inG = gpd.read_file(global_bounds)
inG2 = gpd.read_file(global_bounds_adm2)

runSmall = True
runLarge = True

# Convert EA csv files to geometry

In [None]:
in_folder = "/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/EA_Files/"
ea_files = []
for root, dirs, files in os.walk(in_folder):
    for x in files:
        if ((x.endswith(".csv")) and (not "URBAN" in x)):
            ea_files.append(os.path.join(root, x))
            
ea_files

In [None]:
pd.read_csv(ea_files[-1]).head()

In [None]:
def try_float(x):
    try:
        return(float(x))
    except:
        return(None)

def read_geog(file, lat_column, lon_column, crs='epsg:4326', write_out=True):
    print(os.path.basename(file))
    out_file = file.replace(".csv", ".geojson")
    inD = pd.read_csv(file)
    
    print(inD.shape)
    inD[lat_column] = inD[lat_column].apply(try_float)
    inD[lon_column] = inD[lon_column].apply(try_float)    
    inD = inD.loc[~(inD[lat_column].isna() | inD[lon_column].isna())]
    print(inD.shape)
    
    inD_geom = inD.apply(lambda x: Point(float(x[lon_column]), float(x[lat_column])), axis=1)
    inD = gpd.GeoDataFrame(inD, geometry=inD_geom, crs=crs)
    
    if write_out:
        inD.to_file(out_file, driver="GeoJSON")        
    return(inD)

#res = read_geog(ea_files[0], "latdd_corrected", "londd_corrected")
#res = read_geog(ea_files[1], "lat", "lon")
#res = read_geog(ea_files[2], "latitude", "longitude")
#res = read_geog(ea_files[3], "latitude", "longitude")
#res = read_geog(ea_files[4], "lat_mean", "long_mean")
#res = read_geog(ea_files[5], "latdd_corrected", "londd_corrected")
#res = read_geog(ea_files[6], "latdd_corrected", "londd_corrected")
#res = read_geog(ea_files[7], "lat_modified","lon_modified")
#res = read_geog(ea_files[8], "lat_corrected", "lon_corrected")
#res = read_geog(ea_files[9], "lat_corrected", "lon_corrected")
res = read_geog(ea_files[-1], "latDD_corrected", "lonDD_corrected")

# Run individual counties

In [None]:
# Process ETH
iso3 = "ETH"
local_path = "/home/public/Data/COUNTRY/{country}/WORLDPOP/".format(country=iso3)
pop_2015_un = os.path.join(local_path, "%s_ppp_2015_UNadj.tif" % iso3.lower())
pop_2018_un = os.path.join(local_path, "%s_ppp_2016_UNadj.tif" % iso3.lower())
pop_files = [[pop_2015_un, "%s_upo15.tif" % iso3.lower()], 
             [pop_2018_un, "%s_upo16.tif" % iso3.lower()]]
output_folder = "/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/%s_URBAN_DATA_new_naming" % iso3
ea_file = os.path.join(output_folder, "admin3", "Ethiopia_pti_admin3.shp")
#ea_file = os.path.join(output_folder, "Kebeles", "all_kebeles.shp")
#ea_file = os.path.join(output_folder, "gadm36_ETH_2.shp")

#nu.calculate_urban(iso3, inG, inG2, pop_files, ea_file, output_folder, small=runSmall, km=runLarge)
pp_urban = nu.calc_pp_urban(os.path.join(output_folder, "ethiopia"), "%s_gpo.tif" % iso3.lower(), ea_file, output_folder)
pd.DataFrame(pp_urban.drop(['geometry'], axis=1)).to_csv(os.path.join(output_folder, f"{iso3}_DB_UrbanPopulation_admin3.csv"))

In [None]:
input_file = os.path.join(output_folder, "HBS_GPS.csv")
pop_tiffs = ["eth_gpo.tif", "eth_upo15.tif", 'eth_upo16.tif']
all_tiffs = []
base_folder = os.path.join(output_folder, "FINAL_STANDARD")
base_folder_1km = os.path.join(output_folder, "FINAL_STANDARD_1KM")
for pFile in pop_tiffs:
    all_tiffs.append(os.path.join(base_folder, pFile))
    all_tiffs.append(os.path.join(base_folder_1km, pFile.replace("eth", "eth1k")))    

# Read in ETH HH locations, clean
inD = pd.read_csv(input_file)
inD = inD.loc[~inD['latDD_corrected'].isnull()]
inD = inD.loc[~inD['lonDD_corrected'].isnull()]
geoms = [Point(row['lonDD_corrected'], row['latDD_corrected']) for idx, row in inD.iterrows()]
inD = gpd.GeoDataFrame(inD, geometry=geoms, crs={'init':'epsg:4326'})
# Calculate point urbanization for degree of urbanization
out_file = os.path.join(output_folder, f"{iso3}_DoU_Urban.csv")
nu.point_urban_summaries(inD, all_tiffs, out_file)
# Calculate point urbanization for PP urban
out_file = os.path.join(output_folder, f"{iso3}_DB_Urban.csv")
in_folder = os.path.join(output_folder, "ethiopia")
pop_tiffs = [os.path.join(in_folder, x) for x in os.listdir(in_folder)]
nu.pp_point_urban_summaries(inD, pop_tiffs, out_file)

# Compile and copy mapping data

In [None]:
countries = {'AGO':'angola','BGD':'bangladesh','EGY':'egypt','ETH':'ethiopia',
             'GHA':'ghana','TZA':'tanzania','VNM':'vietnam'}
for iso3 in countries.keys():
    out_folder = "/home/wb411133/data/Projects/MR_Novel_Urbanization/Mapping/URBAN_Data"
    data_folder = "/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/%s_URBAN_DATA_new_naming/" % iso3
    dou_folder = os.path.join(data_folder, "FINAL_STANDARD")
    db_folder  = os.path.join(data_folder, countries[iso3])
    
    dou_urban = os.path.join(dou_folder, f'{iso3.lower()}_upo15_urban.tif')
    dou_urban_hd = os.path.join(dou_folder, f'{iso3.lower()}_upo15_urban_hd.tif')
    
    db_urban_cc = os.path.join(db_folder, f"{iso3.lower()}_upo15d20b2000_cc.tif")
    db_urban_co = os.path.join(db_folder, f"{iso3.lower()}_upo15d20b2000_co.tif")
    db_urban_ur = os.path.join(db_folder, f"{iso3.lower()}_upo15d20b2000_ur.tif")
    
    for uFile in [dou_urban, dou_urban_hd, db_urban_cc, db_urban_co, db_urban_ur]:
        print(f'{iso3}: {os.path.exists(uFile)}')
        out_file = os.path.join(out_folder, os.path.basename(uFile))
        shutil.copy(uFile, out_file)
        

# Compile zonal results

In [60]:
in_folder = "/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/"
out_folder = os.path.join(in_folder, "URBAN_ZONAL_RESULTS")
if not os.path.exists(out_folder):
    os.makedirs(out_folder)
    
for root, dirs, files in os.walk(in_folder):
    if "URBAN_DATA_new_naming" in root:        
        country = os.path.basename(root).split("_")[0]        
        if country in nu.EA_DEFS.keys():
            for f in files:
                if ("EA_PP_URBAN_Updated" in f) | ("EA_WB_URBAN_" in f) | ("HH_GPS" in f):
                    fName = pathlib.Path(os.path.join(root, f))   
                    date = datetime.fromtimestamp(fName.stat().st_mtime)
                    if datetime(2021,6,1) < date:
                        print(f'{country}: {f} - {date}') 
                    else:
                        print(f'***OLD: {country}: {f} - {date}') 
                    shutil.copy(os.path.join(root, f), os.path.join(out_folder, f))

AGO: AGO_HH_GPS_WB_URBAN.csv - 2022-06-07 11:41:21.951099
AGO: AGO_HH_GPS_PP_URBAN.csv - 2022-06-07 11:41:32.765155
AGO: AGO_EA_WB_URBAN_1K.csv - 2022-08-18 08:45:53.924261
AGO: AGO_EA_WB_URBAN_250.csv - 2022-08-18 08:56:40.919611
AGO: AGO_EA_PP_URBAN_Updated.csv - 2022-08-25 15:20:38.479666
ETH: ETH_HH_GPS_WB_URBAN.csv - 2022-06-06 14:21:00.484850
ETH: ETH_HH_GPS_PP_URBAN.csv - 2022-06-06 14:24:16.564839
ETH: ETH_EA_WB_URBAN_1K.csv - 2022-08-18 08:35:50.220135
ETH: ETH_EA_WB_URBAN_250.csv - 2022-08-18 08:36:23.795309
ETH: ETH_EA_PP_URBAN_Updated.csv - 2022-08-25 14:31:45.735654
BFA: BFA_HH_GPS_WB_URBAN.csv - 2022-06-06 14:20:44.531770
BFA: BFA_HH_GPS_PP_URBAN.csv - 2022-06-06 14:20:51.765806
BFA: BFA_EA_WB_URBAN_1K.csv - 2022-08-18 08:35:50.867139
BFA: BFA_EA_WB_URBAN_250.csv - 2022-08-18 08:36:23.564308
BFA: BFA_EA_PP_URBAN_Updated.csv - 2022-08-25 14:32:27.461868
CIV: CIV_HH_GPS_WB_URBAN.csv - 2022-06-06 14:17:55.492918
CIV: CIV_HH_GPS_PP_URBAN.csv - 2022-06-06 14:18:07.917980
CIV: 

In [57]:
datetime(2021,6,1)

datetime.datetime(2021, 6, 1, 0, 0)

In [None]:
# Delete all zonal stats
for root, dirs, files in os.walk(in_folder):
    if "URBAN_DATA_new_naming" in root:        
        country = os.path.basename(root).split("_")[0]        
        if country in nu.EA_DEFS.keys():
            for f in files:
                if ("URBAN_COMMUNE_STATS" in f) | ("URBAN_ADMIN2" in f):
                    print(f'{country}: {f}')
                    os.remove(os.path.join(root, f))

# Generate Metadata

In [34]:
template_metadata = "/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/LSO_URBAN_DATA_new_naming/METADATA/metadata.xlsx"
dataset_info = pd.read_excel(template_metadata, sheet_name=0)
layer_info = pd.read_excel(template_metadata, sheet_name=1, index_col=0)

In [43]:
base_folder = "/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/{ISO3}_URBAN_DATA_new_naming"
country_name = "Angola"
iso3 = 'AGO'
in_folder = base_folder.format(ISO3=iso3)
out_dir = os.path.join(in_folder, 'metadata')

make_meta = meta.metadata_gost(in_folder, out_dir)
layers = make_meta.get_layers()
metadata = make_meta.generate_metadata()


In [44]:
layer_info['layer_name'] = [p.replace("lso", iso3.lower()) for p in layer_info['layer_name']]

In [45]:
sel_info = layer_info.loc[:,['layer_name', 'layer_label','description','source_name','source_url','data_process_summary']]
sel_info

Unnamed: 0,layer_name,layer_label,description,source_name,source_url,data_process_summary
83,ago_adm,ADM0 raster,Official World Bank boundaries rasterized to o...,World Bank,,Rasterized to identify area covered by country
75,ago_cpo15,Constrained WorldPop 2015,"WorldPop population for 2015, constrained by l...",WorldPop,https://www.worldpop.org/methods/,Re-sampled to final resolution using cubic con...
79,ago_cpo15_urban,Constrained WorldPop 2015 Urban Extent,Urban extents calculated from constrained Worl...,WorldPop/EC urban extents,EC Method (https://ghsl.jrc.ec.europa.eu/degur...,WB code (https://github.com/worldbank/GOST_Urban)
80,ago_cpo15_urban_hd,Constrained WorldPop 2015 High Density Urban E...,Urban extents calculated from constrained Worl...,WorldPop/EC urban extents,EC Method (https://ghsl.jrc.ec.europa.eu/degur...,WB code (https://github.com/worldbank/GOST_Urban)
77,ago_cpo20,Constrained WorldPop 2020,"WorldPop population for 2020, constrained by l...",WorldPop,https://www.worldpop.org/methods/,Re-sampled to final resolution using cubic con...
81,ago_cpo20_urban,Constrained WorldPop 2020 Urban Extent,Urban extents calculated from constrained Worl...,WorldPop/EC urban extents,EC Method (https://ghsl.jrc.ec.europa.eu/degur...,WB code (https://github.com/worldbank/GOST_Urban)
82,ago_cpo20_urban_hd,Constrained WorldPop 2020 High Density Urban E...,Urban extents calculated from constrained Worl...,WorldPop/EC urban extents,EC Method (https://ghsl.jrc.ec.europa.eu/degur...,WB code (https://github.com/worldbank/GOST_Urban)
85,ago_des,Globcover 2015 Desert,Bare areas (value 200) extracted from Globcove...,Globcover 2015,https://www.esa.int/ESA_Multimedia/Images/2018...,Downloaded and single value extracted
61,ago_ele,SRTM Elevation,1km SRTM elevation product downloaded from NOAA,SRTM 1-arc second,https://www.usgs.gov/centers/eros/science/usgs...,Downloaded and scaled to final resolution usin...
65,ago_gbu,GHS Built-up 2015,Percent of pixel that is built-up in 2015 base...,GHS-BUILT R2015B,https://ghsl.jrc.ec.europa.eu/ghs_bu.php,Downloaded and scaled to final resolution usin...


In [46]:
final_meta = metadata['metadata']
final_meta = final_meta.loc[:,~final_meta.columns.isin(['layer_label','description','source_name','source_url','data_process_summary'])]
final_meta.merge(sel_info, on='layer_name')

Unnamed: 0,layer_name,data_type,crs_name,crs_code,num_dimensions,min_lon,max_lon,min_lat,max_lat,vector_shape_type,vector_object_count,folder,raster_width,raster_height,raster_res,layer_label,description,source_name,source_url,data_process_summary
0,ago_adm,Raster,World_Mollweide,,1,1139750.0,2385000.0,-2217000.0,-541750.0,,,,6701.0,4981.0,250.0,ADM0 raster,Official World Bank boundaries rasterized to o...,World Bank,,Rasterized to identify area covered by country
1,ago_adm,Raster,World_Mollweide,,1,1139750.0,2385000.0,-2217000.0,-541750.0,,,/FINAL_STANDARD,6701.0,4981.0,250.0,ADM0 raster,Official World Bank boundaries rasterized to o...,World Bank,,Rasterized to identify area covered by country
2,ago_adm,Vector,WGS 84,4326.0,13,11.67473,24.09038,-18.04311,-4.383905,MultiPolygon,1.0,,,,,ADM0 raster,Official World Bank boundaries rasterized to o...,World Bank,,Rasterized to identify area covered by country
3,ago_adm,Vector,WGS 84,4326.0,13,11.67473,24.09038,-18.04311,-4.383905,MultiPolygon,1.0,,,,,ADM0 raster,Official World Bank boundaries rasterized to o...,World Bank,,Rasterized to identify area covered by country
4,ago_cpo15,Raster,WGS 84,4326.0,1,11.67458,24.08208,-18.04208,-4.38375,,,,16390.0,14889.0,0.000833,Constrained WorldPop 2015,"WorldPop population for 2015, constrained by l...",WorldPop,https://www.worldpop.org/methods/,Re-sampled to final resolution using cubic con...
5,ago_cpo15,Raster,World_Mollweide,,1,1139750.0,2385000.0,-2217000.0,-541750.0,,,/FINAL_STANDARD,6701.0,4981.0,250.0,Constrained WorldPop 2015,"WorldPop population for 2015, constrained by l...",WorldPop,https://www.worldpop.org/methods/,Re-sampled to final resolution using cubic con...
6,ago_cpo20,Raster,WGS 84,4326.0,1,11.67458,24.08208,-18.04208,-4.38375,,,,16390.0,14889.0,0.000833,Constrained WorldPop 2020,"WorldPop population for 2020, constrained by l...",WorldPop,https://www.worldpop.org/methods/,Re-sampled to final resolution using cubic con...
7,ago_cpo20,Raster,World_Mollweide,,1,1139750.0,2385000.0,-2217000.0,-541750.0,,,/FINAL_STANDARD,6701.0,4981.0,250.0,Constrained WorldPop 2020,"WorldPop population for 2020, constrained by l...",WorldPop,https://www.worldpop.org/methods/,Re-sampled to final resolution using cubic con...
8,ago_des,Raster,WGS 84,4326.0,1,11.67222,24.09167,-18.04444,-4.383333,,,,4918.0,4471.0,0.002778,Globcover 2015 Desert,Bare areas (value 200) extracted from Globcove...,Globcover 2015,https://www.esa.int/ESA_Multimedia/Images/2018...,Downloaded and single value extracted
9,ago_des,Raster,World_Mollweide,,1,1139750.0,2385000.0,-2217000.0,-541750.0,,,/FINAL_STANDARD,6701.0,4981.0,250.0,Globcover 2015 Desert,Bare areas (value 200) extracted from Globcove...,Globcover 2015,https://www.esa.int/ESA_Multimedia/Images/2018...,Downloaded and single value extracted


In [50]:
make_meta.write_metadata(os.path.join(out_dir, f"{iso3}_novel_urbanization_metadata.xlsx"), 
                         layer_metadata = final_meta, field_metadata = metadata['fields'],
    dataset_id    = dataset_info.Definition[0].format(ISO3=iso3, Country=country_name),
    dataset_title = dataset_info.Definition[1].format(ISO3=iso3, Country=country_name),
    country       = dataset_info.Definition[2].format(ISO3=iso3, Country=country_name),
    abstract      = dataset_info.Definition[3].format(ISO3=iso3, Country=country_name),
    purpose       = dataset_info.Definition[4].format(ISO3=iso3, Country=country_name),
    creation_date = datetime.today().strftime('%Y-%m-%d'),
    release_date  = datetime.today().strftime('%Y-%m-%d'),
    owner         = dataset_info.Definition[7].format(ISO3=iso3, Country=country_name),
    email         = dataset_info.Definition[8].format(ISO3=iso3, Country=country_name),
)

In [49]:
out_dir

'/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/AGO_URBAN_DATA_new_naming/metadata'

# Generating zip commands

In [None]:
# Delete existing files
in_folder = "/home/wb411133/temp"
for root, dirs, files in os.walk(in_folder):
    for d in dirs:
        if (d == "FINAL_STANDARD") or (d == "FINAL_STANDARD_1KM"):
            cur_dir = os.path.join(root, d)
            print("zip -r {out_file} {infolder}".format(
                out_file = "%s_%s.zip" % (cur_dir.split("/")[-2].split("_")[0], cur_dir.split("_")[-1]),
                infolder = os.path.join(os.path.basename(os.path.dirname(cur_dir)), os.path.basename(cur_dir))))

# Debugging

In [None]:
file1 = "/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/LSO_URBAN_DATA_new_naming/LSO_lso_cpo20.tif.csv"
file2 = "/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/LSO_URBAN_DATA_new_naming/LSO_lso_gpo.tif.csv"

inD1 = pd.read_csv(file1, index_col=0)
inD2 = pd.read_csv(file2, index_col=0)

In [None]:
inD1.head()

In [None]:
inD2.head()

In [None]:
inD1.join(inD2)

In [None]:
inD2.columns in inD1.columns

In [None]:
inD1.columns

In [None]:
inD2.columns

In [None]:
pop_files = ['/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/LSO_URBAN_DATA_new_naming/lso_upo15.tif',
'/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/LSO_URBAN_DATA_new_naming/lso_gpo.tif',
'/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/LSO_URBAN_DATA_new_naming/lso_cpo15.tif',
'/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/LSO_URBAN_DATA_new_naming/lso_cpo20.tif',
'/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/LSO_URBAN_DATA_new_naming/lso_gpo.tif',
'/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/LSO_URBAN_DATA_new_naming/lso_upo15.tif']

In [None]:
list(set(pop_files))