In [38]:
import os, sys, importlib, subprocess, copy, zipfile
import rasterio, geohash

import geopandas as gpd
import pandas as pd
import numpy as np

from shapely.geometry import Point, box
from shapely.wkt import loads
from rasterio import features
from collections import Counter

sys.path.append("../")

import infrasap.vulnerability_mapping as vulmap
import infrasap.covid_data_extraction as cov
import infrasap.rasterMisc as rMisc
import infrasap.misc as misc
import infrasap.osmMisc as osm
import infrasap.UrbanRaster as urban


In [4]:
# define the input datasets
global_bounds = "/home/public/Data/GLOBAL/ADMIN/Admin0_Polys.shp"
global_adm1 = "/home/public/Data/GLOBAL/ADMIN/Admin1_Polys.shp"
global_adm2 = "/home/public/Data/GLOBAL/ADMIN/Admin2_Polys.shp"
pop_folder = "/home/public/Data/GLOBAL/Population/WorldPop_PPP_2020/GLOBAL_1km_Demographics"
output_folder = "/home/wb411133/data/Projects/CoVID"
population_raster = "/home/public/Data/GLOBAL/Population/WorldPop_PPP_2020/ppp_2020_1km_Aggregated.tif" 
lcRaster = "/home/public/Data/GLOBAL/LANDCOVER/GLOBCOVER/2015/ESACCI-LC-L4-LCCS-Map-300m-P1Y-2015-v2.0.7.tif"
dhs_folder = '/home/public/Data/PROJECTS/CoVID/DHS'
dhs_files = {}
for root, dirs, files in os.walk(dhs_folder):
    for f in files:
        if f[-4:] == ".shp":
            dhs_files[f.replace(".shp", "")] = gpd.read_file(os.path.join(os.path.join(root, f)))


# Read in the global datasets
pop_files = os.listdir(pop_folder)
inG  = gpd.read_file(global_bounds)
inG1 = gpd.read_file(global_adm1)
inG2 = gpd.read_file(global_adm2)
inR = rasterio.open(population_raster)
inL = rasterio.open(lcRaster)

In [3]:
iso3 = "IDN"

In [4]:
# extract national bounds
country_folder = os.path.join(output_folder, iso3)
adm0_file = os.path.join(country_folder, "adm0.shp")
adm1_file = os.path.join(country_folder, "adm1.shp")
adm2_file = os.path.join(country_folder, "adm2.shp")
lc_file = os.path.join(country_folder, "LC.tif")

if not os.path.exists(country_folder):
    os.makedirs(country_folder)
country_bounds = inG.loc[inG['ISO3'] == iso3].to_crs({'init':'epsg:4326'})
country_adm1 = inG1.loc[inG1['ISO3'] == iso3].to_crs({'init':'epsg:4326'})
country_adm2 = inG2.loc[inG2['ISO3'] == iso3].to_crs({'init':'epsg:4326'})
if not os.path.exists(adm0_file):
    country_bounds.to_file(adm0_file)
if not os.path.exists(adm1_file):
    country_adm1.to_file(adm1_file)
if not os.path.exists(adm2_file):
    country_adm2.to_file(adm2_file)
if not os.path.exists(lc_file):
    rMisc.clipRaster(inL, gpd.read_file(adm0_file), lc_file)

country_bounds = country_bounds.to_crs({'init':'epsg:4326'})

In [None]:
cov.calculate_vulnerability(iso3, country_folder, country_bounds, pop_folder, pop_files)
misc.tPrint("***Calculated Vulnerability")

In [None]:
cov.create_urban_data(iso3, country_folder, country_bounds, inR, calc_urban=False)
misc.tPrint("***Calculated Urban Extents")                           
'''
except:
    misc.tPrint("%s errored on HD clusters" % iso3)
    try:
        create_urban_data(iso3, country_folder, country_bounds, inR, calc_urban=True, calc_hd_urban=False)
    except:
        misc.tPrint("%s errored on all clusters" % iso3)        
#extract_osm(country_bounds, country_folder)

misc.tPrint("***Extracted OSM")
'''

In [None]:
country_shape = country_bounds.unary_union

In [16]:
def combine_dhs_pop(popRaster, dhs_raster, out_file, factor=100):
    ''' 
    INPUT
        popRaster [rasterio]
        dhs_raster [rasterio]
        out_file [string]
        [optional] factor [int] - number to divide dhs_raster (converts percentage to fraction)
    '''
    inP = popRaster.read()
    dhs = dhs_raster.read()
    if factor != 1:
        dhs = dhs / factor
    dhs_pop = inP * (dhs)
    
    with rasterio.open(out_file, 'w', **popRaster.meta) as outR:
        outR.write(dhs_pop)

        
template = os.path.join(country_folder, "WP_2020_1km.tif")                
inP = rasterio.open(template)
# Process DHS data

# get a list of unique columns in the DHS data
total_columns = 0
try:
    del(all_columns)
except:
    pass
# get a list of all unique columns
for key, inD in dhs_files.items():
    cur_columns = list(inD.columns.values)
    try:
        all_columns = all_columns + cur_columns
    except:
        all_columns = cur_columns

col_count = Counter(all_columns)
unq_columns = [key for key, value in col_count.items() if value == 1]                           

In [None]:
cur_rasters = copy.deepcopy(cov.hnp_categories)

In [15]:
dhs_rasters = {}
for key, inD in dhs_files.items():
    sel_dhs = inD.loc[inD['ISO3'] == iso3]
    if sel_dhs.shape[0] > 0:
        for field in inD.columns:
            if field in unq_columns:
                out_file = os.path.join(country_folder, f'{key}_{field}.tif')
                out_file_pop = os.path.join(country_folder, f'{key}_{field}_pop.tif')
                try:
                    # rasteize the desired field in the inputDHS data                
                    if not os.path.exists(out_file) and not os.path.exists(out_file_pop):
                        rMisc.rasterizeDataFrame(inD, out_file, idField=field, templateRaster = template)

                    #Multiply the rasterized data frame by the population layer
                    if not os.path.exists(out_file_pop):
                        combine_dhs_pop(inP, rasterio.open(out_file), out_file_pop, factor=100)
                    if os.path.exists(out_file):
                        os.remove(out_file)
                    misc.tPrint(f'{key}: {field}')
                    dhs_rasters[f'{key}_{field}'] = {
                        'raster_file': f'{key}_{field}_pop.tif',
                        'vars': ['SUM', 'MEAN'],
                        'description': f'{key}_{field}'
                    }
                except:
                    misc.tPrint(f"Error processing {key} - {field}")

NameError: name 'unq_columns' is not defined

In [None]:
#Run zonal stats
cur_rasters = copy.deepcopy(cov.hnp_categories)
for key, values in cur_rasters.items():
    values['raster_file'] = os.path.join(country_folder, values['raster_file'])
    cur_rasters[key] = values
    
#Run zonal stats
dhs_final = copy.deepcopy(dhs_rasters)
for key, values in dhs_rasters.items():
    values['raster_file'] = os.path.join(country_folder, values['raster_file'])
    dhs_final[key] = values

all_shps = []
for root, dirs, files, in os.walk(country_folder):
    for f in files:
        if f[-4:] == ".shp" and not "zonal" in f:
            all_shps.append(os.path.join(root, f))


In [None]:
importlib.reload(cov)
cov.run_zonal(all_shps, cur_rasters, out_suffix="_BASE")

In [None]:
cov.run_zonal(all_shps, dhs_rasters, out_suffix="_DHS")

# DEBUGGING BELOW 

In [20]:
# Merge urban fishnets at the national level
base_folder = "/home/wb411133/data/Projects/CoVID"
in_countries = os.listdir(base_folder)
all_fishnets = []
for iso3 in in_countries:
    cur_folder = os.path.join(base_folder, iso3, "FINAL_GEOMS")
    bad_fishnet = os.path.join(cur_folder, "COMBO_HD_URBAN_fishnet.geojson")
    if os.path.exists(bad_fishnet):
        os.remove(bad_fishnet)
    out_fishnet = os.path.join(cur_folder, "%s_COMBO_HD_URBAN_fishnet.geojson" % iso3)
    all_fishnets.append(out_fishnet)
    print(iso3)
    if not os.path.exists(out_fishnet):
        fishnets = []
        for root, dirs, files in os.walk(cur_folder):
            for f in files:
                if f[:3] == "HD_":
                    fishnets.append(os.path.join(root, f))
        try:
            del final
        except:
            pass
        if len(fishnets) > 0:
            for f in fishnets:
                curD = gpd.read_file(f)
                try:
                    final = final.append(curD)
                except:
                    final = curD
            final.to_file(out_fishnet, driver="GeoJSON")

VNM
ARG
PAK
ZAF
COL
ZWE
MNG
SLE
CPV
KEN
GHA
AFG
YEM
ECU
PRY
MRT
MDV
KGZ
HTI
DJI
KHM
TJK
GMB
LKA
SEN
STP
SLV
VEN
MLI
RWA
BOL
TZA
MAR
IND
IDN
SDN
AGO
BEN
BWA
BFA
BDI
CMR
CAF
TCD
COM
COG
CIV
COD
SSD
ERI
ETH
GAB
GNB
GIN
LSO
LBR
MDG
MWI
MUS
MOZ
NAM
NER
NGA
SYC
SOM
SWZ
TGO
UGA
ZMB
LCA
PHL
GTM
BGD
BRA
MEX
EGY
UKR
PER
LAO
PSE
NPL
PNG
DZA
BLR
BTN
BIH
NIC
FJI
GEO
HND
JOR
MHL
MDA
MMR
MKD
PAN
WSM
SLB
TUN
TUR
URY
UZB
ALB
HRV
IRN
SRB
TTO
ATG
CHN
IRQ


In [22]:
# Zip all fishnets together
out_zip = "/home/wb411133/temp/HNP_Fishnets.zip"
with zipfile.ZipFile(out_zip, 'w', zipfile.ZIP_DEFLATED) as zf:
    for f in all_fishnets:
        if os.path.exists(f):
            zf.write(f, arcname=os.path.basename(f))

In [19]:
all_fishnets

['/home/wb411133/data/Projects/CoVID/VNM/FINAL_GEOMS/COMBO_HD_URBAN_fishnet.geojson',
 '/home/wb411133/data/Projects/CoVID/ARG/FINAL_GEOMS/COMBO_HD_URBAN_fishnet.geojson',
 '/home/wb411133/data/Projects/CoVID/PAK/FINAL_GEOMS/COMBO_HD_URBAN_fishnet.geojson',
 '/home/wb411133/data/Projects/CoVID/ZAF/FINAL_GEOMS/COMBO_HD_URBAN_fishnet.geojson',
 '/home/wb411133/data/Projects/CoVID/COL/FINAL_GEOMS/COMBO_HD_URBAN_fishnet.geojson',
 '/home/wb411133/data/Projects/CoVID/ZWE/FINAL_GEOMS/COMBO_HD_URBAN_fishnet.geojson',
 '/home/wb411133/data/Projects/CoVID/MNG/FINAL_GEOMS/COMBO_HD_URBAN_fishnet.geojson',
 '/home/wb411133/data/Projects/CoVID/SLE/FINAL_GEOMS/COMBO_HD_URBAN_fishnet.geojson',
 '/home/wb411133/data/Projects/CoVID/CPV/FINAL_GEOMS/COMBO_HD_URBAN_fishnet.geojson',
 '/home/wb411133/data/Projects/CoVID/KEN/FINAL_GEOMS/COMBO_HD_URBAN_fishnet.geojson',
 '/home/wb411133/data/Projects/CoVID/GHA/FINAL_GEOMS/COMBO_HD_URBAN_fishnet.geojson',
 '/home/wb411133/data/Projects/CoVID/AFG/FINAL_GEOMS/C

In [9]:
final.shape

(7015, 3)

# Join Mobility Results

In [36]:
in_shp  = "/home/wb411133/data/Projects/CoVID/IDN/FINAL_GEOMS/IDN_COMBO_HD_URBAN_fishnet.geojson"
out_shp = "/home/wb411133/data/Projects/CoVID/IDN/FINAL_GEOMS/IDN_COMBO_HD_URBAN_fishnet_20200101.shp"
in_mobility = "/home/wb411133/data/Projects/CoVID/IDN/FINAL_GEOMS/part-00000-tid-5083628924017813989-ab511d05-022c-4ed9-be33-6f51d59fc4b5-10404-1-c000.csv"

inS = gpd.read_file(in_shp)
inS.drop(['FID'], axis=1, inplace=True)
inS.set_index('geohash', inplace=True)
inM = pd.read_csv(in_mobility)
inM.set_index('geohash', inplace=True)
xx = inS.join(inM)

In [37]:
xx.to_file(out_shp)