In [27]:
import sys, os, importlib
import rasterio, geojson

import pandas as pd
import geopandas as gpd

import skimage.graph as graph
import GOSTRocks.misc as misc
import GOSTRocks.rasterMisc as rMisc

sys.path.append("/home/wb411133/Code/GOSTNets_Raster/src")

import GOSTNetsRaster.market_access as ma

In [2]:
iso3='PAK'

# define global data
global_friction_surface = "/home/public/Data/GLOBAL/INFRA/FRICTION_2020/2020_motorized_friction_surface.geotiff"
global_population = "/home/public/Data/GLOBAL/Population/WorldPop_PPP_2020/ppp_2020_1km_Aggregated.tif"
urban_centres = "/home/public/Data/GLOBAL/URBAN/GHS/GHS_STAT_UCDB2015MT_GLOBE_R2019A/GHS_STAT_UCDB2015MT_GLOBE_R2019A_V1_2.gpkg"
admin0_layer = "/home/public/Data/GLOBAL/ADMIN/Admin0_Polys.shp"

#define local variables
output_dir = "/home/wb411133/temp/PAK_ZON"
input_districts_file = os.path.join(output_dir, 'District.shp')
if not os.path.exists(output_dir):
    os.makedirs(output_dir)
    
friction_file = os.path.join(output_dir, "2020_friction.tif")
pop_file = os.path.join(output_dir, "2020_population.tif")
travel_time_file = os.path.join(output_dir, "2020_tt_cities.tif")
ntl_2020 = os.path.join(output_dir, "ntl_2020.tif")
sel_cities = os.path.join(output_dir, "UCDB_cities_PAK.shp")
ntl_res = os.path.join(output_dir, "ntl_zonal_stats.csv")
tt_pop_res = os.path.join(output_dir, "tt_pop.shp")

In [5]:
inF = rasterio.open(global_friction_surface)
inPop = rasterio.open(global_population)
inU = gpd.read_file(urban_centres)
inD = gpd.read_file(input_districts_file)
inAdmin = gpd.read_file(admin0_layer)
inR = rasterio.open(friction_file)
frictionD = inR.read()[0,:,:] * 1000
mcp = graph.MCP_Geometric(frictionD)

inA = inAdmin.loc[inAdmin['ISO3'] == "PAK"]

In [62]:
importlib.reload(ma)

if not os.path.exists(friction_file):
    rMisc.clipRaster(inF, inA, friction_file)
    
if not os.path.exists(pop_file):
    rMisc.clipRaster(inPop, inA, pop_file)

if not os.path.exists(sel_cities):
    selU = inU.loc[inU['CTR_MN_ISO'] == iso3]
    selU.to_file(sel_cities)
else:
    selU = gpd.read_file(sel_cities)
    
if not os.path.exists(travel_time_file):
    tempU = selU.copy()
    tempU['geometry'] = tempU['geometry'].apply(lambda x: x.centroid)
    travel_costs, traceback = ma.calculate_travel_time(inR, mcp, tempU)
    with rasterio.open(travel_time_file, 'w', **inR.meta) as out_file:
        out_file.write_band(1, travel_costs)
        
# Summarize populations in travel time bands
popR = rasterio.open(pop_file)
ttR = rasterio.open(friction_file)
tempU = selU.copy()
tempU['geometry'] = tempU['geometry'].apply(lambda x: x.centroid)

inD_ttPop = ma.summarize_travel_time_populations(popR, ttR, tempU, mcp, inD)
inD_ttPop.to_file(tt_pop_res)
pd.DataFrame(inD_ttPop.drop(['geometry'], axis=1)).to_csv(tt_pop_res.replace(".shp",".csv"))

  with rMisc.create_rasterio_inmemory(popR.profile, tt_pop) as temp_ttPopR:
  inD_ttPop.to_file(tt_pop_res)


In [20]:
thresholds = [0, 0.01, 0.1, 0.5, 1]
# Calculate zonal NTL
aws_base = 's3://wbgdecinternal-ntl/NTL/VIIRS/Annual/VIIRS_ANNUAL_EOG/'
ntl_files = [
    'VNL_v2_npp_201204-201303_global_vcmcfg_c202101211500.average.tif',
    'VNL_v2_npp_2013_global_vcmcfg_c202101211500.average.tif',
    'VNL_v2_npp_2014_global_vcmslcfg_c202101211500.average.tif',
    'VNL_v2_npp_2015_global_vcmslcfg_c202101211500.average.tif',
    'VNL_v2_npp_2016_global_vcmslcfg_c202101211500.average.tif',
    'VNL_v2_npp_2017_global_vcmslcfg_c202101211500.average.tif',
    'VNL_v2_npp_2018_global_vcmslcfg_c202101211500.average.tif',
    'VNL_v2_npp_2019_global_vcmslcfg_c202101211500.average.tif',
    'VNL_v2_npp_2020_global_vcmslcfg_c202101211500.average.tif'
]

for ntl_file in ntl_files:
    year = ntl_file.split("_")[3]
    cur_file = os.path.join(aws_base, ntl_file)
    curR = rasterio.open(cur_file)
    for thresh in thresholds:
        out_col_sum = f'ntl_{year}_{thresh}_SUM'
        out_col_cnt = f'ntl_{year}_{thresh}_CNT'
        res = rMisc.zonalStats(inD, curR, minVal=thresh)
        res = pd.DataFrame(res, columns=['SUM','MIN','MAX','MEAN'])
        inD[out_col_sum] = res['SUM']
        inD[out_col_cnt] = res.apply(lambda x: x['SUM']/x['MEAN'], axis=1)
        misc.tPrint(f"Completed {out_col_sum}")
        

16:01:15	Completed ntl_201204-201303_0_SUM
16:01:16	Completed ntl_201204-201303_0.01_SUM
16:01:16	Completed ntl_201204-201303_0.1_SUM
16:01:17	Completed ntl_201204-201303_0.5_SUM
16:01:17	Completed ntl_201204-201303_1_SUM
16:02:13	Completed ntl_2013_0_SUM
16:02:14	Completed ntl_2013_0.01_SUM
16:02:14	Completed ntl_2013_0.1_SUM
16:02:15	Completed ntl_2013_0.5_SUM
16:02:15	Completed ntl_2013_1_SUM
16:03:09	Completed ntl_2014_0_SUM
16:03:10	Completed ntl_2014_0.01_SUM
16:03:10	Completed ntl_2014_0.1_SUM
16:03:11	Completed ntl_2014_0.5_SUM
16:03:11	Completed ntl_2014_1_SUM
16:04:12	Completed ntl_2015_0_SUM
16:04:13	Completed ntl_2015_0.01_SUM
16:04:13	Completed ntl_2015_0.1_SUM
16:04:14	Completed ntl_2015_0.5_SUM
16:04:15	Completed ntl_2015_1_SUM
16:05:08	Completed ntl_2016_0_SUM
16:05:09	Completed ntl_2016_0.01_SUM
16:05:09	Completed ntl_2016_0.1_SUM
16:05:10	Completed ntl_2016_0.5_SUM
16:05:10	Completed ntl_2016_1_SUM
16:06:05	Completed ntl_2017_0_SUM
16:06:06	Completed ntl_2017_0.01_SUM

In [22]:
pd.DataFrame(inD.drop(['geometry'], axis=1)).to_csv(ntl_res)

In [None]:
# Extract the most recent ntl imagery
if not os.path.exists(ntl_2020):
    tempR = rasterio.open(os.path.join(aws_base, ntl_files[-1]))
    rMisc.clipRaster(tempR, inA, ntl_2020)