http://wiki.worldbank.org/display/GEOS/MEX_AGEBS_Zonal

In [29]:
import sys, os, importlib
import rasterio, ee, geojson

import pandas as pd
import geopandas as gpd
import skimage.graph as graph

from shapely.geometry import box

sys.path.append("../../../../gostrocks/src")
import GOSTRocks.rasterMisc as rMisc
from GOSTRocks.misc import tPrint

sys.path.append("../../../../GEE_Zonal/src")
import gee_tools as gee
from gee_helpers import *

sys.path.append("../../../../GOST_Urban/")
from src import UrbanRaster

sys.path.append("../../../../GOSTNets_Raster/src")
import GOSTNets_Raster.market_access as ma

ee.Initialize()
cat = gee.Catalog()

In [24]:
# Define inpput Data
in_folder = "/home/wb411133/data/Country/MEX"
zonal_out_folder = os.path.join(in_folder, "ZONAL_OUTPUTS")
raster_folder = os.path.join(in_folder, "GIS_DATA")
ndvi_folder = os.path.join(zonal_out_folder, "NDVI")
chirps_folder = os.path.join(zonal_out_folder, "CHIRPS")

for tFolder in [zonal_out_folder, raster_folder, ndvi_folder, chirps_folder]:
    if not os.path.exists(tFolder):
        os.makedirs(tFolder)
    
agebs_folder = os.path.join(in_folder, 'AGEB', 'AGEBS')
agebs_files = [os.path.join(agebs_folder, x) for x in os.listdir(agebs_folder) if x.endswith(".shp")]

pop_file = "/home/public/Data/GLOBAL/Population/WorldPop_PPP_2020/ppp_2020_1km_Aggregated.tif"
ghsl_file = '/home/public/Data/GLOBAL/URBAN/GHS/GHS_SMOD/GHS_SMOD_POP2015_GLOBE_R2019A_54009_1K_V2_0.tif'
global_access_map = '/home/public/Data/GLOBAL/INFRA/FRICTION_2020/2020_motorized_friction_surface.geotiff'


In [26]:
# Define output data
master_agebs = f"{agebs_folder}.shp"
cur_file = master_agebs

out_ghsl = os.path.join(zonal_out_folder, f'{os.path.basename(cur_file)[:-4]}_GHSL.csv')
out_pop_summaries = os.path.join(zonal_out_folder, f'{os.path.basename(cur_file)[:-4]}_Pop2020.csv')
ndvi_zonal = os.path.join(zonal_out_folder, f'{os.path.basename(cur_file)[:-4]}_NDVI_monthly.csv')
chirps_zonal = os.path.join(zonal_out_folder, f'{os.path.basename(cur_file)[:-4]}_CHIRPS_monthly.csv')
ntl_zonal_csv = os.path.join(zonal_out_folder, f'{os.path.basename(cur_file)[:-4]}_ntl_zonal_res.csv') 
urban_pop = os.path.join(zonal_out_folder, f'{os.path.basename(cur_file)[:-4]}_UrbanPop2020.csv')
hd_pop = os.path.join(zonal_out_folder, f'{os.path.basename(cur_file)[:-4]}_HD_UrbanPop2020.csv')
urban_access_res = os.path.join(zonal_out_folder, f'{os.path.basename(cur_file)[:-4]}_urban_market_access.csv')
hd_urban_access_res = os.path.join(zonal_out_folder, f'{os.path.basename(cur_file)[:-4]}_hd_urban_market_access.csv')

local_pop         = os.path.join(raster_folder, f'{os.path.basename(cur_file)[:-4]}_WorldPop.tif')
local_urban       = os.path.join(raster_folder, f'{os.path.basename(cur_file)[:-4]}_Urban.tif')
local_urban_pop   = os.path.join(raster_folder, f'{os.path.basename(cur_file)[:-4]}_UrbanPop.tif')
local_urban_hd    = os.path.join(raster_folder, f'{os.path.basename(cur_file)[:-4]}_HDUrban.tif')
local_urban_hdpop = os.path.join(raster_folder, f'{os.path.basename(cur_file)[:-4]}_HDUrbanPop.tif')
local_access_map  = os.path.join(raster_folder, f'{os.path.basename(cur_file)[:-4]}_friction_surface.tif')
urban_extents_file = os.path.join(raster_folder, f'{os.path.basename(cur_file)[:-4]}_UrbanExtents.shp')
hd_urban_extents_file = os.path.join(raster_folder, f'{os.path.basename(cur_file)[:-4]}_HD_UrbanExtents.shp')
urban_access = os.path.join(raster_folder, f'{os.path.basename(cur_file)[:-4]}_access_to_urban.tif')
hd_urban_access = os.path.join(raster_folder, f'{os.path.basename(cur_file)[:-4]}_access_to_hd_urban.tif')

In [4]:
# combine agebs into a single dataset
if not os.path.exists(master_agebs):
    try: 
        del final
    except:
        pass
    for agebs_file in agebs_files:
        inD = gpd.read_file(agebs_file)
        try:
            final = final.append(inD)
            final.reset_index()
        except:
            final = inD

    final.to_file(master_agebs)
    inD = final    
else: # or read them in if the combo file already exists
    inD = gpd.read_file(master_agebs)

# NDVI - Vegetation

In [None]:
results = cat.search_tags("ndvi")
results = results.search_title("Landsat")
results = results.search_title("32-day")

landsat_collection = results.datasets.iloc[0,0]
lc_id = landsat_collection[landsat_collection.find("/")+1:landsat_collection.find("/")+5]
stat = 'MAX'

# These are the datasets that are related to landsat NDVI
results.datasets

In [None]:
importlib.reload(gee)

# The GEE zonal work fails when too complex vector shapes are sent to GEE
#    this code breaks the zonal calculation into groups of 100 to solve the complexity issue.
step_count = 100
steps = list(range(step_count, inD.shape[0], step_count))
steps.append(inD.shape[0])
start_idx = 0
try:
    del brokenD
except:
    pass
for end_idx in steps:
    curD = inD.iloc[start_idx:end_idx,]
    out_file = os.path.join(ndvi_folder, f'MASTER_LIST_NDVI_{start_idx}_{end_idx}.csv')
    if not os.path.exists(out_file):
        try:
            tPrint(os.path.basename(out_file))
            cur_ee = gpd_to_gee(curD, 'CVEGEO')
            # Run analysis on just the L8 data
            zs = gee.ZonalStats(collection_id = landsat_collection,
                            target_features = cur_ee,
                            statistic_type = "all",
                            output_name=f"{lc_id}_ndvi_{stat}",
                            scale=1000,
                            min_threshold=0.1,
                            water_mask=True,                                    
                            tile_scale = 16
                           )
            zonal_res = zs.runZonalStats()
            res = zonal_res.getInfo()
            pd_res = get_zonal_res(res)
            pd_res["CVEGEO"] = curD["CVEGEO"].values
            pd_res.to_csv(out_file)                        
        except:
            try: #If an error occurs, those zones will appear in the GeoDataFrame brokenD
                brokenD = brokenD.append(curD)
            except:
                brokenD = curD
            tPrint(f'***ERROR{os.path.basename(out_file)}')
            pass
    start_idx = end_idx

In [None]:
# Aggregate the results into a single output csv
try:
    del final
except:
    pass
for ndvi_file in os.listdir(ndvi_folder):
    curN = pd.read_csv(os.path.join(ndvi_folder, ndvi_file), index_col=0)
    bad_id_cnt = curN['CVEGEO'].fillna(-1).value_counts().iloc[0]
    if bad_id_cnt > 1:
        start_idx = int(ndvi_file.split("_")[-2])
        end_idx = int(ndvi_file.split("_")[-1].replace(".csv", ""))
        curD = inD.iloc[start_idx:end_idx,]
        good_ids = curD['CVEGEO'].values
        curN['CVEGEO'] = good_ids
    tPrint(f"{ndvi_file}: {bad_id_cnt}")
    try:
        final = final.append(curN)
    except:
        final = curN        

In [None]:
final.to_csv(ndvi_zonal)

# CHIRPS - Weather 

In [6]:
# This is all the same as NDVI, but for rainfall

results = cat.search_tags("weather")
results = results.search_title("CHIRPS")
chirps_collection = results.datasets.iloc[1,0]

In [25]:
importlib.reload(gee)
step_count = 100
steps = list(range(step_count, inD.shape[0], step_count))
steps.append(inD.shape[0])
start_idx = 0
try:
    del brokenD
except:
    pass
for end_idx in steps:
    curD = inD.iloc[start_idx:end_idx,]
    out_file = os.path.join(chirps_folder, f'MASTER_LIST_CHIRPS_{start_idx}_{end_idx}.csv')
    if not os.path.exists(out_file):
        try:
            tPrint(os.path.basename(out_file))
            cur_ee = gpd_to_gee(curD, 'CVEGEO')
            # Run analysis on just the L8 data
            zs = gee.ZonalStats(collection_id = chirps_collection,
                            target_features = cur_ee,
                            statistic_type = "sum", #We only want to return TOTAL rainfall in the AGEB
                            output_name = '',
                            scale=1000,
                            min_threshold=0,
                            water_mask=True,  
                            frequency='monthly', # the original temporal is daily, we want monthly
                            temporal_stat='sum', # when collapsing a month, sum the values
                            tile_scale = 16
                           )
            zonal_res = zs.runZonalStats()
            res = zonal_res.getInfo()
            pd_res = get_zonal_res(res)
            pd_res["CVEGEO"] = curD["CVEGEO"].values
            pd_res.to_csv(out_file)                        
        except:
            try:
                brokenD = brokenD.append(curD)
            except:
                brokenD = curD
            tPrint(f'***ERROR{os.path.basename(out_file)}')
            break
            pass
    start_idx = end_idx

11:21:21	MASTER_LIST_CHIRPS_0_100.csv
11:21:26	MASTER_LIST_CHIRPS_100_200.csv
11:21:29	MASTER_LIST_CHIRPS_200_300.csv
11:21:33	MASTER_LIST_CHIRPS_300_400.csv
11:21:36	MASTER_LIST_CHIRPS_400_500.csv
11:21:40	MASTER_LIST_CHIRPS_500_600.csv
11:21:51	MASTER_LIST_CHIRPS_600_700.csv
11:21:59	MASTER_LIST_CHIRPS_700_800.csv
11:22:05	MASTER_LIST_CHIRPS_800_900.csv
11:22:09	MASTER_LIST_CHIRPS_900_1000.csv
11:22:12	MASTER_LIST_CHIRPS_1000_1100.csv
11:22:16	MASTER_LIST_CHIRPS_1100_1200.csv
11:22:18	MASTER_LIST_CHIRPS_1200_1300.csv
11:22:21	MASTER_LIST_CHIRPS_1300_1400.csv
11:22:24	MASTER_LIST_CHIRPS_1400_1500.csv
11:22:27	MASTER_LIST_CHIRPS_1500_1600.csv
11:22:30	MASTER_LIST_CHIRPS_1600_1700.csv
11:22:33	MASTER_LIST_CHIRPS_1700_1800.csv
11:22:36	MASTER_LIST_CHIRPS_1800_1900.csv
11:22:39	MASTER_LIST_CHIRPS_1900_2000.csv
11:22:43	MASTER_LIST_CHIRPS_2000_2100.csv
11:22:46	MASTER_LIST_CHIRPS_2100_2200.csv
11:22:49	MASTER_LIST_CHIRPS_2200_2300.csv
11:22:52	MASTER_LIST_CHIRPS_2300_2400.csv
11:22:56	MAST

11:33:57	MASTER_LIST_CHIRPS_19200_19300.csv
11:34:00	MASTER_LIST_CHIRPS_19300_19400.csv
11:34:02	MASTER_LIST_CHIRPS_19400_19500.csv
11:34:04	MASTER_LIST_CHIRPS_19500_19600.csv
11:34:07	MASTER_LIST_CHIRPS_19600_19700.csv
11:34:10	MASTER_LIST_CHIRPS_19700_19800.csv
11:34:13	MASTER_LIST_CHIRPS_19800_19900.csv
11:34:15	MASTER_LIST_CHIRPS_19900_20000.csv
11:34:17	MASTER_LIST_CHIRPS_20000_20100.csv
11:34:22	MASTER_LIST_CHIRPS_20100_20200.csv
11:34:29	MASTER_LIST_CHIRPS_20200_20300.csv
11:34:37	MASTER_LIST_CHIRPS_20300_20400.csv
11:34:48	MASTER_LIST_CHIRPS_20400_20500.csv
11:34:57	MASTER_LIST_CHIRPS_20500_20600.csv
11:35:07	MASTER_LIST_CHIRPS_20600_20700.csv
11:35:14	MASTER_LIST_CHIRPS_20700_20800.csv
11:35:18	MASTER_LIST_CHIRPS_20800_20900.csv
11:35:21	MASTER_LIST_CHIRPS_20900_21000.csv
11:35:24	MASTER_LIST_CHIRPS_21000_21100.csv
11:35:27	MASTER_LIST_CHIRPS_21100_21200.csv
11:35:31	MASTER_LIST_CHIRPS_21200_21300.csv
11:35:34	MASTER_LIST_CHIRPS_21300_21400.csv
11:35:37	MASTER_LIST_CHIRPS_2140

11:47:17	MASTER_LIST_CHIRPS_37900_38000.csv
11:47:20	MASTER_LIST_CHIRPS_38000_38100.csv
11:47:23	MASTER_LIST_CHIRPS_38100_38200.csv
11:47:27	MASTER_LIST_CHIRPS_38200_38300.csv
11:47:30	MASTER_LIST_CHIRPS_38300_38400.csv
11:47:33	MASTER_LIST_CHIRPS_38400_38500.csv
11:47:36	MASTER_LIST_CHIRPS_38500_38600.csv
11:47:38	MASTER_LIST_CHIRPS_38600_38700.csv
11:47:49	MASTER_LIST_CHIRPS_38700_38800.csv
11:48:03	MASTER_LIST_CHIRPS_38800_38900.csv
11:48:15	MASTER_LIST_CHIRPS_38900_39000.csv
11:48:26	MASTER_LIST_CHIRPS_39000_39100.csv
11:48:40	MASTER_LIST_CHIRPS_39100_39200.csv
11:48:52	MASTER_LIST_CHIRPS_39200_39300.csv
11:49:05	MASTER_LIST_CHIRPS_39300_39400.csv
11:49:12	MASTER_LIST_CHIRPS_39400_39500.csv
11:49:15	MASTER_LIST_CHIRPS_39500_39600.csv
11:49:18	MASTER_LIST_CHIRPS_39600_39700.csv
11:49:21	MASTER_LIST_CHIRPS_39700_39800.csv
11:49:24	MASTER_LIST_CHIRPS_39800_39900.csv
11:49:27	MASTER_LIST_CHIRPS_39900_40000.csv
11:49:30	MASTER_LIST_CHIRPS_40000_40100.csv
11:49:33	MASTER_LIST_CHIRPS_4010

12:02:52	MASTER_LIST_CHIRPS_56600_56700.csv
12:02:54	MASTER_LIST_CHIRPS_56700_56800.csv
12:02:57	MASTER_LIST_CHIRPS_56800_56900.csv
12:03:00	MASTER_LIST_CHIRPS_56900_57000.csv
12:03:02	MASTER_LIST_CHIRPS_57000_57100.csv
12:03:05	MASTER_LIST_CHIRPS_57100_57200.csv
12:03:07	MASTER_LIST_CHIRPS_57200_57300.csv
12:03:09	MASTER_LIST_CHIRPS_57300_57400.csv
12:03:12	MASTER_LIST_CHIRPS_57400_57500.csv
12:03:14	MASTER_LIST_CHIRPS_57500_57600.csv
12:03:17	MASTER_LIST_CHIRPS_57600_57700.csv
12:03:19	MASTER_LIST_CHIRPS_57700_57800.csv
12:03:22	MASTER_LIST_CHIRPS_57800_57900.csv
12:03:24	MASTER_LIST_CHIRPS_57900_58000.csv
12:03:26	MASTER_LIST_CHIRPS_58000_58100.csv
12:03:29	MASTER_LIST_CHIRPS_58100_58200.csv
12:03:31	MASTER_LIST_CHIRPS_58200_58300.csv
12:03:34	MASTER_LIST_CHIRPS_58300_58400.csv
12:03:36	MASTER_LIST_CHIRPS_58400_58500.csv
12:03:39	MASTER_LIST_CHIRPS_58500_58600.csv
12:03:41	MASTER_LIST_CHIRPS_58600_58700.csv
12:03:46	MASTER_LIST_CHIRPS_58700_58800.csv
12:03:52	MASTER_LIST_CHIRPS_5880

12:14:55	MASTER_LIST_CHIRPS_75300_75400.csv
12:14:58	MASTER_LIST_CHIRPS_75400_75500.csv
12:15:01	MASTER_LIST_CHIRPS_75500_75600.csv
12:15:03	MASTER_LIST_CHIRPS_75600_75700.csv
12:15:06	MASTER_LIST_CHIRPS_75700_75800.csv
12:15:09	MASTER_LIST_CHIRPS_75800_75900.csv
12:15:12	MASTER_LIST_CHIRPS_75900_76000.csv
12:15:16	MASTER_LIST_CHIRPS_76000_76100.csv
12:15:25	MASTER_LIST_CHIRPS_76100_76200.csv
12:15:36	MASTER_LIST_CHIRPS_76200_76300.csv
12:15:49	MASTER_LIST_CHIRPS_76300_76400.csv
12:15:57	MASTER_LIST_CHIRPS_76400_76500.csv
12:16:04	MASTER_LIST_CHIRPS_76500_76600.csv
12:16:14	MASTER_LIST_CHIRPS_76600_76700.csv
12:16:24	MASTER_LIST_CHIRPS_76700_76800.csv
12:16:33	MASTER_LIST_CHIRPS_76800_76900.csv
12:16:38	MASTER_LIST_CHIRPS_76900_77000.csv
12:16:41	MASTER_LIST_CHIRPS_77000_77100.csv
12:16:43	MASTER_LIST_CHIRPS_77100_77200.csv
12:16:46	MASTER_LIST_CHIRPS_77200_77300.csv
12:16:48	MASTER_LIST_CHIRPS_77300_77400.csv
12:16:51	MASTER_LIST_CHIRPS_77400_77500.csv
12:16:53	MASTER_LIST_CHIRPS_7750

In [27]:
try:
    del final
except:
    pass
for chirp_file in os.listdir(chirps_folder):
    curN = pd.read_csv(os.path.join(chirps_folder, chirp_file), index_col=0)
    bad_id_cnt = curN['CVEGEO'].fillna(-1).value_counts().iloc[0]
    if bad_id_cnt > 1:
        start_idx = int(ndvi_file.split("_")[-2])
        end_idx = int(ndvi_file.split("_")[-1].replace(".csv", ""))
        curD = inD.iloc[start_idx:end_idx,]
        good_ids = curD['CVEGEO'].values
        curN['CVEGEO'] = good_ids
    tPrint(f"{chirp_file}: {bad_id_cnt}")
    try:
        final = final.append(curN)
    except:
        final = curN       
        
final.to_csv(chirps_zonal)

12:17:34	MASTER_LIST_CHIRPS_0_100.csv: 1
12:17:34	MASTER_LIST_CHIRPS_100_200.csv: 1
12:17:34	MASTER_LIST_CHIRPS_200_300.csv: 1
12:17:34	MASTER_LIST_CHIRPS_300_400.csv: 1
12:17:34	MASTER_LIST_CHIRPS_400_500.csv: 1
12:17:34	MASTER_LIST_CHIRPS_500_600.csv: 1
12:17:34	MASTER_LIST_CHIRPS_600_700.csv: 1
12:17:34	MASTER_LIST_CHIRPS_700_800.csv: 1
12:17:34	MASTER_LIST_CHIRPS_800_900.csv: 1
12:17:34	MASTER_LIST_CHIRPS_900_1000.csv: 1
12:17:34	MASTER_LIST_CHIRPS_1000_1100.csv: 1
12:17:34	MASTER_LIST_CHIRPS_1100_1200.csv: 1
12:17:34	MASTER_LIST_CHIRPS_1200_1300.csv: 1
12:17:34	MASTER_LIST_CHIRPS_1300_1400.csv: 1
12:17:34	MASTER_LIST_CHIRPS_1400_1500.csv: 1
12:17:34	MASTER_LIST_CHIRPS_1500_1600.csv: 1
12:17:34	MASTER_LIST_CHIRPS_1600_1700.csv: 1
12:17:34	MASTER_LIST_CHIRPS_1700_1800.csv: 1
12:17:34	MASTER_LIST_CHIRPS_1800_1900.csv: 1
12:17:34	MASTER_LIST_CHIRPS_1900_2000.csv: 1
12:17:34	MASTER_LIST_CHIRPS_2000_2100.csv: 1
12:17:34	MASTER_LIST_CHIRPS_2100_2200.csv: 1
12:17:34	MASTER_LIST_CHIRPS_220

12:17:42	MASTER_LIST_CHIRPS_18000_18100.csv: 1
12:17:42	MASTER_LIST_CHIRPS_18100_18200.csv: 1
12:17:42	MASTER_LIST_CHIRPS_18200_18300.csv: 1
12:17:42	MASTER_LIST_CHIRPS_18300_18400.csv: 1
12:17:42	MASTER_LIST_CHIRPS_18400_18500.csv: 1
12:17:42	MASTER_LIST_CHIRPS_18500_18600.csv: 1
12:17:42	MASTER_LIST_CHIRPS_18600_18700.csv: 1
12:17:42	MASTER_LIST_CHIRPS_18700_18800.csv: 1
12:17:42	MASTER_LIST_CHIRPS_18800_18900.csv: 1
12:17:42	MASTER_LIST_CHIRPS_18900_19000.csv: 1
12:17:42	MASTER_LIST_CHIRPS_19000_19100.csv: 1
12:17:42	MASTER_LIST_CHIRPS_19100_19200.csv: 1
12:17:42	MASTER_LIST_CHIRPS_19200_19300.csv: 1
12:17:43	MASTER_LIST_CHIRPS_19300_19400.csv: 1
12:17:43	MASTER_LIST_CHIRPS_19400_19500.csv: 1
12:17:43	MASTER_LIST_CHIRPS_19500_19600.csv: 1
12:17:43	MASTER_LIST_CHIRPS_19600_19700.csv: 1
12:17:43	MASTER_LIST_CHIRPS_19700_19800.csv: 1
12:17:43	MASTER_LIST_CHIRPS_19800_19900.csv: 1
12:17:43	MASTER_LIST_CHIRPS_19900_20000.csv: 1
12:17:43	MASTER_LIST_CHIRPS_20000_20100.csv: 1
12:17:43	MAST

12:17:56	MASTER_LIST_CHIRPS_35600_35700.csv: 1
12:17:56	MASTER_LIST_CHIRPS_35700_35800.csv: 1
12:17:56	MASTER_LIST_CHIRPS_35800_35900.csv: 1
12:17:56	MASTER_LIST_CHIRPS_35900_36000.csv: 1
12:17:56	MASTER_LIST_CHIRPS_36000_36100.csv: 1
12:17:56	MASTER_LIST_CHIRPS_36100_36200.csv: 1
12:17:56	MASTER_LIST_CHIRPS_36200_36300.csv: 1
12:17:56	MASTER_LIST_CHIRPS_36300_36400.csv: 1
12:17:57	MASTER_LIST_CHIRPS_36400_36500.csv: 1
12:17:57	MASTER_LIST_CHIRPS_36500_36600.csv: 1
12:17:57	MASTER_LIST_CHIRPS_36600_36700.csv: 1
12:17:57	MASTER_LIST_CHIRPS_36700_36800.csv: 1
12:17:57	MASTER_LIST_CHIRPS_36800_36900.csv: 1
12:17:57	MASTER_LIST_CHIRPS_36900_37000.csv: 1
12:17:57	MASTER_LIST_CHIRPS_37000_37100.csv: 1
12:17:57	MASTER_LIST_CHIRPS_37100_37200.csv: 1
12:17:57	MASTER_LIST_CHIRPS_37200_37300.csv: 1
12:17:57	MASTER_LIST_CHIRPS_37300_37400.csv: 1
12:17:58	MASTER_LIST_CHIRPS_37400_37500.csv: 1
12:17:58	MASTER_LIST_CHIRPS_37500_37600.csv: 1
12:17:58	MASTER_LIST_CHIRPS_37600_37700.csv: 1
12:17:58	MAST

12:18:16	MASTER_LIST_CHIRPS_53100_53200.csv: 1
12:18:16	MASTER_LIST_CHIRPS_53200_53300.csv: 1
12:18:16	MASTER_LIST_CHIRPS_53300_53400.csv: 1
12:18:16	MASTER_LIST_CHIRPS_53400_53500.csv: 1
12:18:16	MASTER_LIST_CHIRPS_53500_53600.csv: 1
12:18:17	MASTER_LIST_CHIRPS_53600_53700.csv: 1
12:18:17	MASTER_LIST_CHIRPS_53700_53800.csv: 1
12:18:17	MASTER_LIST_CHIRPS_53800_53900.csv: 1
12:18:17	MASTER_LIST_CHIRPS_53900_54000.csv: 1
12:18:17	MASTER_LIST_CHIRPS_54000_54100.csv: 1
12:18:17	MASTER_LIST_CHIRPS_54100_54200.csv: 1
12:18:17	MASTER_LIST_CHIRPS_54200_54300.csv: 1
12:18:18	MASTER_LIST_CHIRPS_54300_54400.csv: 1
12:18:18	MASTER_LIST_CHIRPS_54400_54500.csv: 1
12:18:18	MASTER_LIST_CHIRPS_54500_54600.csv: 1
12:18:18	MASTER_LIST_CHIRPS_54600_54700.csv: 1
12:18:18	MASTER_LIST_CHIRPS_54700_54800.csv: 1
12:18:18	MASTER_LIST_CHIRPS_54800_54900.csv: 1
12:18:18	MASTER_LIST_CHIRPS_54900_55000.csv: 1
12:18:18	MASTER_LIST_CHIRPS_55000_55100.csv: 1
12:18:19	MASTER_LIST_CHIRPS_55100_55200.csv: 1
12:18:19	MAST

12:18:42	MASTER_LIST_CHIRPS_70700_70800.csv: 1
12:18:42	MASTER_LIST_CHIRPS_70800_70900.csv: 1
12:18:43	MASTER_LIST_CHIRPS_70900_71000.csv: 1
12:18:43	MASTER_LIST_CHIRPS_71000_71100.csv: 1
12:18:43	MASTER_LIST_CHIRPS_71100_71200.csv: 1
12:18:43	MASTER_LIST_CHIRPS_71200_71300.csv: 1
12:18:43	MASTER_LIST_CHIRPS_71300_71400.csv: 1
12:18:43	MASTER_LIST_CHIRPS_71400_71500.csv: 1
12:18:44	MASTER_LIST_CHIRPS_71500_71600.csv: 1
12:18:44	MASTER_LIST_CHIRPS_71600_71700.csv: 1
12:18:44	MASTER_LIST_CHIRPS_71700_71800.csv: 1
12:18:44	MASTER_LIST_CHIRPS_71800_71900.csv: 1
12:18:44	MASTER_LIST_CHIRPS_71900_72000.csv: 1
12:18:44	MASTER_LIST_CHIRPS_72000_72100.csv: 1
12:18:45	MASTER_LIST_CHIRPS_72100_72200.csv: 1
12:18:45	MASTER_LIST_CHIRPS_72200_72300.csv: 1
12:18:45	MASTER_LIST_CHIRPS_72300_72400.csv: 1
12:18:45	MASTER_LIST_CHIRPS_72400_72500.csv: 1
12:18:45	MASTER_LIST_CHIRPS_72500_72600.csv: 1
12:18:45	MASTER_LIST_CHIRPS_72600_72700.csv: 1
12:18:46	MASTER_LIST_CHIRPS_72700_72800.csv: 1
12:18:46	MAST

# Population Density

In [None]:
#Summarize total population across the AGEBS from WorldPop
inR = rasterio.open(pop_file)
res = rMisc.zonalStats(inD, inR, minVal=0)
pop_res = pd.DataFrame(res, columns=['SUM','MIN','MAX','MEAN'])
pop_res['CVEGEO'] = inD['CVEGEO']
pop_res.to_csv(out_pop_summaries)

# Construction Density

In [None]:
#Summarize growth of urban extents
inR = rasterio.open(ghsl_file)
inD = inD.to_crs(inR.crs)

res = rMisc.zonalStats(inD, inR, rastType='C', unqVals = [1,2,3,4,5,6])
ghsl_res = pd.DataFrame(res, columns = [f'c{x}' for x in [1,2,3,4,5,6]])
ghsl_res['CVEGEO'] = inD['CVEGEO']
ghsl_res.to_csv(out_ghsl)

# Infrastructure Density

In [None]:
### TODO: summarize road lengths/intersections in each AGEB

# Urbanization

In [None]:
# Calculate urban and summarize urban population
if not os.path.exists(local_pop):
    rMisc.clipRaster(rasterio.open(pop_file), inD, local_pop)    
inR = rasterio.open(local_pop)
if inD.crs != inR.crs:
    inD = inD.to_crs(inR.crs)
    
urb = UrbanRaster.urbanGriddedPop(local_pop)
try:
    urban_extents = urb.calculateUrban(densVal=300, totalPopThresh=5000, 
                                       smooth=False, queen=False, 
                                       raster=local_urban, raster_pop=local_urban_pop)
    if not os.path.exists(urban_pop):
        res = rMisc.zonalStats(inD, local_urban_pop, minVal=0)
        pop_res = pd.DataFrame(res, columns=['SUM','MIN','MAX','MEAN'])
        pop_res['CVEGEO'] = inD['CVEGEO']
        pop_res.to_csv(urban_pop)
    urban_extents.to_file(urban_extents_file)
except:
    print("Could not calculate urban popualtion")
try:
    hd_extents =    urb.calculateUrban(densVal=1500, totalPopThresh=50000, 
                                   smooth=True, queen=True, 
                                   raster=local_urban_hd, raster_pop=local_urban_hdpop)
    res = rMisc.zonalStats(inD, local_urban_hdpop, minVal=0)
    pop_res = pd.DataFrame(res, columns=['SUM','MIN','MAX','MEAN'])
    pop_res['CVEGEO'] = inD['CVEGEO']
    pop_res.to_csv(hd_pop)
    hd_extents.to_file(hd_urban_extents_file)
except:
    print("Could not calculate high density urban popualtion")

# Market Access

In [None]:
# Measure traveltime to urban and HD urban areas
urban_dests = gpd.read_file(urban_extents_file)
urban_dests['geometry'] = urban_dests['geometry'].apply(lambda x: x.centroid)

hd_urban_dests = gpd.read_file(hd_urban_extents_file)
hd_urban_dests['geometry'] = hd_urban_dests['geometry'].apply(lambda x: x.centroid)

if not os.path.exists(local_access_map):
    inD = gpd.read_file(master_agebs)
    inD_bounds = gpd.GeoDataFrame(pd.DataFrame([[1,box(*inD.total_bounds)]], 
                                               columns=['id','geometry']), 
                                               geometry="geometry", crs=inD.crs)
    rMisc.clipRaster(rasterio.open(global_access_map), inD_bounds, local_access_map)
    
inR = rasterio.open(local_access_map)
frictionD = inR.read()[0,:,:] * 1000
mcp = graph.MCP_Geometric(frictionD)

travel_costs, traceback = ma.calculate_travel_time(inR, mcp, urban_dests, urban_access)
travel_costs, traceback = ma.calculate_travel_time(inR, mcp, hd_urban_dests, hd_urban_access)


In [None]:
# Standardize population to market access in order to calculate population-weighted access to urban and HD urban areas.
inR1 = rasterio.open(local_pop)
inR2 = rasterio.open(urban_access)

sPop, metadata = rMisc.standardizeInputRasters(inR1, inR2)
access_data = inR2.read()
pop_access = sPop * access_data

with rMisc.create_rasterio_inmemory(metadata, pop_access) as pop_access_R:
    res = rMisc.zonalStats(inD, pop_access_R, minVal=0)
    res = pd.DataFrame(res, columns=['SUM','MIN','MAX','MEAN'])
    
pop_stats = pd.read_csv(out_pop_summaries)
res['urban_access'] = res['SUM']/pop_stats['SUM']
res.to_csv(urban_access_res)

inR2 = rasterio.open(hd_urban_access)
access_data = inR2.read()
pop_access = sPop * access_data

with rMisc.create_rasterio_inmemory(metadata, pop_access) as pop_access_R:
    res = rMisc.zonalStats(inD, pop_access_R, minVal=0)
    res = pd.DataFrame(res, columns=['SUM','MIN','MAX','MEAN'])
    
pop_stats = pd.read_csv(out_pop_summaries)
res['urban_access'] = res['SUM']/pop_stats['SUM']
res.to_csv(hd_urban_access_res)

# Nighttime Lights

In [None]:
# Get a list of the VIIRS images in S3. This example leverages the GOST team's S3 bucket
s3_base = 's3://wbgdecinternal-ntl/'
ntl_file_list = "/home/wb411133/temp/YEM/AWS_NTL_S3.txt"
focal_tile = "TILE1"

all_files = []
with open(ntl_file_list, 'r') as in_aws:
    for line in in_aws:
        if focal_tile in line and 'avg_rade9' in line:
            all_files.append(os.path.join(s3_base, line.split(" ")[-1][:-1]))
            
all_files[:5]

In [None]:
# Run zonal statistics against the admin area
for cur_tif in all_files:
    res = rMisc.zonalStats(inD, cur_tif, minVal=0.05)
    res = pd.DataFrame(res,columns=['SUM','MIN','MAX','MEAN'])
    inD[cur_tif.split("/")[5]] = res['SUM']
    tPrint(os.path.basename(cur_tif))
    
ntl_res = pd.DataFrame(inD.drop(['geometry'], axis=1))

In [None]:
ntl_res.to_csv(ntl_zonal_csv)