In [97]:
import pandas as pd
import os, sys
sys.path.append(r'C:\Users\charl\Documents\GitHub\GOST_PublicGoods\GOSTNets\GOSTNets')
sys.path.append(r'C:\Users\charl\Documents\GitHub\GOST')
import GOSTnet as gn
import importlib
importlib.reload(gn)
import geopandas as gpd
import rasterio as rt
from rasterio import features
from shapely.wkt import loads
import numpy as np
import networkx as nx
from shapely.geometry import box, Point

peartree version: 0.6.0 
networkx version: 2.2 
matplotlib version: 2.2.2 
osmnx version: 0.8.2 


In [98]:
basepth = r'C:\Users\charl\Documents\GOST\Yemen'
pth = r'C:\Users\charl\Documents\GOST\Yemen\graphtool'
net_pth = pth
pckle = r'walk_graph.pickle'
WGS = {'init':'epsg:4326'}
measure_crs = {'init':'epsg:32638'}
subset = r'YEHNP_PHCs_driving_24th_newwalk' 
OD_name = r'output_driving_24th.csv'
srtm_pth = r'C:\Users\charl\Documents\GOST\Yemen\SRTM'
offroad_speed = 4

### Import All-Destination OD

In [99]:
OD = pd.read_csv(os.path.join(pth, OD_name))
OD = OD.rename(columns = {'Unnamed: 0':'O_ID'})
OD = OD.set_index('O_ID')
OD = OD.replace([np.inf, -np.inf], np.nan)
OD_original = OD.copy()

### Optional: Subset to Accepted Nodes

In [100]:

"""
acceptable_df = pd.read_csv(os.path.join(pth, 'HeRAMs_2016v2018_damages and functionality_cleaned_snapped.csv'))
print(acceptable_df.columns)
hosp_types = ['District L Rural\r Hospital','Governorate/General\r Hospital','Hospital','1']
def hospcheck(x, hosp_types):
    if x in hosp_types:
        return 'HOS'
    else:
        return 'PHC'
acceptable_df['hosp'] = acceptable_df['Facility Type2'].apply(lambda x: hospcheck(x, hosp_types))
acceptable_df = acceptable_df.loc[acceptable_df['hosp'] == 'PHC']
function_level = ['1','2',1,2]
acceptable_df = acceptable_df.loc[acceptable_df['Functionality2018'].isin(function_level)]
#service_level = ['1']
#acceptable_df = acceptable_df.loc[acceptable_df['Comprehensive Emergency Obstetric Care (S424)'].isin(service_level)]
"""
acceptable_df = pd.read_csv(os.path.join(pth, 'YEHNP PHCs_unicef_snapped.csv'))


len(acceptable_df)

1003

In [101]:
acceptable_df['geometry'] = acceptable_df['geometry'].apply(loads)
acceptable_gdf = gpd.GeoDataFrame(acceptable_df, geometry = 'geometry', crs = {'init':'epsg:4326'})
accepted_facilities = list(set(list(acceptable_df.NN)))
accepted_facilities_str = [str(i) for i in accepted_facilities]
OD = OD[accepted_facilities_str]
acceptable_df.to_csv(os.path.join(basepth,'output_layers','%s.csv' % subset))

In [102]:
print(OD_original.shape)
print(OD.shape)

(36158, 4114)
(36158, 944)


### Define function to add elevation to a point GeoDataFrame

In [103]:
def add_elevation(df, x, y, srtm_pth):
    # walk all tiles, find path
    
    tiles = []
    for root, folder, files in os.walk(os.path.join(srtm_pth,'high_res')):
        for f in files:
            if f[-3:] == 'hgt':
                tiles.append(f[:-4])

    # load dictionary of tiles
    arrs = {}
    for t in tiles:
        arrs[t] = rt.open(srtm_pth+r'\high_res\{}.hgt\{}.hgt'.format(t, t), 'r')

    # assign a code
    uniques = []
    df['code'] = 'placeholder'
    def tile_code(z):
        E = str(z[x])[:2]
        N = str(z[y])[:2]
        return 'N{}E0{}'.format(N, E)
    df['code'] = df.apply(lambda z: tile_code(z), axis = 1)
    unique_codes = list(set(df['code'].unique()))
    
    z = {}
    # Match on High Precision Elevation
    property_name = 'elevation'
    for code in unique_codes:
        
        df2 = df.copy()
        df2 = df2.loc[df2['code'] == code]
        dataset = arrs[code]
        b = dataset.bounds
        datasetBoundary = box(b[0], b[1], b[2], b[3])
        selKeys = []
        selPts = []
        for index, row in df2.iterrows():
            if Point(row[x], row[y]).intersects(datasetBoundary):
                selPts.append((row[x],row[y]))
                selKeys.append(index)
        raster_values = list(dataset.sample(selPts))
        raster_values = [x[0] for x in raster_values]

        # generate new dictionary of {node ID: raster values}
        z.update(zip(selKeys, raster_values))
        
    elev_df = pd.DataFrame.from_dict(z, orient='index')
    elev_df.columns = ['elevation']
    
    missing = elev_df.copy()
    missing = missing.loc[missing.elevation < 0]
    if len(missing) > 0:
        missing_df = df.copy()
        missing_df = missing_df.loc[missing.index]
        low_res_tifpath = os.path.join(srtm_pth, 'clipped', 'clipped_e20N40.tif')
        dataset = rt.open(low_res_tifpath, 'r')
        b = dataset.bounds
        datasetBoundary = box(b[0], b[1], b[2], b[3])
        selKeys = []
        selPts = []
        for index, row in missing_df.iterrows():
            if Point(row[x], row[y]).intersects(datasetBoundary):
                selPts.append((row[x],row[y]))
                selKeys.append(index)
        raster_values = list(dataset.sample(selPts))
        raster_values = [x[0] for x in raster_values]
        z.update(zip(selKeys, raster_values))

        elev_df = pd.DataFrame.from_dict(z, orient='index')
        elev_df.columns = ['elevation']
    df['point_elev'] = elev_df['elevation']
    df = df.drop('code', axis = 1)
    return df

### Define function to convert distances to walk times

In [104]:
def generate_walktimes(df, start = 'point_elev', end = 'node_elev', dist = 'NN_dist', max_walkspeed = 6, min_speed = 0.1):
    # Tobler's hiking function: https://en.wikipedia.org/wiki/Tobler%27s_hiking_function
    def speed(incline_ratio, max_speed):
        walkspeed = max_speed * np.exp(-3.5 * abs(incline_ratio + 0.05)) 
        return walkspeed

    speeds = {}
    times = {}

    for index, data in df.iterrows():
        if data[dist] > 0:
            delta_elevation = data[end] - data[start]
            incline_ratio = delta_elevation / data[dist]
            speed_kmph = speed(incline_ratio = incline_ratio, max_speed = max_walkspeed)
            speed_kmph = max(speed_kmph, min_speed)
            speeds[index] = (speed_kmph)
            times[index] = (data[dist] / 1000 * 3600 / speed_kmph)

    speed_df = pd.DataFrame.from_dict(speeds, orient = 'index')
    time_df = pd.DataFrame.from_dict(times, orient = 'index')

    df['walkspeed'] = speed_df[0]
    df['walk_time'] = time_df[0]
    
    return df

### Add elevation for destination nodes

In [105]:
dest_df = acceptable_df[['NN','NN_dist','Latitude','Longitude']]
dest_df = add_elevation(dest_df, 'Longitude','Latitude', srtm_pth).set_index('NN')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


### Add elevation from graph nodes (reference)

In [106]:
G = nx.read_gpickle(os.path.join(pth, pckle))
G_node_df = gn.node_gdf_from_graph(G)
G_node_df = add_elevation(G_node_df, 'x', 'y', srtm_pth)
match_node_elevs = G_node_df[['node_ID','point_elev']].set_index('node_ID')

### Match on node elevations for dest_df; calculate travel times to nearest node

In [107]:
dest_df['node_elev'] = match_node_elevs['point_elev']
dest_df = generate_walktimes(dest_df, start = 'node_elev', end = 'point_elev', dist = 'NN_dist', max_walkspeed = offroad_speed)
dest_df = dest_df.sort_values(by = 'walk_time', ascending = False)

### Add Walk Time to all travel times in OD matrix

In [108]:
dest_df = dest_df[['walk_time']]
dest_df.index = dest_df.index.map(str)

d_f = OD.transpose()

for i in d_f.columns:
    dest_df[i] = d_f[i]
    
for i in dest_df.columns:
    if i == 'walk_time':
        pass
    else:
        dest_df[i] = dest_df[i] + dest_df['walk_time']

dest_df = dest_df.drop('walk_time', axis = 1)

dest_df = dest_df.transpose()

dest_df['min_time'] = dest_df.min(axis = 1)

### Prep Grid

In [109]:
# Match on network time from origin node (time travelling along network + walking to destination)
grid_name = r'origins_1km_snapped.csv'
grid = pd.read_csv(os.path.join(pth, grid_name))
grid = grid.rename(columns = {'NN':'O_ID'})
grid = grid.set_index(grid['O_ID'])
grid = grid.rename({'Unnamed: 0':'PointID'}, axis = 1)
grid['on_network_time'] = dest_df['min_time']
grid['geometry'] = grid['geometry'].apply(loads)
grid = grid.set_index('PointID')

In [110]:
# Add origin node distance to network - walking time
grid = add_elevation(grid, 'Longitude','Latitude', srtm_pth)
grid = grid.reset_index()
grid = grid.set_index('O_ID')
grid['node_elev'] = match_node_elevs['point_elev']
grid = grid.set_index('PointID')
grid = generate_walktimes(grid, start = 'point_elev', end = 'node_elev', dist = 'NN_dist', max_walkspeed = offroad_speed)
grid = grid.rename({'node_elev':'nr_node_on_net_elev', 
                    'walkspeed':'walkspeed_to_net', 
                    'walk_time':'walk_time_to_net',
                   'NN_dist':'NN_dist_to_net'}, axis = 1)
grid['total_time_net'] = grid['on_network_time'] + grid['walk_time_to_net']

### Calculate Direct Walking Time (not using road network), vs. network Time

In [111]:
grid_gdf = gpd.GeoDataFrame(grid, crs = WGS, geometry = 'geometry')
grid = gn.pandana_snap_points(grid_gdf, 
                              acceptable_gdf, 
                              source_crs = 'epsg:4326', 
                              target_crs = 'epsg:32638', 
                              add_dist_to_node_col = True)

In [112]:
grid = grid.set_index('NN')
grid['dest_NN_elev'] = match_node_elevs['point_elev']
grid = grid.reset_index()
grid2 = grid.copy()
grid2 = generate_walktimes(grid2, start = 'point_elev', end = 'dest_NN_elev', dist = 'NN_dist', max_walkspeed = offroad_speed).reset_index()
grid = grid2
grid = grid.rename({'walkspeed':'walkspeed_direct', 
                    'walk_time':'walk_time_direct',
                   'NN_dist':'NN_dist_direct'}, axis = 1)
grid['PLOT_TIME_SECS'] = grid[['walk_time_direct','total_time_net']].min(axis = 1)
grid['PLOT_TIME_MINS'] = grid['PLOT_TIME_SECS'] / 60

### Burn Raster

In [113]:
rst_fn = os.path.join(pth,'pop18_resampled.tif')
out_fn = os.path.join(basepth,'output_layers','%s.tif' % subset)

# Update metadata
rst = rt.open(rst_fn, 'r')
meta = rst.meta.copy()
D_type = rt.float64
meta.update(compress='lzw', dtype = D_type, count = 2)

with rt.open(out_fn, 'w', **meta) as out:
    with rt.open(rst_fn, 'r') as pop:
        
        # this is where we create a generator of geom, value pairs to use in rasterizing
        shapes = ((geom,value) for geom, value in zip(grid.geometry, grid.PLOT_TIME_MINS))

        population = pop.read(1).astype(D_type)
        cpy = population.copy()

        travel_times = features.rasterize(shapes=shapes, fill=0, out=cpy, transform=out.transform)

        out.write_band(1, population)
        out.write_band(2, travel_times)

### Generate Zonal Stats

In [114]:
def zonalStats(inShp, inRaster, bandNum=1, mask_A = None, reProj = False, minVal = '', maxVal = '', verbose=False , rastType='N', unqVals=[]):
    import sys, os, inspect, logging, json
    import rasterio, affine

    import pandas as pd
    import geopandas as gpd
    import numpy as np

    from collections import Counter
    from shapely.geometry import box
    from affine import Affine
    from rasterio import features
    from rasterio.mask import mask
    from rasterio.features import rasterize
    from rasterio.warp import reproject, Resampling
    from osgeo import gdal
    
    ''' Run zonal statistics against an input shapefile
    
    INPUT VARIABLES
    inShp [string or geopandas object] - path to input shapefile
    inRaster [string or rasterio object] - path to input raster
    
    OPTIONAL
    bandNum [integer] - band in raster to analyze
    reProj [boolean] -  whether to reproject data to match, if not, raise an error
    minVal [number] - if defined, will only calculation statistics on values above this number
    verbose [boolean] - whether to be loud with responses
    rastType [string N or C] - N is numeric and C is categorical. Categorical returns counts of numbers
    unqVals [array of numbers] - used in categorical zonal statistics, tabulates all these numbers, will report 0 counts
    mask_A [numpy boolean mask] - mask the desired band using an identical shape boolean mask. Useful for doing conditional zonal stats
    
    RETURNS
    array of arrays, one for each feature in inShp
    '''   
    if isinstance(inShp, str):
        inVector = gpd.read_file(inShp) 
    else:
        inVector = inShp
    if isinstance(inRaster, str):
        curRaster = rasterio.open(inRaster, 'r+')
    else:
        curRaster = inRaster
        
    # If mask is not none, apply mask 
    if mask_A is not None:
        
        curRaster.write_mask(np.invert(mask_A))
    
    outputData=[]
    if inVector.crs != curRaster.crs:
        if reProj:
            inVector = inVector.to_crs(curRaster.crs)
        else:
            raise ValueError("Input CRS do not match")
    fCount = 0
    tCount = len(inVector['geometry'])
    #generate bounding box geometry for raster bbox
    b = curRaster.bounds
    rBox = box(b[0], b[1], b[2], b[3])
    for geometry in inVector['geometry']:
        #This test is used in case the geometry extends beyond the edge of the raster
        #   I think it is computationally heavy, but I don't know of an easier way to do it
        if not rBox.contains(geometry):
            geometry = geometry.intersection(rBox)            
        try:
            fCount = fCount + 1
            if fCount % 1000 == 0 and verbose:
                tPrint("Processing %s of %s" % (fCount, tCount) )
            # get pixel coordinates of the geometry's bounding box
            ul = curRaster.index(*geometry.bounds[0:2])
            lr = curRaster.index(*geometry.bounds[2:4])
            '''
            TODO: There is a problem with the indexing - if the shape falls outside the boundaries, it errors
                I want to change it to just grab what it can find, but my brain is wrecked and I cannot figure it out
            print(geometry.bounds)
            print(curRaster.shape)
            print(lr)
            print(ul)
            lr = (max(lr[0], 0), min(lr[1], curRaster.shape[1]))
            ul = (min(ul[0], curRaster.shape[0]), min(ul[1]))
            '''
            # read the subset of the data into a numpy array
            window = ((float(lr[0]), float(ul[0]+1)), (float(ul[1]), float(lr[1]+1)))
            
            if mask is not None:
                data = curRaster.read(bandNum, window=window, masked = True)
            else:
                data = curRaster.read(bandNum, window=window, masked = False)
            
            # create an affine transform for the subset data
            t = curRaster.transform
            shifted_affine = Affine(t.a, t.b, t.c+ul[1]*t.a, t.d, t.e, t.f+lr[0]*t.e)

            # rasterize the geometry
            mask = rasterize(
                [(geometry, 0)],
                out_shape=data.shape,
                transform=shifted_affine,
                fill=1,
                all_touched=False,
                dtype=np.uint8)

            # create a masked numpy array
            masked_data = np.ma.array(data=data, mask=mask.astype(bool))
            if rastType == 'N':                
                if minVal != '' or maxVal != '':
                    if minVal != '':
                        masked_data = np.ma.masked_where(masked_data < minVal, masked_data)
                    if maxVal != '':
                        masked_data = np.ma.masked_where(masked_data > maxVal, masked_data)                    
                    if masked_data.count() > 0:                        
                        results = [masked_data.sum(), masked_data.min(), masked_data.max(), masked_data.mean()]
                    else :
                        results = [-1, -1, -1, -1]                
                else:
                    results = [masked_data.sum(), masked_data.min(), masked_data.max(), masked_data.mean()]
            if rastType == 'C':
                if len(unqVals) > 0:                          
                    xx = dict(Counter(data.flatten()))
                    results = [xx.get(i, 0) for i in unqVals]                
                else:
                    results = np.unique(masked_data, return_counts=True)                    
            outputData.append(results)
        except Exception as e: 
            print(e)
            outputData.append([-1, -1, -1, -1])            
    return outputData   

In [118]:
subset = r'HeRAMS Hospitals_driving_24th_newwalk'
out_fn = r'C:\Users\charl\Documents\GOST\Yemen\output_layers\%s.tif' % subset

In [119]:
sys.path.append(r'C:\Users\charl\Documents\GitHub\GOST\GOSTRocks')
#from GOSTRocks.rasterMisc import *
### MODIFIED FUNCTION BELOW!!

utils = r'C:\Users\charl\Documents\GOST\Yemen\util_files'

yemen_shp_name = os.path.join(utils, r'Yemen_bound.shp')
yemen_shp = gpd.read_file(yemen_shp_name)
yemen_shp = yemen_shp.to_crs({'init': 'epsg:4326'})

district_shp_name = os.path.join(utils, r'Yemen_adm2.shp')
district_shp = gpd.read_file(district_shp_name)
district_shp = district_shp.to_crs({'init': 'epsg:4326'})

inraster = out_fn
ras = rt.open(inraster, mode = 'r+')
pop = ras.read(1)
tt_matrix = ras.read(2)

resolution = 'district'

if resolution == 'national':
    target_shp = yemen_shp
elif resolution == 'district':
    target_shp = district_shp

## First, add on the total population of the district to each district shape

mask_pop = np.ma.masked_where(pop > (200000), pop).mask

base_pop = zonalStats(target_shp, 
                        inraster, 
                        bandNum = 1,
                        mask_A = mask_pop,
                        reProj = False, 
                        minVal = 0,
                        maxVal = np.inf, 
                        verbose = True, 
                        rastType='N')

cols = ['total_pop','min','max','mean']

temp_df = pd.DataFrame(base_pop, columns = cols)

target_shp['total_pop'] = temp_df['total_pop']
target_shp['total_pop'].loc[target_shp['total_pop'] == -1] = 0

## Now, calculate the population within a range of time thresholds from the destination set
for time_thresh in [30,60,120, 240]:
    
    mask_obj = np.ma.masked_where(tt_matrix > (time_thresh), tt_matrix).mask

    raw = zonalStats(target_shp, 
                        inraster, 
                        bandNum = 1,
                        mask_A = mask_obj,
                        reProj = False, 
                        minVal = 0,
                        maxVal = np.inf, 
                        verbose = True, 
                        rastType='N')

    cols = ['pop_%s' % time_thresh,'min','max','mean']

    temp_df = pd.DataFrame(raw, columns = cols)

    target_shp['pop_%s' % time_thresh] = temp_df['pop_%s' % time_thresh]
    target_shp['pop_%s' % time_thresh].loc[target_shp['pop_%s' % time_thresh] == -1] = 0
    target_shp['frac_%s' % time_thresh] = (target_shp['pop_%s' % time_thresh]) / (target_shp['total_pop']).fillna(0)
    target_shp['frac_%s' % time_thresh].replace([np.inf, -np.inf], 0)
    target_shp['frac_%s' % time_thresh] = target_shp['frac_%s' % time_thresh].fillna(0)
    
# Save to file
                  
if resolution == 'national':
    print(out_fn)
    print(target_shp.frac_30.head(1))
    print(target_shp.frac_60.head(1))
    print(target_shp.frac_120.head(1))
    print(target_shp.frac_240.head(1))
else:
    target_shp['abs_pop_iso'] = target_shp['total_pop'] - target_shp['pop_30']
    target_shp.to_file(os.path.join(basepth, 'output_layers','webmap_batch2','%s_zonal_%s.shp' % (subset, resolution)), driver = 'ESRI Shapefile')