In [185]:
import sys, os, importlib, requests, json, traceback
from datetime import datetime
from pathlib import Path
from shapely.geometry import box
import rasterio
from rasterstats import zonal_stats
import GOSTRocks.rasterMisc as rMisc

import random
import pandas as pd
import numpy as np
import geopandas as gpd

import warnings
warnings.simplefilter(action='ignore')

# Setup

## Data directories

In [120]:
CWD = Path.cwd()
DATA_DIR = CWD.joinpath('data')
bld_cnts = "../data/AOIs/DRC/health_zone_building_counts.csv"
zones_geojson = "../data/AOIs/DRC/health_zones_selected.geojson"
pop = "../data/AOIs/DRC/GHS_POP_wgs84.tif"
urb_classes_file = "../data/AOIs/DRC/GHS_SMOD_wgs84.tif"
osm_bld = "../data/AOIs/DRC/gis_osm_buildings_a_free_1.shp"

In [104]:
df_zones = gpd.read_file(zones_geojson)
df_bld_cnts = pd.read_csv(bld_cnts)

In [17]:
dfz = df_zones.merge(df_bld_cnts[['Health_Zone', 'gBuildings']], on='Health_Zone', how='inner', indicator=True)

# Get population of each zone

In [43]:
src = rasterio.open(pop)
res = rMisc.zonalStats(inShp=dfz, inRaster=src)

In [44]:
dfz['pop'] = [i[0] for i in res]

# Get urbanization classes

In [106]:
urb_classes = [10, 11, 12, 13, 21, 22, 23, 30]
#stats = zonal_stats(bld_geojson, urb_class, geojson_out=True, categorical=True, stats=['majority'])
urb_stats = zonal_stats(zones_geojson, urb_classes_file, geojson_out=True, categorical=True)

In [110]:
urb_res = []

for i in urb_stats:
    data_item = {'Health_Zone': i['properties']['Health_Zone']}
    s = {}
    for c in urb_classes:
        s[c] = i['properties'].get(c, 0)
    
    tot_pixels = np.sum(list(s.values()))
    prop_11 = s.get(11)/tot_pixels
    s_sorted =  sorted(s.items(), key=lambda x: x[1], reverse=True)
    data_item.update({'first_urb': s_sorted[0][0]})
    data_item.update({'second_urb': s_sorted[1][0]})
    data_item.update({'third_urb': s_sorted[2][0]})
    data_item.update({'prop11': prop_11})
    urb_res.append(data_item)

In [111]:
df = pd.DataFrame(urb_res)

In [115]:
dfz.drop(columns=['_merge'], inplace=True)
df_zones = dfz.merge(df, on='Health_Zone', how='inner', indicator=True)

In [None]:
df_zones.sort_values(by='prop11', ascending=True)[['Health_Zone', 'gBuildings', 'pop', 'prop11']]

# Get OSM buildings count

In [128]:
dfb = gpd.read_file(osm_bld)
df_zones_bld = gpd.sjoin(left_df=df_zones, right_df=dfb, how='left', predicate='contains')

In [140]:
df_zones_bld_grp = df_zones_bld.groupby(['Health_Zone']).size().reset_index()
df_zones_bld_grp.rename(columns={0: 'osmBuildings'}, inplace=True)

In [162]:
#df_zones.drop(columns=['_merge'], inplace=True)
df_zones2 = df_zones.merge(df_zones_bld_grp, on='Health_Zone', how='inner', indicator=True)

In [163]:
df_zones2.drop(columns=['_merge'], inplace=True)

In [164]:
df_zones2['bldToCapture'] = df_zones2.apply(lambda x: (x['gBuildings'] - x['osmBuildings']), axis=1)
df_zones2['bldToCaptureProp'] = df_zones2.apply(lambda x: (x['gBuildings'] - x['osmBuildings'])/x['gBuildings'], axis=1)

In [166]:
df_zones2.sort_values(by='bldToCapture', ascending=True)[['Health_Zone', 'gBuildings', 'pop', 'prop11', 'osmBuildings', 'bldToCapture', 'bldToCaptureProp']].tail(5)

Unnamed: 0,Health_Zone,gBuildings,pop,prop11,osmBuildings,bldToCapture,bldToCaptureProp
13,Luambo,58899,244753.405219,0.93789,11,58888,0.999813
18,Mikalayi,63997,34393.212113,0.95391,53,63944,0.999172
8,Kamonia,64108,202753.278927,0.925674,48,64060,0.999251
4,Demba,72922,215078.384645,0.94001,5843,67079,0.919873
9,Kamwesha,82191,528143.18468,0.886568,22,82169,0.999732


# Add scores

In [167]:
# prioritize bassed on buildings: the highesst rank given to zones with highest number proportion of buildings to capture
df_bld_score = df_zones2.sort_values(by='bldToCaptureProp', ascending=False)
df_bld_score ['bld_score'] = [i for i in range(1, len(df_bld_score)+1)]

# prioritize bassed on population: zones with large populaiton receive higher score 
df_pop_score = df_bld_score.sort_values(by='pop', ascending=False)
df_pop_score ['pop_score'] = [i for i in range(1, len(df_pop_score)+1)]


# prioritize bassed on population: zones with large populaiton receive higher score 
df_urb_score = df_pop_score.sort_values(by='prop11', ascending=True)
df_urb_score ['urb_score'] = [i for i in range(1, len(df_urb_score)+1)]

# overral score-since the score 1 is associated with higher rank, we sort in ascending order to find which zones to focus on
df_urb_score['agg_score'] = df_urb_score['bld_score'] + df_urb_score['pop_score'] + df_urb_score['urb_score']

# Remove zones which are being worked on now and sort by aggregate score

In [178]:
# Remove zones which are being worked on now
exclude = ['Demba', 'Mutoto', 'Bena Tshiadi', 'Yangala', 'Mutena']
#include_zones = list(set(df_urb_score.Health_Zone.unique()) - set(exclude))
df_scored_zones = df_urb_score[~df_urb_score['Health_Zone'].isin(exclud_zones)]

In [180]:
df_scored_zones.sort_values(by='agg_score', ascending=True, inplace=True)

In [183]:
# add cumulative buildings to capture in each zone
df_scored_zones['cumBldToCapture'] = df_scored_zones.bldToCapture.cumsum()

In [186]:
# Save this file
out_shp = "../data/AOIs/DRC/zone_ranks.shp"
df_scored_zones.to_file(out_shp, driver = 'ESRI Shapefile')
out_csv = "../data/AOIs/DRC/zone_ranks.csv"
df_scored_zones.drop(columns=['geometry'], inplace=True)
df_scored_zones.to_csv(out_csv, index=False)

In [None]:
def min_above_threshold(x):
    try:
        min_val = np.min(x[x > NTL_VAL_THRESHOLD])
    except:
        min_val = np.NaN
        pass
    return min_val

In [None]:
def mean_above_threshold(x):
    try:
        return np.mean(x[x > NTL_VAL_THRESHOLD])
    except:
        return np.NaN

In [None]:
def median_above_threshold(x):
    try:
        return np.median(x[x > NTL_VAL_THRESHOLD])
    except:
        return np.NaN

In [None]:
def sum_above_threshold(x):
    try:
        return np.sum(x[x > NTL_VAL_THRESHOLD])
    except:
        return np.NaN

In [None]:
def sum25(x):
    try:
        return np.sum(x[x > 0.25])
    except:
        return np.NaN

In [None]:
def sum5(x):
    try:
        return np.sum(x[x > 0.5])
    except:
        return np.NaN

In [None]:
def sum75(x):
    try:
        return np.sum(x[x > 0.75])
    except:
        return np.NaN

In [None]:
def sum1(x):
    try:
        return np.sum(x[x > 1])
    except:
        return np.NaN