In [1]:
### Script for adding avalanche statistics (buildings, people and km2) to admin polygons
#dissolve across district and province (done in previous script)
#cut by new district and province
#calculate area
#zonal stats on pop
#zonal stats on building centroids
#save to postgres + join back on admin areas

In [2]:
import json
import geopandas as gpd
import pandas as pd
from sqlalchemy import create_engine
import psycopg2 # required for exporting to postgis
import rioxarray as rxr
from rasterio.crs import CRS
from sqlalchemy import create_engine
import rasterstats
from shapely.ops import transform
from datetime import datetime
import numpy as np

### Setting connection and parameters

In [3]:
# Load database configuration from file
with open(r'D:\iMMAP\code\db_config\hsdc_local_db_config.json', 'r') as f:
    config = json.load(f)

# Create database URL with credentials
db_url = f"postgresql://{config['username']}:{config['password']}@{config['host']}:{config['port']}/{config['database']}"

# Connect to the database
con = create_engine(db_url)

In [4]:
pd.set_option('display.max_columns', None)

In [5]:
# Define projection
repro_crs = '+proj=cea'

### Defining functions

In [6]:
# Clipping admin and flood zones
def clip(adm, av):
    av = adm.clip(av)
    return av

In [7]:
# Extracting area
def area(av):
    col_name = 'm2'
    av[col_name] = av['geom'].area #.div(1000000)
    return av

In [8]:
# Extracting buildings
def buildings(flood, build_centroids, group_by_parameter):
    
    build_count_column = 'build'
    
    # Join building centroids to flood polygons
    print('    Joining buildings to flood polygons   Start: {}'.format(datetime.now().strftime("%H:%M:%S")))
    joined_df = gpd.sjoin(
        build_centroids,
        flood,
        how='inner',
        predicate='intersects'
    )
    print('    Joining buildings to flood polygons   End:   {}'.format(datetime.now().strftime("%H:%M:%S")))
    
    # Count number of buildings within admin polygons
    print('    Counting number of buildings          Start: {}'.format(datetime.now().strftime("%H:%M:%S")))
    build_count = joined_df.groupby(
        [group_by_parameter],
        as_index=False,
    )['geom'].count() # vuid is arbitrary, we just count the row
    print('    Counting number of buildings          End:   {}'.format(datetime.now().strftime("%H:%M:%S")))
    
    # Change column name to build_count
    build_count.rename(columns = {'geom': build_count_column}, inplace = True)

    # Merge build count back on to admin dataset
    flood = flood.merge(
        build_count, 
        on=group_by_parameter, 
        how='left')
    
    return flood

In [9]:
# Extracting population
def ZonalStats(shape_gdf, raster, stats):

    zonalSt = rasterstats.zonal_stats(shape_gdf, raster, stats = stats)
    
    df = pd.DataFrame(zonalSt)
    
    df_concat = pd.concat([df, shape_gdf], axis=1)
    
    final_gdf = gpd.GeoDataFrame(df_concat, geometry=df_concat.geom) #wkb_geometry
    
    final_gdf_ordered = final_gdf[[c for c in final_gdf if c not in [stats]] + [stats]]
    
    flood = final_gdf_ordered.rename(columns={"sum": "pop_sum"}) #, inplace=True)
    
    flood = flood.drop(columns=['geometry'])
    
    flood = flood.set_geometry('geom')
    
    return flood

### Defining reference paths and lists

In [10]:
pop_path = r'D:\iMMAP\data\Afghanistan\afg_worldpop_2020_UNadj_unconstrained.tif'
pop_path_cea = r'D:\iMMAP\data\Afghanistan\afg_worldpop_2020_UNadj_unconstrained_projCEA.tif'

admin_list = [
    'afg_admbnda_region',
    'afg_admbnda_adm1',
    'afg_admbnda_adm2'   
]

admin_abbreviations = [
    'region',
    'adm1',
    'adm2'
]

admin_list_groupby = [
    'reg_pcode',
    'adm1_pcode',
    'adm2_pcode'       
]

#av_list = [
#    'afg_avsa_clean_dissolve_testClip'
#]

av_code_list = [
    'High',
    'Moderate'
]

### Running functions

In [11]:
print("Loading buildings. Starting: ", datetime.now().strftime("%H:%M:%S"))
build_centroids = gpd.GeoDataFrame.from_postgis('SELECT * from afg_buildings_microsoft_centroids', con).to_crs(repro_crs) #afg_buildings_microsoft_centroids_testclip #build_testclip2
#build_centroids = gpd.GeoDataFrame.from_postgis('SELECT * from afg_buildings_microsoft_centroids_testclip1_tiny', con).to_crs(repro_crs) #afg_buildings_microsoft_centroids_testclip #build_testclip2
print("Loading buildings. Finished: ", datetime.now().strftime("%H:%M:%S"))

Loading buildings. Starting:  14:53:59
Loading buildings. Finished:  14:57:19


In [12]:
print('\n\n\nALL PROCESSING START', datetime.now().strftime("%H:%M:%S"))

for i in range(len(admin_list)):
    
    print('\n\n\n\nStarting admin loop nr {}: {}'.format(i, admin_list[i]))
    
    adm = gpd.GeoDataFrame.from_postgis('SELECT * from {}'.format(admin_list[i]), con)
    
    adm = adm.to_crs(repro_crs)
    
    group_by_parameter = admin_list_groupby[i]
    
    av_name = 'afg_avsa_clean_dissolve'
    
    
    #for av_level in range(len(av_list)):
        
    print("Current Time =", datetime.now().strftime("%H:%M:%S"))
    
    av = gpd.GeoDataFrame.from_postgis('SELECT * from {}'.format(av_name), con, 'wkb_geometry')
    
    print('Reprojecting - Start: {}'.format(datetime.now().strftime("%H:%M:%S")))
    av = av.to_crs(repro_crs) #OBS, if no flood level extent in admin area, then row dissapears from final output
    print('Reprojecting - End:   {}'.format(datetime.now().strftime("%H:%M:%S")))
    
    print('Clip - Start:         {}'.format(datetime.now().strftime("%H:%M:%S")))
    av = clip(adm, av) #.explode()
    print('Clip - End:           {}'.format(datetime.now().strftime("%H:%M:%S")))
    print('Area - Start:         {}'.format(datetime.now().strftime("%H:%M:%S")))
    av = area(av)
    print('Area - End:           {}'.format(datetime.now().strftime("%H:%M:%S")))
    
    print('Buildings - Start:    {}'.format(datetime.now().strftime("%H:%M:%S")))
    av = buildings(av, build_centroids, group_by_parameter)
    print('Buildings - End:      {}'.format(datetime.now().strftime("%H:%M:%S")))
    
    print('Zonalstats - Start:   {}'.format(datetime.now().strftime("%H:%M:%S")))
    av = ZonalStats(av, pop_path_cea, 'sum')
    print('Zonalstats - End:     {}'.format(datetime.now().strftime("%H:%M:%S")))
    
    print('Merging - Start:      {}'.format(datetime.now().strftime("%H:%M:%S")))
    # Subsetting the table to only the columns we want to join
    av = pd.DataFrame(av[[
            '{}'.format(group_by_parameter),
            'm2',
            'pop_sum',
            'build']]
    )
    
    
    # Summaries across columns
    av = av.groupby(group_by_parameter).sum()
    
    
    
    adm = adm.merge(
        av, 
        on=group_by_parameter, 
        how='left')
    print('Merging - End:        {}'.format(datetime.now().strftime("%H:%M:%S")))
   
    print("Current Time =", datetime.now().strftime("%H:%M:%S"))
    
    
    
    # Convering NaN to 0 across stats columns
    test_m2 = adm['m2'].isnull()
    test_pop = adm['pop_sum'].isnull()
    test_build = adm['build'].isnull()

    if len(test_m2.unique()) == 2:
        adm.loc[adm.m2.isnull(), 'm2'] = 0
    
    if len(test_pop.unique()) == 2:
        adm.loc[adm.pop_sum.isnull(), 'pop_sum'] = 0
    
    if len(test_build.unique()) == 2:
        adm.loc[adm.build.isnull(), 'build'] = 0
        
    
    print(adm)
    
    
    # Save to postgis if all flood levels have been processed
    adm = adm.to_crs('epsg:4326')
    adm.to_postgis('afg_admbnda_{}_av_stats'.format(admin_abbreviations[i]), con, if_exists='replace')
    print('\n\nFINISHED WITH  -  afg_admbnda_{}_av_stats  - {}'
                  .format(admin_abbreviations[i], datetime.now().strftime("%H:%M:%S")))
    
print('\n\n\nALL PROCESSING END', datetime.now().strftime("%H:%M:%S"))




ALL PROCESSING START 14:57:19




Starting admin loop nr 0: afg_admbnda_region
Current Time = 14:57:19
Reprojecting - Start: 14:58:41
Reprojecting - End:   14:58:51
Clip - Start:         14:58:51
Clip - End:           15:42:45
Area - Start:         15:42:45
Area - End:           15:42:45
Buildings - Start:    15:42:45
    Joining buildings to flood polygons   Start: 15:42:45
    Joining buildings to flood polygons   End:   17:51:59
    Counting number of buildings          Start: 17:51:59
    Counting number of buildings          End:   17:51:59
Buildings - End:      17:51:59
Zonalstats - Start:   17:51:59
Zonalstats - End:     17:53:56
Merging - Start:      17:53:56
Merging - End:        17:54:00
Current Time = 17:54:00
   id                                               geom            reg_en  \
0   1  MULTIPOLYGON (((7617702.961 3735993.537, 76174...          Northern   
1   2  MULTIPOLYGON (((7304671.677 3483929.798, 73045...  Central Highland   
2   3  MULTIPOLYGON (((7588786.3



FINISHED WITH  -  afg_admbnda_adm1_av_stats  - 19:05:25




Starting admin loop nr 2: afg_admbnda_adm2
Current Time = 19:05:25
Reprojecting - Start: 19:06:45
Reprojecting - End:   19:06:54
Clip - Start:         19:06:54
Clip - End:           19:58:00
Area - Start:         19:58:00
Area - End:           19:58:00
Buildings - Start:    19:58:00
    Joining buildings to flood polygons   Start: 19:58:00
    Joining buildings to flood polygons   End:   20:00:56
    Counting number of buildings          Start: 20:00:56
    Counting number of buildings          End:   20:00:56
Buildings - End:      20:00:56
Zonalstats - Start:   20:00:56
Zonalstats - End:     20:03:18
Merging - Start:      20:03:18
Merging - End:        20:03:23
Current Time = 20:03:23
      id                                               geom  shape_leng  \
0      1  MULTIPOLYGON (((7867133.864 3802368.500, 78671...    0.803005   
1      2  MULTIPOLYGON (((7616915.466 3779589.826, 76168...    3.296682   
2      3  MULTIPOL

Unnamed: 0,ogc_fid,avalanche_,avalanch_1,avalanch_2,avalanch_3,avalanch_4,basin_id,source,sum_area_s,avalanch_5,area_build,shape_leng,shape_area,basinmembe,_sum,area,geometry
0,25500.0,High,84703.0,Chute and run-out area,868769.0,2274.0,2.023305e+09,E. Hagen. iMMAP 2015,543196.0,,,0.066591,5.271763e-05,3793.0,18.557033,542926,"POLYGON ((66.53948 33.56908, 66.53959 33.56899..."
1,192764.0,Moderate,81374.0,Chute and run-out area,71357.0,476.0,2.023305e+09,E. Hagen. iMMAP 2015,25.0,,,0.000235,2.472588e-09,3790.0,0.000773,25,"POLYGON ((66.36637 33.50999, 66.36630 33.51003..."
2,193333.0,Moderate,81642.0,Chute and run-out area,67429.0,358.0,2.023305e+09,E. Hagen. iMMAP 2015,22.0,,,0.000229,2.154863e-09,3790.0,0.001321,22,"POLYGON ((66.36778 33.51321, 66.36778 33.51322..."
3,193911.0,Moderate,81940.0,Chute and run-out area,245715.0,954.0,2.023305e+09,E. Hagen. iMMAP 2015,928.0,,,0.001652,9.002248e-08,3790.0,0.028118,928,"POLYGON ((66.37829 33.52253, 66.37830 33.52265..."
4,194475.0,Moderate,82221.0,Chute and run-out area,57366.0,556.0,2.023305e+09,E. Hagen. iMMAP 2015,237.0,,,0.001004,2.296441e-08,3901.0,0.004862,237,"POLYGON ((66.77621 33.53051, 66.77617 33.53051..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6699,249488.0,Moderate,111647.0,Trigger area,,493.0,2.023305e+09,E. Hagen. iMMAP 2015,24007.0,,,0.007019,2.340339e-06,3787.0,1.421876,23993,"POLYGON ((66.44671 33.96202, 66.44677 33.96200..."
6700,249503.0,Moderate,111656.0,Chute and run-out area,127759.0,1060.0,2.023305e+09,E. Hagen. iMMAP 2015,110599.0,,,0.022930,1.078099e-05,3787.0,5.704434,110535,"POLYGON ((66.44336 33.95863, 66.44337 33.95859..."
6701,249504.0,Moderate,111656.0,Trigger area,,1060.0,2.023305e+09,E. Hagen. iMMAP 2015,17396.0,,,0.007035,1.695786e-06,3787.0,0.713512,17386,"POLYGON ((66.44336 33.95863, 66.44335 33.95863..."
6702,249577.0,Moderate,111696.0,Trigger area,,414.0,2.023305e+09,E. Hagen. iMMAP 2015,2018.0,,,0.001858,1.967375e-07,3787.0,0.100846,2017,"POLYGON ((66.45015 33.96018, 66.45029 33.96006..."


11384.583295929046

240861330