In [1]:
### Script for adding flood statistics (buildings, people and km2) to admin polygons

##Steps:
#dissolve flood data by district and province
#cut by new district and province
#calculate area
#zonal stats on buildings centroids
#zonal stats on population
#joining stats back to admin areas
#save to db

#Estimated run time: 48h

In [2]:
import os
import geopandas as gpd
import pandas as pd
from sqlalchemy import create_engine
import psycopg2 # required for exporting to postgis
import rioxarray as rxr
from rasterio.crs import CRS
from sqlalchemy import create_engine
import rasterstats
from shapely.ops import transform
from datetime import datetime

### Setting connection and parameters

In [3]:
password = ''
db_connection_url = "postgresql://postgres:{}@localhost:5432/HSDC".format(password)
con = create_engine(db_connection_url)

In [4]:
pd.set_option('display.max_columns', None)

In [5]:
# Define projection
repro_crs = '+proj=cea'

### Defining functions

In [7]:
# Clipping admin and flood zones
def clip(adm, flood):
    flood = adm.clip(flood)
    return flood

In [8]:
# Extracting area
def area(flood, flood_level_code):
    col_name = 'km2_{}'.format(flood_level_code)
    flood[col_name] = flood['geom'].area.div(1000000)
    return flood

In [9]:
# Extracting buildings
def buildings(flood, build_centroids, group_by_parameter):
     
    build_count_column = 'build_{}'.format(flood_code_list[flood_level])
    
    
    # Join building centroids to flood polygons
    print('    Joining buildings to flood polygons   Start: {}'.format(datetime.now().strftime("%H:%M:%S")))
    joined_df = gpd.sjoin(
        build_centroids,
        flood,
        how='inner',
        predicate='intersects'
    )
    print('    Joining buildings to flood polygons   End:   {}'.format(datetime.now().strftime("%H:%M:%S")))
    
    
    # Count number of buildings within admin polygons
    print('    Counting number of buildings          Start: {}'.format(datetime.now().strftime("%H:%M:%S")))
    build_count = joined_df.groupby(
        [group_by_parameter],
        as_index=False,
    )['geom'].count() # vuid is arbitrary, we just count the row
    print('    Counting number of buildings          End:   {}'.format(datetime.now().strftime("%H:%M:%S")))
    
    # Change column name to build_count
    build_count.rename(columns = {'geom': build_count_column}, inplace = True)
 
    # Merge build count back on to admin dataset
    flood = flood.merge(
        build_count, 
        on=group_by_parameter, 
        how='left')
    
    return flood

In [10]:
# Extracting population
def ZonalStats(shape_gdf, raster, stats):

    zonalSt = rasterstats.zonal_stats(shape_gdf, raster, stats = stats)
    
    df = pd.DataFrame(zonalSt)
    
    df_concat = pd.concat([df, shape_gdf], axis=1)
    
    final_gdf = gpd.GeoDataFrame(df_concat, geometry=df_concat.geom) #wkb_geometry
    
    final_gdf_ordered = final_gdf[[c for c in final_gdf if c not in [stats]] + [stats]]
    
    flood = final_gdf_ordered.rename(columns={"sum": "pop_sum_{}".format(flood_code_list[flood_level])}) #, inplace=True)
    
    flood = flood.drop(columns=['geometry'])
    
    flood = flood.set_geometry('geom')
    
    return flood

### Defining reference paths and lists

In [11]:
pop_path = r'D:\iMMAP\data\Afghanistan\afg_worldpop_2020_UNadj_unconstrained.tif'
pop_path_cea = r'D:\iMMAP\data\Afghanistan\afg_worldpop_2020_UNadj_unconstrained_projCEA.tif'

admin_list = [
    'afg_admbnda_region',
    'afg_admbnda_adm1',
    'afg_admbnda_adm2'   
]

admin_abbreviations = [
    'region',
    'adm1',
    'adm2'
]

admin_list_groupby = [
    'reg_pcode',
    'adm1_pcode',
    'adm2_pcode'       
]

flood_list = [
    'afg_fldzonea_100k_ncia_v2_029cm_dissolve_all_v2',
    'afg_fldzonea_100k_ncia_v2_121cm_dissolve_all_v2',
    'afg_fldzonea_100k_ncia_v2_271cm_dissolve_all_v2'
]

flood_code_list = [
    '029cm',
    '121cm',
    '271cm'
]

### Running functions

In [12]:
print("Loading buildings. Starting: ", datetime.now().strftime("%H:%M:%S"))
build_centroids = gpd.GeoDataFrame.from_postgis('SELECT * from afg_buildings_microsoft_centroids', con).to_crs(repro_crs) #afg_buildings_microsoft_centroids_testclip #build_testclip2
print("Loading buildings. Finished: ", datetime.now().strftime("%H:%M:%S"))

Loading buildings. Starting:  12:47:25
Loading buildings. Finished:  12:50:50


In [13]:
print('\n\n\nALL PROCESSING START', datetime.now().strftime("%H:%M:%S"))

for admin_level in range(len(admin_list)):
    
    print('\n\n\n\nStarting admin loop nr {}: {}'.format(admin_level, admin_list[admin_level]))
    
    adm = gpd.GeoDataFrame.from_postgis('SELECT * from {}'.format(admin_list[admin_level]), con)
    
    adm = adm.to_crs(repro_crs)
    
    group_by_parameter = admin_list_groupby[admin_level]
        
    for flood_level in range(len(flood_list)):
        
        print('\n\nStarting flood loop nr {}'.format(flood_list[flood_level]))
        print("Current Time =", datetime.now().strftime("%H:%M:%S"))
        
        flood = gpd.GeoDataFrame.from_postgis('SELECT * from {}'.format(flood_list[flood_level]), con, 'wkb_geometry')
        
        print('Reprojecting - Start: {}'.format(datetime.now().strftime("%H:%M:%S")))
        flood = flood.to_crs(repro_crs) #OBS, if no flood level extent in admin area, then row dissapears from final output
        print('Reprojecting - End:   {}'.format(datetime.now().strftime("%H:%M:%S")))
       
        print('Clip - Start:         {}'.format(datetime.now().strftime("%H:%M:%S")))
        flood = clip(adm, flood) #.explode()
        print('Clip - End:           {}'.format(datetime.now().strftime("%H:%M:%S")))

        print('Area - Start:         {}'.format(datetime.now().strftime("%H:%M:%S")))
        flood = area(flood, flood_code_list[flood_level])
        print('Area - End:           {}'.format(datetime.now().strftime("%H:%M:%S")))
        
        print('Buildings - Start:    {}'.format(datetime.now().strftime("%H:%M:%S")))
        flood = buildings(flood, build_centroids, group_by_parameter)
        print('Buildings - End:      {}'.format(datetime.now().strftime("%H:%M:%S")))
        
        print('Zonalstats - Start:   {}'.format(datetime.now().strftime("%H:%M:%S")))
        flood = ZonalStats(flood, pop_path_cea, 'sum')
        print('Zonalstats - End:     {}'.format(datetime.now().strftime("%H:%M:%S")))
        
        
        
        print('Merging - Start:      {}'.format(datetime.now().strftime("%H:%M:%S")))
        # Subsetting the table to only the columns we want to join
        flood = pd.DataFrame(flood[[
                '{}'.format(group_by_parameter),
                'km2_{}'.format(flood_code_list[flood_level]),
                'pop_sum_{}'.format(flood_code_list[flood_level]),
                'build_{}'.format(flood_code_list[flood_level])]]
        )

        # Summaries across columns
        flood = flood.groupby(group_by_parameter).sum()
        
        adm = adm.merge(
            flood, 
            on=group_by_parameter, 
            how='left')
        print('Merging - End:        {}'.format(datetime.now().strftime("%H:%M:%S")))

        print(adm)

        print('Finished flood loop nr {}'.format(flood_list[flood_level]))
        print("Current Time =", datetime.now().strftime("%H:%M:%S"))
        
        # Save to postgis if all flood levels have been processed
        if flood_level == len(flood_list) - 1:
            adm = adm.to_crs('epsg:4326')
            adm.to_postgis('afg_admbnda_{}_flood_stats_fullsize_v02'.format(admin_abbreviations[admin_level]), con, if_exists='replace')
            print('\n\nFINISHED WITH  -  afg_admbnda_{}_flood_stats  - {}'
                      .format(admin_abbreviations[admin_level], datetime.now().strftime("%H:%M:%S")))
    
print('\n\n\nALL PROCESSING END', datetime.now().strftime("%H:%M:%S"))




ALL PROCESSING START 12:50:50




Starting admin loop nr 0: afg_admbnda_region


Starting flood loop nr afg_fldzonea_100k_ncia_v2_029cm_dissolve_all_v2
Current Time = 12:50:50
Reprojecting - Start: 12:51:17
Reprojecting - End:   12:51:19
Clip - Start:         12:51:19
Clip - End:           14:33:28
Area - Start:         14:33:28
Area - End:           14:33:28
Buildings - Start:    14:33:28
    Joining buildings to flood polygons   Start: 14:33:28
    Joining buildings to flood polygons   End:   01:10:20
    Counting number of buildings          Start: 01:10:20
    Counting number of buildings          End:   01:10:20
Buildings - End:      01:10:20
Zonalstats - Start:   01:10:20
Zonalstats - End:     01:18:54
Merging - Start:      01:18:54
Merging - End:        01:18:54
   id                                               geom            reg_en  \
0   1  MULTIPOLYGON (((7617702.961 3735993.537, 76174...          Northern   
1   2  MULTIPOLYGON (((7304671.677 3483929.798, 73045...  Cen

   id                                               geom            reg_en  \
0   1  MULTIPOLYGON (((7617702.961 3735993.537, 76174...          Northern   
1   2  MULTIPOLYGON (((7304671.677 3483929.798, 73045...  Central Highland   
2   3  MULTIPOLYGON (((7588786.360 3345978.293, 75886...     South Eastern   
3   4  MULTIPOLYGON (((7523765.643 3314826.127, 75237...          Southern   
4   5  MULTIPOLYGON (((7792666.678 3551912.629, 77925...           Eastern   
5   6  MULTIPOLYGON (((7488668.638 3545395.217, 74884...           Capital   
6   7  MULTIPOLYGON (((7925357.856 3733126.513, 79252...     North Eastern   
7   8  MULTIPOLYGON (((6859234.301 3303643.897, 68575...           Western   

            reg_da reg_pcode reg_ref regalt1en regalt2en regalt1da regalt2da  \
0       سهیلی حوزه        NR    None      None      None      None      None   
1  لوړه مرکزی حوزه        CH    None      None      None      None      None   
2  جنوب ختیځه حوزه        SE    None      None      None 

Reprojecting - Start: 01:05:06
Reprojecting - End:   01:05:09
Clip - Start:         01:05:09
Clip - End:           02:05:16
Area - Start:         02:05:16
Area - End:           02:05:16
Buildings - Start:    02:05:16
    Joining buildings to flood polygons   Start: 02:05:16
    Joining buildings to flood polygons   End:   05:59:32
    Counting number of buildings          Start: 05:59:32
    Counting number of buildings          End:   05:59:32
Buildings - End:      05:59:32
Zonalstats - Start:   05:59:32
Zonalstats - End:     06:02:24
Merging - Start:      06:02:24
Merging - End:        06:02:24
    id                                               geom  shape_leng  \
0    1  MULTIPOLYGON (((7580084.127 3398346.617, 75801...   10.487964   
1    2  MULTIPOLYGON (((6963422.952 3112388.252, 69613...   10.325135   
2    3  MULTIPOLYGON (((7925673.149 3667998.587, 79256...    6.917788   
3    4  MULTIPOLYGON (((7295527.260 3403656.802, 72954...    8.156241   
4    5  MULTIPOLYGON (((7772198

Reprojecting - Start: 06:02:36
Reprojecting - End:   06:02:37
Clip - Start:         06:02:37
Clip - End:           06:07:19
Area - Start:         06:07:19
Area - End:           06:07:19
Buildings - Start:    06:07:19
    Joining buildings to flood polygons   Start: 06:07:19
    Joining buildings to flood polygons   End:   07:12:43
    Counting number of buildings          Start: 07:12:43
    Counting number of buildings          End:   07:12:43
Buildings - End:      07:12:43
Zonalstats - Start:   07:12:43
Zonalstats - End:     07:13:20
Merging - Start:      07:13:20
Merging - End:        07:13:20
    id                                               geom  shape_leng  \
0    1  MULTIPOLYGON (((7580084.127 3398346.617, 75801...   10.487964   
1    2  MULTIPOLYGON (((6963422.952 3112388.252, 69613...   10.325135   
2    3  MULTIPOLYGON (((7925673.149 3667998.587, 79256...    6.917788   
3    4  MULTIPOLYGON (((7295527.260 3403656.802, 72954...    8.156241   
4    5  MULTIPOLYGON (((7772198



FINISHED WITH  -  afg_admbnda_adm1_flood_stats  - 07:13:21




Starting admin loop nr 2: afg_admbnda_adm2


Starting flood loop nr afg_fldzonea_100k_ncia_v2_029cm_dissolve_all_v2
Current Time = 07:13:22
Reprojecting - Start: 07:13:47
Reprojecting - End:   07:13:50
Clip - Start:         07:13:50
Clip - End:           09:08:30
Area - Start:         09:08:30
Area - End:           09:08:30
Buildings - Start:    09:08:30
    Joining buildings to flood polygons   Start: 09:08:30
    Joining buildings to flood polygons   End:   09:32:00
    Counting number of buildings          Start: 09:32:00
    Counting number of buildings          End:   09:32:00
Buildings - End:      09:32:00
Zonalstats - Start:   09:32:00
Zonalstats - End:     09:33:41
Merging - Start:      09:33:41
Merging - End:        09:33:41
      id                                               geom  shape_leng  \
0      1  MULTIPOLYGON (((7867133.864 3802368.500, 78671...    0.803005   
1      2  MULTIPOLYGON (((7616915.466 377

Reprojecting - Start: 11:14:44
Reprojecting - End:   11:14:45
Clip - Start:         11:14:45
Clip - End:           11:21:41
Area - Start:         11:21:41
Area - End:           11:21:41
Buildings - Start:    11:21:41
    Joining buildings to flood polygons   Start: 11:21:41
    Joining buildings to flood polygons   End:   11:29:15
    Counting number of buildings          Start: 11:29:15
    Counting number of buildings          End:   11:29:15
Buildings - End:      11:29:15
Zonalstats - Start:   11:29:15
Zonalstats - End:     11:29:51
Merging - Start:      11:29:51
Merging - End:        11:29:51
      id                                               geom  shape_leng  \
0      1  MULTIPOLYGON (((7867133.864 3802368.500, 78671...    0.803005   
1      2  MULTIPOLYGON (((7616915.466 3779589.826, 76168...    3.296682   
2      3  MULTIPOLYGON (((7670834.369 3570001.122, 76708...    1.984007   
3    118  MULTIPOLYGON (((7600682.499 3516871.898, 76005...    1.197111   
4    119  MULTIPOLYGO