In [14]:
### Script for adding flood statistics (buildings, people and km2) to admin 
###Steps:
#dissolve flood data by district and province
#cut by new district and province
#calculate area
#zonal stats on buildings centroids
#zonal stats on population
#joining stats back to admin areas
#save to db

In [15]:
import os
import geopandas as gpd
import pandas as pd
from sqlalchemy import create_engine
import psycopg2 # required for exporting to postgis
import rioxarray as rxr
from rasterio.crs import CRS
from sqlalchemy import create_engine
import rasterstats
from shapely.ops import transform
from datetime import datetime

### Setting connection and parameters

In [16]:
password = 'pewp7re'
db_connection_url = "postgresql://postgres:{}@localhost:5432/HSDC".format(password)
con = create_engine(db_connection_url)

In [17]:
pd.set_option('display.max_columns', None)

In [18]:
# Define projection
repro_crs = '+proj=cea'

In [19]:
#flood_test = gpd.GeoDataFrame.from_postgis('SELECT * from afg_fldzonea_100k_ncia_v2_271cm_dissolve_all_v2_testclip', con, 'wkb_geometry')

### Defining functions

In [20]:
# Clipping admin and flood zones
def clip(adm, flood):
    flood = adm.clip(flood)
    return flood

In [21]:
# Extracting area
def area(flood, flood_level_code):
    col_name = 'km2_{}'.format(flood_level_code)
    flood[col_name] = flood['geom'].area.div(1000000)
    return flood

In [22]:
# Extracting buildings
def buildings(flood, build_centroids, group_by_parameter):
     
    build_count_column = 'build_{}'.format(flood_code_list[flood_level])
    
    
    # Join building centroids to flood polygons
    print('    Joining buildings to flood polygons   Start: {}'.format(datetime.now().strftime("%H:%M:%S")))
    joined_df = gpd.sjoin(
        build_centroids,
        flood,
        how='inner',
        predicate='intersects'
    )
    print('    Joining buildings to flood polygons   End:   {}'.format(datetime.now().strftime("%H:%M:%S")))
    
    
    # Count number of buildings within admin polygons
    print('    Counting number of buildings          Start: {}'.format(datetime.now().strftime("%H:%M:%S")))
    build_count = joined_df.groupby(
        [group_by_parameter],
        as_index=False,
    )['geom'].count() # vuid is arbitrary, we just count the row
    print('    Counting number of buildings          End:   {}'.format(datetime.now().strftime("%H:%M:%S")))
    
    # Change column name to build_count
    build_count.rename(columns = {'geom': build_count_column}, inplace = True)
 
    # Merge build count back on to admin dataset
    flood = flood.merge(
        build_count, 
        on=group_by_parameter, 
        how='left')
    
    return flood

In [23]:
# Extracting population
def ZonalStats(shape_gdf, raster, stats):

    zonalSt = rasterstats.zonal_stats(shape_gdf, raster, stats = stats)
    
    df = pd.DataFrame(zonalSt)
    
    df_concat = pd.concat([df, shape_gdf], axis=1)
    
    final_gdf = gpd.GeoDataFrame(df_concat, geometry=df_concat.geom) #wkb_geometry
    
    final_gdf_ordered = final_gdf[[c for c in final_gdf if c not in [stats]] + [stats]]
    
    flood = final_gdf_ordered.rename(columns={"sum": "pop_sum_{}".format(flood_code_list[flood_level])}) #, inplace=True)
    
    flood = flood.drop(columns=['geometry'])
    
    flood = flood.set_geometry('geom')
    
    return flood

### Defining reference paths and lists

In [24]:
pop_path = r'D:\iMMAP\data\Afghanistan\afg_worldpop_2020_UNadj_unconstrained.tif'
pop_path_cea = r'D:\iMMAP\data\Afghanistan\afg_worldpop_2020_UNadj_unconstrained_projCEA.tif'

admin_list = [
    'afg_admbnda_region_testclip2', #testclip removed
    'afg_admbnda_adm1_testclip2',
    'afg_admbnda_adm2_testclip2'   
]

admin_abbreviations = [
    'region',
    'adm1',
    'adm2'
]

admin_list_groupby = [
    'reg_pcode',
    'adm1_pcode',
    'adm2_pcode'       
]

flood_list = [
    'afg_fldzonea_100k_ncia_v2_029cm_dissolve_all_v2_testclip',
    'afg_fldzonea_100k_ncia_v2_121cm_dissolve_all_v2_testclip',
    'afg_fldzonea_100k_ncia_v2_271cm_dissolve_all_v2_testclip'
]

flood_code_list = [
    '029cm',
    '121cm',
    '271cm'
]

### Running functions

In [25]:
print("Loading buildings. Starting: ", datetime.now().strftime("%H:%M:%S"))
build_centroids = gpd.GeoDataFrame.from_postgis('SELECT * from build_testclip2', con).to_crs(repro_crs) #afg_buildings_microsoft_centroids_testclip #build_testclip2
print("Loading buildings. Finished: ", datetime.now().strftime("%H:%M:%S"))

Loading buildings. Starting:  14:11:49
Loading buildings. Finished:  14:11:50


In [32]:
print('\n\n\nALL PROCESSING START', datetime.now().strftime("%H:%M:%S"))

for admin_level in range(len(admin_list)):
    
    print('\n\n\n\nStarting admin loop nr {}: {}'.format(admin_level, admin_list[admin_level]))
    
    adm = gpd.GeoDataFrame.from_postgis('SELECT * from {}'.format(admin_list[admin_level]), con)
    
    adm = adm.to_crs(repro_crs)
    
    group_by_parameter = admin_list_groupby[admin_level]
        
    for flood_level in range(len(flood_list)):
        
       # # Use adm for first iteration, after that use the flood polygons
       # if flood_level == 0:
       #     data = adm
       # else:
       #     data = flood
       # 
        print('\n\nStarting flood loop nr {}'.format(flood_list[flood_level]))
        print("Current Time =", datetime.now().strftime("%H:%M:%S"))
        
        flood = gpd.GeoDataFrame.from_postgis('SELECT * from {}'.format(flood_list[flood_level]), con, 'wkb_geometry')
        
        print('Reprojecting - Start: {}'.format(datetime.now().strftime("%H:%M:%S")))
        flood = flood.to_crs(repro_crs) #OBS, if no flood level extent in admin area, then row dissapears from final output
        print('Reprojecting - End:   {}'.format(datetime.now().strftime("%H:%M:%S")))
       
        print('Clip - Start:         {}'.format(datetime.now().strftime("%H:%M:%S")))
        flood = clip(adm, flood) #.explode()
        print('Clip - End:           {}'.format(datetime.now().strftime("%H:%M:%S")))

        print('Area - Start:         {}'.format(datetime.now().strftime("%H:%M:%S")))
        flood = area(flood, flood_code_list[flood_level])
        print('Area - End:           {}'.format(datetime.now().strftime("%H:%M:%S")))
        
        print('Buildings - Start:    {}'.format(datetime.now().strftime("%H:%M:%S")))
        flood = buildings(flood, build_centroids, group_by_parameter)
        print('Buildings - End:      {}'.format(datetime.now().strftime("%H:%M:%S")))
        
        print('Zonalstats - Start:   {}'.format(datetime.now().strftime("%H:%M:%S")))
        flood = ZonalStats(flood, pop_path_cea, 'sum')
        print('Zonalstats - End:     {}'.format(datetime.now().strftime("%H:%M:%S")))
        
        
        
        print('Merging - Start:      {}'.format(datetime.now().strftime("%H:%M:%S")))
        # Subsetting the table to only the columns we want to join
        flood = pd.DataFrame(flood[[
                '{}'.format(group_by_parameter),
                'km2_{}'.format(flood_code_list[flood_level]),
                'pop_sum_{}'.format(flood_code_list[flood_level]),
                'build_{}'.format(flood_code_list[flood_level])]]
        )

        # Summaries across columns
        flood = flood.groupby(group_by_parameter).sum()
        
        adm = adm.merge(
            flood, 
            on=group_by_parameter, 
            how='left')
        print('Merging - End:        {}'.format(datetime.now().strftime("%H:%M:%S")))

        print(adm)

        print('Finished flood loop nr {}'.format(flood_list[flood_level]))
        print("Current Time =", datetime.now().strftime("%H:%M:%S"))
        
        # Save to postgis if all flood levels have been processed
        if flood_level == len(flood_list) - 1:
            adm = adm.to_crs('epsg:4326')
            adm.to_postgis('afg_admbnda_{}_flood_stats'.format(admin_abbreviations[admin_level]), con, if_exists='replace')
            print('\n\nFINISHED WITH  -  afg_admbnda_{}_flood_stats  - {}'
                      .format(admin_abbreviations[admin_level], datetime.now().strftime("%H:%M:%S")))
    
       #
       #
       ## Merge stats onto original adm if all flood levels have been processed
       #if flood_level == len(flood_list) - 1:
       #    
       #    print('Saving to postgis: {}'.format(datetime.now().strftime("%H:%M:%S")))
       #    
       #    flood = pd.DataFrame(flood.drop(columns='geom')).groupby(group_by_parameter).sum()
       #    
       #    adm = adm.merge(
       #        flood, 
       #        on=group_by_parameter, 
       #        how='left')
       #    
       #    adm = adm.to_crs('epsg:4326')
       #    adm.to_postgis('afg_admbnda_{}_flood_stats'.format(admin_abbreviations[admin_level]), con, if_exists='replace')
       #    print('\n\nFINISHED WITH  -  afg_admbnda_{}_flood_stats  - {}'
       #          .format(admin_abbreviations[admin_level], datetime.now().strftime("%H:%M:%S")))
    
print('\n\n\nALL PROCESSING END', datetime.now().strftime("%H:%M:%S"))




ALL PROCESSING START 14:48:03




Starting admin loop nr 0: afg_admbnda_region_testclip2


Starting flood loop nr afg_fldzonea_100k_ncia_v2_029cm_dissolve_all_v2_testclip
Current Time = 14:48:03
Reprojecting - Start: 14:48:04
Reprojecting - End:   14:48:04
Clip - Start:         14:48:04
Clip - End:           14:48:13
Area - Start:         14:48:13
Area - End:           14:48:13
Buildings - Start:    14:48:13
    Joining buildings to flood polygons   Start: 14:48:13
    Joining buildings to flood polygons   End:   14:48:27
    Counting number of buildings          Start: 14:48:27
    Counting number of buildings          End:   14:48:27
Buildings - End:      14:48:27
Zonalstats - Start:   14:48:27
Zonalstats - End:     14:48:28
Merging - Start:      14:48:28
Merging - End:        14:48:28
   id                                               geom         reg_en  \
0   3  MULTIPOLYGON (((7588786.360 3345978.293, 75886...  South Eastern   
1   5  MULTIPOLYGON (((7792666.678 3551912.629, 

Reprojecting - Start: 14:49:35
Reprojecting - End:   14:49:35
Clip - Start:         14:49:35
Clip - End:           14:49:45
Area - Start:         14:49:45
Area - End:           14:49:45
Buildings - Start:    14:49:45
    Joining buildings to flood polygons   Start: 14:49:45
    Joining buildings to flood polygons   End:   14:49:56
    Counting number of buildings          Start: 14:49:56
    Counting number of buildings          End:   14:49:56
Buildings - End:      14:49:56
Zonalstats - Start:   14:49:56
Zonalstats - End:     14:49:58
Merging - Start:      14:49:58
Merging - End:        14:49:58
   id                                               geom  shape_leng  \
0   5  MULTIPOLYGON (((7772198.026 3621013.781, 77721...    2.393424   
1  16  MULTIPOLYGON (((7735346.358 3562491.163, 77352...    4.380275   
2  19  MULTIPOLYGON (((7789915.066 3512799.047, 77899...    4.938876   
3  24  MULTIPOLYGON (((7848069.570 3595197.756, 78478...    3.327006   
4  26  MULTIPOLYGON (((7656798.748 3



Starting flood loop nr afg_fldzonea_100k_ncia_v2_029cm_dissolve_all_v2_testclip
Current Time = 14:50:08
Reprojecting - Start: 14:50:09
Reprojecting - End:   14:50:09
Clip - Start:         14:50:09
Clip - End:           14:50:20
Area - Start:         14:50:20
Area - End:           14:50:20
Buildings - Start:    14:50:20
    Joining buildings to flood polygons   Start: 14:50:20
    Joining buildings to flood polygons   End:   14:50:21
    Counting number of buildings          Start: 14:50:21
    Counting number of buildings          End:   14:50:21
Buildings - End:      14:50:21
Zonalstats - Start:   14:50:21
Zonalstats - End:     14:50:23
Merging - Start:      14:50:23
Merging - End:        14:50:23
     id                                               geom  shape_leng  \
0     3  MULTIPOLYGON (((7670834.369 3570001.122, 76708...    1.984007   
1     6  MULTIPOLYGON (((7761820.825 3536335.297, 77621...    0.900783   
2    69  MULTIPOLYGON (((7833952.331 3608855.591, 78339...    1.4414

Reprojecting - Start: 14:50:41
Reprojecting - End:   14:50:41
Clip - Start:         14:50:41
Clip - End:           14:50:45
Area - Start:         14:50:45
Area - End:           14:50:45
Buildings - Start:    14:50:45
    Joining buildings to flood polygons   Start: 14:50:45
    Joining buildings to flood polygons   End:   14:50:46
    Counting number of buildings          Start: 14:50:46
    Counting number of buildings          End:   14:50:46
Buildings - End:      14:50:46
Zonalstats - Start:   14:50:46
Zonalstats - End:     14:50:47
Merging - Start:      14:50:47
Merging - End:        14:50:47
     id                                               geom  shape_leng  \
0     3  MULTIPOLYGON (((7670834.369 3570001.122, 76708...    1.984007   
1     6  MULTIPOLYGON (((7761820.825 3536335.297, 77621...    0.900783   
2    69  MULTIPOLYGON (((7833952.331 3608855.591, 78339...    1.441451   
3    25  MULTIPOLYGON (((7876603.357 3620544.353, 78767...    0.763970   
4    26  MULTIPOLYGON (((7