In [48]:
# Objective: Getting pop, buildings and area per landslide risk zone

# Input: admin0, admin1, region, landslide_raster (with 8 different risk categories)
# Output: admin0, admin1 and region with pop, buildings and area per risk category per admin polygon

# Steps:

## Pop
# Load population data and landslide data
# Reproject so pixel size matche between pop and landslide
# Mask pop with land slide categories
# Apply zonal stats across masked population dataset

## Area
# Get count of none-masked pixels from population data above for each risk category (already masked by risk category)
# Get pixel size
# Multiply pixel size with count

## Buildings
# Extract pixel from landslide to each building centroid (using zonal stats on point)
# Join buildings and admin polygons
# Group by each risk category for each polygon

## Aggregate all columns to 5 risk categories
## Join final columns to adm_stats original

In [49]:
import os
import json
import geopandas as gpd
import pandas as pd
import psycopg2 # required for exporting to postgis
import rioxarray as rxr
from rasterio.crs import CRS
from sqlalchemy import create_engine
import rasterstats
from shapely.ops import transform
from datetime import datetime
import numpy as np

from rasterio.enums import Resampling

import xarray
import matplotlib.pyplot as plt

In [50]:
pcode = 'adm1_pcode'

In [51]:
os.chdir('D:\iMMAP\proj\ASDC\data\Landslide\process_v10')

In [52]:
# Load database configuration from file
with open(r'D:\iMMAP\code\db_config\hsdc_local_db_config.json', 'r') as f:
    config = json.load(f)

# Create database URL with credentials
db_url = f"postgresql://{config['username']}:{config['password']}@{config['host']}:{config['port']}/{config['database']}"

# Connect to the database
con = create_engine(db_url)

In [53]:
pd.set_option('display.max_columns', None)

### Select input data

In [54]:
print('Loading adm')

# This needs to be the admin dataset with at least population statistics already included (i.e. the '_stats' table)
adm = gpd.GeoDataFrame.from_postgis('SELECT * from afg_admbnda_adm1_testclip2_stats', con) #.to_crs(repro_crs)
print('Loading landslide')
landslide = xarray.open_dataarray(r'D:\iMMAP\data\Afghanistan\HSDC-Official\afg_ls_30m_wb_s3.tif').squeeze() # D:\iMMAP\proj\ASDC\data\afg_ls_30m_wb_s3_compressed.tif
print('Loading pop')
pop = xarray.open_dataarray(r'D:\iMMAP\data\Afghanistan\HSDC-Official\afg_worldpop_2020_UNadj_unconstrained.tif').squeeze() # D:\iMMAP\data\Afghanistan\HSDC-Official\afg_worldpop_2020_UNadj_unconstrained.tif
print('Loading build')
build = gpd.GeoDataFrame.from_postgis('SELECT * from afg_buildings_microsoft_centroids_testclip1_tiny', con) #.to_crs(repro_crs)
print('Loadingl landslide - reprojected version')
landslide_repro = xarray.open_dataarray(r'D:\iMMAP\proj\ASDC\data\Landslide\process_v10\landslide_repro.tif').squeeze()

#adm_stats_original = gpd.GeoDataFrame.from_postgis('SELECT * FROM afg_admbnda_adm1_stats', con)

Loading adm
Loading landslide
Loading pop
Loading build
Loadingl landslide - reprojected version


## 1. Population

### 1.1. Reproject landslide to match pop

In [55]:
# # Define function to describe raster
# def print_raster(raster):
#     print(
#         f"shape: {raster.rio.shape}\n"
#         f"resolution: {raster.rio.resolution()}\n"
#         f"bounds: {raster.rio.bounds()}\n"
#         f"sum: {raster.sum().item()}\n"
#         f"CRS: {raster.rio.crs}\n"
#     )

In [56]:
# print("Original Raster:\n----------------\n")
# print_raster(landslide)
# print("Raster to Match:\n----------------\n")
# print_raster(pop)

In [57]:
# # Reproject to match target raster
# landslide_repro = landslide.rio.reproject_match(pop)

In [58]:
# print("Reprojected Raster:\n-------------------\n")
# print_raster(landslide_repro)
# print("Raster to Match:\n----------------\n")
# print_raster(pop)

### 1.2. Save reprojected landslide to file

In [59]:
# # Save reprojected landslide data to file
# landslide_repro.rio.to_raster("landslide_repro.tif")

### 1.3. Create masked population rasters

In [60]:
# # Loop through risk cats and create one dataset per cat

# risk_category_list = [0, 1, 2, 3, 4, 5, 6, 7, 8]

# for risk_category in risk_category_list:
#     pop_masked = pop.where(landslide_repro == risk_category)
#     pop_masked.rio.to_raster('pop_masked_ls_risk_cat_{}.tif'.format(risk_category))

### 1.4. Apply zonal stats on masked population rasters

In [61]:
# Defining zonal stats function

def ZonalStats(shape_gdf, raster, stats, nodata_value):

    zonalSt = rasterstats.zonal_stats(shape_gdf, raster, stats = stats, nodata=nodata_value)
    
    df = pd.DataFrame(zonalSt)
    
    df_concat = pd.concat([df, shape_gdf], axis=1)
    
    final_gdf = gpd.GeoDataFrame(df_concat, geometry=df_concat.geom) #wkb_geometry
    
    final_gdf_ordered = final_gdf[[c for c in final_gdf if c not in [stats]] + [stats]]
    
    final = final_gdf_ordered.drop(columns=['geometry'])
    
    final = final.set_geometry('geom')
    
    return final

In [62]:
# Apply zonal stats to each raster

adm_stats = adm
risk_category_list = [0, 1, 2, 3, 4, 5, 6, 7, 8]

for category in risk_category_list:
    raster = 'pop_masked_ls_risk_cat_{}.tif'.format(category)
    print(raster)
    
    adm_stats = ZonalStats(adm_stats, raster, 'sum', 'NaN') #change column name
    
    adm_stats = adm_stats.rename(columns={'sum': 'pop_{}'.format(category)})
    
    col = 'pop_{}'.format(category)
    
    adm_stats[col] = adm_stats[col].round(decimals=2)

pop_masked_ls_risk_cat_0.tif
pop_masked_ls_risk_cat_1.tif
pop_masked_ls_risk_cat_2.tif
pop_masked_ls_risk_cat_3.tif
pop_masked_ls_risk_cat_4.tif
pop_masked_ls_risk_cat_5.tif
pop_masked_ls_risk_cat_6.tif
pop_masked_ls_risk_cat_7.tif
pop_masked_ls_risk_cat_8.tif


In [63]:
# Create column with pop sum

sum_list = [
    'pop_0',
    'pop_1',
    'pop_2',
    'pop_3',
    'pop_4',
    'pop_5',
    'pop_6',
    'pop_7',
    'pop_8'
]

adm_stats['pop_sum'] = adm_stats[sum_list].sum(axis=1)

### 1.5. Aggregate from 8 to 5 risk categories

In [64]:
# Aggregated to match desired risk categories
#0=Null; 1-3=Low; 4-5=Moderate; 6=High; 7-9=Very High
adm_stats['pop_ls_0'] = adm_stats['pop_0']
adm_stats['pop_ls_1'] = adm_stats[['pop_1', 'pop_2', 'pop_3']].sum(axis=1)
adm_stats['pop_ls_2'] = adm_stats[['pop_4', 'pop_5']].sum(axis=1)
adm_stats['pop_ls_3'] = adm_stats['pop_6']
adm_stats['pop_ls_4'] = adm_stats[['pop_7', 'pop_8']].sum(axis=1)

In [65]:
adm_stats[:1]

Unnamed: 0,id,geom,shape_leng,shape_area,adm1_en,adm1_da,adm1_pcode,adm1_ref,adm1alt1en,adm1alt2en,adm1alt1da,adm1alt2da,adm0_en,adm0_da,adm0_pcode,date,validon,validto,reg_en,reg_da,reg_pcode,pop,km2,sett,build,pop_densit,pop_f029,km2_f029,build_f029,pop_f121,km2_f121,build_f121,pop_f271,km2_f271,build_f271,km2_av,pop_av,build_av,pop_ls_0,pop_ls_1,pop_ls_2,pop_ls_3,pop_ls_4,km2_ls_0,km2_ls_1,km2_ls_2,km2_ls_3,km2_ls_4,build_ls_0,build_ls_1,build_ls_2,build_ls_3,build_ls_4,pop_0,pop_1,pop_2,pop_3,pop_4,pop_5,pop_6,pop_7,pop_8,pop_sum
0,5.0,"MULTIPOLYGON (((69.81884 34.80029, 69.81822 34...",2.393424,0.185458,Kapisa,کاپیسا,AF02,,,,,,Afghanistan,افغانستان,AF,2017-12-12,2021-11-17,,Capital,مرکزی حوزه,CR,615265,1879.21,631.0,67797,327.41,70688,112.19,8258,35988,63.59,2460,15380,27.09,603,369.02,59995,3280,12649.78,17077.31,124025.84,53222.23,39556.77,482.27,95.33,725.99,294.7,280.92,59062,428,5161,3083,63,12649.78,3.41,,17073.9,53716.25,70309.59,53222.23,33612.81,5943.96,246531.93


## 2. Area


### 2.1. Get pixel size

In [66]:
# Get km2 per cell

# Reproject to equal area crs
landslide_cea = landslide_repro.rio.reproject('+proj=cea')
#landslide_cea.rio.to_raster("landslide_repro_cea.tif")

res = landslide_cea.rio.resolution()[0]
print('resolution: ', res)

m2_per_cell =res * res
km2_per_cell = m2_per_cell / 1000000

print('m2_per_cell ', m2_per_cell)
print('km2_per_cell ', km2_per_cell)

resolution:  88.45348854407813
m2_per_cell  7824.01963561736
km2_per_cell  0.00782401963561736


### 2.2. Get area per risk category per polygon

In [67]:
# Apply zonal stats with count parameter to each raster
# Multiply count with pixel size to get area

risk_category_list = [0, 1, 2, 3, 4, 5, 6, 7, 8]

for category in risk_category_list:
    raster = 'pop_masked_ls_risk_cat_{}.tif'.format(category)
    print(raster)
    
    adm_stats = ZonalStats(adm_stats, raster, 'count', 0)
 
    adm_stats = adm_stats.rename(columns={'count': 'km2_{}'.format(category)})
    
    col = 'km2_{}'.format(category)
    
    adm_stats[col] = (adm_stats[col] * (87.84301333486803 * 87.84301333486803) / 1000000).round(decimals=2)

pop_masked_ls_risk_cat_0.tif
pop_masked_ls_risk_cat_1.tif
pop_masked_ls_risk_cat_2.tif
pop_masked_ls_risk_cat_3.tif
pop_masked_ls_risk_cat_4.tif
pop_masked_ls_risk_cat_5.tif
pop_masked_ls_risk_cat_6.tif
pop_masked_ls_risk_cat_7.tif
pop_masked_ls_risk_cat_8.tif


In [68]:
adm_stats

Unnamed: 0,id,geom,shape_leng,shape_area,adm1_en,adm1_da,adm1_pcode,adm1_ref,adm1alt1en,adm1alt2en,adm1alt1da,adm1alt2da,adm0_en,adm0_da,adm0_pcode,date,validon,validto,reg_en,reg_da,reg_pcode,pop,km2,sett,build,pop_densit,pop_f029,km2_f029,build_f029,pop_f121,km2_f121,build_f121,pop_f271,km2_f271,build_f271,km2_av,pop_av,build_av,pop_ls_0,pop_ls_1,pop_ls_2,pop_ls_3,pop_ls_4,km2_ls_0,km2_ls_1,km2_ls_2,km2_ls_3,km2_ls_4,build_ls_0,build_ls_1,build_ls_2,build_ls_3,build_ls_4,pop_0,pop_1,pop_2,pop_3,pop_4,pop_5,pop_6,pop_7,pop_8,pop_sum,km2_0,km2_1,km2_2,km2_3,km2_4,km2_5,km2_6,km2_7,km2_8
0,5.0,"MULTIPOLYGON (((69.81884 34.80029, 69.81822 34...",2.393424,0.185458,Kapisa,کاپیسا,AF02,,,,,,Afghanistan,افغانستان,AF,2017-12-12,2021-11-17,,Capital,مرکزی حوزه,CR,615265,1879.21,631.0,67797,327.41,70688,112.19,8258,35988,63.59,2460,15380,27.09,603,369.02,59995,3280,12649.78,17077.31,124025.84,53222.23,39556.77,482.27,95.33,725.99,294.7,280.92,59062,428,5161,3083,63,12649.78,3.41,,17073.9,53716.25,70309.59,53222.23,33612.81,5943.96,246531.93,73.51,0.04,0.0,95.29,311.06,414.93,294.7,250.66,30.26
1,16.0,"MULTIPOLYGON (((69.48780 34.16098, 69.48661 34...",4.380275,0.460366,Kabul,کابل,AF01,,,,,,Afghanistan,افغانستان,AF,2017-12-12,2021-11-17,,Capital,مرکزی حوزه,CR,6182515,4687.95,1028.0,643926,1318.81,1933975,459.2,187525,1361481,286.8,118442,697241,136.94,56794,364.16,86245,7881,40280.58,18036.06,495259.05,124378.91,49877.8,2229.87,374.98,1423.22,395.7,264.18,584435,932,49819,7964,776,40280.58,27.84,5.84,18002.38,210293.55,284965.5,124378.91,41340.66,8537.14,727832.4,314.24,0.18,0.12,374.68,594.92,828.3,395.7,244.7,19.48
2,19.0,"MULTIPOLYGON (((69.97800 33.62184, 69.97799 33...",4.938876,0.531138,Paktya,پکتیا,AF13,,,,,,Afghanistan,افغانستان,AF,2017-12-12,2021-11-17,,South Eastern,جنوب ختیځه حوزه,SE,840036,5468.04,916.0,95548,153.63,120165,561.48,28771,69739,318.65,13418,19251,77.79,2486,277.15,35697,988,427697.75,20598.05,173456.59,155943.25,58680.61,2158.25,157.96,1307.26,1379.95,464.62,87667,781,2723,4326,51,427697.75,,3595.77,17002.28,20675.86,152780.73,155943.25,33847.46,24833.15,836376.25,2567.55,0.0,28.9,129.06,164.59,1142.67,1379.95,231.05,233.57
3,24.0,"MULTIPOLYGON (((70.50041 34.51768, 70.49845 34...",3.327006,0.38402,Laghman,لغمان,AF07,,,,,,Afghanistan,افغانستان,AF,2017-12-12,2021-11-17,,Eastern,ختیځه حوزه,ER,702218,3900.67,675.0,87840,180.03,107324,238.25,8742,76139,163.16,3930,43887,78.73,1461,347.64,28073,1048,411409.03,50201.19,144873.27,74358.65,21336.13,999.14,474.36,1505.56,637.42,284.19,73941,2736,7368,3697,98,411409.03,,26.49,50174.7,70909.38,73963.89,74358.65,15401.67,5934.46,702178.27,1363.56,0.0,0.27,474.09,703.97,801.59,637.42,209.63,74.56
4,26.0,"MULTIPOLYGON (((68.78219 33.61425, 68.78328 33...",4.247632,0.428761,Logar,لوگر,AF05,,,,,,Afghanistan,افغانستان,AF,2017-12-12,2021-11-17,,Capital,مرکزی حوزه,CR,576898,4394.3,781.0,83454,131.28,128092,526.13,25805,76735,291.54,13575,33653,96.23,4519,221.85,13646,455,12543.67,24363.91,49951.55,41386.68,7988.65,2479.09,347.14,762.13,638.77,167.17,80139,526,1305,1472,12,12543.67,,9.25,24354.66,25868.4,24083.15,41386.68,3070.65,4918.0,136234.46,211.67,0.0,0.13,347.01,406.82,355.31,638.77,69.13,98.04
5,27.0,"MULTIPOLYGON (((71.03569 34.56707, 71.03462 34...",4.027321,0.416132,Kunar,کنر ها,AF15,,,,,,Afghanistan,افغانستان,AF,2017-12-12,2021-11-17,,Eastern,ختیځه حوزه,ER,617596,4217.67,994.0,51466,146.43,53989,184.29,9297,37894,126.37,4728,19847,55.27,1612,324.06,31223,762,151248.53,95685.86,210065.47,85236.06,74839.59,110.57,864.53,1919.18,599.88,723.51,36632,3217,7870,3308,439,151248.53,,5366.71,90319.15,51524.89,158540.58,85236.06,49958.62,24880.97,617075.51,504.11,0.0,48.72,815.81,452.31,1466.87,599.88,536.53,186.98
6,30.0,"MULTIPOLYGON (((68.91945 34.80273, 68.91932 34...",5.324653,0.550813,Parwan,پروان,AF03,,,,,,Afghanistan,افغانستان,AF,2017-12-12,2021-11-17,,Capital,مرکزی حوزه,CR,868496,5581.57,1037.0,105118,155.6,84418,212.5,9254,52823,134.73,4491,29845,58.62,1817,776.79,62263,5847,58740.66,18270.9,141787.11,127886.48,58034.47,589.73,362.84,1637.72,2039.34,951.94,79347,1550,17240,6574,407,58740.66,,141.87,18129.03,59772.15,82014.96,127886.48,21152.43,36882.04,404719.62,520.92,0.0,1.97,360.87,727.62,910.1,2039.34,269.0,682.94
7,31.0,"MULTIPOLYGON (((69.35163 33.05242, 69.35296 33...",3.993206,0.39814,Khost,خوست,AF14,,,,,,Afghanistan,افغانستان,AF,2017-12-12,2021-11-17,,South Eastern,جنوب ختیځه حوزه,SE,1038852,4111.29,1025.0,154165,252.68,156810,334.3,20764,90374,190.16,8828,34527,62.16,1893,135.57,15784,669,709550.38,43018.87,155374.23,95918.28,31589.55,1717.79,316.77,1173.62,635.01,268.1,143343,2102,2900,5793,27,709550.38,,5718.64,37300.23,79897.12,75477.11,95918.28,19042.84,12546.71,1035451.31,1983.51,0.0,57.65,259.12,543.27,630.35,635.01,160.17,107.93
8,33.0,"MULTIPOLYGON (((70.90162 33.98614, 70.90083 33...",5.325266,0.721955,Nangarhar,ننگرهار,AF06,,,,,,Afghanistan,افغانستان,AF,2017-12-12,2021-11-17,,Eastern,ختیځه حوزه,ER,2267027,7376.5,1563.0,343146,307.33,403357,723.56,66713,215275,449.24,30221,106846,199.31,12332,310.85,27800,784,1785877.38,76047.8,236244.98,136777.81,31647.34,3405.47,844.65,1900.38,922.93,303.07,325867,2432,8074,6658,115,1785877.38,,25.21,76022.59,93206.12,143038.86,136777.81,20508.01,11139.33,2266595.31,3968.79,0.0,0.38,844.27,934.13,966.25,922.93,228.95,74.12


### 2.3. Aggregate from 8 to 5 risk categories


In [69]:
# Group to match desired categories
#0=Null; 1-3=Low; 4-5=Moderate; 6=High; 7-9=Very High
adm_stats['km2_ls_0'] = adm_stats['km2_0']
adm_stats['km2_ls_1'] = adm_stats[['km2_1', 'km2_2', 'km2_3']].sum(axis=1)
adm_stats['km2_ls_2'] = adm_stats[['km2_4', 'km2_5']].sum(axis=1)
adm_stats['km2_ls_3'] = adm_stats['km2_6']
adm_stats['km2_ls_4'] = adm_stats[['km2_7', 'km2_8']].sum(axis=1)

In [70]:
adm_stats[:1]

Unnamed: 0,id,geom,shape_leng,shape_area,adm1_en,adm1_da,adm1_pcode,adm1_ref,adm1alt1en,adm1alt2en,adm1alt1da,adm1alt2da,adm0_en,adm0_da,adm0_pcode,date,validon,validto,reg_en,reg_da,reg_pcode,pop,km2,sett,build,pop_densit,pop_f029,km2_f029,build_f029,pop_f121,km2_f121,build_f121,pop_f271,km2_f271,build_f271,km2_av,pop_av,build_av,pop_ls_0,pop_ls_1,pop_ls_2,pop_ls_3,pop_ls_4,km2_ls_0,km2_ls_1,km2_ls_2,km2_ls_3,km2_ls_4,build_ls_0,build_ls_1,build_ls_2,build_ls_3,build_ls_4,pop_0,pop_1,pop_2,pop_3,pop_4,pop_5,pop_6,pop_7,pop_8,pop_sum,km2_0,km2_1,km2_2,km2_3,km2_4,km2_5,km2_6,km2_7,km2_8
0,5.0,"MULTIPOLYGON (((69.81884 34.80029, 69.81822 34...",2.393424,0.185458,Kapisa,کاپیسا,AF02,,,,,,Afghanistan,افغانستان,AF,2017-12-12,2021-11-17,,Capital,مرکزی حوزه,CR,615265,1879.21,631.0,67797,327.41,70688,112.19,8258,35988,63.59,2460,15380,27.09,603,369.02,59995,3280,12649.78,17077.31,124025.84,53222.23,39556.77,73.51,95.33,725.99,294.7,280.92,59062,428,5161,3083,63,12649.78,3.41,,17073.9,53716.25,70309.59,53222.23,33612.81,5943.96,246531.93,73.51,0.04,0.0,95.29,311.06,414.93,294.7,250.66,30.26


## 3. Buildings

In [71]:
print(datetime.now().strftime("%H:%M:%S"))
# Input: building centroids, landslide dataset
# Output: number of buildings per land slide risk catagory per admin polygon

# Approach:
# Extract pixel value to building centroids
# Loop over landslide risk category
# For each risk category
    # filter the points so the dataset only includes points from that category
    # perform spatial join on remaining points

15:36:12


### 3.1. Visualize data

In [72]:
#### Explored data
#
#fig, ax = plt.subplots(figsize=(10, 10))
#
#adm.plot(ax=ax)
#landslide_repro.plot(ax=ax)
#build.plot(ax=ax, color='red')
#
#plt.show()

In [73]:
# Check matching crs
print('adm crs', adm_stats.crs)
print('build crs', build.crs)
print('landslide_repro crs', landslide_repro.rio.crs)

adm crs epsg:4326
build crs epsg:4326
landslide_repro crs EPSG:4326


### 3.2. Zonal stats - cell to point

In [74]:
# Applying point stats and concatenating output to input geometry
# Output: buildings with associated landslide risk column

def point_stats(shape_gdf, raster_file_path):

    pointSt = rasterstats.point_query(shape_gdf, raster_file_path, interpolate='nearest')

    df = pd.DataFrame(pointSt)
    
    df_concat = pd.concat([df, shape_gdf], axis=1)
    
    gdf = gpd.GeoDataFrame(df_concat, geometry=df_concat.geom) #wkb_geometry
    
    gdf_ordered = gdf[[c for c in gdf if c not in [0]] + [0]] # 0 refers to column name
    
    gdf_clean = gdf_ordered.drop(columns=['geometry']).set_geometry('geom').rename(columns={0:'risk'})
    
    return gdf_clean

In [75]:
print(datetime.now().strftime("%H:%M:%S"))
build_risk = point_stats(build, 'landslide_repro.tif')

15:36:12


### 3.3. Spatial join and grouping

In [76]:
print(datetime.now().strftime("%H:%M:%S"))
# Joining points to polygon
# Group points by pcode and get count (=number of points within each pcode)
# Merge point back to original polygon dataset

# Group by parameter = pcode

def buildings(polygons, build_centroids, group_by_parameter, build_count_column):
    
    # Join building centroids to flood polygons
    #print('    Joining buildings to flood polygons   Start: {}'.format(datetime.now().strftime("%H:%M:%S")))
    joined_df = gpd.sjoin(
        build_centroids,
        polygons,
        how='inner',
        predicate='intersects'
    )
    #print('    Joining buildings to flood polygons   End:   {}'.format(datetime.now().strftime("%H:%M:%S")))
    
    # Count number of buildings within admin polygons
    #print('    Counting number of buildings          Start: {}'.format(datetime.now().strftime("%H:%M:%S")))
    build_count = joined_df.groupby(
        [group_by_parameter],
        as_index=False,
    )['geom'].count() # vuid is arbitrary, we just count the row
    #print('    Counting number of buildings          End:   {}'.format(datetime.now().strftime("%H:%M:%S")))
    
    # Change column name to build_count
    build_count.rename(columns = {'geom': build_count_column}, inplace = True)

    # Merge build count back on to admin dataset
    polygons = polygons.merge(
        build_count, 
        on=group_by_parameter, 
        how='left')
    
    return polygons

15:37:17


In [77]:
print(datetime.now().strftime("%H:%M:%S"))
# loop over risk categories

risk_category_list = [0, 1, 2, 3, 4, 5, 6, 7, 8]

for category in risk_category_list:
    
    build_filtered = build_risk.query('risk == {}'.format(category))
    
    build_count_column = 'build_{}'.format(category)
    
    adm_stats = buildings(adm_stats, build_filtered, pcode, build_count_column)

15:37:17


In [78]:
adm_stats[:1]

Unnamed: 0,id,geom,shape_leng,shape_area,adm1_en,adm1_da,adm1_pcode,adm1_ref,adm1alt1en,adm1alt2en,adm1alt1da,adm1alt2da,adm0_en,adm0_da,adm0_pcode,date,validon,validto,reg_en,reg_da,reg_pcode,pop,km2,sett,build,pop_densit,pop_f029,km2_f029,build_f029,pop_f121,km2_f121,build_f121,pop_f271,km2_f271,build_f271,km2_av,pop_av,build_av,pop_ls_0,pop_ls_1,pop_ls_2,pop_ls_3,pop_ls_4,km2_ls_0,km2_ls_1,km2_ls_2,km2_ls_3,km2_ls_4,build_ls_0,build_ls_1,build_ls_2,build_ls_3,build_ls_4,pop_0,pop_1,pop_2,pop_3,pop_4,pop_5,pop_6,pop_7,pop_8,pop_sum,km2_0,km2_1,km2_2,km2_3,km2_4,km2_5,km2_6,km2_7,km2_8,build_0,build_1,build_2,build_3,build_4,build_5,build_6,build_7,build_8
0,5.0,"MULTIPOLYGON (((69.81884 34.80029, 69.81822 34...",2.393424,0.185458,Kapisa,کاپیسا,AF02,,,,,,Afghanistan,افغانستان,AF,2017-12-12,2021-11-17,,Capital,مرکزی حوزه,CR,615265,1879.21,631.0,67797,327.41,70688,112.19,8258,35988,63.59,2460,15380,27.09,603,369.02,59995,3280,12649.78,17077.31,124025.84,53222.23,39556.77,73.51,95.33,725.99,294.7,280.92,59062,428,5161,3083,63,12649.78,3.41,,17073.9,53716.25,70309.59,53222.23,33612.81,5943.96,246531.93,73.51,0.04,0.0,95.29,311.06,414.93,294.7,250.66,30.26,,,,,,,,,


### 3.4. Aggregate from 8 to 5 risk categories

In [79]:
# Group to match desired categories
#0=Null; 1-3=Low; 4-5=Moderate; 6=High; 7-9=Very High
adm_stats['build_ls_0'] = adm_stats['build_0']
adm_stats['build_ls_1'] = adm_stats[['build_1', 'build_2', 'build_3']].sum(axis=1)
adm_stats['build_ls_2'] = adm_stats[['build_4', 'build_5']].sum(axis=1)
adm_stats['build_ls_3'] = adm_stats['build_6']
adm_stats['build_ls_4'] = adm_stats[['build_7', 'build_8']].sum(axis=1)

In [80]:
adm_stats[:1]

Unnamed: 0,id,geom,shape_leng,shape_area,adm1_en,adm1_da,adm1_pcode,adm1_ref,adm1alt1en,adm1alt2en,adm1alt1da,adm1alt2da,adm0_en,adm0_da,adm0_pcode,date,validon,validto,reg_en,reg_da,reg_pcode,pop,km2,sett,build,pop_densit,pop_f029,km2_f029,build_f029,pop_f121,km2_f121,build_f121,pop_f271,km2_f271,build_f271,km2_av,pop_av,build_av,pop_ls_0,pop_ls_1,pop_ls_2,pop_ls_3,pop_ls_4,km2_ls_0,km2_ls_1,km2_ls_2,km2_ls_3,km2_ls_4,build_ls_0,build_ls_1,build_ls_2,build_ls_3,build_ls_4,pop_0,pop_1,pop_2,pop_3,pop_4,pop_5,pop_6,pop_7,pop_8,pop_sum,km2_0,km2_1,km2_2,km2_3,km2_4,km2_5,km2_6,km2_7,km2_8,build_0,build_1,build_2,build_3,build_4,build_5,build_6,build_7,build_8
0,5.0,"MULTIPOLYGON (((69.81884 34.80029, 69.81822 34...",2.393424,0.185458,Kapisa,کاپیسا,AF02,,,,,,Afghanistan,افغانستان,AF,2017-12-12,2021-11-17,,Capital,مرکزی حوزه,CR,615265,1879.21,631.0,67797,327.41,70688,112.19,8258,35988,63.59,2460,15380,27.09,603,369.02,59995,3280,12649.78,17077.31,124025.84,53222.23,39556.77,73.51,95.33,725.99,294.7,280.92,,0.0,0.0,,0.0,12649.78,3.41,,17073.9,53716.25,70309.59,53222.23,33612.81,5943.96,246531.93,73.51,0.04,0.0,95.29,311.06,414.93,294.7,250.66,30.26,,,,,,,,,


In [81]:
# List columns to drop
drop_cols = ['pop_0',
             'pop_1',
             'pop_2',
             'pop_3',
             'pop_4',
             'pop_5',
             'pop_6',
             'pop_7',
             'pop_8',
             'pop_sum',
             'km2_0',
             'km2_1',
             'km2_2',
             'km2_3',
             'km2_4',
             'km2_5',
             'km2_6',
             'km2_7',
             'km2_8',
             'build_0',
             'build_1',
             'build_2',
             'build_3',
             'build_4',
             'build_5',
             'build_6',
             'build_7',
             'build_8']

In [82]:
# Drop cols
adm_stats = adm_stats.drop(columns=drop_cols)

# Move geom column to end of table
cols_at_end = ['geom']
adm_stats = adm_stats[[c for c in adm_stats if c not in cols_at_end] 
       + [c for c in cols_at_end if c in adm_stats]]

In [83]:
adm_stats['pop_ls_0'] = adm_stats.loc[:,'pop'] - adm_stats.loc[:,['pop_ls_1', 'pop_ls_2', 'pop_ls_3', 'pop_ls_4']].sum(axis=1) #.round(decimals=2)


In [84]:
adm_stats['km2_ls_0'] = adm_stats.loc[:,'km2'] - adm_stats.loc[:,['km2_ls_1', 'km2_ls_2', 'km2_ls_3', 'km2_ls_4']].sum(axis=1) #.round(decimals=2)


In [85]:
adm_stats['build_ls_0'] = adm_stats.loc[:,'build'] - adm_stats.loc[:,['build_ls_1', 'build_ls_2', 'build_ls_3', 'build_ls_4']].sum(axis=1) #.round(decimals=2)


In [86]:
adm_stats[:3]

Unnamed: 0,id,shape_leng,shape_area,adm1_en,adm1_da,adm1_pcode,adm1_ref,adm1alt1en,adm1alt2en,adm1alt1da,adm1alt2da,adm0_en,adm0_da,adm0_pcode,date,validon,validto,reg_en,reg_da,reg_pcode,pop,km2,sett,build,pop_densit,pop_f029,km2_f029,build_f029,pop_f121,km2_f121,build_f121,pop_f271,km2_f271,build_f271,km2_av,pop_av,build_av,pop_ls_0,pop_ls_1,pop_ls_2,pop_ls_3,pop_ls_4,km2_ls_0,km2_ls_1,km2_ls_2,km2_ls_3,km2_ls_4,build_ls_0,build_ls_1,build_ls_2,build_ls_3,build_ls_4,geom
0,5.0,2.393424,0.185458,Kapisa,کاپیسا,AF02,,,,,,Afghanistan,افغانستان,AF,2017-12-12,2021-11-17,,Capital,مرکزی حوزه,CR,615265,1879.21,631.0,67797,327.41,70688,112.19,8258,35988,63.59,2460,15380,27.09,603,369.02,59995,3280,381382.85,17077.31,124025.84,53222.23,39556.77,482.27,95.33,725.99,294.7,280.92,67797.0,0.0,0.0,,0.0,"MULTIPOLYGON (((69.81884 34.80029, 69.81822 34..."
1,16.0,4.380275,0.460366,Kabul,کابل,AF01,,,,,,Afghanistan,افغانستان,AF,2017-12-12,2021-11-17,,Capital,مرکزی حوزه,CR,6182515,4687.95,1028.0,643926,1318.81,1933975,459.2,187525,1361481,286.8,118442,697241,136.94,56794,364.16,86245,7881,5494963.18,18036.06,495259.05,124378.91,49877.8,2229.87,374.98,1423.22,395.7,264.18,643781.0,46.0,74.0,25.0,0.0,"MULTIPOLYGON (((69.48780 34.16098, 69.48661 34..."
2,19.0,4.938876,0.531138,Paktya,پکتیا,AF13,,,,,,Afghanistan,افغانستان,AF,2017-12-12,2021-11-17,,South Eastern,جنوب ختیځه حوزه,SE,840036,5468.04,916.0,95548,153.63,120165,561.48,28771,69739,318.65,13418,19251,77.79,2486,277.15,35697,988,431357.5,20598.05,173456.59,155943.25,58680.61,2158.25,157.96,1307.26,1379.95,464.62,95548.0,0.0,0.0,,0.0,"MULTIPOLYGON (((69.97800 33.62184, 69.97799 33..."


In [87]:
# Specify the column label to start the slice from
start_column = 'pop'

# Get the index of the start column
start_index = adm_stats.columns.get_loc(start_column)

# Slice the GeoDataFrame from the start column to the last column
sliced_gdf = adm_stats.iloc[:, start_index:]

# Replace NaN values with 0 in the sliced GeoDataFrame
sliced_gdf = sliced_gdf.fillna(0).round(decimals=2)

In [88]:
sliced_gdf

Unnamed: 0,pop,km2,sett,build,pop_densit,pop_f029,km2_f029,build_f029,pop_f121,km2_f121,build_f121,pop_f271,km2_f271,build_f271,km2_av,pop_av,build_av,pop_ls_0,pop_ls_1,pop_ls_2,pop_ls_3,pop_ls_4,km2_ls_0,km2_ls_1,km2_ls_2,km2_ls_3,km2_ls_4,build_ls_0,build_ls_1,build_ls_2,build_ls_3,build_ls_4,geom
0,615265,1879.21,631.0,67797,327.41,70688,112.19,8258,35988,63.59,2460,15380,27.09,603,369.02,59995,3280,381382.85,17077.31,124025.84,53222.23,39556.77,482.27,95.33,725.99,294.7,280.92,67797.0,0.0,0.0,0.0,0.0,"MULTIPOLYGON (((69.81884 34.80029, 69.81822 34..."
1,6182515,4687.95,1028.0,643926,1318.81,1933975,459.2,187525,1361481,286.8,118442,697241,136.94,56794,364.16,86245,7881,5494963.18,18036.06,495259.05,124378.91,49877.8,2229.87,374.98,1423.22,395.7,264.18,643781.0,46.0,74.0,25.0,0.0,"MULTIPOLYGON (((69.48780 34.16098, 69.48661 34..."
2,840036,5468.04,916.0,95548,153.63,120165,561.48,28771,69739,318.65,13418,19251,77.79,2486,277.15,35697,988,431357.5,20598.05,173456.59,155943.25,58680.61,2158.25,157.96,1307.26,1379.95,464.62,95548.0,0.0,0.0,0.0,0.0,"MULTIPOLYGON (((69.97800 33.62184, 69.97799 33..."
3,702218,3900.67,675.0,87840,180.03,107324,238.25,8742,76139,163.16,3930,43887,78.73,1461,347.64,28073,1048,411448.76,50201.19,144873.27,74358.65,21336.13,999.14,474.36,1505.56,637.42,284.19,87840.0,0.0,0.0,0.0,0.0,"MULTIPOLYGON (((70.50041 34.51768, 70.49845 34..."
4,576898,4394.3,781.0,83454,131.28,128092,526.13,25805,76735,291.54,13575,33653,96.23,4519,221.85,13646,455,453207.21,24363.91,49951.55,41386.68,7988.65,2479.09,347.14,762.13,638.77,167.17,83454.0,0.0,0.0,0.0,0.0,"MULTIPOLYGON (((68.78219 33.61425, 68.78328 33..."
5,617596,4217.67,994.0,51466,146.43,53989,184.29,9297,37894,126.37,4728,19847,55.27,1612,324.06,31223,762,151769.02,95685.86,210065.47,85236.06,74839.59,110.57,864.53,1919.18,599.88,723.51,51466.0,0.0,0.0,0.0,0.0,"MULTIPOLYGON (((71.03569 34.56707, 71.03462 34..."
6,868496,5581.57,1037.0,105118,155.6,84418,212.5,9254,52823,134.73,4491,29845,58.62,1817,776.79,62263,5847,522517.04,18270.9,141787.11,127886.48,58034.47,589.73,362.84,1637.72,2039.34,951.94,105118.0,0.0,0.0,0.0,0.0,"MULTIPOLYGON (((68.91945 34.80273, 68.91932 34..."
7,1038852,4111.29,1025.0,154165,252.68,156810,334.3,20764,90374,190.16,8828,34527,62.16,1893,135.57,15784,669,712951.07,43018.87,155374.23,95918.28,31589.55,1717.79,316.77,1173.62,635.01,268.1,154165.0,0.0,0.0,0.0,0.0,"MULTIPOLYGON (((69.35163 33.05242, 69.35296 33..."
8,2267027,7376.5,1563.0,343146,307.33,403357,723.56,66713,215275,449.24,30221,106846,199.31,12332,310.85,27800,784,1786309.07,76047.8,236244.98,136777.81,31647.34,3405.47,844.65,1900.38,922.93,303.07,342987.0,10.0,116.0,33.0,0.0,"MULTIPOLYGON (((70.90162 33.98614, 70.90083 33..."


In [89]:
# Add the modified columns back to the original GeoDataFrame
adm_stats = gpd.GeoDataFrame(pd.concat([adm_stats.iloc[:, :start_index], sliced_gdf], axis=1).round(decimals=2), geometry=adm_stats.geometry).drop(columns=['geometry'])


In [90]:
adm_stats

# Specify the column label to start the slice from
start_column = 'pop'

# Get the index of the start column
start_index = adm_stats.columns.get_loc(start_column)

# Slice the GeoDataFrame from the start column to the last column
sliced_gdf = adm_stats.iloc[:, start_index:]

# Replace NaN values with 0 in the sliced GeoDataFrame
sliced_gdf = sliced_gdf.fillna(0)

In [91]:

#adm_stats_with_geom = adm_stats.set_geometry('geom').round(decimals=2)
#adm_stats_with_geom.set_geometry('geom').geometry

#adm_stats_with_geometry = adm_stats.set_geometry('geom')

#adm_stats.set_geometry('geom').round(decimals=2).to_postgis('adm1_stats_landslide_sample_v2', con, if_exists='replace')


In [92]:
# Round just before saving to postgres to make sure only two decimals are displayed
adm_stats_with_geometry = adm_stats.round(decimals=2)

In [93]:
# Save to postgres table
adm_stats_with_geometry.set_geometry('geom').to_postgis('adm1_stats_landslide_sample_v3', con, if_exists='replace')

In [94]:
# Open and view table
adm_stats_test = gpd.GeoDataFrame.from_postgis('SELECT * from adm1_stats_landslide_sample_v3', con)
adm_stats_test

Unnamed: 0,id,shape_leng,shape_area,adm1_en,adm1_da,adm1_pcode,adm1_ref,adm1alt1en,adm1alt2en,adm1alt1da,adm1alt2da,adm0_en,adm0_da,adm0_pcode,date,validon,validto,reg_en,reg_da,reg_pcode,pop,km2,sett,build,pop_densit,pop_f029,km2_f029,build_f029,pop_f121,km2_f121,build_f121,pop_f271,km2_f271,build_f271,km2_av,pop_av,build_av,pop_ls_0,pop_ls_1,pop_ls_2,pop_ls_3,pop_ls_4,km2_ls_0,km2_ls_1,km2_ls_2,km2_ls_3,km2_ls_4,build_ls_0,build_ls_1,build_ls_2,build_ls_3,build_ls_4,geom
0,5.0,2.39,0.19,Kapisa,کاپیسا,AF02,,,,,,Afghanistan,افغانستان,AF,2017-12-12,2021-11-17,,Capital,مرکزی حوزه,CR,615265,1879.21,631.0,67797,327.41,70688,112.19,8258,35988,63.59,2460,15380,27.09,603,369.02,59995,3280,381382.85,17077.31,124025.84,53222.23,39556.77,482.27,95.33,725.99,294.7,280.92,67797.0,0.0,0.0,0.0,0.0,"MULTIPOLYGON (((69.81884 34.80029, 69.81822 34..."
1,16.0,4.38,0.46,Kabul,کابل,AF01,,,,,,Afghanistan,افغانستان,AF,2017-12-12,2021-11-17,,Capital,مرکزی حوزه,CR,6182515,4687.95,1028.0,643926,1318.81,1933975,459.2,187525,1361481,286.8,118442,697241,136.94,56794,364.16,86245,7881,5494963.18,18036.06,495259.05,124378.91,49877.8,2229.87,374.98,1423.22,395.7,264.18,643781.0,46.0,74.0,25.0,0.0,"MULTIPOLYGON (((69.48780 34.16098, 69.48661 34..."
2,19.0,4.94,0.53,Paktya,پکتیا,AF13,,,,,,Afghanistan,افغانستان,AF,2017-12-12,2021-11-17,,South Eastern,جنوب ختیځه حوزه,SE,840036,5468.04,916.0,95548,153.63,120165,561.48,28771,69739,318.65,13418,19251,77.79,2486,277.15,35697,988,431357.5,20598.05,173456.59,155943.25,58680.61,2158.25,157.96,1307.26,1379.95,464.62,95548.0,0.0,0.0,0.0,0.0,"MULTIPOLYGON (((69.97800 33.62184, 69.97799 33..."
3,24.0,3.33,0.38,Laghman,لغمان,AF07,,,,,,Afghanistan,افغانستان,AF,2017-12-12,2021-11-17,,Eastern,ختیځه حوزه,ER,702218,3900.67,675.0,87840,180.03,107324,238.25,8742,76139,163.16,3930,43887,78.73,1461,347.64,28073,1048,411448.76,50201.19,144873.27,74358.65,21336.13,999.14,474.36,1505.56,637.42,284.19,87840.0,0.0,0.0,0.0,0.0,"MULTIPOLYGON (((70.50041 34.51768, 70.49845 34..."
4,26.0,4.25,0.43,Logar,لوگر,AF05,,,,,,Afghanistan,افغانستان,AF,2017-12-12,2021-11-17,,Capital,مرکزی حوزه,CR,576898,4394.3,781.0,83454,131.28,128092,526.13,25805,76735,291.54,13575,33653,96.23,4519,221.85,13646,455,453207.21,24363.91,49951.55,41386.68,7988.65,2479.09,347.14,762.13,638.77,167.17,83454.0,0.0,0.0,0.0,0.0,"MULTIPOLYGON (((68.78219 33.61425, 68.78328 33..."
5,27.0,4.03,0.42,Kunar,کنر ها,AF15,,,,,,Afghanistan,افغانستان,AF,2017-12-12,2021-11-17,,Eastern,ختیځه حوزه,ER,617596,4217.67,994.0,51466,146.43,53989,184.29,9297,37894,126.37,4728,19847,55.27,1612,324.06,31223,762,151769.02,95685.86,210065.47,85236.06,74839.59,110.57,864.53,1919.18,599.88,723.51,51466.0,0.0,0.0,0.0,0.0,"MULTIPOLYGON (((71.03569 34.56707, 71.03462 34..."
6,30.0,5.32,0.55,Parwan,پروان,AF03,,,,,,Afghanistan,افغانستان,AF,2017-12-12,2021-11-17,,Capital,مرکزی حوزه,CR,868496,5581.57,1037.0,105118,155.6,84418,212.5,9254,52823,134.73,4491,29845,58.62,1817,776.79,62263,5847,522517.04,18270.9,141787.11,127886.48,58034.47,589.73,362.84,1637.72,2039.34,951.94,105118.0,0.0,0.0,0.0,0.0,"MULTIPOLYGON (((68.91945 34.80273, 68.91932 34..."
7,31.0,3.99,0.4,Khost,خوست,AF14,,,,,,Afghanistan,افغانستان,AF,2017-12-12,2021-11-17,,South Eastern,جنوب ختیځه حوزه,SE,1038852,4111.29,1025.0,154165,252.68,156810,334.3,20764,90374,190.16,8828,34527,62.16,1893,135.57,15784,669,712951.07,43018.87,155374.23,95918.28,31589.55,1717.79,316.77,1173.62,635.01,268.1,154165.0,0.0,0.0,0.0,0.0,"MULTIPOLYGON (((69.35163 33.05242, 69.35296 33..."
8,33.0,5.33,0.72,Nangarhar,ننگرهار,AF06,,,,,,Afghanistan,افغانستان,AF,2017-12-12,2021-11-17,,Eastern,ختیځه حوزه,ER,2267027,7376.5,1563.0,343146,307.33,403357,723.56,66713,215275,449.24,30221,106846,199.31,12332,310.85,27800,784,1786309.07,76047.8,236244.98,136777.81,31647.34,3405.47,844.65,1900.38,922.93,303.07,342987.0,10.0,116.0,33.0,0.0,"MULTIPOLYGON (((70.90162 33.98614, 70.90083 33..."


### 5. Post-processing