# Flood summaries for ThinkHazard

This script performs flood hazard ranking by administrative unit using global-extent
Fathom tiles hosted on AWS S3, rather than country-extent locally downloaded data.

The hazard ranking is based on:
- Value threshold: Minimum flood depth (cm) to consider
- Area threshold: Minimum percentage of area affected
- Hazard score: Count of return periods meeting both thresholds (0-3)


In [5]:
import os, time, io, json, sys
import urllib3
import boto3
import h3ronpy
import rasterio

import geopandas as gpd
import pandas as pd
import numpy as np
import folium as flm
import matplotlib.pyplot as plt
import GOSTrocks.rasterMisc as rMisc
import GOSTrocks.dataMisc as dMisc
import GOSTrocks.mapMisc as mapMisc

from functools import reduce
from GOSTrocks.misc import tPrint
from h3ronpy.pandas.vector import geodataframe_to_cells, cells_dataframe_to_geodataframe
from h3ronpy import ContainmentMode
from dotenv import load_dotenv
from shapely.geometry import shape, box
#from geojson_pydantic import Feature, Polygon
from urllib3.exceptions import InsecureRequestWarning
from botocore import UNSIGNED
from botocore.config import Config
from pystac_client import Client
from tqdm.notebook import tqdm

# Import helper functions
from gfdrr_helper import *

sys.path.append("../../src")

import global_zonal
import h3_helper

urllib3.disable_warnings(InsecureRequestWarning)

def tPrint(s):
    """prints the time along with the message"""
    print("%s\t%s" % (time.strftime("%H:%M:%S"), s))

s3_client = boto3.client('s3', verify=False, config=Config(signature_version=UNSIGNED))

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [6]:
local_folder = "C:/WBG/Work/Projects/ThinkHazard"
out_folder = os.path.join(local_folder, "FATHOM_summaries")
map_folder = os.path.join(local_folder, "FATHOM_maps")
for tF in [out_folder, map_folder]:
    if not os.path.exists(tF):
        os.makedirs(tF)
vrt_folder = r"C:\WBG\Work\data\FATHOM"
s3_bucket = "wbg-geography01"
s3_prefix = "FATHOM"
return_periods = [10, 100, 500, 1000]
flood_files = [
    ["FU", "FLOOD_MAP-1ARCSEC-NW_OFFSET-1in{rp}-FLUVIAL-UNDEFENDED-DEPTH-2020-PERCENTILE50-v3.1.vrt"],
    ["CU", "FLOOD_MAP-1ARCSEC-NW_OFFSET-1in{rp}-COASTAL-UNDEFENDED-DEPTH-2020-PERCENTILE50-v3.1.vrt"],
    ['PD', "FLOOD_MAP-1ARCSEC-NW_OFFSET-1in{rp}-PLUVIAL-DEFENDED-DEPTH-2020-PERCENTILE50-v3.1.vrt"]
]

admin_boundaries_file = r"C:\WBG\Work\data\ADMIN\NEW_WB_BOUNDS\FOR_PUBLICATION\crs_4326\parquet\WB_GAD_ADM2.parquet"

In [7]:
inA = gpd.read_parquet(admin_boundaries_file)
inA.head()

Unnamed: 0,ISO_A3,ISO_A2,WB_A3,WB_REGION,WB_STATUS,NAM_0,NAM_1,ADM1CD_c,GEOM_SRCE,geometry,NAM_2,ADM2CD_c
0,AUS,AU,AUS,Other,Member State,Australia,Australian Capital Territory,AUS002,WB GAD,"POLYGON ((149.2007 -35.20541, 149.20589 -35.21...",Unincorporated ACT,AUS002001
1,AUS,AU,AUS,Other,Member State,Australia,Other Territories,AUS006,WB GAD,"MULTIPOLYGON (((150.76958 -35.1223, 150.76522 ...",Unincorp. Other Territories,AUS006001
2,AUS,AU,AUS,Other,Member State,Australia,New South Wales,AUS004,WB GAD,"POLYGON ((151.15052 -33.8721, 151.14489 -33.88...",Ashfield (A),AUS004003
3,AUS,AU,AUS,Other,Member State,Australia,New South Wales,AUS004,WB GAD,"POLYGON ((151.08142 -33.84999, 151.07837 -33.8...",Auburn (C),AUS004004
4,AUS,AU,AUS,Other,Member State,Australia,New South Wales,AUS004,WB GAD,"POLYGON ((151.01316 -33.87841, 151.02907 -33.8...",Bankstown (C),AUS004007


In [None]:
with rasterio.Env(GDAL_HTTP_UNSAFESSL='YES'):
    for sel_country in inA['ISO_A3'].unique():
        all_res = []
        out_file = os.path.join(out_folder, f"FATHOM_ThinkHazard_summary_{sel_country}.csv")
        sel_a = inA[inA['ISO_A3'] == sel_country]                           
        if not os.path.exists(out_file) and not (sel_country in ["FJI"]):
            tPrint(f"Processing country: {sel_country}")
            for lbl, raster_file in flood_files:
                for return_period in return_periods:
                    tPrint(f"Processing {lbl} for {return_period} year return period")
                    sel_raster_file = raster_file.format(rp=return_period)
                    sel_raster = f"s3://{s3_bucket}/{s3_prefix}/{sel_raster_file}"
                    res_a = calculate_think_hazard_score(sel_a, sel_raster, depth_threshold=50, idx_col='ADM2CD_c')
                    res_a.rename(columns={'frac_area_flooded': f'frac_area_flooded_{lbl}_{return_period}yr'}, inplace=True)
                    all_res.append(res_a)
            all_res_df = reduce(lambda left, right: pd.merge(left, right, on='ADM2CD_c', how='outer'), all_res) 
            all_res_df.to_csv(out_file, index=False)
        else:
            tPrint(f"File already exists for {sel_country}, skipping...")

20:30:15	File already exists for AUS, skipping...


NameError: name 'plt' is not defined

In [None]:
# Map floods for all results
for sel_country in tqdm(inA['ISO_A3'].unique()):
    out_file = os.path.join(out_folder, f"FATHOM_ThinkHazard_summary_{sel_country}.csv")
    try:
        all_res_df = pd.read_csv(out_file)
        sel_a = inA[inA['ISO_A3'] == sel_country]                                   
        map_adm = pd.merge(sel_a, all_res_df, on='ADM2CD_c', how='left')
        map_flood(map_adm, return_period=100, out_file=os.path.join(map_folder, f"flood_map_{sel_country}_100yr.png"))
    except FileNotFoundError:
        tPrint(f"No summary file for {sel_country}, skipping mapping...")
    

FileNotFoundError: [Errno 2] No such file or directory: 'C:/WBG/Work/Projects/ThinkHazard\\FATHOM_summaries\\FATHOM_ThinkHazard_summary_FJI.csv'

# DEBURRGGGGINININING
In the initial run there is an error in the coastal processing, this section opens all those files and re-processes the coastal flood

In [10]:
# get a list of all processed countries
processed_countries = [f.split("_")[-1].split(".")[0] for f in os.listdir(out_folder) if f.startswith("FATHOM_ThinkHazard_summary_")]

for sel_country in tqdm(processed_countries):    
    sel_a = inA[inA['ISO_A3'] == sel_country]    
    lbl, raster_file = flood_files[1]
    existing_file = os.path.join(out_folder, f"FATHOM_ThinkHazard_summary_{sel_country}.csv")
    existing_df = pd.read_csv(existing_file)
        
    for return_period in return_periods:
        sel_raster_file = raster_file.format(rp=return_period)
        sel_raster = f"s3://{s3_bucket}/{s3_prefix}/{sel_raster_file}"
        with rasterio.Env(GDAL_HTTP_UNSAFESSL='YES'):
            with rasterio.open(sel_raster) as inR:            
                res_a = calculate_think_hazard_score(sel_a, sel_raster, depth_threshold=50, idx_col='ADM2CD_c')
                res_a.rename(columns={'frac_area_flooded': f'frac_area_flooded_{lbl}_{return_period}yr'}, inplace=True)

        # Drop coastal flooding column if they exist and add new columns
        existing_df = existing_df.drop(columns=[f'frac_area_flooded_{lbl}_{return_period}yr'], errors='ignore')
        existing_df = pd.merge(existing_df, res_a, on='ADM2CD_c', how='left')
    existing_df.to_csv(existing_file, index=False)

  0%|          | 0/222 [00:00<?, ?it/s]

In [9]:
out_folder

'C:/WBG/Work/Projects/ThinkHazard\\FATHOM_summaries'

In [None]:
sel_a

In [None]:
map_adm = pd.merge(sel_a, res_a, on='ADM2CD_c', how='left')
map_adm.head()

In [None]:
all_res_df.columns[1:]

In [None]:
map_adm = pd.merge(sel_a, all_res_df, on='ADM2CD_c', how='left')
for col in all_res_df.columns[1:]:
    map_adm['FLOOD_COL'] = (map_adm[col] * 100).fillna(0)
    plt = mapMisc.static_map_vector(map_adm, 'FLOOD_COL', thresh=[0, 5, 10, 15, 100], colormap='Blues')
    plt.savefig(os.path.join(map_folder, f"{sel_country}_{col}_map.png"))



In [None]:
mapMisc.static_map_vector?