In [1]:
import sys
sys.path.insert(0, '..')
sys.path.insert(0, '../exposure_to_hazards/')

from config import DATA_DIR

In [2]:
from pathlib import Path
from climada.entity import Exposures
import numpy as np
import pandas as pd

exposures_dict = {}
path_population_files = DATA_DIR / "population"

path_exposures = path_population_files / "worldpop/climada_exposures"

exposures_dict = {}
for age in ["0_1", "all", "65_70_75_80"]:
    exposures_dict[age]= {}

    for year in np.arange(2003, 2023):
        year = str(year)
        exposures_dict[age][year]= Exposures.from_hdf5(path_exposures / f"{age}_era5_025_compatible_{year}.hdf5")
        exposures_dict[age][year].gdf["impf_cs"] = 1 
        exposures_dict[age][year].gdf['longitude'] = exposures_dict[age][year].gdf['longitude'].apply(lambda x: x - 360 if x > 180 else x)
        exposures_dict[age][year].gdf['value'][exposures_dict[age][year].gdf['value']<0] = 0


In [3]:
#  1. merge exposure gdf with adm1 for any age any year (we just need the lat/lon points by admin1)

In [4]:
import geopandas as gpd
from shapely.geometry import Point


gdf_adm1 = gpd.read_file(path_population_files / "geoBoundariesCGAZ_ADM1/geoBoundariesCGAZ_ADM1.shp")
gdf_adm1 = gdf_adm1.to_crs("EPSG:4326")
gdf_adm1 =  gdf_adm1[gdf_adm1['shapeType']=='ADM1']

gdf_adm1 = gdf_adm1.rename(columns={'shapeGroup':'ISO3', 'shapeName':'ADM1_NAME', 'shapeID': 'ADM1_ID'})
gdf_adm1.to_file(path_population_files / "geoBoundariesCGAZ_ADM1_renamed/geoBoundariesCGAZ_ADM1.shp")

geometry = [Point(xy) for xy in zip(exposures_dict[age][str(year)].gdf.longitude, exposures_dict[age][str(year)].gdf.latitude)]

gdf = gpd.GeoDataFrame(exposures_dict[age][str(year)].gdf, crs="EPSG:4326", geometry=geometry)
gdf = gpd.sjoin(gdf, gdf_adm1, how="left", predicate='within')


In [5]:
import pandas as pd
geometry = [Point(xy) for xy in zip(exposures_dict[age][str(year)].gdf.longitude, exposures_dict[age][str(year)].gdf.latitude)]

for age in ['all', '0_1', '65_70_75_80']:
    gdf = gpd.GeoDataFrame(exposures_dict[age][str(year)].gdf, crs="EPSG:4326", geometry=geometry)
    merged = gpd.sjoin(gdf, gdf_adm1, how="left", predicate='within')
    merged = merged.drop_duplicates(subset=['latitude', 'longitude'], keep='first')
    merged.to_csv(path_population_files / f"{age}_worldpop_admin_by_point.csv")

In [6]:
# population by admin1 by year

In [9]:
geometry = [Point(xy) for xy in zip(exposures_dict[age][str(year)].gdf.longitude, exposures_dict[age][str(year)].gdf.latitude)]

pop_by_adm1_by_year = {}
for age in ['all', '65_70_75_80', '0_1']:
    total_pop_by_year = []
    for year in np.arange(2003, 2022):
        gdf = gpd.GeoDataFrame(exposures_dict[age][str(year)].gdf, crs="EPSG:4326", geometry=geometry)
        gdf = gpd.sjoin(gdf, gdf_adm1, how="left", predicate='within')

        merged = pd.merge(exposures_dict[age][str(year)].gdf, gdf)
        merged = merged[['value','ADM1_ID','ISO3','ADM1_NAME']].groupby(['ADM1_ID','ISO3', 'ADM1_NAME']).sum()
        merged['year'] = year
        total_pop_by_year.append(merged)
    pop_by_adm1_by_year[age] = pd.concat(total_pop_by_year).reset_index()
    pop_by_adm1_by_year[age].to_csv(path_population_files / f"{age}_worldpop_by_admin1_by_year_2000_2022.csv")
    

In [20]:
exposures_dict['0_1'][str(2021)].gdf.value.sum()/exposures_dict['all'][str(2021)].gdf.value.sum()

0.08643703464341572