In [1]:
from pathlib import Path
import numpy as np
import pandas as pd
import geopandas as gpd
import xarray as xr
import matplotlib.pyplot as plt

from cartopy import crs as ccrs
from scipy import stats
from tqdm.notebook import tqdm
import os
import sys

project_path = os.path.abspath(os.path.join('..', '..', '..'))
if project_path not in sys.path:
    sys.path.insert(0, project_path)

from source.config import DATA_SRC, POP_DATA_SRC, WEATHER_SRC

MAX_YEAR = 2023
MIN_YEAR = 1980
REFERENCE_YEAR_START = 1986
REFERENCE_YEAR_END = 2005

RESULTS_FOLDER =  DATA_SRC / 'lancet'/ 'results'/ f'results_{MAX_YEAR + 1}/worldpop_hw_exposure'




In [2]:
# Figure settings
plt.rcParams['figure.dpi'] = 100
plt.rcParams['savefig.dpi'] = 300
plt.rcParams['figure.figsize'] = (5,2.5)
plt.rcParams['figure.titlesize'] = 'medium'
plt.rcParams['axes.titlesize'] = 'medium'

In [3]:
INFANTS_TOTALS_FILE = POP_DATA_SRC / 'hybrid_2024' / f'worldpop_infants_1950_2023_era5_compatible.nc'
ELDERLY_TOTALS_FILE = POP_DATA_SRC / 'hybrid_2024' / f'worldpop_elderly_1950_2023_era5_compatible.nc'
population_over_65 = xr.open_dataarray(ELDERLY_TOTALS_FILE)
population_infants = xr.open_dataarray(INFANTS_TOTALS_FILE)

population_over_65['age_band_lower_bound'] = 65
population = xr.concat([population_infants, population_over_65], dim='age_band_lower_bound')
population.name = 'population'
# chunk for parallel
population = population.chunk(dict(age_band_lower_bound=1, year=20))
population = population.assign_coords(longitude=(((population.longitude + 180) % 360) - 180)).sortby('longitude', ascending=False)


In [4]:
exposures_over65 = xr.open_dataset(RESULTS_FOLDER  / f'heatwave_exposure_change_over65_multi_threshold_{MIN_YEAR}-{MAX_YEAR}_worldpop.nc')
exposures_over65 = exposures_over65.assign_coords(longitude=(((exposures_over65.longitude + 180) % 360) - 180)).sortby('longitude', ascending=False)

exposures_infants = xr.open_dataset(
    RESULTS_FOLDER / f'heatwave_exposure_change_infants_multi_threshold_{MIN_YEAR}-{MAX_YEAR}_worldpop.nc')
exposures_infants = exposures_infants.assign_coords(longitude=(((exposures_infants.longitude + 180) % 360) - 180)).sortby('longitude', ascending=False)


exposures_change = xr.concat([exposures_infants, exposures_over65], dim=pd.Index([0, 65], name='age_band_lower_bound'))
exposures_change = exposures_change.chunk(dict(age_band_lower_bound=1, year=20))
exposures_change = exposures_change.assign_coords(longitude=(((exposures_change.longitude + 180) % 360) - 180)).sortby('longitude', ascending=False)

exposures_abs = xr.open_dataset(
    RESULTS_FOLDER / f'heatwave_exposure_multi_threshold_{MIN_YEAR}-{MAX_YEAR}_worldpop.nc',
    chunks=dict(age_band_lower_bound=1, year=20)
)
exposures_abs = exposures_abs.assign_coords(longitude=(((exposures_abs.longitude + 180) % 360) - 180)).sortby('longitude', ascending=False)


    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]


In [5]:
# INFANTS_TOTALS_FILE = POP_DATA_SRC / 'hybrid_2024' / f'worldpop_infants_1950_2023_era5_compatible.nc'
# ELDERLY_TOTALS_FILE = POP_DATA_SRC / 'hybrid_2024' / f'worldpop_elderly_1950_2023_era5_compatible.nc'

# population_over_65 = xr.open_dataarray(ELDERLY_TOTALS_FILE)
# population_infants = xr.open_dataarray(INFANTS_TOTALS_FILE)

In [5]:
country_lc_grouping = pd.read_excel(DATA_SRC / 'lancet' / 'admin_boundaries' /'Country Names and Groupings - 2024 Report.xlsx', header=1)

In [6]:
country_polygons = gpd.read_file(DATA_SRC / 'lancet' / 'admin_boundaries' / 'Detailed_Boundary_ADM0' / 'GLOBAL_ADM0.shp')

In [8]:
DATA_SRC / 'lancet' / 'admin_boundaries' / 'admin0_raster_report_2024.nc'

PosixPath('/nfs/n2o/wcr/szelie/lancet/admin_boundaries/admin0_raster_report_2024.nc')

## Calculate Exposure weighted change by country (population normalised)

In [7]:
countries_raster = xr.open_dataset(DATA_SRC / 'lancet' / 'admin_boundaries' / 'admin0_raster_report_2024.nc')

In [13]:
from tqdm import tqdm
import dask
with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    weighted_results = []

    for _, row in tqdm(country_polygons.iterrows(), total=len(country_polygons)):
        grid_code = row.OBJECTID
        country_mask = countries_raster['OBJECTID'] == grid_code
        country_population = (country_mask * population).sum(dim=['latitude', 'longitude'])
        country_exposures = (country_mask * exposures_change).sum(dim=['latitude', 'longitude']) / country_population
        country_exposures = country_exposures.expand_dims(dim={'country':[row.ISO_3_CODE]})
        weighted_results.append(country_exposures)

    weighted_results = xr.concat(weighted_results, dim='country')
    weighted_results.to_netcdf(RESULTS_FOLDER / f'exposure_by_region_or_grouping/countries_heatwaves_exposure_weighted_change_1980-{MAX_YEAR}_worldpop.nc')

100%|██████████| 241/241 [00:03<00:00, 63.62it/s]
  return func(*(_execute_task(a, cache) for a in args))


# Exposure to change by country, total


In [14]:
with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    results_tot = []

    for _, row in tqdm(country_polygons.iterrows(), total=len(country_polygons)):
        grid_code = row.OBJECTID
        country_mask = countries_raster['OBJECTID'] == grid_code
        country_population = (country_mask * population).sum(dim=['latitude', 'longitude'])
        country_exposures = (country_mask * exposures_change).sum(dim=['latitude', 'longitude'])
        country_exposures = country_exposures.expand_dims(dim={'country':[row.ISO_3_CODE]})
        results_tot.append(country_exposures)

    results_tot = xr.concat(results_tot, dim='country')
    results_tot.to_netcdf(RESULTS_FOLDER / f'exposure_by_region_or_grouping/countries_heatwaves_exposure_change_{MIN_YEAR}-{MAX_YEAR}_worldpop.nc')

100%|██████████| 241/241 [00:02<00:00, 83.22it/s]


# Exposures absolute by country

In [33]:
pop = []
results = []
results_weight = []

for _, row in tqdm(country_polygons.iterrows(), total=len(country_polygons)):
    grid_code = row.OBJECTID
    country_mask = countries_raster['OBJECTID'] == grid_code

    country_population = ((country_mask * population)
                          .sum(dim=['latitude', 'longitude'])
                          .expand_dims(dim={'country':[row.ISO_3_CODE]})
                          .compute())
    pop.append(country_population)
    
    country_exposures = ((exposures_abs * country_mask)
                         .sum(dim=['latitude', 'longitude'])
                         .expand_dims(dim={'country':[row.ISO_3_CODE]})
                         .compute())
    results.append(country_exposures.heatwaves_days)
    
    country_exposure_per_person = country_exposures.heatwaves_days / country_population
    results_weight.append(country_exposure_per_person.compute())
    
                

results_pop = xr.concat(pop, dim='country')
results_pop = results_pop.to_dataset(name='population')

results_abs = xr.concat(results, dim='country')
results_abs = results_abs.to_dataset(name='exposures_total')

results_weight = xr.concat(results_weight, dim='country')
results_weight = results_weight.to_dataset(name='exposures_weighted')

exposures_countries = xr.merge([results_pop, results_abs, results_weight])

exposures_countries.to_netcdf(
    RESULTS_FOLDER / f'exposure_by_region_or_grouping/countries_heatwaves_exposure_{MIN_YEAR}-{MAX_YEAR}_worldpop.nc'
)

100%|██████████| 241/241 [17:06<00:00,  4.26s/it]


## Exposures absolute by WHO region


In [48]:
## first let's rasterize the WHO regions

In [15]:
region_to_id = {region: i for i, region in enumerate(country_polygons['WHO_REGION'].unique(), start=1)}
# Apply the mapping to create a new column with numerical identifiers
country_polygons['WHO_REGION_ID'] = country_polygons['WHO_REGION'].map(region_to_id)

# Rasterize the WHO regions
who_region_raster = xr.open_dataset(DATA_SRC / 'lancet' / 'admin_boundaries' / 'WHO_regions_raster_report_2024.nc')

In [16]:
who_regions = country_polygons[['WHO_REGION', 'WHO_REGION_ID']]
who_regions = who_regions.drop_duplicates()

In [17]:
who_regions

Unnamed: 0,WHO_REGION,WHO_REGION_ID
0,AMRO,1
1,EMRO,2
2,AFRO,3
6,WPRO,4
12,SEARO,5
158,EURO,6


In [18]:
import dask
pop = []
results = []
results_weight = []  

with dask.config.set(**{'array.slicing.split_large_chunks': False}):

    for _, row in tqdm(who_regions.iterrows(), total=len(who_regions.WHO_REGION)):
        mask = who_region_raster['WHO_REGION_ID'] == row.WHO_REGION_ID

        masked_population = ((mask * population)
                              .sum(dim=['latitude', 'longitude'])
                              .expand_dims(dim={'who_region':[row.WHO_REGION]})
                              .compute())
        pop.append(masked_population)

        masked_exposures = ((exposures_abs * mask)
                            .sum(dim=['latitude', 'longitude'])
                            .expand_dims(dim={'who_region':[row.WHO_REGION]})
                            .compute()
                           )
        results.append(masked_exposures.heatwaves_days)

        masked_exposure_per_person = masked_exposures.heatwaves_days / masked_population
        results_weight.append(masked_exposure_per_person.compute())


    results_pop = xr.concat(pop, dim='who_region')
    results_pop = results_pop.to_dataset(name='population')

    results_abs = xr.concat(results, dim='who_region')
    results_abs = results_abs.to_dataset(name='exposures_total')

    results_weight = xr.concat(results_weight, dim='who_region')
    results_weight = results_weight.to_dataset(name='exposures_weighted')

    exposures_who = xr.merge([results_pop, results_abs, results_weight])

100%|██████████| 6/6 [00:23<00:00,  3.89s/it]


In [83]:
exposures_who.to_netcdf(RESULTS_FOLDER / f'exposure_by_region_or_grouping/who_regions_heatwaves_exposure_{MIN_YEAR}-{MAX_YEAR}_worldpop.nc')

In [20]:
exposures_who.sel(year=2020).population.sum()

In [21]:
results = []
with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    
    for _, row in tqdm(who_regions.iterrows(), total=len(who_regions.WHO_REGION)):
        mask = who_region_raster == row.WHO_REGION_ID

        masked_exposures = (exposures_change * mask).sum(dim=['latitude', 'longitude'])
        masked_exposures = masked_exposures.expand_dims(dim={'who_region':[row.WHO_REGION]})
        results.append(masked_exposures)

    results = xr.concat(results, dim='who_region')
    results.to_netcdf(RESULTS_FOLDER / f'exposure_by_region_or_grouping/who_regions_heatwaves_exposure_change_{MIN_YEAR}-{MAX_YEAR}_worldpop.nc')

100%|██████████| 6/6 [00:00<00:00, 357.15it/s]


# Exposures by HDI

In [31]:
country_lc_grouping = pd.read_excel(DATA_SRC / 'lancet' / 'admin_boundaries' /'Country Names and Groupings - 2024 Report.xlsx', header=1)

country_polygons = country_polygons.merge(country_lc_grouping.rename(columns={'ISO3':'ISO_3_CODE'}))

hdi_levels = []

region_to_id = {region: i for i, region in enumerate(country_polygons['HDI Level (2021)'].unique(), start=1)}

# Apply the mapping to create a new column with numerical identifiers
country_polygons['HDI_ID'] = country_polygons['HDI Level (2021)'].map(region_to_id)

hdi_raster = xr.open_dataset(DATA_SRC / 'lancet' / 'admin_boundaries' / 'HDI_group_raster_report_2024.nc')


In [32]:
hdi = country_polygons[['HDI_ID', 'HDI Level (2021)']].drop_duplicates()

In [33]:
hdi

Unnamed: 0,HDI_ID,HDI Level (2021)
0,1,
1,2,Low
2,3,Medium
4,4,Very High
7,5,High


In [35]:
import dask
pop = []
results = []
results_weight = [] 

hdi = hdi[hdi['HDI_ID']>1]
with dask.config.set(**{'array.slicing.split_large_chunks': False}):

    for _, row in tqdm(hdi.iterrows(), total=len(hdi['HDI Level (2021)'])):
        mask = hdi_raster['HDI_ID'] ==row.HDI_ID
        
        masked_population = ((mask * population)
                              .sum(dim=['latitude', 'longitude'])
                              .expand_dims(dim={'level_of_human_development':[row['HDI Level (2021)']]})
                              .compute())
        pop.append(masked_population)

        masked_exposures = ((exposures_abs * mask)
                            .sum(dim=['latitude', 'longitude'])
                            .expand_dims(dim={'level_of_human_development':[row['HDI Level (2021)']]})
                            .compute()
                           )
        results.append(masked_exposures.heatwaves_days)

        masked_exposure_per_person = masked_exposures.heatwaves_days / masked_population
        results_weight.append(masked_exposure_per_person.compute())


    results_pop = xr.concat(pop, dim='level_of_human_development')
    results_pop = results_pop.to_dataset(name='population')

    results_abs = xr.concat(results, dim='level_of_human_development')
    results_abs = results_abs.to_dataset(name='exposures_total')

    results_weight = xr.concat(results_weight, dim='level_of_human_development')
    results_weight = results_weight.to_dataset(name='exposures_weighted')

    exposures_hdi = xr.merge([results_pop, results_abs, results_weight])
    
exposures_hdi.to_netcdf(RESULTS_FOLDER / f'exposure_by_region_or_grouping/hdi_regions_heatwaves_exposure_{MIN_YEAR}-{MAX_YEAR}_worldpop.nc')

  0%|          | 0/4 [00:00<?, ?it/s]

In [None]:
import dask
pop = []
results = []
results_weight = [] 

hdi = hdi[hdi['HDI_ID']>1]
with dask.config.set(**{'array.slicing.split_large_chunks': False}):

    for _, row in tqdm(hdi.iterrows(), total=len(hdi['HDI Level (2021)'])):
        mask = hdi_raster['HDI_ID'] ==row.HDI_ID
        
        masked_population = ((mask * population)
                              .sum(dim=['latitude', 'longitude'])
                              .expand_dims(dim={'level_of_human_development':[row['HDI Level (2021)']]})
                              .compute())
        pop.append(masked_population)

        masked_exposures = ((exposures_abs * mask)
                            .sum(dim=['latitude', 'longitude'])
                            .expand_dims(dim={'level_of_human_development':[row['HDI Level (2021)']]})
                            .compute()
                           )
        results.append(masked_exposures.heatwaves_days)

        masked_exposure_per_person = masked_exposures.heatwaves_days / masked_population
        results_weight.append(masked_exposure_per_person.compute())


    results_pop = xr.concat(pop, dim='level_of_human_development')
    results_pop = results_pop.to_dataset(name='population')

    results_abs = xr.concat(results, dim='level_of_human_development')
    results_abs = results_abs.to_dataset(name='exposures_total')

    results_weight = xr.concat(results_weight, dim='level_of_human_development')
    results_weight = results_weight.to_dataset(name='exposures_weighted')

    exposures_hdi = xr.merge([results_pop, results_abs, results_weight])
    
#exposures_hdi.to_netcdf(RESULTS_FOLDER / f'exposure_by_region_or_grouping/hdi_regions_heatwaves_exposure_{MIN_YEAR}-{MAX_YEAR}_worldpop.nc')

In [57]:
exposures_hdi.to_netcdf(RESULTS_FOLDER / f'exposure_by_region_or_grouping/hdi_regions_heatwaves_exposure_{MIN_YEAR}-{MAX_YEAR}_worldpop.nc')

In [28]:
exposures_hdi

In [29]:
exposures_hdi.sel(year=2020, age_band_lower_bound=0, level_of_human_development='Very High').sum()

In [30]:
exposures_hdi.sel(year=2020, age_band_lower_bound=0, level_of_human_development='Low').sum()

In [59]:
results = []
with dask.config.set(**{'array.slicing.split_large_chunks': False}):

    for _, row in tqdm(hdi.iterrows(), total=len(hdi['HDI Level (2021)'])):
        mask = hdi_raster['HDI_ID'] ==row.HDI_ID

        masked_exposures = (exposures_change * mask).sum(dim=['latitude', 'longitude'])
        masked_exposures = masked_exposures.expand_dims(dim={'level_of_human_development':[row['HDI Level (2021)']]})
        results.append(masked_exposures)

    results = xr.concat(results, dim='level_of_human_development')
    results.to_netcdf(RESULTS_FOLDER / f'exposure_by_region_or_grouping/hdi_regions_heatwaves_exposure_change_{MIN_YEAR}-{MAX_YEAR}_worldpop.nc')

100%|██████████| 4/4 [00:00<00:00, 129.13it/s]


In [32]:
[row['HDI Level (2021)']]

['High']

# Exposure to change weighted by LC Grouping


In [36]:
country_polygons['LC Grouping']

0        SIDS
1        Asia
2      Africa
3        SIDS
4        Asia
        ...  
213      SIDS
214      SIDS
215      SIDS
216      SIDS
217      Asia
Name: LC Grouping, Length: 218, dtype: object

In [30]:
region_to_id = {region: i for i, region in enumerate(country_polygons['LC Grouping'].unique(), start=1)}
# Apply the mapping to create a new column with numerical identifiers
country_polygons['LC_GROUPING_ID'] = country_polygons['LC Grouping'].map(region_to_id)

lc_grouping_raster = xr.open_dataset(DATA_SRC / 'lancet' / 'admin_boundaries' / 'LC_group_raster_report_2024.nc')
          

In [37]:
lc_grouping = country_polygons[['LC_GROUPING_ID', 'LC Grouping']].drop_duplicates()

In [38]:
lc_grouping

Unnamed: 0,LC_GROUPING_ID,LC Grouping
0,1,SIDS
1,2,Asia
2,3,Africa
5,4,Latin America
23,5,Northern America
100,6,Oceania
155,7,Europe


In [39]:
pop = []
results = []
results_weight = [] 


with dask.config.set(**{'array.slicing.split_large_chunks': False}):

    for _, row in tqdm(lc_grouping.iterrows(), total=len(lc_grouping['LC Grouping'])):
        mask = lc_grouping_raster['LC_GROUPING_ID'] ==row.LC_GROUPING_ID
        
        masked_population = ((mask * population)
                              .sum(dim=['latitude', 'longitude'])
                              .expand_dims(dim={'lc_group':[row['LC Grouping']]})
                              .compute())
        pop.append(masked_population)

        masked_exposures = ((exposures_abs * mask)
                            .sum(dim=['latitude', 'longitude'])
                            .expand_dims(dim={'lc_group':[row['LC Grouping']]})
                            .compute()
                           )
        results.append(masked_exposures.heatwaves_days)

        masked_exposure_per_person = masked_exposures.heatwaves_days / masked_population
        results_weight.append(masked_exposure_per_person.compute())


    results_pop = xr.concat(pop, dim='lc_group')
    results_pop = results_pop.to_dataset(name='population')

    results_abs = xr.concat(results, dim='lc_group')
    results_abs = results_abs.to_dataset(name='exposures_total')

    results_weight = xr.concat(results_weight, dim='lc_group')
    results_weight = results_weight.to_dataset(name='exposures_weighted')


exposures_lc_grouping= xr.merge([results_pop, results_abs, results_weight])

exposures_lc_grouping.to_netcdf(RESULTS_FOLDER / 'exposure_by_region_or_grouping/exposures_abs_by_lc_group_worldpop.nc')

100%|██████████| 7/7 [00:27<00:00,  3.88s/it]
