# Heatwave exposures by country, WHO region, HDI index class etc

In [1]:
from pathlib import Path
import numpy as np
import pandas as pd

import xarray as xr
import matplotlib.pyplot as plt

from cartopy import crs as ccrs
from scipy import stats
from tqdm.notebook import tqdm
import os
import sys

project_path = os.path.abspath(os.path.join('..', '..', '..'))
if project_path not in sys.path:
    sys.path.insert(0, project_path)

from source.config import DATA_SRC, POP_DATA_SRC, WEATHER_SRC

In [2]:
# Figure settings
plt.rcParams['figure.dpi'] = 100
plt.rcParams['savefig.dpi'] = 300
plt.rcParams['figure.figsize'] = (5,2.5)
plt.rcParams['figure.titlesize'] = 'medium'
plt.rcParams['axes.titlesize'] = 'medium'

In [3]:
MAX_YEAR = 2023
MIN_YEAR = 1980
REFERENCE_YEAR_START = 1986
REFERENCE_YEAR_END = 2005

RESULTS_FOLDER =  DATA_SRC / 'lancet'/ 'heatwaves'/ f'results_{MAX_YEAR + 1}/gpw_exposure'

INTERMEDIATE_RESULTS_FOLDER = DATA_SRC / 'lancet'/ 'heatwaves'/ f'results_{MAX_YEAR + 1}/gpw_exposure'


GPW_FILE = DATA_SRC/ 'lancet/population/UN WPP-Adjusted Population Count v4.11 2000-2020 15min era compat.nc'

# Using the hybrid version
# DEMOGRAPHICS_TOTALS_FILE = POP_DATA_SRC / 'demographics_2000_2020_15_min_era_compat.nc'
DEMOGRAPHICS_TOTALS_FILE = POP_DATA_SRC / 'demographics_hybrid_1950_2020_15_min_era_compat.nc'

In [4]:
INFANTS_TOTALS_FILE = POP_DATA_SRC / 'infants_1950_2020_hybrid_15_min_era_compat.nc'

In [5]:
GPW_FILE = DATA_SRC/ 'lancet/population/UN WPP-Adjusted Population Count v4.11 2000-2020 15min era compat.nc'

MAP_PROJECTION = ccrs.EckertIII()

## GPW data

In [6]:
gpw_dat = xr.open_dataset(GPW_FILE)

## Load population and demographic data

In [7]:
demographics_totals = xr.open_dataarray(DEMOGRAPHICS_TOTALS_FILE)
population_over_65 = demographics_totals.sel(age_band_lower_bound=65).load()

In [8]:
extrapolated_years = np.arange(2020+1, MAX_YEAR+1)

In [9]:
population_over_65 = (
    xr.concat([demographics_totals, 
               demographics_totals.interp(year=extrapolated_years, kwargs=dict(fill_value="extrapolate")).compute()
              ], 'year').sel(age_band_lower_bound=65).load())

In [10]:
population_infants = xr.open_dataarray(INFANTS_TOTALS_FILE)

In [11]:
population_infants = (
    xr.concat([population_infants, 
               population_infants.interp(year=extrapolated_years, kwargs=dict(fill_value="extrapolate")).compute()
              ], 'year').load())

In [12]:
population = xr.concat([population_infants, population_over_65], dim='age_band_lower_bound')
population.name = 'population'
# chunk for parallel
population = population.chunk(dict(age_band_lower_bound=1, year=20))

## Load country data

In [13]:
COUNTRY_ID_LOOKUP = DATA_SRC/ 'lancet/population/gpwv4/gpw_v4_national_identifier_lookup.txt'

COUNTRIES_RASTER = gpw_dat['National Identifier Grid, v4.11 (2010): National Identifier Grid'].load()

COUNTRIES_LOOKUP = pd.read_csv(COUNTRY_ID_LOOKUP,sep='\t')

In [14]:
COUNTRIES_RASTER

In [15]:
COUNTRIES_LC_GROUPINGS = pd.read_excel('/nfs/n2o/wcr/szelie/lancet/reports/Country Names and groupings - 2023 Report.xlsx', header=1)

In [16]:
COUNTRIES_LOOKUP = COUNTRIES_LOOKUP.join(COUNTRIES_LC_GROUPINGS.set_index('ISO3')[['LC Grouping']], on='ISOCODE' )

In [17]:
land_mask = (COUNTRIES_RASTER < 2000)

## Load region raster references

In [18]:
who_regions = xr.open_dataarray(POP_DATA_SRC / 'who_regions_15min_era_compat.nc')

hdi_regions = xr.open_dataarray(POP_DATA_SRC / 'hdi_regions_15min_era_compat.nc')

## Load exposure data

Using the multi-threshold version of the indicator

In [19]:
MIN_YEAR = 1980

In [20]:
exposures_over65 = xr.open_dataset(INTERMEDIATE_RESULTS_FOLDER / f'heatwave_exposure_change_over65_multi_threshold_{MIN_YEAR}-{MAX_YEAR}.nc')

exposures_infants = xr.open_dataset(
    INTERMEDIATE_RESULTS_FOLDER / f'heatwave_exposure_change_infants_multi_threshold_{MIN_YEAR}-{MAX_YEAR}.nc')

exposures_change = xr.concat([exposures_infants, exposures_over65], dim=pd.Index([0, 65], name='age_band_lower_bound'))

exposures_change = exposures_change.chunk(dict(age_band_lower_bound=1, year=20))

exposures_abs = xr.open_dataset(
    INTERMEDIATE_RESULTS_FOLDER / f'heatwave_exposure_multi_threshold_{MIN_YEAR}-{MAX_YEAR}.nc',
    chunks=dict(age_band_lower_bound=1, year=20)
)


## Trim pop to expusre

In [21]:
# population_over_65 = population_over_65.sel(year=exposures_over65.year)

In [22]:
# population_infants = population_infants.sel(year=exposures_infants.year)

In [23]:
population = population.sel(year=exposures_abs.year)

# Calculate Exposure weighted change by country (population normalised)

Total exposure divided by country total **vulnerable population**, allows to easily compare between countries.

In [24]:
weighted_results = []
    
for _, row in tqdm(COUNTRIES_LOOKUP.iterrows(), total=len(COUNTRIES_LOOKUP)):
    grid_code = row.GRIDCODE
    country_mask = COUNTRIES_RASTER == grid_code
    country_population = (country_mask * population).sum(dim=['latitude', 'longitude'])
    country_exposures = (country_mask * exposures_change).sum(dim=['latitude', 'longitude']) / country_population
    country_exposures = country_exposures.expand_dims(dim={'country':[row.ISOCODE]})
    weighted_results.append(country_exposures)

weighted_results = xr.concat(weighted_results, dim='country')
weighted_results.to_netcdf(INTERMEDIATE_RESULTS_FOLDER / f'countries_heatwaves_exposure_weighted_change_1980-{MAX_YEAR}.nc')

  0%|          | 0/250 [00:00<?, ?it/s]

  return func(*(_execute_task(a, cache) for a in args))


# Exposure to change by country, total

Calculate the sum of HW metric by country for all metrics (makes more sense for some than others)

In [25]:
results_tot = []

for _, row in tqdm(COUNTRIES_LOOKUP.iterrows(), total=len(COUNTRIES_LOOKUP)):
    grid_code = row.GRIDCODE
    country_mask = COUNTRIES_RASTER == grid_code
    country_population = (country_mask * population).sum(dim=['latitude', 'longitude'])
    country_exposures = (exposures_change * country_mask).sum(dim=['latitude', 'longitude'])
    country_exposures = country_exposures.expand_dims(dim={'country':[row.ISOCODE]})
    results_tot.append(country_exposures)
    
results_tot = xr.concat(results_tot, dim='country')
results_tot.to_netcdf(INTERMEDIATE_RESULTS_FOLDER / f'countries_heatwaves_exposure_change_{MIN_YEAR}-{MAX_YEAR}.nc')

  0%|          | 0/250 [00:00<?, ?it/s]

# Exposures absolute by country

In [26]:
exposures_abs = exposures_abs.chunk(dict(age_band_lower_bound=1, year=20))

In [27]:
pop = []
results = []
results_weight = []

for _, row in tqdm(COUNTRIES_LOOKUP.iterrows(), total=len(COUNTRIES_LOOKUP)):
    grid_code = row.GRIDCODE
    country_mask = COUNTRIES_RASTER == grid_code

    country_population = ((country_mask * population)
                          .sum(dim=['latitude', 'longitude'])
                          .expand_dims(dim={'country':[row.ISOCODE]})
                          .compute())
    pop.append(country_population)
    
    country_exposures = ((exposures_abs * country_mask)
                         .sum(dim=['latitude', 'longitude'])
                         .expand_dims(dim={'country':[row.ISOCODE]})
                         .compute())
    results.append(country_exposures.heatwaves_days)
    
    country_exposure_per_person = country_exposures.heatwaves_days / country_population
    results_weight.append(country_exposure_per_person.compute())
    
                

results_pop = xr.concat(pop, dim='country')
results_pop = results_pop.to_dataset(name='population')

results_abs = xr.concat(results, dim='country')
results_abs = results_abs.to_dataset(name='exposures_total')

results_weight = xr.concat(results_weight, dim='country')
results_weight = results_weight.to_dataset(name='exposures_weighted')

exposures_countries = xr.merge([results_pop, results_abs, results_weight])

  0%|          | 0/250 [00:00<?, ?it/s]

In [2]:
exposures_countries.to_netcdf(
    INTERMEDIATE_RESULTS_FOLDER / f'countries_heatwaves_exposure_{MIN_YEAR}-{MAX_YEAR}.nc'
)

NameError: name 'exposures_countries' is not defined

# Exposures by WHO region

In [None]:
pop = []
results = []
results_weight = []    
for i, region_name in tqdm(enumerate(who_regions.who_region), total=len(who_regions.who_region)):
    mask = who_regions == i
    
    masked_population = ((mask * population)
                          .sum(dim=['latitude', 'longitude'])
                          .expand_dims(dim={'who_region':[region_name]})
                          .compute())
    pop.append(masked_population)
    
    masked_exposures = ((exposures_abs * mask)
                        .sum(dim=['latitude', 'longitude'])
                        .expand_dims(dim={'who_region':[region_name]})
                        .compute()
                       )
    results.append(masked_exposures.heatwaves_days)
    
    masked_exposure_per_person = masked_exposures.heatwaves_days / masked_population
    results_weight.append(masked_exposure_per_person.compute())

    
results_pop = xr.concat(pop, dim='who_region')
results_pop = results_pop.to_dataset(name='population')

results_abs = xr.concat(results, dim='who_region')
results_abs = results_abs.to_dataset(name='exposures_total')

results_weight = xr.concat(results_weight, dim='who_region')
results_weight = results_weight.to_dataset(name='exposures_weighted')

exposures_who = xr.merge([results_pop, results_abs, results_weight])

In [None]:
exposures_who.to_netcdf(INTERMEDIATE_RESULTS_FOLDER / f'who_regions_heatwaves_exposure_{MIN_YEAR}-{MAX_YEAR}.nc')

In [None]:
results = []
    
for i, region_name in tqdm(enumerate(who_regions.who_region), total=len(who_regions.who_region)):
    mask = who_regions == i

    masked_exposures = (exposures_change * mask).sum(dim=['latitude', 'longitude'])
    masked_exposures = masked_exposures.expand_dims(dim={'who_region':[region_name]})
    results.append(masked_exposures)

results = xr.concat(results, dim='who_region')
results.to_netcdf(INTERMEDIATE_RESULTS_FOLDER / f'who_regions_heatwaves_exposure_change_{MIN_YEAR}-{MAX_YEAR}.nc')

# Exposures by HDI

In [None]:
pop = []
results = []
results_weight = []    
for i, region_name in tqdm(enumerate(hdi_regions.level_of_human_development), total=len(hdi_regions.level_of_human_development)):
    mask = who_regions == i
    
    masked_population = ((mask * population)
                          .sum(dim=['latitude', 'longitude'])
                          .expand_dims(dim={'level_of_human_development':[region_name]})
                          .compute())
    pop.append(masked_population)
    
    masked_exposures = ((exposures_abs * mask)
                        .sum(dim=['latitude', 'longitude'])
                        .expand_dims(dim={'level_of_human_development':[region_name]})
                        .compute()
                       )
    results.append(masked_exposures.heatwaves_days)
    
    masked_exposure_per_person = masked_exposures.heatwaves_days / masked_population
    results_weight.append(masked_exposure_per_person.compute())

    
results_pop = xr.concat(pop, dim='level_of_human_development')
results_pop = results_pop.to_dataset(name='population')

results_abs = xr.concat(results, dim='level_of_human_development')
results_abs = results_abs.to_dataset(name='exposures_total')

results_weight = xr.concat(results_weight, dim='level_of_human_development')
results_weight = results_weight.to_dataset(name='exposures_weighted')

exposures_hdi = xr.merge([results_pop, results_abs, results_weight])

In [None]:
exposures_hdi.to_netcdf(INTERMEDIATE_RESULTS_FOLDER / f'hdi_regions_heatwaves_exposure_{MIN_YEAR}-{MAX_YEAR}.nc')

In [None]:
results = []
    
for i, region_name in tqdm(enumerate(hdi_regions.level_of_human_development), total=len(hdi_regions.level_of_human_development)):
    mask = hdi_regions == i

    masked_exposures = (exposures_change * mask).sum(dim=['latitude', 'longitude'])
    masked_exposures = masked_exposures.expand_dims(dim={'level_of_human_development':[region_name]})
    results.append(masked_exposures)

results = xr.concat(results, dim='level_of_human_development')
results.to_netcdf(INTERMEDIATE_RESULTS_FOLDER / f'hdi_regions_heatwaves_exposure_change_{MIN_YEAR}-{MAX_YEAR}.nc')

# Exposure to change weighted by LC Grouping


In [None]:
pop_results = []
abs_results = []
weighted_results = []

lc_groups = COUNTRIES_LOOKUP.groupby('LC Grouping')
for lc_grp, countries in tqdm(lc_groups, total=len(lc_groups)):
    grp_population = xr.zeros_like(population.sel(year=exposures_change.year).sum(dim=['latitude', 'longitude']))
    grp_exposures = xr.zeros_like(exposures_change.heatwaves_days.sum(dim=['latitude', 'longitude']))

    for _, row in tqdm(countries.iterrows(), total=len(countries)):
        grid_code = row.GRIDCODE
        country_mask = COUNTRIES_RASTER == grid_code
        
        country_population = (country_mask * population.sel(year=exposures_change.year)).sum(dim=['latitude', 'longitude'])
        grp_population += country_population
        
        country_exposures_ts = (country_mask * exposures_change).sum(dim=['latitude', 'longitude']).heatwaves_days
        grp_exposures += country_exposures_ts
        
    
    grp_population = (grp_population
                      .expand_dims(dict(lc_group=[lc_grp]))
                      .compute())
    grp_exposures = (grp_exposures
                     .expand_dims(dict(lc_group=[lc_grp]))
                     .compute())
    pop_results.append(grp_population)
    abs_results.append(grp_exposures)
    
    grp_exposures_norm = grp_exposures / grp_population
    
    weighted_results.append(grp_exposures_norm)
    
pop_results = xr.concat(pop_results, dim='lc_group')
pop_results = pop_results.to_dataset(name='population')

abs_results = xr.concat(abs_results, dim='lc_group')
abs_results = abs_results.to_dataset(name='exposures_total')

weighted_results = xr.concat(weighted_results, dim='lc_group')
weighted_results = weighted_results.to_dataset(name='exposures_weighted')

In [None]:
exposures_lc_groups = xr.merge([pop_results, abs_results, weighted_results])
exposures_lc_groups.to_netcdf(INTERMEDIATE_RESULTS_FOLDER / 'exposures_change_by_lc_group.nc')

In [None]:
(abs_results
 .to_dataframe()
 .reset_index()
 .rename(columns=dict(lc_group='LC Grouping'))
 .to_csv(RESULTS_FOLDER / 'heatwave_totals_change_by_lc_group.csv', index=False)
)

In [None]:
(weighted_results
 .to_dataframe()
 .reset_index()
 .rename(columns=dict(lc_group='LC Grouping'))
 .to_csv(RESULTS_FOLDER / 'heatwave_weighted_change_by_lc_group.csv', index=False)
)

## Exposure absolute value weighted by LC Grouping

In [None]:
pop_results = []
abs_results = []
weighted_results = []

lc_groups = COUNTRIES_LOOKUP.groupby('LC Grouping')
for lc_grp, countries in tqdm(lc_groups, total=len(lc_groups)):
    grp_population = xr.zeros_like(population.sel(year=exposures_abs.year).sum(dim=['latitude', 'longitude']))
    grp_exposures = xr.zeros_like(exposures_abs.heatwaves_days.sum(dim=['latitude', 'longitude']))

    # grp_exposures = pd.Series(np.zeros_like(summer_anom.year), index=summer_anom.year)
    for _, row in tqdm(countries.iterrows(), total=len(countries)):
        grid_code = row.GRIDCODE
        country_mask = COUNTRIES_RASTER == grid_code
        
        country_population = (country_mask * population.sel(year=exposures_abs.year)).sum(dim=['latitude', 'longitude'])
        grp_population += country_population
        
        # NOTE since we already calculated exposures, don't do it a second time!
        country_exposures_ts = (country_mask * exposures_abs).sum(dim=['latitude', 'longitude']).heatwaves_days
        grp_exposures += country_exposures_ts
        
    
    grp_population = (grp_population
                      .expand_dims(dict(lc_group=[lc_grp]))
                      .compute())
    grp_exposures = (grp_exposures
                     .expand_dims(dict(lc_group=[lc_grp]))
                     .compute())
    pop_results.append(grp_population)
    abs_results.append(grp_exposures)

    grp_exposures_norm = (grp_exposures / grp_population).compute()
    weighted_results.append(grp_exposures_norm)
    # weighted_results = pd.concat(weighted_results, axis=1)
    

pop_results = xr.concat(pop_results, dim='lc_group')
pop_results = pop_results.to_dataset(name='population')

abs_results = xr.concat(abs_results, dim='lc_group')
abs_results = abs_results.to_dataset(name='exposures_total')

weighted_results = xr.concat(weighted_results, dim='lc_group')
weighted_results = weighted_results.to_dataset(name='exposures_weighted')

exposures_abs_lc_groups = xr.merge([pop_results, abs_results, weighted_results])


In [None]:
exposures_abs_lc_groups = xr.merge([pop_results, abs_results, weighted_results])


In [None]:
exposures_abs_lc_groups.to_netcdf(INTERMEDIATE_RESULTS_FOLDER / 'exposures_abs_by_lc_group.nc')

In [None]:
(exposures_abs_lc_groups
 .exposures_total
 .sel(year=2022, age_band_lower_bound=65)
 .to_dataframe()
).exposures_total.plot.bar()

In [None]:
(exposures_abs_lc_groups
 .exposures_total
 .sel(year=2023, age_band_lower_bound=65)
 .to_dataframe()
).exposures_total.plot.bar()

In [None]:
(exposures_abs_lc_groups
 .population
 .sel(year=2022, age_band_lower_bound=65)
 .to_dataframe()
).population.plot.bar()

In [None]:
(exposures_abs_lc_groups
 .exposures_weighted
 .sel(year=2022, age_band_lower_bound=65)
 .to_dataframe()
).exposures_weighted.plot.bar()