# Present data for report



In [1]:
from pathlib import Path
import numpy as np
import pandas as pd

import xarray as xr
import matplotlib.pyplot as plt

from cartopy import crs as ccrs
from scipy import stats
from tqdm.notebook import tqdm
import os
import sys

project_path = os.path.abspath(os.path.join('..', '..', '..'))
if project_path not in sys.path:
    sys.path.insert(0, project_path)

from source.config import DATA_SRC, POP_DATA_SRC, WEATHER_SRC

PATH_FIGURES = Path("/cluster/project/climate/szelie/lancet_countdown/figures")


In [2]:
# Figure settings
plt.rcParams['figure.dpi'] = 120
plt.rcParams['savefig.dpi'] = 600
plt.rcParams['figure.figsize'] = (5,2.5)
plt.rcParams['figure.titlesize'] = 'medium'
plt.rcParams['axes.titlesize'] = 'medium'
plt.rcParams['savefig.bbox'] = 'tight'

In [3]:
MAX_YEAR = 2023
REFERENCE_YEAR_START = 1986
REFERENCE_YEAR_END = 2005

RESULTS_FOLDER =  DATA_SRC / 'lancet'/ 'heatwaves'/ f'results_{MAX_YEAR + 1}/worldpop_exposure'

countries_raster = xr.open_dataset(DATA_SRC / 'lancet' / 'admin_boundaries' / 'admin0_raster_report_2024.nc')




In [4]:
MAP_PROJECTION = ccrs.EckertIII()

In [5]:
MIN_YEAR = 1980

## Load basic country data

In [6]:
country_lc_grouping = pd.read_excel(DATA_SRC / 'lancet' / 'admin_boundaries' /'Country Names and Groupings - 2024 Report.xlsx', header=1)

In [7]:
# COUNTRIES_LOOKUP = COUNTRIES_LOOKUP.join(
#     country_lc_grouping.set_index('ISO3')[['LC Grouping']], on='ISOCODE' )

In [8]:
import geopandas as gpd
countries = gpd.read_file(DATA_SRC / 'lancet' / 'admin_boundaries' / 'Detailed_Boundary_ADM0' / 'GLOBAL_ADM0.shp')

## Load population data

In [9]:
INFANTS_TOTALS_FILE = POP_DATA_SRC / 'hybrid_2024' / f'worldpop_infants_1950_2023_era5_compatible.nc'
ELDERLY_TOTALS_FILE = POP_DATA_SRC / 'hybrid_2024' / f'worldpop_elderly_1950_2023_era5_compatible.nc'

population_over_65 = xr.open_dataarray(ELDERLY_TOTALS_FILE)
population_infants = xr.open_dataarray(INFANTS_TOTALS_FILE)

 Extrapolate demographic data to 2022

In [10]:
population_over_65['age_band_lower_bound'] = 65
population = xr.concat([population_infants, population_over_65], dim='age_band_lower_bound')
population.name = 'population'
# # chunk for parallel
# population = population.chunk(dict(age_band_lower_bound=1, year=20))

## Load Heatwave metrics data

In [11]:
heatwave_metrics_files = sorted((Path('/nfs/n2o/wcr/szelie/lancet/results/results_2024/heatwave_days_era5')).glob('*.nc'))
heatwave_metrics = xr.open_mfdataset(heatwave_metrics_files, combine='by_coords')

## Load exposure to change results

In [12]:
MAX_YEAR =2023
RESULTS_FOLDER =  DATA_SRC / 'lancet'/ 'results'/ f'results_{MAX_YEAR + 1}/worldpop_hw_exposure'


exposures_over65 = xr.open_dataset(RESULTS_FOLDER / f'heatwave_exposure_change_over65_multi_threshold_{MIN_YEAR}-{MAX_YEAR}_worldpop.nc')

exposures_infants = xr.open_dataset(
    RESULTS_FOLDER / f'heatwave_exposure_change_infants_multi_threshold_{MIN_YEAR}-{MAX_YEAR}_worldpop.nc')

exposures_over65 = exposures_over65.to_array()
exposures_over65['age_band_lower_bound'] = 65
exposures_infants = exposures_infants.to_array()
exposures_infants['age_band_lower_bound'] = 0
exposures_over65 = exposures_over65.squeeze().drop('variable')
exposures_infants = exposures_infants.squeeze().drop('variable')

exposures_change = xr.concat([exposures_infants, exposures_over65], 
                             dim=pd.Index([0, 65], name='age_band_lower_bound'))

In [13]:
total_exposures = exposures_change.sum(['latitude', 'longitude'])

total_exposures_change_over65 = total_exposures.sel(age_band_lower_bound=65, drop=True).to_dataframe('elderly')
total_exposures_change_infants = total_exposures.sel(age_band_lower_bound=0, drop=True).to_dataframe('infants')

## Load exposure absolute values (not exposure to change)

In [14]:
exposures_abs = xr.open_dataset(RESULTS_FOLDER / f'heatwave_exposure_multi_threshold_{MIN_YEAR}-{MAX_YEAR}_worldpop.nc')

In [15]:
population_df = population.sum(dim=['latitude', 'longitude']).to_dataframe().reset_index()

In [16]:
exposures_abs_df = exposures_abs.sum(dim=['latitude', 'longitude']).to_dataframe().reset_index()

In [17]:
exposures_abs_df

Unnamed: 0,age_band_lower_bound,year,heatwaves_days
0,0,1980,2.788381e+08
1,0,1981,2.891575e+08
2,0,1982,1.944532e+08
3,0,1983,4.924692e+08
4,0,1984,3.037111e+08
...,...,...,...
83,65,2019,8.035450e+09
84,65,2020,6.755217e+09
85,65,2021,6.403054e+09
86,65,2022,9.985383e+09


In [18]:
exposures_abs_df = exposures_abs_df.rename({'heatwaves_days':'total heatwave days'})

with pd.ExcelWriter(RESULTS_FOLDER / 'indicator_1_1_2_heatwaves_summary.xlsx') as writer:  
    pd.merge(population_df, exposures_abs_df).to_excel(writer, sheet_name='Global', index=False)

## Load the country exposure results

In [19]:
country_weighted = xr.open_dataset(
    RESULTS_FOLDER / f'exposure_by_region_or_grouping/countries_heatwaves_exposure_weighted_change_{MIN_YEAR}-{MAX_YEAR}_worldpop.nc')

In [20]:
country_exposure_change = xr.open_dataset(
    RESULTS_FOLDER / f'exposure_by_region_or_grouping/countries_heatwaves_exposure_change_{MIN_YEAR}-{MAX_YEAR}_worldpop.nc')

In [21]:
country_exposure_abs = xr.open_dataset(
    RESULTS_FOLDER / f'exposure_by_region_or_grouping/countries_heatwaves_exposure_{MIN_YEAR}-{MAX_YEAR}_worldpop.nc')

## Load aggregated by hdi and WHO region data

In [22]:
hdi_exposure = xr.open_dataset(RESULTS_FOLDER / f'exposure_by_region_or_grouping/hdi_regions_heatwaves_exposure_{MIN_YEAR}-{MAX_YEAR}_worldpop.nc')
who_exposure = xr.open_dataset(RESULTS_FOLDER / f'exposure_by_region_or_grouping/who_regions_heatwaves_exposure_{MIN_YEAR}-{MAX_YEAR}_worldpop.nc')

hdi_exposure_change = xr.open_dataset(RESULTS_FOLDER / f'exposure_by_region_or_grouping/hdi_regions_heatwaves_exposure_change_{MIN_YEAR}-{MAX_YEAR}_worldpop.nc')
who_exposure_change = xr.open_dataset(RESULTS_FOLDER / f'exposure_by_region_or_grouping/who_regions_heatwaves_exposure_change_{MIN_YEAR}-{MAX_YEAR}_worldpop.nc')

In [23]:
RESULTS_FOLDER / f'exposure_by_region_or_grouping'

PosixPath('/nfs/n2o/wcr/szelie/lancet/results/results_2024/worldpop_hw_exposure/exposure_by_region_or_grouping')

## Load results by LC grouping

In [24]:
exposures_change_lc_groups = xr.open_dataset(RESULTS_FOLDER / 'exposure_by_region_or_grouping/exposures_change_by_lc_group_worldpop.nc')
exposures_abs_lc_groups = xr.open_dataset(RESULTS_FOLDER / 'exposure_by_region_or_grouping/exposures_abs_by_lc_group_worldpop.nc')



# Re-export data tables as csv

In [25]:
(country_weighted
.heatwaves_days
.to_dataframe()
.to_csv(RESULTS_FOLDER / 'exposure_by_region_or_grouping/heatwave_exposure_wieghted_change_days_by_country_w_hdi_worldpop.csv')
)
(country_exposure_change
 .heatwaves_days
 .to_dataframe()
 .to_csv(RESULTS_FOLDER / 'exposure_by_region_or_grouping/heatwave_exposure_days_by_country_worldpop.csv')
)

In [26]:
country_exposure_abs_df = country_exposure_abs.sel(year=slice(1980,None)).to_dataframe().reset_index().rename(columns={"country":"ISO3"})


In [27]:
country_exposure_abs_df.merge(country_lc_grouping).dropna(axis='index').to_csv(RESULTS_FOLDER / 'heatwave_exposure_abs_days_by_country.csv')


In [28]:
country_exposure_abs_df = country_exposure_abs_df.drop(columns='exposures_weighted')
country_exposure_abs_df = country_exposure_abs_df.rename(columns={'exposures_total':'total heatwave days'})

In [29]:
with pd.ExcelWriter(RESULTS_FOLDER / 'indicator_1_1_2_heatwaves_summary.xlsx', engine='openpyxl', mode='a') as writer:  
    
    country_exposure_abs_df.merge(country_lc_grouping).to_excel(writer, sheet_name='Country' , index=False)
    

In [30]:
(who_exposure
 .to_dataframe()
 .to_csv(RESULTS_FOLDER / 'exposure_by_region_or_grouping/heatwave_exposure_days_by_who_region_worldpop.csv')
)

(who_exposure_change
 .to_dataframe()
 .to_csv(RESULTS_FOLDER / 'exposure_by_region_or_grouping/heatwave_exposure_days_change_by_who_region_worldpop.csv')
)

In [31]:
who_exposure_df = who_exposure.to_dataframe().reset_index()
who_exposure_df = who_exposure_df.rename(columns = {"who_region":"WHO Region"})

who_exposure_df = who_exposure_df.rename(columns={'exposures_total':'total heatwave days'})
who_exposure_df = who_exposure_df.drop(columns='exposures_weighted')

with pd.ExcelWriter(RESULTS_FOLDER / 'indicator_1_1_2_heatwaves_summary.xlsx', engine='openpyxl', mode='a') as writer:  
    who_exposure_df.to_excel(writer, sheet_name='WHO Region',  index=False)

In [33]:
hdi_exposure_df = hdi_exposure.sel(year=slice(1980,2024)).to_dataframe().reset_index()
hdi_exposure_df = hdi_exposure_df.rename(columns={'exposures_total':'total heatwave days'})
hdi_exposure_df = hdi_exposure_df.drop(columns='exposures_weighted')

hdi_exposure_df = hdi_exposure_df.rename(columns = {"level_of_human_development":"HDI Group"})
with pd.ExcelWriter(RESULTS_FOLDER / 'indicator_1_1_2_heatwaves_summary.xlsx', engine='openpyxl', mode='a') as writer:  
    hdi_exposure_df.to_excel(writer, sheet_name='HDI Group', index=False)

In [34]:
(hdi_exposure
 .to_dataframe()
 .to_csv(RESULTS_FOLDER / 'exposure_by_region_or_grouping/heatwave_exposure_days_by_hdi_worldpop.csv')
)

(hdi_exposure_change
 .to_dataframe()
 .to_csv(RESULTS_FOLDER / 'exposure_by_region_or_grouping/heatwave_exposure_days_change_by_hdi_worldpop.csv')
)

In [35]:
LC_exposures_abs_lc_groups_df = exposures_abs_lc_groups.sel(year=slice(1980,2024)).to_dataframe().reset_index()

In [36]:
LC_exposures_abs_lc_groups_df

Unnamed: 0,year,lc_group,age_band_lower_bound,population,exposures_total,exposures_weighted
0,1980,SIDS,0,2.201534e+06,2.482086e+05,0.112744
1,1980,SIDS,65,1.287640e+07,5.441439e+05,0.042259
2,1980,Asia,0,6.444686e+07,1.989588e+08,3.087176
3,1980,Asia,65,1.173616e+08,3.434761e+08,2.926649
4,1980,Africa,0,1.662339e+07,1.600789e+07,0.962974
...,...,...,...,...,...,...
611,2023,Northern America,65,6.555742e+07,8.173071e+08,12.467042
612,2023,Oceania,0,2.915025e+05,5.040989e+05,1.729312
613,2023,Oceania,65,3.612950e+06,6.259816e+06,1.732605
614,2023,Europe,0,7.628086e+06,1.052487e+08,13.797523


In [37]:
LC_exposures_abs_lc_groups_df = LC_exposures_abs_lc_groups_df.rename(columns = {"lc_group":"Lancet Countdown Region"})

LC_exposures_abs_lc_groups_df = LC_exposures_abs_lc_groups_df.rename(columns={'exposures_total':'total heatwave days'})
LC_exposures_abs_lc_groups_df = LC_exposures_abs_lc_groups_df.drop(columns='exposures_weighted')

with pd.ExcelWriter(RESULTS_FOLDER / 'indicator_1_1_2_heatwaves_summary.xlsx', engine='openpyxl', mode='a') as writer:  
    LC_exposures_abs_lc_groups_df.to_excel(writer, sheet_name='LC Region', index=False)

In [38]:
(exposures_abs_lc_groups
 .to_dataframe()
 .to_csv(RESULTS_FOLDER / 'exposure_by_region_or_grouping/heatwave_exposure_days_by_lc_group_worldpop.csv')
)
(exposures_change_lc_groups
 .to_dataframe()
 .to_csv(RESULTS_FOLDER / 'exposure_by_region_or_grouping/heatwave_exposure_days_change_by_lc_group_worldpop.csv')
)

# Plots

## Plot days of heatwave experienced (exposure weighted days)

This is again different from previous plots because we weight the absolute exposures instead of the changes. The idea is to be able to say something like in the 90s you would typically experience X days of heatwave per year while in the 2010s you experience Y days

> Important when showing averages, don't do average of weighted number of days per country since you need to have it always population wieghted, otherwise HW for china counts the same as HW for luxembourg.

In [None]:
# lc_map = countries.dissolve('LC Grouping')

In [None]:
plot_data = heatwave_metrics
# plot_data = plot_data.where(population.sel(age_band_lower_bound=65) > 10)
plot_data = plot_data.sel(year=slice(1986,2005)).mean(dim='year')
f, ax = plt.subplots(figsize=(6,3), subplot_kw=dict(projection=MAP_PROJECTION))

plot_data.heatwaves_days.plot(
    vmax=15,
    transform=ccrs.PlateCarree(),
    ax=ax)


In [None]:
plot_data = heatwave_metrics
plot_data = plot_data.sel(year=slice(2013,2023)).mean(dim='year')
f, ax = plt.subplots(figsize=(6,3), subplot_kw=dict(projection=MAP_PROJECTION))

plot_data.heatwaves_days.plot(
    vmax=15,
    transform=ccrs.PlateCarree(),
    ax=ax)


In [None]:
countries_raster = xr.open_dataset(DATA_SRC / 'lancet' / 'admin_boundaries' / 'admin0_raster_report_2024.nc')

In [None]:
plot_data = heatwave_metrics.sel(year=2022) - heatwave_metrics.sel(year=slice(1986,2005)).mean(dim='year')
plot_data = plot_data.assign_coords(longitude=(((plot_data.longitude + 180) % 360) - 180)).sortby('longitude', ascending=False)
land_mask = countries_raster['OBJECTID'] <2000
plot_data = land_mask *plot_data
# plot_data = plot_data.sel(year=slice(2013,2022)).mean(dim='year')
f, ax = plt.subplots(figsize=(6,3), subplot_kw=dict(projection=MAP_PROJECTION))

plot_data.heatwaves_days.plot(
    transform=ccrs.PlateCarree(),
    ax=ax,
    vmin=-40, vmax=40,
    cmap='RdBu_r',
)
ax.coastlines()
ax.set_title(f'Change in number of heatwave days in {MAX_YEAR} \n relative to 1986-2005 baseline')
ax.figure.savefig(PATH_FIGURES / 'map_hw_change_2022.png')

In [None]:
import geoplot

Heatwave days per person in 2022. Don't show trend b/c too much variance, more just to give first idea.

In [None]:
exposures_abs_ts = exposures_abs.sum(['latitude', 'longitude']) / population.sel(year=slice(1980,2023)).sum(['latitude', 'longitude'])

In [None]:
exposures_abs_ts

In [None]:
exposures_abs_ts_df = exposures_abs_ts.to_dataframe().unstack(1)
exposures_abs_ts_df = exposures_abs_ts_df.transpose()

In [None]:
exposures_abs_ts_df.to_csv(RESULTS_FOLDER / 'heatwave_days_experienced.csv')

In [None]:
exposures_abs_ts_df = exposures_abs_ts_df.reset_index()
exposures_abs_ts_df = exposures_abs_ts_df.set_index("year")[[0,65]]

In [None]:
FIGURES_FOLDER = Path("../../../figures")

In [None]:
ax = (exposures_abs_ts_df
 .rename(columns={0:'Infants',65:'Over 65'})
 .rename_axis(columns="Age group")
 .plot(ylabel='Days/year', title='Average number of heatwave days experienced'))

ax.figure.savefig(FIGURES_FOLDER / 'global_hw_per_person.png')
ax.figure.savefig(FIGURES_FOLDER / 'global_hw_per_person.pdf')

In [None]:
plot_data = exposures_abs.sum(['latitude', 'longitude'])
plot_data = plot_data.to_dataframe().unstack(1).transpose()
plot_data = plot_data.reset_index()
plot_data = plot_data.set_index("year")[[0,65]]

In [None]:

ax = ((plot_data / 1e9)
 .rename(columns={0:'Infants',65:'Over 65'})
 .rename_axis(columns="Age group")
 .plot(ylabel='billion person-days', title='Total number of heatwave days experienced'))

ax.figure.savefig(FIGURES_FOLDER / 'heatwaves_exposure_total.png')
ax.figure.savefig(FIGURES_FOLDER / 'heatwaves_exposure_total.pdf')
plot_data.to_csv(FIGURES_FOLDER / 'heatwaves_exposure_total.csv')

### Compare changes in total exposures with changes in population and heatwave days

Simple attribution, given total increase in HW days exposre (person-days) what part is from climate and what part is from population increase?

- Don't really do it (according to Xiang isn't that obvious), just report % changes in HW, Persons, and person-days between two reference periods
- Choose a 'recent' period, could do ten-years to date so 2013-2022, bit random. Otherwise 2010-2020

In [None]:
exposures_abs_rolling =  exposures_abs_ts_df.rolling(10).mean().dropna()

In [None]:
exposures_abs_rolling.unstack().to_csv(RESULTS_FOLDER / 'heatwave_days_experienced_10_year_rolling_mean.csv')

# IMPORTANT need to use weighted average for HW 'raw' can't just do the sum accross pixels cus thats bollocks.

In [None]:
cos_lat = np.cos(np.radians(heatwave_metrics.latitude))

In [None]:
def _summary_weight(data, yrs):
    return (data.sel(year=yrs) *cos_lat) .mean(['latitude', 'longitude']).mean(dim='year').compute()

hw_ref = _summary_weight(heatwave_metrics.heatwaves_days , slice(1986,2005))
hw_dec = _summary_weight(heatwave_metrics.heatwaves_days, slice(2013,2022))
hw_rol = (heatwave_metrics.heatwaves_days*cos_lat).mean(['latitude', 'longitude']).rolling(year=10).mean().compute()


In [None]:
hw_rol.name = 'heatwave_days'

In [None]:
(100*(hw_dec - hw_ref) / hw_ref).item()

In [None]:
def _summary(data, yrs):
    return data.sel(year=yrs).sum(['latitude', 'longitude']).mean(dim='year').compute()

# hw_ref = _summary(heatwave_metrics.heatwaves_days, slice(1986,2005))
# hw_dec = _summary(heatwave_metrics.heatwaves_days, slice(2013,2022))
# hw_rol = heatwave_metrics.heatwaves_days.sum(['latitude', 'longitude']).rolling(year=10).mean().compute()

po_ref = _summary(population, slice(1986,2005))
po_dec = _summary(population, slice(2013,2022))
po_rol = population.sum(['latitude', 'longitude']).rolling(year=10).mean().compute()

ex_ref = _summary(exposures_abs.heatwaves_days, slice(1986,2005))
ex_dec = _summary(exposures_abs.heatwaves_days, slice(2013,2022))
ex_rol = exposures_abs.heatwaves_days.sum(['latitude', 'longitude']).rolling(year=10).mean().compute()
ex_rol.name = 'heatwave_person_days'

In [None]:
ex_rol.to_dataframe().dropna().to_csv(RESULTS_FOLDER / 'heatwave_exposure_days_10_year_rolling_mean.csv')
hw_rol.to_dataframe().dropna().to_csv(RESULTS_FOLDER / 'heatwave_days_10_year_rolling_mean.csv')
po_rol.to_dataframe().dropna().to_csv(RESULTS_FOLDER / 'population_10_year_rolling_mean.csv')

In [None]:
hw_ref

In [None]:
hw_dec

In [None]:
(hw_dec - hw_ref).item()

In [None]:
(100*(hw_dec - hw_ref) / hw_ref).item()

In [None]:
# (100*(hw_dec) / hw_ref).item()

In [None]:
po_ref.to_dataframe()

In [None]:
po_dec.to_dataframe()

In [None]:
100*((po_dec - po_ref) / po_ref).to_dataframe()

In [None]:
po_rol.to_dataframe().unstack().T

In [None]:
100*((ex_dec - ex_ref) / ex_ref).to_dataframe()

By LC group

In [None]:
(100*(po_rol - po_ref) / po_ref).to_dataframe().unstack(0).plot()

In [None]:
ax = (100*(ex_rol - ex_ref) / ex_ref).to_dataframe('heatwave days').unstack(0).plot()
ax.axhline(0)

In [None]:
# Map by LC group is not very interesting
# e = (exposures_abs_lc_groups
#  .exposures_weighted
#  .sel(year=2022, age_band_lower_bound=65)
#  .to_dataframe()
#  .join(lc_map)
#  .set_geometry('geometry')
# )

# e.plot(column='exposures_weighted', legend=True)

By country

In [None]:
countries = countries.rename(columns={"ISO_3_CODE":"country"})

plot_df = country_exposure_abs.exposures_weighted.sel(age_band_lower_bound=65, drop=True).sel(year=slice(1986,2005)).mean(dim='year').to_dataframe().reset_index()
plot_df = plot_df.merge(countries).set_geometry('geometry')
g = (plot_df
     .plot(column='exposures_weighted', vmin=0, vmax=20,
           legend=True
          )
    )


countries = countries.rename({"ISO_3_CODE":"countries"})

plot_df = country_exposure_abs.exposures_weighted.sel(age_band_lower_bound=65, drop=True).sel(year=slice(2013,2022)).mean(dim='year').to_dataframe().reset_index()
plot_df = plot_df.merge(countries).set_geometry('geometry')

g = (plot_df
     .plot(column='exposures_weighted', vmin=0, vmax=20,
           legend=True
          )
    )


# f.colorbar(ax, ax=axs, shrink=0.6, location='bottom')


In [None]:
countries['country']


In [None]:
ref = (country_exposure_abs.exposures_weighted
     .sel(age_band_lower_bound=65, drop=True)
     .sel(year=slice(1986,2005))
     .mean(dim='year')
     .to_dataframe()
      )
yr = (country_exposure_abs.exposures_weighted
     .sel(age_band_lower_bound=65, drop=True)
     .sel(year=slice(2013,2022))
     .mean(dim='year')
     .to_dataframe())
     
    
e = ((yr-ref).reset_index()
     .merge(countries, on="country")
     .set_geometry('geometry')
     .plot(column='exposures_weighted',
           legend=True, vmin=0, vmax=14,
           cmap='plasma'
          )
    )

In [None]:
ref = (country_exposure_abs.exposures_weighted
     .sel(age_band_lower_bound=65, drop=True)
     .sel(year=slice(1986,2005))
     .mean(dim='year')
     .to_dataframe()
      )
yr = (country_exposure_abs.exposures_weighted
     .sel(age_band_lower_bound=65, drop=True)
     .sel(year=slice(2013,2022))
     .mean(dim='year')
     .to_dataframe())

     
    
e = ((yr-ref).reset_index()
     .merge(countries, on="country")
     .set_geometry('geometry')
     .plot(column='exposures_weighted',
           legend=True, vmin=0, vmax=14,
           cmap='plasma'
          )
    )

In [None]:
ref = (exposures_abs_lc_groups.exposures_weighted
     # .sel(age_band_lower_bound=65, drop=True)
     .sel(year=slice(1986,2005))
     .mean(dim='year')
     .to_dataframe()
      )

yr = (exposures_abs_lc_groups.exposures_weighted
     # .sel(age_band_lower_bound=65, drop=True)
     .sel(year=slice(2013,2022))
     .mean(dim='year')
     .to_dataframe())

In [None]:
ax = (exposures_abs_lc_groups.exposures_weighted
     .sel(year=2022)
     .to_dataframe()
      .exposures_weighted.unstack(1).rename_axis(index='', columns='Heatwave days')
      .rename(index={'South and Central America': 'South and \nCentral America'})
      .plot
      .bar(ylabel='days/year',
           title='Heatwave days per vulnerable person\n 10 year mean 2013-2022'
          )
      .legend(bbox_to_anchor=(1.04, 0.5), 
              loc="center left", 
              borderaxespad=0, title='Age group')      
     )
ax.figure.savefig(PATH_FIGURES / 'heatwave_days_lc_group_2022.png')
ax.figure.savefig(PATH_FIGURES / 'heatwave_days_lc_group_2022.pdf')

In [None]:
ax = (yr
      .exposures_weighted.unstack(1).rename_axis(index='', columns='Heatwave days')
      .rename(index={'South and Central America': 'South and \nCentral America'})
      .plot
      .bar(ylabel='days/year',
           title='Heatwave days per vulnerable person\n 10 year mean 2013-2022'
          )
      .legend(bbox_to_anchor=(1.04, 0.5), 
              loc="center left", 
              borderaxespad=0, title='Age group')      
     )
ax.figure.savefig(PATH_FIGURES / 'heatwave_days_lc_group_2013-2022.png')
ax.figure.savefig(PATH_FIGURES / 'heatwave_days_lc_group_2013-2022.pdf')

In [None]:
e = (yr-ref).exposures_weighted.unstack(1).rename_axis(index='', columns='Heatwave days')

In [None]:
e

In [None]:
ax = (e
      .rename(index={'South and Central America': 'South and \nCentral America'})
      .plot
      .bar(ylabel='days/year',
           title='Mean change in heatwave days per vulnerable person by region\n from 1986-2005 to 2013-2022 '
          )
      .legend(bbox_to_anchor=(1.04, 0.5), 
              loc="center left", 
              borderaxespad=0, title='Age group')
     )
ax.figure.savefig(PATH_FIGURES / 'heatwave_days_change_to_baseline_lc_group_2013-2022.png')
ax.figure.savefig(PATH_FIGURES / 'heatwave_days_change_to_baseline_lc_group_2013-2022.pdf')

In [None]:
p = (100*(yr-ref) / ref).exposures_weighted.unstack(1).rename_axis(index='', columns='Heatwave days')

In [None]:
p.columns = ['Infants', '65+']

In [None]:
p

In [None]:
ax = (p
      .rename(index={'South and Central America': 'South and \nCentral America'})
      .plot
      .bar(ylabel='%',
           title='Increase in mean heatwave days per by region\n in 2013-2022 relative to baseline'
          )
      .legend(bbox_to_anchor=(1.04, 0.5), 
              loc="center left", 
              borderaxespad=0, title='Age group')
     )
ax.figure.savefig(PATH_FIGURES / 'heatwave_days_pct_to_baseline_lc_group_2013-2022.png')
ax.figure.savefig(PATH_FIGURES / 'heatwave_days_pct_to_baseline_lc_group_2013-2022.pdf')

## Plot exposures to change

**NOTE** Some of this is already saved out automatically in the data gen notebook

> Plot exposures combining the 1980-2000 values calculated using histsoc with the 2000-2020 values. Highlight that the data sources are different

In [None]:
exposures_over65

In [None]:
import seaborn as sns
with sns.axes_style("whitegrid"):
    var = 'heatwaves_days'

    f, ax = plt.subplots()

    (exposures_over65.sum(dim=['latitude','longitude']) / 1e9).loc[2000:].plot(ax=ax, label='WorldPop')
    (exposures_over65.sum(dim=['latitude','longitude']) / 1e9).loc[:2000].plot(label='ISIMIP', ax=ax)
    ax.legend()
    ax.set_ylabel('Billion person-days')
    f.savefig(PATH_FIGURES / 'heatwave person-days hybrid 1980-2020.png')
    f.savefig(PATH_FIGURES / 'heatwave person-days hybrid 1980-2020.pdf')
    
    

In [None]:
plot_data

In [None]:
plot_data  = (exposures_over65.sum(dim=['latitude','longitude']) / 1e9).rolling(year=10).mean()
with sns.axes_style("whitegrid"):
    var = 'heatwaves_days'

    f, ax = plt.subplots()

    plot_data.plot(ax=ax, label='10-year rolling mean')
#     (total_exposures_over65[var] / 1e9).loc[:1999].rolling(10).mean().plot(style=':', label='ISIMIP', ax=ax)
    ax.legend()
    ax.set_ylabel('Billion person-days')
#     f.savefig(RESULTS_FOLDER / 'heatwave person-days hybrid 1980-2020.png')
#     f.savefig(RESULTS_FOLDER / 'heatwave person-days hybrid 1980-2020.pdf')
    
    

In [None]:
plot_data

In [None]:
plot_data =  exposures_change.sum(['latitude', 'longitude']).to_dataframe('').unstack('age_band_lower_bound')
plot_data.columns = ['infants', 'over 65']
plot_data = plot_data[[ 'over 65', 'infants']]

f , ax = plt.subplots(figsize=(6,2.7))
ax = plot_data.plot.bar(stacked=True, width=0.89, ax=ax)
ax.set_ylabel('Billion person-days')
ax.set_title('Exposures of vulnerable populations to \nchange in heatwave occurance')
ax.legend(title='Age')

# NOTE: wasn't an easy way to set the different hatches so have to set manually the indexes
for p in ax.patches[:20]:
    p.set_hatch('...')
    p.set_edgecolor('C0')
    p.set_facecolor('w')

for p in ax.patches[44:65]:
    p.set_hatch('xxxx')
    p.set_edgecolor('C1')
    p.set_facecolor('w')

plt.savefig(PATH_FIGURES / f'heatwave person-days hybrid w newborn 1980-{MAX_YEAR}.png')
plt.savefig(PATH_FIGURES / f'heatwave person-days hybrid w newborn 1980-{MAX_YEAR}.pdf')

In [None]:
ax.patches

## Absolute exposures

In [None]:
plot_data =  exposures_abs.sum(['latitude', 'longitude']).to_dataframe().unstack('age_band_lower_bound')
# sns.barplot(data=plot_data, x='year', y='heatwaves_days', )

plot_data.columns = ['infants', 'over 65']
plot_data = plot_data[[ 'over 65', 'infants']]

f , ax = plt.subplots(figsize=(6,2.7))

ax = plot_data.plot.bar(stacked=True, width=0.89, ax=ax)
ax.set_ylabel('Billion person-days')
ax.set_title('Exposures of vulnerable populations to heatwaves')
ax.legend(title='Age ')

# NOTE: wasn't an easy way to set the different hatches so have to set manually the indexes
# for p in ax.patches[:20]:
#     p.set_hatch('...')
#     p.set_edgecolor('C0')
#     p.set_facecolor('w')

# for p in ax.patches[43:63]:
#     p.set_hatch('xxxx')
#     p.set_edgecolor('C1')
#     p.set_facecolor('w')

# plt.savefig(RESULTS_FOLDER / f'heatwave person-days hybrid w newborn 1980-{MAX_YEAR}.png')
# plt.savefig(RESULTS_FOLDER / f'heatwave person-days hybrid w newborn 1980-{MAX_YEAR}.pdf')

## By country

In [None]:
sns.color_palette("Paired")

In [None]:
# _namelookup = COUNTRIES_LOOKUP.set_index('ISOCODE').NAME0.to_dict()
# _namelookup['Other'] = 'Rest of World'

In [None]:
c = sns.color_palette("Paired")

In [None]:
consistent_colors = dict(zip(
    ['CHN', 'EGY', 'IDN', 'IND', 'JPN', 'NGA', 'Other', 'USA', 'ITA', 'PHL'],
    [c[5], c[6],   c[7], c[3], c[8], c[11], c[0], c[1], c[10], c[9]]
))

In [None]:
country_lc_grouping = pd.read_excel(DATA_SRC / 'lancet' / 'admin_boundaries' /'Country Names and Groupings - 2024 Report.xlsx', header=1)

In [None]:
list(country_lc_grouping[country_lc_grouping.ISO3 == 'IDN']['Country Name to use'])

In [None]:
var = 'heatwaves_days'
age_band = 65

top_codes = (country_exposure_change[var]
             .sel(year=slice(2015,2020), age_band_lower_bound=65, drop=True)
             .mean(dim='year')
             .to_dataframe()
             .sort_values(by=var, ascending=False)
             .head(5)[var].index.to_list()
            )
selected_data_list = []

# Loop through each country code
for country_code in top_codes:
    # Select the data for the current country
    selected_data = country_exposure_change[var].sel(country=country_code)
    
    # Append the selected data to the list
    selected_data_list.append(selected_data)

# You now have a list of xarray DataArrays, one for each country
# You can combine these into a single DataArray or Dataset if needed
combined_data = xr.concat(selected_data_list, dim='country')


# Sort and show the top 5 for a given year
# top_codes = (country_exposure[var]
#              .sel(year=slice(2015,2020), age_band_lower_bound=age_band, drop=True)
#              .mean(dim='year')
#              .to_dataframe()
#              .sort_values(by=var, ascending=False)
#              .head(5)[var].index.to_list()
#             )

results = (combined_data
           .sel(age_band_lower_bound=age_band, drop=True)
           .to_dataframe()[var]
           .unstack().T)

total_exposures_over65 = (country_exposure_change
           .sel(age_band_lower_bound=age_band, drop=True)
            .sum(dim="country").to_dataframe().reset_index())

# Difference between sum of top5 countries and total gives the 'other' category
results['Other'] = np.array(total_exposures_over65['heatwaves_days']) - np.array(results.sum(axis=1))
# invert column order
results = results[results.columns[::-1]]

f, ax = plt.subplots(figsize=(6.2, 2.5))
(results / 1e9).plot.bar(stacked=True, 
                         width=0.9, 
                         ax=ax,
                         color=consistent_colors
                        )

ax.set(
    xlabel='Year',
    ylabel='Billion person-days',
    title='Exposures of over 65s to \nchange in heatwave occurance',
)
ax.xaxis.set_tick_params(labelsize='small')
ax.yaxis.set_tick_params(labelsize='small')

# Manually order the legend
handles, labels = ax.get_legend_handles_labels()
d = dict(zip(labels, handles))
iso_codes = dict(zip(labels, handles)).keys()

ordered_handles = [d[l] for l in iso_codes]
ordered_labels = [country_lc_grouping['Country Name to use'][country_lc_grouping.ISO3 == i].iloc[0] if i != "Other" else "Other" for i in iso_codes]


ordered_handles = [d[l] for l in iso_codes]
ax.legend(ordered_handles, ordered_labels, fontsize='small')

f.savefig(PATH_FIGURES / f'hw_exposure_over65_countries_1980-{MAX_YEAR}.png')
f.savefig(PATH_FIGURES / f'hw_exposure_over65_countries_1980-{MAX_YEAR}.pdf')



In [None]:
top_codes

In [None]:
var = 'heatwaves_days'
top_codes = (country_exposure_change[var]
             .sel(year=slice(2015,2020), age_band_lower_bound=0, drop=True)
             .mean(dim='year')
             .to_dataframe()
             .sort_values(by=var, ascending=False)
             .head(5)[var].index.to_list()
            )


var = 'heatwaves_days'
age_band = 0


selected_data_list = []

# Loop through each country code
for country_code in top_codes:
    # Select the data for the current country
    selected_data = country_exposure_change[var].sel(country=country_code)
    
    # Append the selected data to the list
    selected_data_list.append(selected_data)

# You now have a list of xarray DataArrays, one for each country
# You can combine these into a single DataArray or Dataset if needed
combined_data = xr.concat(selected_data_list, dim='country')


# Sort and show the top 5 for a given year
# top_codes = (country_exposure[var]
#              .sel(year=slice(2015,2020), age_band_lower_bound=age_band, drop=True)
#              .mean(dim='year')
#              .to_dataframe()
#              .sort_values(by=var, ascending=False)
#              .head(5)[var].index.to_list()
#             )

results = (combined_data
           .sel(age_band_lower_bound=age_band, drop=True)
           .to_dataframe()[var]
           .unstack().T)

total_exposures_infants = (country_exposure_change
           .sel(age_band_lower_bound=age_band, drop=True)
            .sum(dim="country").to_dataframe().reset_index())

# Difference between sum of top5 countries and total gives the 'other' category
results['Other'] = np.array(total_exposures_infants['heatwaves_days']) - np.array(results.sum(axis=1))
# invert column order
results = results[results.columns[::-1]]

f, ax = plt.subplots(figsize=(6.2, 2.5))
(results / 1e9).plot.bar(stacked=True, 
                         width=0.9, 
                         ax=ax,
                         color=consistent_colors
                        )

ax.set(
    xlabel='Year',
    ylabel='Billion person-days',
    title='Exposures of infants to \nchange in heatwave occurance',
)
ax.xaxis.set_tick_params(labelsize='small')
ax.yaxis.set_tick_params(labelsize='small')

# Manually order the legend
handles, labels = ax.get_legend_handles_labels()
d = dict(zip(labels, handles))
iso_codes = dict(zip(labels, handles)).keys()


ordered_handles = [d[l] for l in iso_codes]
ordered_labels = [country_lc_grouping['Country Name to use'][country_lc_grouping.ISO3 == i].iloc[0] if i != "Other" else "Other" for i in iso_codes]

ordered_handles = [d[l] for l in iso_codes]
ax.legend(ordered_handles, ordered_labels, fontsize='small')

f.savefig(PATH_FIGURES / f'hw_exposure_infants_countries_1980-{MAX_YEAR}.png')
f.savefig(PATH_FIGURES / f'hw_exposure_infants_countries_1980-{MAX_YEAR}.pdf')


In [None]:
country_exposure_allages = country_exposure_change.sum('age_band_lower_bound')

In [None]:
# var = 'heatwaves_days'
# top_codes = (country_exposure_allages
#              .sel(year=slice(2015,2020), drop=True)
#              .mean(dim='year')
#              .to_dataframe()
#              .sort_values(by=var, ascending=False)
#              .head(5)[var].index.to_list()
#             )


# var = 'heatwaves_days'
# age_band = 0


# selected_data_list = []

# # Loop through each country code
# for country_code in top_codes:
#     # Select the data for the current country
#     selected_data = country_exposure_allages[var].sel(country=country_code)
    
#     # Append the selected data to the list
#     selected_data_list.append(selected_data)

# # You now have a list of xarray DataArrays, one for each country
# # You can combine these into a single DataArray or Dataset if needed
# combined_data = xr.concat(selected_data_list, dim='country')


# results = (combined_data
#            .to_dataframe()[var]
#            .unstack().T)



# # Difference between sum of top5 countries and total gives the 'other' category
# results['Other'] = (np.array(total_exposures_over65[var]) + np.array(total_exposures_infants[var]) ) - np.array(results.sum(axis=1))
# # invert column order
# results = results[results.columns[::-1]]

# # with sns.color_palette("Paired"):
# f, ax = plt.subplots(figsize=(6.2, 2.5))
# (results / 1e9).plot.bar(stacked=True, 
#                          width=0.9, 
#                          ax=ax,
#                          color=consistent_colors
#                         )

# ax.set(
#     xlabel='Year',
#     ylabel='Billion person-days',
#     title='Exposures of over 65s and newborns to \nchange in heatwave occurance',
# )
# ax.xaxis.set_tick_params(labelsize='small')
# ax.yaxis.set_tick_params(labelsize='small')

# # Manually order the legend
# handles, labels = ax.get_legend_handles_labels()
# d = dict(zip(labels, handles))
# iso_codes = dict(zip(labels, handles)).keys()

# ordered_handles = [d[l] for l in iso_codes]
# ordered_labels = [country_lc_grouping['Country Name to use'][country_lc_grouping.ISO3 == i].iloc[0] if i != "Other" else "Other" for i in iso_codes]

# ax.legend(ordered_handles, ordered_labels, fontsize='small')

# f.savefig(PATH_FIGURES / f'hw_exposure_over65_newborn_countries_1980-{MAX_YEAR}.png')
# f.savefig(PATH_FIGURES / f'hw_exposure_over65_newborn_countries_1980-{MAX_YEAR}.pdf')


In [None]:
var = 'exposures_total'
top_codes = (country_exposure_abs[var]
             .sel(year=slice(2015,2020), age_band_lower_bound=65, drop=True)
             .mean(dim='year')
             .to_dataframe()
             .sort_values(by=var, ascending=False)
             .head(5)[var].index.to_list()
            )

In [None]:

var = 'heatwaves_days'
age_band = 65

top_codes = (country_exposure_change[var]
             .sel(year=slice(2015,2020), age_band_lower_bound=65, drop=True)
             .mean(dim='year')
             .to_dataframe()
             .sort_values(by=var, ascending=False)
             .head(5)[var].index.to_list()
            )
selected_data_list = []

# Loop through each country code
for country_code in top_codes:
    # Select the data for the current country
    selected_data = country_exposure_change[var].sel(country=country_code)
    
    # Append the selected data to the list
    selected_data_list.append(selected_data)



var = 'exposures_total'
age_band = 65

_total_exposures = exposures_abs.sum(['latitude', 'longitude'])
_total_exposures = _total_exposures.sel(age_band_lower_bound=age_band, drop=True).to_dataframe().heatwaves_days

plot_data = (country_exposure_abs[var].sel(country=country_exposure_abs.country.isin(top_codes))
           .sel(age_band_lower_bound=age_band, year=slice(1980,None),
                drop=True)
           .to_dataframe()[var]
           .unstack().T)
# Difference between sum of top5 countries and total gives the 'other' category
plot_data['Other'] = _total_exposures - plot_data.sum(axis=1)
# invert column order
plot_data = plot_data[plot_data.columns[::-1]]

f, ax = plt.subplots(figsize=(6.2, 2.5))
(plot_data / 1e9).plot.bar(stacked=True, 
                         width=0.9, 
                         ax=ax,
                         color=consistent_colors
                        )

ax.set(
    xlabel='Year',
    ylabel='Billion person-days',
    title='Exposures of over 65s to heatwaves',
)
ax.xaxis.set_tick_params(labelsize='small')
ax.yaxis.set_tick_params(labelsize='small')

# Manually order the legend
handles, labels = ax.get_legend_handles_labels()
d = dict(zip(labels, handles))
iso_codes = dict(zip(labels, handles)).keys()


ordered_handles = [d[l] for l in iso_codes]
ordered_labels = [country_lc_grouping['Country Name to use'][country_lc_grouping.ISO3 == i].iloc[0] if i != "Other" else "Other" for i in iso_codes]

ordered_handles = [d[l] for l in iso_codes]
ax.legend(ordered_handles, ordered_labels, fontsize='small')

f.savefig(FIGURES_FOLDER / f'hw_exposure_over65_countries_1980-{MAX_YEAR}.png')
f.savefig(FIGURES_FOLDER / f'hw_exposure_over65_countries_1980-{MAX_YEAR}.pdf')


In [None]:

var = 'heatwaves_days'
top_codes = (country_exposure_change[var]
             .sel(year=slice(2015,2020), age_band_lower_bound=0, drop=True)
             .mean(dim='year')
             .to_dataframe()
             .sort_values(by=var, ascending=False)
             .head(5)[var].index.to_list()
            )


var = 'heatwaves_days'
age_band = 0


selected_data_list = []

# Loop through each country code
for country_code in top_codes:
    # Select the data for the current country
    selected_data = country_exposure_change[var].sel(country=country_code)
    
    # Append the selected data to the list
    selected_data_list.append(selected_data)

# You now have a list of xarray DataArrays, one for each country
# You can combine these into a single DataArray or Dataset if needed
combined_data = xr.concat(selected_data_list, dim='country')

var = 'exposures_total'
age_band = 0

_total_exposures = exposures_abs.sum(['latitude', 'longitude'])
_total_exposures = _total_exposures.sel(age_band_lower_bound=age_band, drop=True).to_dataframe().heatwaves_days

plot_data = (country_exposure_abs[var].sel(country=country_exposure_abs.country.isin(top_codes))
           .sel(age_band_lower_bound=age_band, year=slice(1980,None),
                drop=True)
           .to_dataframe()[var]
           .unstack().T)
# Difference between sum of top5 countries and total gives the 'other' category
plot_data['Other'] = _total_exposures - plot_data.sum(axis=1)
# invert column order
plot_data = plot_data[plot_data.columns[::-1]]

f, ax = plt.subplots(figsize=(6.2, 2.5))
(plot_data / 1e9).plot.bar(stacked=True, 
                         width=0.9, 
                         ax=ax,
                         color=consistent_colors
                        )

ax.set(
    xlabel='Year',
    ylabel='Billion person-days',
    title='Exposures of infants to heatwaves',
)
ax.xaxis.set_tick_params(labelsize='small')
ax.yaxis.set_tick_params(labelsize='small')

# Manually order the legend
handles, labels = ax.get_legend_handles_labels()
d = dict(zip(labels, handles))
iso_codes = dict(zip(labels, handles)).keys()


ordered_handles = [d[l] for l in iso_codes]
ordered_labels = [country_lc_grouping['Country Name to use'][country_lc_grouping.ISO3 == i].iloc[0] if i != "Other" else "Other" for i in iso_codes]

ordered_handles = [d[l] for l in iso_codes]
ax.legend(ordered_handles, ordered_labels, fontsize='small')

f.savefig(FIGURES_FOLDER / f'hw_exposure_infants_countries_1980-{MAX_YEAR}.png')
f.savefig(FIGURES_FOLDER / f'hw_exposure_infants_countries_1980-{MAX_YEAR}.pdf')


## HDI and WHO plots for appendix

In [None]:
import seaborn as sns

In [None]:
hdi_exposure

In [None]:
import seaborn as sns
plot_data = (hdi_exposure
             .exposures_weighted
             .rolling(year=10)
             .mean()
             .to_dataframe()
             .reset_index()
             .rename(columns={'age_band_lower_bound': 'Age group',
                                      'exposures_weighted': 'Heatwave days',
                                      'level_of_human_development':'HDI class'})
            )
plot_data = plot_data[plot_data['HDI class']!='']
# plot_data[plot_data.age_band_lower_bound ==0]
ax = sns.relplot(kind='line', data=plot_data,
            x='year', y='Heatwave days', col='Age group', hue='HDI class',
           facet_kws={'sharey': True})
ax.figure.suptitle('10 year rolling mean of population-weighted heatwave days by HDI category', y=1.02)
ax.figure.savefig(PATH_FIGURES / 'heatwave_days_by_hdi.png')

In [None]:
plot_data = (who_exposure
             .exposures_weighted
             .rolling(year=10)
             .mean()
             .to_dataframe()
             .reset_index()
             .rename(columns={'age_band_lower_bound': 'Age group',
                              'exposures_weighted': 'Heatwave days',
                              'who_region':'WHO region'})
            )
# plot_data[plot_data.age_band_lower_bound ==0]
ax = sns.relplot(kind='line', data=plot_data,
            x='year', y='Heatwave days', col='Age group', hue='WHO region',
           facet_kws={'sharey': True})
ax.figure.suptitle('10 year rolling mean of population-weighted heatwave days by WHO Region', y=1.02)
ax.figure.savefig(PATH_FIGURES / 'heatwave_days_by_who.png')