# Present data for report



In [31]:
from pathlib import Path
import numpy as np
import pandas as pd
import geopandas as gpdx
import xarray as xr
import matplotlib.pyplot as plt
import seaborn as sns

from cartopy import crs as ccrs
from scipy import stats

from config import DATA_SRC, POP_DATA_SRC, WEATHER_SRC

In [32]:
# Figure settings
plt.rcParams['figure.dpi'] = 120
plt.rcParams['savefig.dpi'] = 600
plt.rcParams['figure.figsize'] = (5,2.5)
plt.rcParams['figure.titlesize'] = 'medium'
plt.rcParams['axes.titlesize'] = 'medium'
plt.rcParams['savefig.bbox'] = 'tight'

In [33]:
MAX_YEAR = 2023
REFERENCE_YEAR_START = 1986
REFERENCE_YEAR_END = 2005

RESULTS_FOLDER =  DATA_SRC / 'lancet'/ 'heatwaves'/ f'results_{MAX_YEAR + 1}/gpw_exposure'


INTERMEDIATE_RESULTS_FOLDER =  DATA_SRC / 'lancet'/ 'heatwaves'/ f'results_{MAX_YEAR + 1}/gpw_exposure'

SD_FOLDER = Path('~').expanduser() / 'Documents' / 'Lancet'/ 'Data' / 'Standardisation guidelines - LIVE FOR AUTHORS'
COUNTRY_SHAPEFILE = Path('/nfs/n2o/wcr/szelie/lancet/reports') / 'Shapefiles' / 'detailed_2013.shp'

COUNTRY_ID_LOOKUP = DATA_SRC/ 'lancet/population/gpwv4/gpw_v4_national_identifier_lookup.txt'
COUNTRY_RASTER_FILES = DATA_SRC/ 'lancet/population/country_codes_30min_era_compat.nc'

DEMOGRAPHICS_TOTALS_FILE = POP_DATA_SRC / 'demographics_hybrid_1950_2020_15_min_era_compat.nc'
INFANTS_TOTALS_FILE = POP_DATA_SRC / 'infants_1950_2020_hybrid_15_min_era_compat.nc'

In [34]:
MAP_PROJECTION = ccrs.EckertIII()

In [35]:
MIN_YEAR = 1980

In [36]:
COUNTRIES_LOOKUP = pd.read_csv(COUNTRY_ID_LOOKUP,sep='\t')

In [37]:
GPW_FILE = DATA_SRC/ 'lancet/population/UN WPP-Adjusted Population Count v4.11 2000-2020 15min era compat.nc'

gpw_dat = xr.open_dataset(GPW_FILE)
COUNTRIES_RASTER = gpw_dat['National Identifier Grid, v4.11 (2010): National Identifier Grid'].load()
land_mask = (COUNTRIES_RASTER < 2000)

## Load basic country data

In [38]:
COUNTRIES_LC_GROUPINGS = pd.read_excel('/nfs/n2o/wcr/szelie/lancet/reports/Country Names and groupings - 2023 Report.xlsx', header=1)

In [39]:
COUNTRIES_LOOKUP = COUNTRIES_LOOKUP.join(
    COUNTRIES_LC_GROUPINGS.set_index('ISO3')[['LC Grouping']], on='ISOCODE' )

In [40]:
import requests
import geopandas as gpd

# URL of the MapServer layer
url = "https://extranet.who.int/maps/rest/services/GHO/world_2013_detailed_basemap_layer1/MapServer/2/query"

# Parameters for the query
params = {
    "where": "1=1",
    "outFields": "*",  # Request all fields
    "outSR": "4326",
    "f": "geojson"
}

# Send the request
response = requests.get(url, params=params)

# Check if the request was successful
if response.status_code == 200:
    # Convert response to GeoDataFrame
    data = response.json()
    countries = gpd.GeoDataFrame.from_features(data["features"])

    # Save the GeoDataFrame as a Shapefile
else:
    print("Failed to retrieve data: HTTP Status", response.status_code)


In [41]:
#countries = gpd.read_file(COUNTRY_SHAPEFILE)

In [42]:
SD_FOLDER

PosixPath('/cluster/home/szelie/Documents/Lancet/Data/Standardisation guidelines - LIVE FOR AUTHORS')

In [43]:
# #country_classification = pd.read_excel(SD_FOLDER / 'Country names - ISO3 codes - WHO and HDI classifications2021.xlsx', skiprows=1)

# countries = countries.set_index('ISO_3_CODE')

# countries = countries.join(country_classification.set_index('ISO ALPHA-3 code'))

# countries.index.name = 'country'

In [44]:
#countries = countries.join(COUNTRIES_LOOKUP.set_index('ISOCODE')['LC Grouping'])

In [45]:
#countries.plot(column='LC Grouping')

In [46]:
un_pop_totals = pd.read_csv(POP_DATA_SRC / 'UN' / 'WPP2019_TotalPopulationBySex.csv')
un_pop_totals = un_pop_totals[un_pop_totals.VarID == 2] # medium variant 

un_pop_totals = un_pop_totals.set_index('LocID').join(COUNTRIES_LOOKUP.set_index('UNSDCODE')['ISOCODE'])

un_pop_totals = un_pop_totals[~pd.isna(un_pop_totals.ISOCODE)]

In [47]:
# un_pop_totals_xr = un_pop_totals[['ISOCODE', 'Time', 'PopTotal']].set_index(['ISOCODE', 'Time']).sort_index().to_xarray()

# un_pop_totals_xr = un_pop_totals_xr.rename({'Time': 'year', 'ISOCODE': 'country'}).PopTotal

## Load population data

In [48]:
demographics_totals = xr.open_dataarray(DEMOGRAPHICS_TOTALS_FILE)
# population_over_65 = demographics_totals.sel(age_band_lower_bound=65).load()

population_infants = xr.open_dataarray(INFANTS_TOTALS_FILE)

 Extrapolate demographic data to 2022

In [49]:
population_infants

In [50]:
extrapolated_years = np.arange(2020+1, MAX_YEAR+1)

In [None]:
demographics_totals = xr.concat(
    [demographics_totals, 
     demographics_totals.interp(year=extrapolated_years, kwargs=dict(fill_value="extrapolate"))
    ], 'year')

In [None]:
population_over_65 = demographics_totals.sel(age_band_lower_bound=65).compute()

In [None]:
population_infants = xr.concat(
    [population_infants, 
     population_infants.interp(year=extrapolated_years, 
                               kwargs=dict(fill_value="extrapolate"))],
    'year').load()


In [None]:
population = xr.concat([population_infants, population_over_65],
                       dim=pd.Index([0, 65], name='age_band_lower_bound'))

## Load Heatwave metrics data

In [None]:
INTERMEDIATE_RESULTS_FOLDER / 'heatwave_days_era5'

In [None]:
heatwave_metrics_files = sorted((Path('/nfs/n2o/wcr/szelie/lancet/heatwaves/results_2024/heatwave_days_era5')).glob('*.nc'))
heatwave_metrics = xr.open_mfdataset(heatwave_metrics_files, combine='by_coords')

## Load exposure to change results

In [None]:
exposures_over65 = xr.open_dataset(INTERMEDIATE_RESULTS_FOLDER / f'heatwave_exposure_change_over65_multi_threshold_{MIN_YEAR}-{MAX_YEAR}.nc')

exposures_infants = xr.open_dataset(
    INTERMEDIATE_RESULTS_FOLDER / f'heatwave_exposure_change_infants_multi_threshold_{MIN_YEAR}-{MAX_YEAR}.nc')

exposures_change = xr.concat([exposures_infants, exposures_over65], 
                             dim=pd.Index([0, 65], name='age_band_lower_bound'))

In [None]:
total_exposures = exposures_change.sum(['latitude', 'longitude'])

total_exposures_change_over65 = total_exposures.sel(age_band_lower_bound=65, drop=True).to_dataframe()
total_exposures_change_infants = total_exposures.sel(age_band_lower_bound=0, drop=True).to_dataframe()

## Load exposure absolute values (not exposure to change)

In [None]:
exposures_abs = xr.open_dataset(INTERMEDIATE_RESULTS_FOLDER / f'heatwave_exposure_multi_threshold_{MIN_YEAR}-{MAX_YEAR}.nc')

## Load the country exposure results

In [None]:
country_weighted = xr.open_dataset(
    INTERMEDIATE_RESULTS_FOLDER / f'countries_heatwaves_exposure_weighted_change_{MIN_YEAR}-{MAX_YEAR}.nc')

In [None]:
country_exposure_change = xr.open_dataset(
    INTERMEDIATE_RESULTS_FOLDER / f'countries_heatwaves_exposure_change_{MIN_YEAR}-{MAX_YEAR}.nc')

In [None]:
country_exposure_abs = xr.open_dataset(
    INTERMEDIATE_RESULTS_FOLDER / f'countries_heatwaves_exposure_{MIN_YEAR}-{MAX_YEAR}.nc')

## Load aggregated by hdi and WHO region data

In [None]:
hdi_exposure = xr.open_dataset(INTERMEDIATE_RESULTS_FOLDER / f'hdi_regions_heatwaves_exposure_{MIN_YEAR}-{MAX_YEAR}.nc')
who_exposure = xr.open_dataset(INTERMEDIATE_RESULTS_FOLDER / f'who_regions_heatwaves_exposure_{MIN_YEAR}-{MAX_YEAR}.nc')

hdi_exposure_change = xr.open_dataset(INTERMEDIATE_RESULTS_FOLDER / f'hdi_regions_heatwaves_exposure_change_{MIN_YEAR}-{MAX_YEAR}.nc')
who_exposure_change = xr.open_dataset(INTERMEDIATE_RESULTS_FOLDER / f'who_regions_heatwaves_exposure_change_{MIN_YEAR}-{MAX_YEAR}.nc')

### Create country table with the WHO and HDI region columns

In [None]:
# country_exposure_change = xr.merge([
#     country_exposure_change,
#     countries.loc[~pd.isna(countries.index),
#                ['WHO_REGION','WHO Region', 'Level of human development', 'Human Development Index in 2018 (HDI)']]
#           .to_xarray()
#          ])

In [None]:
# country_weighted = xr.merge([
#     country_weighted,
#     countries.loc[~pd.isna(countries.index),
#                ['WHO_REGION','WHO Region', 'Level of human development', 'Human Development Index in 2018 (HDI)']]
#           .to_xarray()
#          ])

## Load results by LC grouping

In [None]:
exposures_change_lc_groups = xr.open_dataset(INTERMEDIATE_RESULTS_FOLDER / 'exposures_change_by_lc_group.nc')
exposures_abs_lc_groups = xr.open_dataset(INTERMEDIATE_RESULTS_FOLDER / 'exposures_abs_by_lc_group.nc')



# Re-export data tables as csv

In [None]:
(country_weighted
 .heatwaves_days
 .to_dataframe()
 .to_csv(RESULTS_FOLDER / 'heatwave_exposure_wieghted_change_days_by_country_w_hdi.csv')
)
(country_exposure_change
 .heatwaves_days
 .to_dataframe()
 .to_csv(RESULTS_FOLDER / 'heatwave_exposure_days_by_country.csv')
)

In [None]:
# (country_exposure_abs
#  .sel(year=slice(1980,None))
#  .to_dataframe()
#  .join(countries[['WHO Region', 'Level of human development', 'LC Grouping']]
#        .dropna(axis='index')
#        .rename_axis(index='country')
#       )
#   .to_csv(RESULTS_FOLDER / 'heatwave_exposure_abs_days_by_country.csv')
# )

In [None]:
(who_exposure
 .to_dataframe()
 .to_csv(RESULTS_FOLDER / 'heatwave_exposure_days_by_who_region.csv')
)

(who_exposure_change
 .to_dataframe()
 .to_csv(RESULTS_FOLDER / 'heatwave_exposure_days_change_by_who_region.csv')
)

In [None]:
(hdi_exposure
 .to_dataframe()
 .to_csv(RESULTS_FOLDER / 'heatwave_exposure_days_by_hdi.csv')
)

(hdi_exposure_change
 .to_dataframe()
 .to_csv(RESULTS_FOLDER / 'heatwave_exposure_days_change_by_hdi.csv')
)

In [None]:
(exposures_abs_lc_groups
 .to_dataframe()
 .to_csv(RESULTS_FOLDER / 'heatwave_exposure_days_by_lc_group.csv')
)
(exposures_change_lc_groups
 .to_dataframe()
 .to_csv(RESULTS_FOLDER / 'heatwave_exposure_days_change_by_lc_group.csv')
)

# Plots

## Plot days of heatwave experienced (exposure weighted days)

This is again different from previous plots because we weight the absolute exposures instead of the changes. The idea is to be able to say something like in the 90s you would typically experience X days of heatwave per year while in the 2010s you experience Y days

> Important when showing averages, don't do average of weighted number of days per country since you need to have it always population wieghted, otherwise HW for china counts the same as HW for luxembourg.

In [None]:
# lc_map = countries.dissolve('LC Grouping')

In [None]:
plot_data = heatwave_metrics
# plot_data = plot_data.where(population.sel(age_band_lower_bound=65) > 10)
plot_data = plot_data.sel(year=slice(1986,2005)).mean(dim='year')
f, ax = plt.subplots(figsize=(6,3), subplot_kw=dict(projection=MAP_PROJECTION))

plot_data.heatwaves_days.plot(
    vmax=15,
    transform=ccrs.PlateCarree(),
    ax=ax)


In [None]:
plot_data = heatwave_metrics
plot_data = plot_data.sel(year=slice(2013,2023)).mean(dim='year')
f, ax = plt.subplots(figsize=(6,3), subplot_kw=dict(projection=MAP_PROJECTION))

plot_data.heatwaves_days.plot(
    vmax=15,
    transform=ccrs.PlateCarree(),
    ax=ax)


In [None]:
plot_data = heatwave_metrics.sel(year=2022) - heatwave_metrics.sel(year=slice(1986,2005)).mean(dim='year')
plot_data = land_mask *plot_data
# plot_data = plot_data.sel(year=slice(2013,2022)).mean(dim='year')
f, ax = plt.subplots(figsize=(6,3), subplot_kw=dict(projection=MAP_PROJECTION))

plot_data.heatwaves_days.plot(
    transform=ccrs.PlateCarree(),
    ax=ax,
    vmin=-40, vmax=40,
    cmap='RdBu_r',
)
ax.coastlines()
ax.set_title(f'Change in number of heatwave days in {MAX_YEAR} \n relative to 1986-2005 baseline')
ax.figure.savefig(RESULTS_FOLDER / 'map_hw_change_2022.png')

In [None]:
import geoplot

Heatwave days per person in 2022. Don't show trend b/c too much variance, more just to give first idea.

In [None]:
exposures_abs.sum(['latitude', 'longitude']).to_dataframe() / 1e9

In [None]:
exposures_abs_ts = exposures_abs.sum(['latitude', 'longitude']) / population.sel(year=slice(1980,2023)).sum(['latitude', 'longitude'])

In [None]:
exposures_abs_ts_df = exposures_abs_ts.to_dataframe().unstack(1)
exposures_abs_ts_df

In [None]:
exposures_abs_ts_df.to_csv(RESULTS_FOLDER / 'heatwave_days_experienced.csv')

In [None]:
exposures_abs_ts_df.loc[REFERENCE_YEAR_START:REFERENCE_YEAR_END].mean()

In [None]:
(exposures_abs_ts_df.rolling(10).mean().dropna() - exposures_abs_ts_df.loc[REFERENCE_YEAR_START:REFERENCE_YEAR_END].mean()) / exposures_abs_ts_df.loc[REFERENCE_YEAR_START:REFERENCE_YEAR_END].mean()

In [None]:
ax = (exposures_abs_ts
 .to_dataframe()
 .unstack(1)['heatwaves_days']
 .rename(columns={0:'Infants',65:'Over 65'})
 .rename_axis(columns="Age group")
 .plot(ylabel='Days/year', title='Average number of heatwave days experienced'))

ax.figure.savefig(RESULTS_FOLDER / 'global_hw_per_person.png')
ax.figure.savefig(RESULTS_FOLDER / 'global_hw_per_person.pdf')

In [None]:
plot_data = exposures_abs.sum(['latitude', 'longitude'])
ax = ((plot_data / 1e9)
 .to_dataframe()
 .unstack(1)['heatwaves_days']
 .rename(columns={0:'Infants',65:'Over 65'})
 .rename_axis(columns="Age group")
 .plot(ylabel='billion person-days', title='Total number of heatwave days experienced'))

ax.figure.savefig(RESULTS_FOLDER / 'heatwaves_exposure_total.png')
ax.figure.savefig(RESULTS_FOLDER / 'heatwaves_exposure_total.pdf')
plot_data.to_dataframe().to_csv(RESULTS_FOLDER / 'heatwaves_exposure_total.csv')

### Compare changes in total exposures with changes in population and heatwave days

Simple attribution, given total increase in HW days exposre (person-days) what part is from climate and what part is from population increase?

- Don't really do it (according to Xiang isn't that obvious), just report % changes in HW, Persons, and person-days between two reference periods
- Choose a 'recent' period, could do ten-years to date so 2013-2022, bit random. Otherwise 2010-2020

In [None]:
exposures_abs_rolling =  exposures_abs_ts_df.rolling(10).mean().dropna()

In [None]:
exposures_abs_rolling.unstack().to_csv(RESULTS_FOLDER / 'heatwave_days_experienced_10_year_rolling_mean.csv')

In [None]:
exposures_abs_rolling

# IMPORTANT need to use weighted average for HW 'raw' can't just do the sum accross pixels cus thats bollocks.

In [None]:
cos_lat = np.cos(np.radians(heatwave_metrics.latitude))

In [None]:
def _summary_weight(data, yrs):
    return (data.sel(year=yrs) *cos_lat) .mean(['latitude', 'longitude']).mean(dim='year').compute()

hw_ref = _summary_weight(heatwave_metrics.heatwaves_days , slice(1986,2005))
hw_dec = _summary_weight(heatwave_metrics.heatwaves_days, slice(2013,2022))
hw_rol = (heatwave_metrics.heatwaves_days*cos_lat).mean(['latitude', 'longitude']).rolling(year=10).mean().compute()


In [None]:
hw_rol.name = 'heatwave_days'

In [None]:
(100*(hw_dec - hw_ref) / hw_ref).item()

In [None]:
def _summary(data, yrs):
    return data.sel(year=yrs).sum(['latitude', 'longitude']).mean(dim='year').compute()

# hw_ref = _summary(heatwave_metrics.heatwaves_days, slice(1986,2005))
# hw_dec = _summary(heatwave_metrics.heatwaves_days, slice(2013,2022))
# hw_rol = heatwave_metrics.heatwaves_days.sum(['latitude', 'longitude']).rolling(year=10).mean().compute()

po_ref = _summary(population, slice(1986,2005))
po_dec = _summary(population, slice(2013,2022))
po_rol = population.sum(['latitude', 'longitude']).rolling(year=10).mean().compute()

ex_ref = _summary(exposures_abs.heatwaves_days, slice(1986,2005))
ex_dec = _summary(exposures_abs.heatwaves_days, slice(2013,2022))
ex_rol = exposures_abs.heatwaves_days.sum(['latitude', 'longitude']).rolling(year=10).mean().compute()
ex_rol.name = 'heatwave_person_days'

In [None]:
ex_rol.to_dataframe().dropna().to_csv(RESULTS_FOLDER / 'heatwave_exposure_days_10_year_rolling_mean.csv')
hw_rol.to_dataframe().dropna().to_csv(RESULTS_FOLDER / 'heatwave_days_10_year_rolling_mean.csv')
po_rol.to_dataframe().dropna().to_csv(RESULTS_FOLDER / 'population_10_year_rolling_mean.csv')

In [None]:
hw_ref

In [None]:
hw_dec

In [None]:
(hw_dec - hw_ref).item()

In [None]:
(100*(hw_dec - hw_ref) / hw_ref).item()

In [None]:
# (100*(hw_dec) / hw_ref).item()

In [None]:
po_ref.to_dataframe()

In [None]:
po_dec.to_dataframe()

In [None]:
100*((po_dec - po_ref) / po_ref).to_dataframe()

In [None]:
po_rol.to_dataframe().unstack().T

In [None]:
100*((ex_dec - ex_ref) / ex_ref).to_dataframe()

By LC group

In [None]:
(100*(po_rol - po_ref) / po_ref).to_dataframe().unstack(0).plot()

In [None]:
ax = (100*(ex_rol - ex_ref) / ex_ref).to_dataframe().unstack(0).plot()
ax.axhline(0)

In [None]:
# Map by LC group is not very interesting
# e = (exposures_abs_lc_groups
#  .exposures_weighted
#  .sel(year=2022, age_band_lower_bound=65)
#  .to_dataframe()
#  .join(lc_map)
#  .set_geometry('geometry')
# )

# e.plot(column='exposures_weighted', legend=True)

By country

In [None]:
f, axs = plt.subplots(2,1, sharex=True, figsize=(5,4))
ax = axs[0]

g = (country_exposure_abs.exposures_weighted
     .sel(age_band_lower_bound=65, drop=True)
     .sel(year=slice(1986,2005))
     .mean(dim='year')
     .to_dataframe()
     .join(countries)
     .set_geometry('geometry')
     .plot(column='exposures_weighted', vmin=0, vmax=20, ax=ax,
           legend=True
          )
    )

ax = axs[1]

g = (country_exposure_abs.exposures_weighted
     .sel(age_band_lower_bound=65, drop=True)
     .sel(year=slice(2013,2022))
     .mean(dim='year')
     .to_dataframe()
     .join(countries)
     .set_geometry('geometry')
     .plot(column='exposures_weighted', vmin=0, vmax=20, ax=ax,
           legend=True,
           # cax=cax
          # legend_kwds={'label': "Heatwave days by Country",
                        # 'orientation': "vertical"}
          )
    )

# f.colorbar(ax, ax=axs, shrink=0.6, location='bottom')


In [None]:
ref = (country_exposure_abs.exposures_weighted
     .sel(age_band_lower_bound=65, drop=True)
     .sel(year=slice(1986,2005))
     .mean(dim='year')
     .to_dataframe()
      )
yr = (country_exposure_abs.exposures_weighted
     .sel(age_band_lower_bound=65, drop=True)
     .sel(year=slice(2013,2022))
     .mean(dim='year')
     .to_dataframe())
     
    
e = ((yr-ref)
     .join(countries)
     .set_geometry('geometry')
     .plot(column='exposures_weighted',
           legend=True, vmin=0, vmax=14,
           cmap='plasma'
          )
    )

In [None]:
ref = (exposures_abs_lc_groups.exposures_weighted
     # .sel(age_band_lower_bound=65, drop=True)
     .sel(year=slice(1986,2005))
     .mean(dim='year')
     .to_dataframe()
      )

yr = (exposures_abs_lc_groups.exposures_weighted
     # .sel(age_band_lower_bound=65, drop=True)
     .sel(year=slice(2013,2022))
     .mean(dim='year')
     .to_dataframe())

In [None]:
ax = (exposures_abs_lc_groups.exposures_weighted
     .sel(year=2022)
     .to_dataframe()
      .exposures_weighted.unstack(1).rename_axis(index='', columns='Heatwave days')
      .rename(index={'South and Central America': 'South and \nCentral America'})
      .plot
      .bar(ylabel='days/year',
           title='Heatwave days per vulnerable person\n 10 year mean 2013-2022'
          )
      .legend(bbox_to_anchor=(1.04, 0.5), 
              loc="center left", 
              borderaxespad=0, title='Age group')      
     )
ax.figure.savefig(RESULTS_FOLDER / 'heatwave_days_lc_group_2022.png')
ax.figure.savefig(RESULTS_FOLDER / 'heatwave_days_lc_group_2022.pdf')

In [None]:
ax = (yr
      .exposures_weighted.unstack(1).rename_axis(index='', columns='Heatwave days')
      .rename(index={'South and Central America': 'South and \nCentral America'})
      .plot
      .bar(ylabel='days/year',
           title='Heatwave days per vulnerable person\n 10 year mean 2013-2022'
          )
      .legend(bbox_to_anchor=(1.04, 0.5), 
              loc="center left", 
              borderaxespad=0, title='Age group')      
     )
ax.figure.savefig(RESULTS_FOLDER / 'heatwave_days_lc_group_2013-2022.png')
ax.figure.savefig(RESULTS_FOLDER / 'heatwave_days_lc_group_2013-2022.pdf')

In [None]:
e = (yr-ref).exposures_weighted.unstack(1).rename_axis(index='', columns='Heatwave days')

In [None]:
e

In [None]:
ax = (e
      .rename(index={'South and Central America': 'South and \nCentral America'})
      .plot
      .bar(ylabel='days/year',
           title='Mean change in heatwave days per vulnerable person by region\n from 1986-2005 to 2013-2022 '
          )
      .legend(bbox_to_anchor=(1.04, 0.5), 
              loc="center left", 
              borderaxespad=0, title='Age group')
     )
ax.figure.savefig(RESULTS_FOLDER / 'heatwave_days_change_to_baseline_lc_group_2013-2022.png')
ax.figure.savefig(RESULTS_FOLDER / 'heatwave_days_change_to_baseline_lc_group_2013-2022.pdf')

In [None]:
p = (100*(yr-ref) / ref).exposures_weighted.unstack(1).rename_axis(index='', columns='Heatwave days')

In [None]:
p.columns = ['Infants', '65+']

In [None]:
p

In [None]:
ax = (p
      .rename(index={'South and Central America': 'South and \nCentral America'})
      .plot
      .bar(ylabel='%',
           title='Increase in mean heatwave days per by region\n in 2013-2022 relative to baseline'
          )
      .legend(bbox_to_anchor=(1.04, 0.5), 
              loc="center left", 
              borderaxespad=0, title='Age group')
     )
ax.figure.savefig(RESULTS_FOLDER / 'heatwave_days_pct_to_baseline_lc_group_2013-2022.png')
ax.figure.savefig(RESULTS_FOLDER / 'heatwave_days_pct_to_baseline_lc_group_2013-2022.pdf')

## Plot exposures to change

**NOTE** Some of this is already saved out automatically in the data gen notebook

> Plot exposures combining the 1980-2000 values calculated using histsoc with the 2000-2020 values. Highlight that the data sources are different

In [None]:
with sns.axes_style("whitegrid"):
    var = 'heatwaves_days'

    f, ax = plt.subplots()

    (total_exposures_over65[var] / 1e9).loc[2000:].plot(ax=ax, label='GPWv4')
    (total_exposures_over65[var] / 1e9).loc[:1999].plot(style=':', label='ISIMIP', ax=ax)
    ax.legend()
    ax.set_ylabel('Billion person-days')
    f.savefig(RESULTS_FOLDER / 'heatwave person-days hybrid 1980-2020.png')
    f.savefig(RESULTS_FOLDER / 'heatwave person-days hybrid 1980-2020.pdf')
    
    

In [None]:
plot_data  = (total_exposures_over65[var] / 1e9).rolling(10).mean()
with sns.axes_style("whitegrid"):
    var = 'heatwaves_days'

    f, ax = plt.subplots()

    plot_data.plot(ax=ax, label='10-year rolling mean')
#     (total_exposures_over65[var] / 1e9).loc[:1999].rolling(10).mean().plot(style=':', label='ISIMIP', ax=ax)
    ax.legend()
    ax.set_ylabel('Billion person-days')
#     f.savefig(RESULTS_FOLDER / 'heatwave person-days hybrid 1980-2020.png')
#     f.savefig(RESULTS_FOLDER / 'heatwave person-days hybrid 1980-2020.pdf')
    
    

In [None]:
plot_data =  exposures_change.sum(['latitude', 'longitude']).to_dataframe().unstack('age_band_lower_bound')
plot_data.columns = ['infants', 'over 65']
plot_data = plot_data[[ 'over 65', 'infants']]

f , ax = plt.subplots(figsize=(6,2.7))
ax = plot_data.plot.bar(stacked=True, width=0.89, ax=ax)
ax.set_ylabel('Billion person-days')
ax.set_title('Exposures of vulnerable populations to \nchange in heatwave occurance')
ax.legend(title='Age')

# NOTE: wasn't an easy way to set the different hatches so have to set manually the indexes
for p in ax.patches[:20]:
    p.set_hatch('...')
    p.set_edgecolor('C0')
    p.set_facecolor('w')

for p in ax.patches[43:63]:
    p.set_hatch('xxxx')
    p.set_edgecolor('C1')
    p.set_facecolor('w')

plt.savefig(RESULTS_FOLDER / f'heatwave person-days hybrid w newborn 1980-{MAX_YEAR}.png')
plt.savefig(RESULTS_FOLDER / f'heatwave person-days hybrid w newborn 1980-{MAX_YEAR}.pdf')

## Absolute exposures

In [None]:
plot_data =  exposures_abs.sum(['latitude', 'longitude']).to_dataframe().unstack('age_band_lower_bound')
# sns.barplot(data=plot_data, x='year', y='heatwaves_days', )

plot_data.columns = ['infants', 'over 65']
plot_data = plot_data[[ 'over 65', 'infants']]

f , ax = plt.subplots(figsize=(6,2.7))

ax = plot_data.plot.bar(stacked=True, width=0.89, ax=ax)
ax.set_ylabel('Billion person-days')
ax.set_title('Exposures of vulnerable populations to heatwaves')
ax.legend(title='Age ')

# NOTE: wasn't an easy way to set the different hatches so have to set manually the indexes
for p in ax.patches[:20]:
    p.set_hatch('...')
    p.set_edgecolor('C0')
    p.set_facecolor('w')

for p in ax.patches[43:63]:
    p.set_hatch('xxxx')
    p.set_edgecolor('C1')
    p.set_facecolor('w')

# plt.savefig(RESULTS_FOLDER / f'heatwave person-days hybrid w newborn 1980-{MAX_YEAR}.png')
# plt.savefig(RESULTS_FOLDER / f'heatwave person-days hybrid w newborn 1980-{MAX_YEAR}.pdf')

## By country

In [None]:
sns.color_palette("Paired")

In [None]:
_namelookup = COUNTRIES_LOOKUP.set_index('ISOCODE').NAME0.to_dict()
_namelookup['Other'] = 'Rest of World'

In [None]:
c = sns.color_palette("Paired")

In [None]:
consistent_colors = dict(zip(
    ['CHN', 'EGY', 'IDN', 'IND', 'JPN', 'NGA', 'Other', 'USA', 'ITA'],
    [c[5], c[6],   c[7], c[3], c[8], c[11], c[0], c[1], c[10] ]
))

In [None]:
var = 'heatwaves_days'
top_codes = (country_exposure_change[var]
             .sel(year=slice(2015,2020), age_band_lower_bound=65, drop=True)
             .mean(dim='year')
             .to_dataframe()
             .sort_values(by=var, ascending=False)
             .head(5)[var].index.to_list()
            )

In [None]:
top_codes

In [None]:
var = 'heatwaves_days'
age_band = 65
# Sort and show the top 5 for a given year
# top_codes = (country_exposure[var]
#              .sel(year=slice(2015,2020), age_band_lower_bound=age_band, drop=True)
#              .mean(dim='year')
#              .to_dataframe()
#              .sort_values(by=var, ascending=False)
#              .head(5)[var].index.to_list()
#             )

results = (country_exposure[var]
           .sel(country=top_codes, age_band_lower_bound=age_band, drop=True)
           .to_dataframe()[var]
           .unstack().T)
# Difference between sum of top5 countries and total gives the 'other' category
results['Other'] = total_exposures_over65[var] - results.sum(axis=1)
# invert column order
results = results[results.columns[::-1]]

f, ax = plt.subplots(figsize=(6.2, 2.5))
(results / 1e9).plot.bar(stacked=True, 
                         width=0.9, 
                         ax=ax,
                         color=consistent_colors
                        )

ax.set(
    xlabel='Year',
    ylabel='Billion person-days',
    title='Exposures of over 65s to \nchange in heatwave occurance',
)
ax.xaxis.set_tick_params(labelsize='small')
ax.yaxis.set_tick_params(labelsize='small')

# Manually order the legend
handles, labels = ax.get_legend_handles_labels()
d = dict(zip(labels, handles))
iso_codes = ['CHN', 'IND', 'JPN', 'USA', 'IDN', 'Other']


ordered_handles = [d[l] for l in iso_codes]
ordered_labels = [_namelookup[l] for l in iso_codes]

ordered_handles = [d[l] for l in iso_codes]
ax.legend(ordered_handles, ordered_labels, fontsize='small')

f.savefig(RESULTS_FOLDER / f'hw_exposure_over65_countries_1980-{MAX_YEAR}.png')
f.savefig(RESULTS_FOLDER / f'hw_exposure_over65_countries_1980-{MAX_YEAR}.pdf')
result


In [None]:
# Sort and show the top 5 for a given year
var = 'heatwaves_days'
age_band = 0
# Sort and show the top 5 for a given year
top_codes = (country_exposure[var]
             .sel(year=slice(2015,2020), age_band_lower_bound=age_band, drop=True)
             .mean(dim='year')
             .to_dataframe()
             .sort_values(by=var, ascending=False)
             .head(5)[var].index.to_list()
            )

results = (country_exposure[var]
           .sel(country=top_codes, age_band_lower_bound=age_band, drop=True)
           .to_dataframe()[var]
           .unstack().T)

# Difference between sum of top5 countries and total gives the 'other' category
results['Other'] = total_exposures_infants[var] - results.sum(axis=1)
# invert column order
results = results[results.columns[::-1]]

f, ax = plt.subplots(figsize=(6.2, 2.5))
(results / 1e9).plot.bar(stacked=True,
                         width=0.9, 
                         ax=ax,
                         color=consistent_colors
                        )

ax.set(
    xlabel='Year',
    ylabel='Billion person-days',
    title='Exposures of infants to \nchange in heatwave occurance',
)
ax.xaxis.set_tick_params(labelsize='small')
ax.yaxis.set_tick_params(labelsize='small')

# Manually order the legend
handles, labels = ax.get_legend_handles_labels()
d = dict(zip(labels, handles))
iso_codes = ['IND','CHN', 'IDN',  'EGY', 'NGA', 'Other']

ordered_handles = [d[l] for l in iso_codes]
ordered_labels = [_namelookup[l] for l in iso_codes]

ax.legend(ordered_handles, ordered_labels, fontsize='small')

f.savefig(RESULTS_FOLDER / f'hw_exposure_infants_countries_1980-{MAX_YEAR}.png')
f.savefig(RESULTS_FOLDER / f'hw_exposure_infants_countries_1980-{MAX_YEAR}.pdf')


In [None]:
country_exposure_allages = country_exposure.sum('age_band_lower_bound')

In [None]:
var = 'heatwaves_days'
# Sort and show the top 5 for a given year
top_codes = (country_exposure_allages[var]
             .sel(year=slice(2015,2020))
             .mean(dim='year')
             .to_dataframe()
             .sort_values(by=var, ascending=False)
             .head(5)[var]
             .index
             .to_list()
)
results = (country_exposure_allages[var]
           .sel(country=top_codes)
           .to_dataframe()[var]
           .unstack()
           .T)
# Difference between sum of top5 countries and total gives the 'other' category
results['Other'] = (total_exposures_over65[var] + total_exposures_infants[var] ) - results.sum(axis=1)
# invert column order
results = results[results.columns[::-1]]

# with sns.color_palette("Paired"):
f, ax = plt.subplots(figsize=(6.2, 2.5))
(results / 1e9).plot.bar(stacked=True, 
                         width=0.9, 
                         ax=ax,
                         color=consistent_colors
                        )

ax.set(
    xlabel='Year',
    ylabel='Billion person-days',
    title='Exposures of over 65s and newborns to \nchange in heatwave occurance',
)
ax.xaxis.set_tick_params(labelsize='small')
ax.yaxis.set_tick_params(labelsize='small')

# Manually order the legend
handles, labels = ax.get_legend_handles_labels()
d = dict(zip(labels, handles))
iso_codes = ['CHN', 'IND', 'JPN', 'USA', 'IDN', 'Other']

ordered_handles = [d[l] for l in iso_codes]
ordered_labels = [_namelookup[l] for l in iso_codes]

ax.legend(ordered_handles, ordered_labels, fontsize='small')

f.savefig(RESULTS_FOLDER / f'hw_exposure_over65_newborn_countries_1980-{MAX_YEAR}.png')
f.savefig(RESULTS_FOLDER / f'hw_exposure_over65_newborn_countries_1980-{MAX_YEAR}.pdf')


In [None]:
var = 'exposures_total'
top_codes = (country_exposure_abs[var]
             .sel(year=slice(2015,2020), age_band_lower_bound=65, drop=True)
             .mean(dim='year')
             .to_dataframe()
             .sort_values(by=var, ascending=False)
             .head(5)[var].index.to_list()
            )

In [None]:
var = 'exposures_total'
age_band = 65

_total_exposures = exposures_abs.sum(['latitude', 'longitude'])
_total_exposures = _total_exposures.sel(age_band_lower_bound=age_band, drop=True).to_dataframe().heatwaves_days

plot_data = (country_exposure_abs[var].sel(country=country_exposure_abs.country.isin(top_codes))
           .sel(age_band_lower_bound=age_band, year=slice(1980,None),
                drop=True)
           .to_dataframe()[var]
           .unstack().T)
# Difference between sum of top5 countries and total gives the 'other' category
plot_data['Other'] = _total_exposures - plot_data.sum(axis=1)
# invert column order
plot_data = plot_data[plot_data.columns[::-1]]

f, ax = plt.subplots(figsize=(6.2, 2.5))
(plot_data / 1e9).plot.bar(stacked=True, 
                         width=0.9, 
                         ax=ax,
                         color=consistent_colors
                        )

ax.set(
    xlabel='Year',
    ylabel='Billion person-days',
    title='Exposures of over 65s to heatwaves',
)
ax.xaxis.set_tick_params(labelsize='small')
ax.yaxis.set_tick_params(labelsize='small')

# Manually order the legend
# handles, labels = ax.get_legend_handles_labels()
# d = dict(zip(labels, handles))
# iso_codes = ['CHN', 'IND', 'JPN', 'USA', 'IDN', 'Other']


# ordered_handles = [d[l] for l in iso_codes]
# ordered_labels = [_namelookup[l] for l in iso_codes]

# ordered_handles = [d[l] for l in iso_codes]
# ax.legend(ordered_handles, ordered_labels, fontsize='small')

# f.savefig(RESULTS_FOLDER / f'hw_exposure_over65_countries_1980-{MAX_YEAR}.png')
# f.savefig(RESULTS_FOLDER / f'hw_exposure_over65_countries_1980-{MAX_YEAR}.pdf')


## HDI and WHO plots for appendix

In [None]:
plot_data = (hdi_exposure
             .exposures_weighted
             .rolling(year=10)
             .mean()
             .to_dataframe()
             .reset_index()
             .rename(columns={'age_band_lower_bound': 'Age group',
                                      'exposures_weighted': 'Heatwave days',
                                      'level_of_human_development':'HDI class'})
            )
# plot_data[plot_data.age_band_lower_bound ==0]
ax = sns.relplot(kind='line', data=plot_data,
            x='year', y='Heatwave days', col='Age group', hue='HDI class',
           facet_kws={'sharey': True})
ax.figure.suptitle('10 year rolling mean of population-weighted heatwave days by HDI category', y=1.02)
ax.figure.savefig(RESULTS_FOLDER / 'heatwave_days_by_hdi.png')

In [None]:
plot_data = (who_exposure
             .exposures_weighted
             .rolling(year=10)
             .mean()
             .to_dataframe()
             .reset_index()
             .rename(columns={'age_band_lower_bound': 'Age group',
                              'exposures_weighted': 'Heatwave days',
                              'who_region':'WHO region'})
            )
# plot_data[plot_data.age_band_lower_bound ==0]
ax = sns.relplot(kind='line', data=plot_data,
            x='year', y='Heatwave days', col='Age group', hue='WHO region',
           facet_kws={'sharey': True})
ax.figure.suptitle('10 year rolling mean of population-weighted heatwave days by WHO Region', y=1.02)
ax.figure.savefig(RESULTS_FOLDER / 'heatwave_days_by_who.png')