# Constraining FaIR

Replicated based on notebooks/160_WG3_constrain_fair_samples.ipynb
at https://github.com/IPCC-WG1/Chapter-7

The results from the Chapter 7 code need to be prepared
and here assumed to be available as the following NetCDF files.
```
./datain/fair_samples_unconstrained.nc
./datain/fair_samples_unconstrained_1pctCO2.nc
```

In [1]:
import json
import numpy as np
import pandas as pd
import scipy.stats as stats
import openpyxl
from netCDF4 import Dataset

In [3]:
from src.util import RetrieveGitHub, retrieve_url, dffilter, df2nc

In [4]:
owner = 'IPCC-WG1'
repo = 'Chapter-7'
repo_ch7 = RetrieveGitHub(owner, repo, './datain')

## Reference data

In [5]:
path = 'data_input/observations/AR6 FGD assessment time series - GMST and GSAT.xlsx'
path = repo_ch7.retrieve(path)

[2024-06-23 14:23:33 mce.util] INFO:Use local file datain/IPCC-WG1/Chapter-7/data_input/observations/AR6 FGD assessment time series - GMST and GSAT.xlsx retrieved from https://github.com/IPCC-WG1/Chapter-7/raw/main/data_input/observations/AR6%20FGD%20assessment%20time%20series%20-%20GMST%20and%20GSAT.xlsx on 2024-06-20


In [6]:
wb = openpyxl.load_workbook(path, read_only=True, data_only=True)
ws = wb['GMST data sets and changes']
rows = ws.iter_rows(
    min_row=2, # 1-based index
    max_row=2+(2020 - 1850 + 1),
    min_col=12,
    max_col=20,
    values_only=True,
)
columns = next(rows)
df_gmst_obs = (
    pd.DataFrame(list(rows), columns=('Year',) + columns[1:])
    .dropna(axis=1)
    .set_index('Year')
)
wb.close()

In [7]:
path = 'data_input/observations/AR6_OHC_ensemble_FGDprelim.csv'
path = repo_ch7.retrieve(path)

[2024-06-23 14:23:45 mce.util] INFO:Use local file datain/IPCC-WG1/Chapter-7/data_input/observations/AR6_OHC_ensemble_FGDprelim.csv retrieved from https://github.com/IPCC-WG1/Chapter-7/raw/main/data_input/observations/AR6_OHC_ensemble_FGDprelim.csv on 2024-06-20


In [8]:
df_ohc_obs = pd.read_csv(path, skiprows=1, index_col=0)

## Constraining process

### 1. GSAT 1995-2014

In [9]:
ncf_in = Dataset('./datain/fair_samples_unconstrained.nc')

In [10]:
idx_year = pd.Index(ncf_in.variables['Year'][:])

In [11]:
slc = slice(
    idx_year.get_loc(1850),
    idx_year.get_loc(2014) + 1,
)
df = pd.DataFrame(ncf_in.variables['T'][:, slc], columns=idx_year[slc])
df_gsat = df.sub(df.loc[:, 1850:1900].mean(axis=1), axis=0)

In [12]:
df = df_gsat.sub(df_gmst_obs.loc[1850:2014, '4-set mean'])
d_rmse_temp = np.sqrt((df**2).sum(axis=1).div(df.shape[1]))

In [13]:
rmse_temp_crit = 0.135
accept_temp = d_rmse_temp < rmse_temp_crit
accept_temp.sum()

311968

In [14]:
df_ind = pd.DataFrame({'temp': d_rmse_temp})
df_accept = pd.DataFrame({'temp': accept_temp})
gatts = {'rmse_temp_crit': rmse_temp_crit}

### 2. Ocean heat uptake from 1971 to 2018

In [15]:
d_ohu = pd.Series(
    ncf_in.variables['OHU'][:, idx_year.get_loc(2018)]
    - ncf_in.variables['OHU'][:, idx_year.get_loc(1971)]
) * 1e-21

In [16]:
name_central = 'Central Estimate Full-depth'
name_unc = 'Full-depth Uncertainty (1-sigma)'
d_ohu_obs = (df_ohc_obs.loc[2018.5, name_central] - df_ohc_obs.loc[1971.5, name_central])
d_ohu_obs_unc = np.sqrt(
    df_ohc_obs.loc[1971.5, name_unc]**2 + df_ohc_obs.loc[2018.5, name_unc]**2
)

In [17]:
ohu_rate = 0.90 # Use an assumed factor of 0.90
accept_ohu = np.logical_and(
    ohu_rate * d_ohu > (d_ohu_obs - d_ohu_obs_unc),
    ohu_rate * d_ohu < (d_ohu_obs + d_ohu_obs_unc),
)
accept_ohu.sum()

322738

In [18]:
df_ind['ohu'] = d_ohu
df_accept['ohu'] = accept_ohu
gatts.update({
    'ohu_rate': ohu_rate,
    'ohu_obs': d_ohu_obs,
    'ohu_obs_unc': d_ohu_obs_unc,
})

### 3. CO2 concentrations in 2014

In [19]:
fn = 'rcmip-concentrations-annual-means-v5-1-0.csv'
path = retrieve_url(
    f'./datain/rcmip/{fn}',
    f'https://rcmip-protocols-au.s3-ap-southeast-2.amazonaws.com/v5.1.0/{fn}',
)

[2024-06-23 14:27:47 mce.util] INFO:Use local file datain/rcmip/rcmip-concentrations-annual-means-v5-1-0.csv retrieved from https://rcmip-protocols-au.s3-ap-southeast-2.amazonaws.com/v5.1.0/rcmip-concentrations-annual-means-v5-1-0.csv on 2024-06-20


In [20]:
df = pd.read_csv(path)

In [21]:
id_vars = df.columns[:7].to_list()
df = df.set_index(id_vars).rename(columns=int)

In [22]:
co2_obs_2014 = dffilter(
    df,
    Scenario='ssp245',
    Region='World',
    Variable=lambda x: x.endswith('CO2'),
).loc[:, 2014].squeeze()
co2_obs_2014

397.5469792683919

In [23]:
d_co2_2014 = pd.Series(
    ncf_in.variables['C_CO2'][:, idx_year.get_loc(2014)]
)

In [24]:
co2_obs_2014_unc = 0.36
accept_co2 = np.logical_and(
    d_co2_2014 > co2_obs_2014 - co2_obs_2014_unc,
    d_co2_2014 < co2_obs_2014 + co2_obs_2014_unc,
)
accept_co2.sum()

21560

In [25]:
df_ind['co2'] = d_co2_2014
df_accept['co2'] = accept_co2
gatts.update({
    'co2_obs': co2_obs_2014,
    'co2_obs_unc': co2_obs_2014_unc,
})

In [26]:
ncf_in.close()

### 4. Airborne fraction

In [32]:
df_accept['temp_ohu_co2'] = pd.Series(
    df_accept['temp'].values
    * df_accept['ohu'].values
    * df_accept['co2'].values
)

In [33]:
df_accept['temp_ohu_co2'].sum()

3751

In [34]:
d1 = df_accept['temp_ohu_co2']
accept_inds = d1.loc[d1].index.values
len(accept_inds)

3751

In [35]:
path = repo_ch7.retrieve('data_input/random_seeds.json')

[2024-06-23 14:32:54 mce.util] INFO:Use local file datain/IPCC-WG1/Chapter-7/data_input/random_seeds.json retrieved from https://github.com/IPCC-WG1/Chapter-7/raw/main/data_input/random_seeds.json on 2024-06-12


In [36]:
with path.open() as f1:
    SEEDS = json.load(f1)

In [37]:
ncf_in = Dataset('./datain/fair_samples_unconstrained_1pctCO2.nc')
d_af140 = pd.Series(ncf_in.variables['af140'][:])
ncf_in.close()

In [38]:
SAMPLES = len(d_af140)
SAMPLES

1000000

In [39]:
accept_prob = stats.uniform.rvs(loc=0, scale=1, size=SAMPLES, random_state=SEEDS[79])
norm_af140 = stats.norm(loc=.597, scale=.049)
pdf_ref = norm_af140.pdf(0.597)

accept_af = (
    norm_af140.pdf(d_af140.loc[accept_inds].values) / pdf_ref
    >= accept_prob[:len(accept_inds)]
)

In [40]:
accept_af.sum()

2237

In [42]:
d1 = df_accept['temp_ohu_co2'].copy()
d1.loc[d1] = accept_af
d1.sum()

2237

In [44]:
df_ind['af'] = d_af140
df_ind['accept_prob'] = accept_prob
df_accept['af_combined'] = d1

The following is not in the Chapter 7 approach, but included for consistency.

In [45]:
df_accept['af_single'] = pd.Series(norm_af140.pdf(d_af140.values) / pdf_ref >= accept_prob)

## Save results

In [51]:
df_ind.iloc[:1]

Unnamed: 0,temp,ohu,co2,af,accept_prob
0,0.136237,397.316364,419.263371,0.723033,0.808039


In [54]:
path_out = './dataout/constraining_fair_indicators.nc'
df2nc(
    path_out,
    df_ind.iloc[:1].rename_axis('Member').rename_axis(columns='Variable'),
    {}, gatts=gatts,
)
ncf = Dataset(path_out, 'r+')

[2024-06-23 14:51:22 __main__] INFO:Old dataout/constraining_fair_indicators.nc is deleted
[2024-06-23 14:51:22 __main__] INFO:dataout/constraining_fair_indicators.nc is created


In [55]:
ncf.variables['Member'][:] = df_ind.index.values

for name, d1 in df_ind.items():
    ncf.variables[name][:] = d1.values

In [56]:
ncf.close()

In [66]:
df_accept

Unnamed: 0,temp,ohu,co2,temp_ohu_co2,af_combined,af_single
0,False,True,False,False,False,False
1,False,False,False,False,False,True
2,False,True,False,False,False,True
3,False,False,False,False,False,False
4,True,True,False,False,False,False
...,...,...,...,...,...,...
999995,True,False,False,False,False,True
999996,True,True,False,False,False,True
999997,True,False,False,False,False,True
999998,True,True,False,False,False,True


In [67]:
df_accept = df_accept.astype('int8')

In [68]:
df_accept

Unnamed: 0,temp,ohu,co2,temp_ohu_co2,af_combined,af_single
0,0,1,0,0,0,0
1,0,0,0,0,0,1
2,0,1,0,0,0,1
3,0,0,0,0,0,0
4,1,1,0,0,0,0
...,...,...,...,...,...,...
999995,1,0,0,0,0,1
999996,1,1,0,0,0,1
999997,1,0,0,0,0,1
999998,1,1,0,0,0,1


In [69]:
path_out = './dataout/constraining_fair_accept.nc'
df2nc(
    path_out,
    df_accept.iloc[:1].rename_axis('Member').rename_axis(columns='Variable'),
    {},
)
ncf = Dataset(path_out, 'r+')

[2024-06-23 14:56:31 __main__] INFO:Old dataout/constraining_fair_accept.nc is deleted
[2024-06-23 14:56:31 __main__] INFO:dataout/constraining_fair_accept.nc is created


In [70]:
ncf.variables['Member'][:] = df_accept.index.values

for name, d1 in df_accept.items():
    ncf.variables[name][:] = d1.values

In [71]:
ncf.close()

## Consistency check

In [92]:
ncf_accept = Dataset('./dataout/constraining_fair_accept.nc')
ncf_ind = Dataset('./dataout/constraining_fair_indicators.nc')

In [100]:
d1 = ncf_ind.variables['temp'][:].filled()
rmse_temp_crit = ncf_ind.getncattr('rmse_temp_crit')
np.all(
    (d1 < rmse_temp_crit)
    == ncf_accept['temp'][:].filled().astype('bool')
)

True

In [107]:
ohu_rate, ohu_obs, ohu_obs_unc = (
    ncf_ind.getncattr('ohu_rate'),
    ncf_ind.getncattr('ohu_obs'),
    ncf_ind.getncattr('ohu_obs_unc'),
)
d1 = ncf_ind.variables['ohu'][:].filled()
np.all(
    np.logical_and(
        ohu_rate * d1 > (ohu_obs - ohu_obs_unc),
        ohu_rate * d1 < (ohu_obs + ohu_obs_unc),
    )
    == ncf_accept['ohu'][:].filled().astype('bool')
)

True

In [110]:
co2_obs, co2_obs_unc = (
    ncf_ind.getncattr('co2_obs'),
    ncf_ind.getncattr('co2_obs_unc'),
)
d1 = ncf_ind.variables['co2'][:].filled()
np.all(
    np.logical_and(
        d1 > co2_obs - co2_obs_unc,
        d1 < co2_obs + co2_obs_unc,
    )
    == ncf_accept['co2'][:].filled().astype('bool')
)

True

In [112]:
accept_prob = ncf_ind.variables['accept_prob'][:].filled()
norm_af140 = stats.norm(loc=.597, scale=.049)
pdf_ref = norm_af140.pdf(0.597)

In [115]:
d1 = ncf_ind.variables['af'][:].filled()
np.all(
    (norm_af140.pdf(d1) / pdf_ref >= accept_prob)
    == ncf_accept['af_single'][:].filled().astype('bool')
)

True

In [124]:
bl = ncf_accept.variables['temp_ohu_co2'][:].filled().astype('bool')
np.all(
    (norm_af140.pdf(d1[bl]) / pdf_ref >= accept_prob[:len(d1[bl])])
    == ncf_accept['af_combined'][:].filled().astype('bool')[bl]
)

True

In [126]:
{
    k: ncf_accept[k][:].sum()
    for k in ['temp', 'ohu', 'co2', 'temp_ohu_co2', 'af_single', 'af_combined']
}

{'temp': 311968,
 'ohu': 322738,
 'co2': 21560,
 'temp_ohu_co2': 3751,
 'af_single': 442554,
 'af_combined': 2237}

In [None]:
ncf_ind.close()
ncf_accept.close()