# Constraining process

In [1]:
import yaml
import numpy as np
import pandas as pd
import openpyxl
import scipy.stats as stats
from netCDF4 import Dataset

In [2]:
from src.mh_sampler import sampling
from src.util import RetrieveGitHub, df2nc

In [3]:
owner = 'IPCC-WG1'
repo = 'Chapter-7'
repo_ch7 = RetrieveGitHub(owner, repo, './datain')

## Reference data

In [4]:
path = 'data_input/observations/AR6 FGD assessment time series - GMST and GSAT.xlsx'
path = repo_ch7.retrieve(path)

[2024-07-07 14:51:12 src.util] INFO:Use local file datain/IPCC-WG1/Chapter-7/data_input/observations/AR6 FGD assessment time series - GMST and GSAT.xlsx retrieved from https://github.com/IPCC-WG1/Chapter-7/raw/main/data_input/observations/AR6%20FGD%20assessment%20time%20series%20-%20GMST%20and%20GSAT.xlsx on 2024-06-20


In [5]:
wb = openpyxl.load_workbook(path, read_only=True, data_only=True)
ws = wb['GMST data sets and changes']
rows = ws.iter_rows(
    min_row=2, # 1-based index
    max_row=2+(2020 - 1850 + 1),
    min_col=12,
    max_col=20,
    values_only=True,
)
columns = next(rows)
df_gmst_obs = (
    pd.DataFrame(list(rows), columns=('Year',) + columns[1:])
    .dropna(axis=1)
    .set_index('Year')
)
wb.close()

In [6]:
path = 'data_input/observations/AR6_OHC_ensemble_FGDprelim.csv'
path = repo_ch7.retrieve(path)

[2024-07-07 14:51:18 src.util] INFO:Use local file datain/IPCC-WG1/Chapter-7/data_input/observations/AR6_OHC_ensemble_FGDprelim.csv retrieved from https://github.com/IPCC-WG1/Chapter-7/raw/main/data_input/observations/AR6_OHC_ensemble_FGDprelim.csv on 2024-06-20


In [7]:
df_ohc_obs = pd.read_csv(path, skiprows=1, index_col=0)

In [8]:
name_central = 'Central Estimate Full-depth'
name_unc = 'Full-depth Uncertainty (1-sigma)'
d_ohu_obs = (df_ohc_obs.loc[2018.5, name_central] - df_ohc_obs.loc[1971.5, name_central])
d_ohu_obs_unc = np.sqrt(
    df_ohc_obs.loc[1971.5, name_unc]**2 + df_ohc_obs.loc[2018.5, name_unc]**2
)

## Results from the unconstrained runs

In [9]:
ncf = Dataset('./dataout/unconstrained_run.nc')

In [10]:
method_order = ncf.variables['Method'][:].tolist()
columns = pd.Index(ncf.variables['Year'][:])
index = pd.MultiIndex.from_product([ncf.variables['Member'][:], method_order])

In [11]:
method_order

['ar6', 'ar6_orig', 'mce-2l', 's21']

In [12]:
df = pd.DataFrame(
    ncf.variables['tg'][:].reshape((-1, len(columns))),
    index=index, columns=columns,
)
df_gsat = df.sub(df.loc[:, 1850:1900].mean(axis=1), axis=0)

In [13]:
df = pd.DataFrame(
    ncf.variables['ohc'][:].reshape((-1, len(columns))),
    index=index, columns=columns,
)
ohu_rate = 0.90 # Use an assumed factor of 0.90
d_ohu = df[2018].sub(df[1971]).mul(ohu_rate)

## Constraining based on AR6 Chapter 7

In [14]:
df = df_gsat.loc[:, 1850:2014].sub(df_gmst_obs.loc[1850:2014, '4-set mean'])
d_rmse_temp = np.sqrt((df**2).sum(axis=1).div(df.shape[1]))
rmse_temp_crit = 0.135
accept_temp = d_rmse_temp < rmse_temp_crit

In [15]:
accept_ohu = (
    (d_ohu > d_ohu_obs - d_ohu_obs_unc)
    &
    (d_ohu < d_ohu_obs + d_ohu_obs_unc)
)

In [16]:
df_accept = pd.concat([
    accept_temp.to_frame('temp'),
    accept_ohu.to_frame('ohu'),
    np.logical_and(accept_temp, accept_ohu).to_frame('all'),
], axis=1)

In [17]:
df_accept.groupby(level=1).sum()

Unnamed: 0,temp,ohu,all
ar6,11521,21843,8528
ar6_orig,13591,17925,8141
mce-2l,7925,20206,6078
s21,11241,21510,8257


In [18]:
df_accept.to_csv('./dataout/constraining_ar6_accept.csv')

## Alternative MH independent sampler

In [19]:
def asymmetric_gaussian(central, unc90, size=10000, random_state=1):
    """
    Returns asymmetric gaussian series for a central value and 90% uncertainty range
    """
    NINETY_TO_ONESIGMA = stats.norm.ppf(0.95)
    d1 = stats.norm.rvs(
        size=size,
        loc=np.ones(size),
        scale=np.ones(size) * (unc90[1]/central-1.) / NINETY_TO_ONESIGMA,
        random_state=1,
    )
    d1 = pd.Series(d1)
    d1 = d1.where(d1 > 1., (d1 - 1.) * ((central-unc90[0])/(unc90[1]-central)) + 1.)
    return d1.mul(central)

In [20]:
df_ind = []
map_rv = {}

# Based on AR6-assessed GSAT ranges shown in Cross Chapter Box 2.3, Table 1
name = 'GSAT_1995_2014'
df_ind.append(df_gsat.loc[:, 1995:2014].mean(axis=1).rename(name))
map_rv[name] = asymmetric_gaussian(0.85, (0.67, 0.98))

name = 'GSAT_recent_trend'
df_ind.append(
    df_gsat.loc[:, 1995:2014].mean(axis=1).sub(
        df_gsat.loc[:, 1961:1990].mean(axis=1)
    ).rename(name)
)
map_rv[name] = stats.norm(
    loc=0.85 - 0.36,
    scale=np.sqrt(
        asymmetric_gaussian(0.36, (0.22, 0.45)).var()
        +
        asymmetric_gaussian(0.85, (0.67, 0.98)).var()
    ),
)

name = 'OHU_1971_2018'
df_ind.append(d_ohu.rename(name))
map_rv[name] = stats.norm(loc=d_ohu_obs, scale=d_ohu_obs_unc)

df_ind = pd.DataFrame(df_ind).T

In [21]:
def wrap_sampling(df, rv, seed):
    name = df.name
    print(name)
    df = df.droplevel(1)
    df1 = df.squeeze()

    if df1.ndim == 1:
        mean = rv[df1.name].mean()
        sig2 = rv[df1.name].var()
        ret = sampling(df1, mean, sig2, seed=seed)
    else:
        mean = np.array([rv[name_ind].mean() for name_ind in df])
        sig2 = np.array([rv[name_ind].var() for name_ind in df])
        # Target covariance is assumed to be scaled from input covariance
        # with the ratio of target variance to input variance
        cov = df.cov().values
        scale = np.sqrt(sig2 / cov.diagonal())
        scale = scale * scale.reshape((-1, 1))
        ret = sampling(df, mean, cov * scale, seed=seed)

    return pd.Series(df.index[ret])

In [22]:
map_seed = {
    'mh': 102,
    'mh_post': 103,
}

In [23]:
df_ind.groupby(level=1).get_group('ar6').droplevel(1)

Unnamed: 0,GSAT_1995_2014,GSAT_recent_trend,OHU_1971_2018
0,1.070317,0.709512,514.376014
1,1.615536,1.184818,419.908844
2,0.899945,0.539019,444.462915
3,1.231196,0.788284,453.848101
4,1.046899,0.831918,414.474295
...,...,...,...
49995,0.280946,0.533467,176.515285
49996,1.057469,0.884918,457.181894
49997,0.987590,0.617667,439.979369
49998,1.352421,0.741100,589.054299


In [24]:
df_mhout = (
    df_ind
    .groupby(level=1)
    .apply(wrap_sampling, map_rv, map_seed['mh'])
)

ar6


[2024-07-07 14:54:38 src.mh_sampler] INFO:acceptance rate 0.11044


ar6_orig


[2024-07-07 14:55:31 src.mh_sampler] INFO:acceptance rate 0.09638


mce-2l


[2024-07-07 14:56:24 src.mh_sampler] INFO:acceptance rate 0.07396


s21


[2024-07-07 14:57:16 src.mh_sampler] INFO:acceptance rate 0.1096


In [25]:
with open('./src/variables.yml') as f1:
    var_atts = yaml.safe_load(f1)

In [30]:
df_ind = (
    df_ind
    .rename_axis(['Member', 'Method'])
    .rename_axis(columns='Variable')
    .unstack('Method')
)

In [31]:
path_out = './dataout/constraining_ind.nc'
df2nc(path_out, df_ind.iloc[:1], var_atts)

[2024-07-07 15:11:04 src.util] INFO:dataout/constraining_ind.nc is created


In [32]:
ncf = Dataset(path_out, 'r+')

In [36]:
ncf.variables['Member'][:] = df_ind.index.values

In [44]:
method_order_file = ncf.variables['Method'][:].tolist()

In [49]:
for k, v in df_ind.groupby('Variable', axis=1):
    ncf.variables[k][:] = v.droplevel(0, axis=1)[method_order_file].values

In [50]:
ncf.close()

In [51]:
(
    df_mhout
    .rename_axis('Method')
    .rename_axis(columns='Member')
    .T
    .to_csv('./dataout/constraining_mhout.csv')
)

## Post processing

Reducing the constrained member size to 600

In [52]:
df_member_1 = pd.DataFrame({
    method:
    d1.loc[d1].iloc[:600].index.get_level_values(0).values
    for method, d1 in df_accept['all'].groupby(level=1)
})

In [53]:
df_member_1

Unnamed: 0,ar6,ar6_orig,mce-2l,s21
0,2,7,11,7
1,13,11,15,12
2,16,15,22,13
3,22,16,37,15
4,24,24,38,16
...,...,...,...,...
595,3707,3460,4942,3444
596,3724,3477,4957,3451
597,3727,3481,4961,3458
598,3736,3483,4983,3462


In [54]:
# Randomize the constrained members and reduce the size to 600
df_member = {}
np.random.seed(map_seed['mh_post'])

# order changed for consistency with the previous run
for method in ['ar6_orig', 'ar6', 's21', 'mce-2l']:
    d1 = df_mhout.loc[method]
    df_member[method] = pd.Series(np.array(d1)[np.random.randint(0, len(d1), 600)])

df_member = pd.DataFrame(df_member)

In [55]:
df_member

Unnamed: 0,ar6_orig,ar6,s21,mce-2l
0,24312,7796,22342,9649
1,23899,10266,24905,34909
2,46289,12618,38955,17814
3,16155,25845,6674,45096
4,13573,17894,48938,30940
...,...,...,...,...
595,11904,46701,36618,8243
596,32570,44151,4038,46505
597,11207,30470,47393,41679
598,32012,6294,37515,8265


In [56]:
# Number of unique members
df_member.agg(lambda d1: len(pd.unique(d1)))

ar6_orig    515
ar6         512
s21         543
mce-2l      488
dtype: int64

In [57]:
df = pd.concat(
    [df_member_1[method_order], df_member[method_order]],
    axis=1,
)
df.columns = pd.MultiIndex.from_product(
    [['ar6_chapter7', 'mh_sampler'], method_order],
)

In [58]:
(
    df
    .rename_axis('Sequence')
    .rename_axis(columns=['Constraining', 'Sampling'])
    .to_csv('./dataout/constraining_results.csv')
)