# Sampling thermal parameters

In [1]:
import sys
import json
import yaml
import numpy as np
import pandas as pd
import scipy.stats as stats
from netCDF4 import Dataset

In [2]:
# mce is available in https://github.com/tsutsui1872/mce 
sys.path.append('../mce')

In [3]:
from mce.core.forcing import RfAll

In [4]:
from src.util import RetrieveGitHub, df2nc
from src.tlm import ebm_to_irm, add_ecs_tcr

In [5]:
obj_rf = RfAll()

## Read calibrated parameters

In [6]:
df_parms_calib = pd.read_csv('./dataout/parms_calib.csv', index_col=[0, 1])
df_parms_calib

Unnamed: 0_level_0,Unnamed: 1_level_0,q4x,lamg,cdeep,cmix,gamma_2l,eff,tau0,tau1,a0,a1,q2x,co2_beta,ecs,tcr
Method,Dataset,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
ar6,ACCESS-CM2,7.733057,0.699488,93.230040,8.705751,0.542905,1.496768,5.653977,378.010245,0.445999,0.554001,3.680935,,5.262326,2.409353
ar6,ACCESS-ESM1-5,6.745166,0.712660,95.361976,8.381012,0.619427,1.604319,4.820713,375.567394,0.402246,0.597754,3.210699,,4.505233,1.921801
ar6,AWI-CM-1-1-MR,8.168461,1.209335,56.493956,8.200244,0.475052,1.448388,4.264331,189.099361,0.620322,0.379678,3.888188,,3.215145,2.071938
ar6,BCC-CSM2-MR,7.217011,1.143931,64.573536,5.936500,0.872355,1.303708,2.556726,150.247677,0.483884,0.516116,3.435297,,3.003064,1.709459
ar6,BCC-ESM1,6.488241,0.892194,97.662585,8.695826,0.529933,1.368489,5.305895,338.532502,0.537132,0.462868,3.088403,,3.461580,1.871806
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
mce-2l,NorESM2-LM,6.615321,1.276142,117.625608,3.434206,0.871714,,1.591194,228.208766,0.588413,0.411587,2.808846,1.177587,2.201046,1.390751
mce-2l,NorESM2-MM,7.072782,1.389735,112.052479,3.815150,0.825756,,1.713863,217.356912,0.621318,0.378682,2.924660,1.209163,2.104474,1.390466
mce-2l,SAM0-UNICON,7.255909,0.943384,109.087494,5.775929,0.749171,,3.377058,263.990592,0.545765,0.454235,3.608986,1.005256,3.825574,2.197015
mce-2l,TaiESM1,7.272367,0.811419,87.053832,7.131655,0.689128,,4.670360,237.729205,0.521989,0.478011,3.645495,0.997446,4.492738,2.473972


## Sampling core parameters

In [7]:
variables = ['q4x', 'lamg', 'cmix', 'cdeep', 'gamma_2l', 'eff']
variables_mce = variables[:-1] + ['co2_beta']

In [8]:
def gen_sample(df_calib, nsize, seed):
    kernel = stats.gaussian_kde(df_calib.T)
    df = pd.DataFrame(
        kernel.resample(size=int(nsize*1.1), seed=seed),
        index=df_calib.columns,
    ).T
    df = (
        df
        .where(df > 0)
        .dropna()
        .iloc[:nsize]
        .reset_index(drop=True)
        .rename_axis('Member')
    )
    return df

In [9]:
nsize = 50000
seed_sampling = 101

In [10]:
df_parms_sample = pd.concat({
    k: gen_sample(
        df[variables if k != 'mce-2l' else variables_mce],
        nsize, seed_sampling,
    )
    for k, df in df_parms_calib.groupby('Method')
}, names=['Method'])

In [11]:
qvals = [0.05, 0.17, 0.5, 0.83, 0.95]
vn = 'lamg'

for k, df in df_parms_calib.groupby('Method'):
    d1 = df_parms_sample.loc[k][vn]
    q_sample = d1.quantile(qvals)
    q_calib = df[vn].quantile(qvals)
    factor = (
        ((q_calib[0.17] - q_calib[0.05])/(q_calib[0.5] - q_calib[0.05]))
        / ((q_sample[0.17] - q_sample[0.05])/(q_sample[0.5] - q_sample[0.05]))
    )
    d1.where(
        d1 > q_sample[0.17],
        q_sample[0.17] - (q_sample[0.17] - d1) * factor,
        inplace=True,
    )

In [12]:
display(
    df_parms_sample.loc['ar6'][variables].corr(),
    df_parms_calib.loc['ar6'][variables].corr(),
)

Unnamed: 0,q4x,lamg,cmix,cdeep,gamma_2l,eff
q4x,1.0,0.46381,0.013784,0.044157,0.311973,0.451525
lamg,0.46381,1.0,0.341001,0.382583,0.148295,-0.197522
cmix,0.013784,0.341001,1.0,0.168347,-0.437814,-0.433908
cdeep,0.044157,0.382583,0.168347,1.0,0.078769,-0.249702
gamma_2l,0.311973,0.148295,-0.437814,0.078769,1.0,0.290593
eff,0.451525,-0.197522,-0.433908,-0.249702,0.290593,1.0


Unnamed: 0,q4x,lamg,cmix,cdeep,gamma_2l,eff
q4x,1.0,0.464861,0.013387,0.053257,0.315927,0.448694
lamg,0.464861,1.0,0.340022,0.382789,0.156895,-0.198951
cmix,0.013387,0.340022,1.0,0.168562,-0.441082,-0.435512
cdeep,0.053257,0.382789,0.168562,1.0,0.104639,-0.25423
gamma_2l,0.315927,0.156895,-0.441082,0.104639,1.0,0.285662
eff,0.448694,-0.198951,-0.435512,-0.25423,0.285662,1.0


In [13]:
display(
    df_parms_sample.loc['s21'][variables].corr(),
    df_parms_calib.loc['s21'][variables].corr(),
)

Unnamed: 0,q4x,lamg,cmix,cdeep,gamma_2l,eff
q4x,1.0,0.393555,-0.021099,0.092979,0.355711,0.414269
lamg,0.393555,1.0,0.197286,0.371227,0.35378,-0.077128
cmix,-0.021099,0.197286,1.0,0.094361,-0.202165,-0.083811
cdeep,0.092979,0.371227,0.094361,1.0,0.315726,-0.371334
gamma_2l,0.355711,0.35378,-0.202165,0.315726,1.0,0.125591
eff,0.414269,-0.077128,-0.083811,-0.371334,0.125591,1.0


Unnamed: 0,q4x,lamg,cmix,cdeep,gamma_2l,eff
q4x,1.0,0.383521,-0.020672,0.109158,0.359013,0.40425
lamg,0.383521,1.0,0.194855,0.367843,0.363113,-0.088703
cmix,-0.020672,0.194855,1.0,0.094203,-0.19912,-0.088135
cdeep,0.109158,0.367843,0.094203,1.0,0.33442,-0.37668
gamma_2l,0.359013,0.363113,-0.19912,0.33442,1.0,0.112967
eff,0.40425,-0.088703,-0.088135,-0.37668,0.112967,1.0


In [14]:
display(
    df_parms_sample.loc['mce-2l'][variables_mce].corr(),
    df_parms_calib.loc['mce-2l'][variables_mce].corr(),
)

Unnamed: 0,q4x,lamg,cmix,cdeep,gamma_2l,co2_beta
q4x,1.0,0.372577,0.073165,0.071667,-0.047589,-0.168766
lamg,0.372577,1.0,-0.274107,0.372106,0.274224,-0.2829
cmix,0.073165,-0.274107,1.0,-0.077182,-0.463574,-0.107128
cdeep,0.071667,0.372106,-0.077182,1.0,0.228693,-0.059959
gamma_2l,-0.047589,0.274224,-0.463574,0.228693,1.0,0.320272
co2_beta,-0.168766,-0.2829,-0.107128,-0.059959,0.320272,1.0


Unnamed: 0,q4x,lamg,cmix,cdeep,gamma_2l,co2_beta
q4x,1.0,0.374984,0.069023,0.092832,-0.031824,-0.164798
lamg,0.374984,1.0,-0.280561,0.373522,0.283242,-0.279922
cmix,0.069023,-0.280561,1.0,-0.080729,-0.466925,-0.112713
cdeep,0.092832,0.373522,-0.080729,1.0,0.251333,-0.045966
gamma_2l,-0.031824,0.283242,-0.466925,0.251333,1.0,0.320451
co2_beta,-0.164798,-0.279922,-0.112713,-0.045966,0.320451,1.0


## Add CO2 doubling forcing

In [15]:
cco2_pi = obj_rf.parms_ar6_ghg.C0_1750
obj_rf.parms.update(ccref=cco2_pi)
q2x_ref = obj_rf.c2erf_ar6('CO2', cco2_pi*2.)
q4x_ref = obj_rf.c2erf_ar6('CO2', cco2_pi*4.)
q2x_ref, q4x_ref, q2x_ref / q4x_ref

(3.934168323890023, 8.259783657536742, 0.4763040397916769)

In [16]:
df = df_parms_sample
df['q2x'] = df['q4x'] * (q2x_ref / q4x_ref)

# MCE-2l uses a variant amplification factor
df['q2x'].where(
    df['co2_beta'].isna(),
    df['q4x'] / df['co2_beta'] * 0.5,
    inplace=True,
)

In [17]:
for k, df in df_parms_sample.groupby('Method'):
    if k == 'mce-2l':
        d1 = df['q4x'] / df['co2_beta'] * 0.5
    else:
        d1 = df['q4x'] * (q2x_ref / q4x_ref)
    print(k, np.all(d1 == df['q2x']))

ar6 True
mce-2l True
s21 True


## AR6 original method

In [18]:
owner = 'IPCC-WG1'
repo = 'Chapter-7'
repo_ch7 = RetrieveGitHub(owner, repo, './datain')

In [19]:
path = repo_ch7.retrieve('data_input/random_seeds.json')
with path.open() as f1:
    SEEDS = json.load(f1)

[2024-07-05 16:29:19 src.util] INFO:Use local file datain/IPCC-WG1/Chapter-7/data_input/random_seeds.json retrieved from https://github.com/IPCC-WG1/Chapter-7/raw/main/data_input/random_seeds.json on 2024-06-12


In [20]:
path = repo_ch7.retrieve('data_input/tunings/cmip6_twolayer_tuning_params.json')
with path.open() as f1:
    params = json.load(f1)

cmip6_models = list(params['q4x']['model_data']['EBM-epsilon'].keys())

[2024-07-05 16:29:21 src.util] INFO:Use local file datain/IPCC-WG1/Chapter-7/data_input/tunings/cmip6_twolayer_tuning_params.json retrieved from https://github.com/IPCC-WG1/Chapter-7/raw/main/data_input/tunings/cmip6_twolayer_tuning_params.json on 2024-06-11


In [21]:
df_parms = pd.concat({
    vn: pd.DataFrame(params[vn]['model_data']).rename_axis('Dataset')
    for vn in params
}, names=['Parameter']).rename_axis(columns='Model').unstack('Parameter')

In [22]:
variables = ['q4x', 'lamg', 'cmix', 'cdeep', 'gamma_2l', 'eff']
df = df_parms['EBM-epsilon'].loc[cmip6_models, variables]

kernel = stats.gaussian_kde(df.T)
nsize_ch7 = 1000000
df_sample = pd.DataFrame(
    kernel.resample(size=int(nsize_ch7*1.25), seed=SEEDS[15]),
    index=df.columns,
).T

df_sample['lamg'] = 1.
df_sample = df_sample.where(df_sample > 0)
df_sample['lamg'] = stats.truncnorm.rvs(
    -2, 2, loc=-4/3, scale=0.5, size=int(nsize_ch7*1.25),
    random_state=SEEDS[16],
)

df_sample = (
    df_sample
    .dropna()
    .iloc[:nsize_ch7]
    .reset_index(drop=True)
    .rename_axis(index='Member')
    .rename_axis(columns='')
)

In [23]:
df_sample['lamg'] = -df_sample['lamg']

In [24]:
F2XCO2_MEAN = 4.00
F2XCO2_NINETY = 0.48
NINETY_TO_ONESIGMA = stats.norm.ppf(0.95)
df_sample['q2x'] = stats.norm.rvs(
    loc=F2XCO2_MEAN, scale=F2XCO2_NINETY/NINETY_TO_ONESIGMA,
    size=nsize_ch7, random_state=SEEDS[73],
)

In [25]:
df_parms_sample = pd.concat([
    pd.concat({
        'ar6_orig': df_sample.iloc[:nsize],
    }, names=['Method']),
    df_parms_sample,
])

In [26]:
add_ecs_tcr(df_sample, approx=True)

In [27]:
df_parms_sample_ar6_orig_large = df_sample

## Add derived parameters

In [28]:
df = df_parms_sample
df = pd.concat([df, ebm_to_irm(df)], axis=1)

In [29]:
add_ecs_tcr(df)

In [30]:
df[['tau0', 'tau1', 'a0', 'a1', 'ecs', 'tcr']].groupby('Method').quantile(qvals)

Unnamed: 0_level_0,Unnamed: 1_level_0,tau0,tau1,a0,a1,ecs,tcr
Method,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
ar6,0.05,2.157941,83.696599,0.278021,0.187622,2.023613,1.4235
ar6,0.17,3.134933,171.968336,0.381424,0.290229,2.445673,1.599442
ar6,0.5,4.335408,313.552127,0.54936,0.45064,3.584757,1.98117
ar6,0.83,6.079214,493.435694,0.709771,0.618576,5.745763,2.623421
ar6,0.95,7.829668,726.518541,0.812378,0.721979,6.928937,3.15542
ar6_orig,0.05,1.990796,70.077624,0.341223,0.174736,1.902907,1.273912
ar6_orig,0.17,2.752708,149.997226,0.460221,0.258495,2.224966,1.494768
ar6_orig,0.5,3.801076,282.758632,0.610348,0.389652,2.994257,1.900856
ar6_orig,0.83,5.365113,458.961199,0.741505,0.539779,4.546766,2.530731
ar6_orig,0.95,7.08155,700.352325,0.825264,0.658777,6.737184,3.192598


In [31]:
df_parms_sample = df

# Save data

In [32]:
# Variable attributes (long_name, units) are given separately
with open('./src/variables.yml') as f1:
    var_atts = yaml.safe_load(f1)

In [33]:
# Data structure is changed for output
bl = df_parms_sample.index.get_level_values('Member') == 0
df1 = df_parms_sample.loc[bl].unstack('Method')
df1

Unnamed: 0_level_0,q4x,q4x,q4x,q4x,lamg,lamg,lamg,lamg,cmix,cmix,...,a1,a1,ecs,ecs,ecs,ecs,tcr,tcr,tcr,tcr
Method,ar6,ar6_orig,mce-2l,s21,ar6,ar6_orig,mce-2l,s21,ar6,ar6_orig,...,mce-2l,s21,ar6,ar6_orig,mce-2l,s21,ar6,ar6_orig,mce-2l,s21
Member,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
0,5.856909,8.142751,6.328691,7.173368,0.455387,1.156137,0.732814,0.61885,7.810095,9.493752,...,0.092628,0.778703,6.125934,3.569695,4.01861,5.521058,2.00201,2.302507,3.336189,3.598972


In [34]:
path_out = './dataout/parms_sample.nc'
df2nc(path_out, df1, var_atts)

[2024-07-05 16:30:29 src.util] INFO:dataout/parms_sample.nc is created


In [35]:
# Re-open the file and write out data values 
ncf = Dataset(path_out, 'r+')

ncf.variables['Member'][:] = df_parms_sample.index.levels[1].values

method_order = ncf.variables['Method'][:]

for k, v in df_parms_sample.iteritems():
    fill_value = ncf.variables[k]._FillValue
    ncf.variables[k][:] = v.unstack('Method')[method_order].fillna(fill_value)

In [36]:
# Check
for k, v in df_parms_sample.iteritems():
    print(
        k,
        np.array_equal(
            v.unstack('Method')[method_order].fillna(fill_value).values,
            ncf.variables[k][:].filled(),
        ),
    )

q4x True
lamg True
cmix True
cdeep True
gamma_2l True
eff True
q2x True
co2_beta True
tau0 True
tau1 True
a0 True
a1 True
ecs True
tcr True


In [37]:
ncf.close()

In [38]:
path_out = './dataout/fair_samples_thermal.nc'
df2nc(path_out, df_parms_sample_ar6_orig_large, var_atts)

[2024-07-05 16:33:11 src.util] INFO:dataout/fair_samples_thermal.nc is created


In [39]:
ncf = Dataset(path_out, 'r+')

for k, v in df_parms_sample_ar6_orig_large.iteritems():
    ncf.variables[k][:] = v.values

In [40]:
ncf.close()