# Import RCMIP2 scenarios

In [None]:
%cd ..

In [2]:
import re
import numpy as np
import pandas as pd
from mce.core import ScenarioBase
from mce.core.forcing import RfAll
from mce.util.io import retrieve_url

In [3]:
forcing = RfAll()

## Read RCMIP2 data

In [4]:
fmt = 'rcmip-{}-annual-means-v5-1-0.csv'.format
dfin = {}
for cat, cat_full in {
    'emis': 'emissions',
    'conc': 'concentrations',
    'rf': 'radiative-forcing',
}.items():
    fn = fmt(cat_full)
    path = retrieve_url(
        f'datain/rcmip/{fn}',
        f'https://rcmip-protocols-au.s3-ap-southeast-2.amazonaws.com/v5.1.0/{fn}',
    )
    dfin[cat] = (
        pd.read_csv(path, index_col=list(range(7)))
        .rename(columns=int)
        .groupby('Region')
        .get_group('World') # Keep World only
    )

[2025-04-23 14:14:18 mce.util.io] INFO:Use local file datain/rcmip/rcmip-emissions-annual-means-v5-1-0.csv retrieved from https://rcmip-protocols-au.s3-ap-southeast-2.amazonaws.com/v5.1.0/rcmip-emissions-annual-means-v5-1-0.csv on 2025-02-08
[2025-04-23 14:14:18 mce.util.io] INFO:Use local file datain/rcmip/rcmip-concentrations-annual-means-v5-1-0.csv retrieved from https://rcmip-protocols-au.s3-ap-southeast-2.amazonaws.com/v5.1.0/rcmip-concentrations-annual-means-v5-1-0.csv on 2025-02-09
[2025-04-23 14:14:19 mce.util.io] INFO:Use local file datain/rcmip/rcmip-radiative-forcing-annual-means-v5-1-0.csv retrieved from https://rcmip-protocols-au.s3-ap-southeast-2.amazonaws.com/v5.1.0/rcmip-radiative-forcing-annual-means-v5-1-0.csv on 2025-04-09


In [5]:
id_vars = dfin['emis'].index.names
id_vars

FrozenList(['Model', 'Scenario', 'Region', 'Variable', 'Unit', 'Mip_Era', 'Activity_Id'])

## Data processing

### Emissions

In [6]:
# Delete the header part in variable names, change '/yr' to 'yr-1' in units,
# and isoloate MAGICC variables
df = (
    dfin['emis']
    .rename(lambda x: x.replace('Emissions|', ''), level='Variable')
    .rename(lambda x: x.replace('/yr', ' yr-1'), level='Unit')
)
dfg = df.groupby([
    'main' if x.find('MAGICC') == -1 else 'magicc'
    for x in df.index.get_level_values('Variable')
])

In [7]:
# Rename variables and units

def f_ren(x):
    x = re.sub(r'(F-|Montreal )Gases\|', '', x)
    x = re.sub(r'( ?)(CFC|HFC|PFC)\|', r'\1', x)
    x = re.sub(r'( ?)(CFC|HFC|PFC|HCFC|Halon)(.)', r'\1\2-\3', x)
    x = re.sub('(C4F10|C5F12|C6F14)', r'n-\1', x)
    x = (
        x
        .replace('HFC-4310mee', 'HFC-43-10mee')
        .replace('cC4F8', 'c-C4F8')
        .replace('Sulfur', 'SO2')
        .replace('VOC', 'NMVOC')
    )
    return x

df = (
    dfg.get_group('main')
    .rename(f_ren, level='Variable')
    .rename(f_ren, level='Unit')
)

In [8]:
def f_var_unique(df):
    """Return unique variables
    """
    return sorted(set(df.index.get_level_values('Variable')))

In [9]:
# Check non-GHGs variables
[x for x in f_var_unique(df) if x not in forcing.ghgs]

['BC', 'CO', 'NH3', 'NMVOC', 'NOx', 'OC', 'SO2']

In [10]:
# Categorize variables into GHG and SLCF
dfout = {
    f'emis_{cat}': df
    for cat, df in df.groupby([
        'ghg' if x in forcing.ghgs else 'slcf'
        for x in df.index.get_level_values('Variable')
    ])
}

In [11]:
# Check variables by MAGICC
df = dfg.get_group('magicc')
dfg = df.groupby([
    x.split('|')[0] for x in df.index.get_level_values('Variable')
])
list(dfg.groups)

['BC', 'CH4', 'CO', 'CO2', 'N2O', 'NH3', 'NOx', 'OC', 'Sulfur', 'VOC']

In [12]:
# Select CO2 and apply unit conversion
df = dfg.get_group('CO2').mul(12e-3/44.).rename({'Mt CO2 yr-1': 'Gt C yr-1'}, level='Unit')
dfg = df.groupby('Variable')
list(dfg.groups)

['CO2|MAGICC AFOLU',
 'CO2|MAGICC Fossil and Industrial',
 'CO2|MAGICC Fossil and Industrial|Aircraft',
 'CO2|MAGICC Fossil and Industrial|Energy Sector',
 'CO2|MAGICC Fossil and Industrial|Industrial Sector',
 'CO2|MAGICC Fossil and Industrial|International Shipping',
 'CO2|MAGICC Fossil and Industrial|Residential Commercial Other',
 'CO2|MAGICC Fossil and Industrial|Solvents Production and Application',
 'CO2|MAGICC Fossil and Industrial|Transportation Sector',
 'CO2|MAGICC Fossil and Industrial|Waste']

In [13]:
# Select aggregated AFOLU and FFI
d_ren_magicc = {
    'CO2|MAGICC AFOLU': 'AFOLU',
    'CO2|MAGICC Fossil and Industrial': 'FFI',
}
dfout['emis_co2'] = pd.concat([
    dfg.get_group(k).rename(lambda x: v, level='Variable')
    for k, v in d_ren_magicc.items()
]).sort_index()

In [14]:
# Check additivity
df1 = (
    dfout['emis_co2']
    .groupby([x for x in id_vars if x != 'Variable'])
    .sum()
)
df2 = (
    dfout['emis_ghg']
    .groupby('Variable').get_group('CO2')
    .droplevel('Variable')
    .mul(12e-3/44.)
    .rename({'Mt CO2 yr-1': 'Gt C yr-1'}, level='Unit')
)
np.allclose(df1 - df2.fillna(0.), 0.)

True

In [15]:
# Drop CO2 from emis_ghg
dfout['emis_ghg'] = dfout['emis_ghg'].drop('CO2', level='Variable')

### Concentrations

In [16]:
# Rename concentration variable as well
df = (
    dfin['conc']
    .rename(lambda x: x.replace('Atmospheric Concentrations|', ''), level='Variable')
    .rename(f_ren, level='Variable')
)

In [17]:
# Ensure all species are supported in MCE
[x for x in set(df.index.get_level_values('Variable')) if x not in forcing.ghgs]

[]

In [18]:
dfout['conc'] = df

### Forcing

In [19]:
# Rename forcing units
df = dfin['rf'].rename({'W/m^2': 'W m-2'}, level='Unit')

In [20]:
# Categorize forcing variables into radiative forcing and effective radiative forcing
dfg = df.groupby([
    x.split('|')[0] for x in df.index.get_level_values('Variable')
])

In [21]:
list(dfg.groups)

['Effective Radiative Forcing', 'Radiative Forcing']

In [22]:
d_ren = {
    'Radiative Forcing': 'rf',
    'Effective Radiative Forcing': 'erf',
}

# Delete redundant part and apply several conversions
# Need check for "Other" category that appears to include BC_on_snow, H2O_stratospheric, halogens
def f_ren_forcing(x):
    return (
        x
        .replace('Anthropogenic|', '')
        .replace('Anthropogenic', 'anthro')
        .replace('Natural|', '')
        .replace('Natural', 'natural')
        .replace('Aerosols|', '')
        .replace('Aerosols-cloud Interactions', 'aerosols-cloud_interactions')
        .replace('Aerosols-radiation Interactions', 'aerosols-radiation_interactions')
        .replace('Tropospheric Ozone', 'O3_trop')
        .replace('Stratospheric Ozone', 'O3_strat')
        .replace('BC on Snow', 'BC_on_snow')
        .replace('Albedo Change', 'land_use')
        .replace('Other|Contrails and Contrail-induced Cirrus', 'contrails')
        .replace('CH4 Oxidation Stratospheric H2O', 'H2O_stratospheric')
        .replace('Solar', 'solar')
        .replace('Volcanic', 'volcanic')
        .replace('Aerosol', 'aerosol')
        .replace('Other WMGHGs', 'halogen')
        .replace('Other|', '')
        .replace('Other', 'other')
        # nonco2wmghg,minor
    )

In [23]:
dfset = {
    v:
    dfg
    .get_group(k)
    .rename({k: 'total'}, level='Variable')
    .rename(lambda x: x.replace(f'{k}|', ''), level='Variable')
    .rename(f_ren_forcing)
    for k, v in d_ren.items()
}

In [24]:
dfout['rf'] = dfset['rf'] .rename(f_ren, level='Variable')
dfout['erf'] = dfset['erf']

## Save data for MCE use

In [25]:
list(dfout)

['emis_ghg', 'emis_slcf', 'emis_co2', 'conc', 'rf', 'erf']

In [26]:
def f_var_units(df):
    """Return unique pairs of variable and units
    """
    return sorted(set(zip(
        df.index.get_level_values('Variable'),
        df.index.get_level_values('Unit'),
    )))

In [27]:
f_var_units(dfout['emis_co2'])

[('AFOLU', 'Gt C yr-1'), ('FFI', 'Gt C yr-1')]

In [28]:
f_var_units(dfout['emis_ghg'])

[('C2F6', 'kt C2F6 yr-1'),
 ('C3F8', 'kt C3F8 yr-1'),
 ('C7F16', 'kt C7F16 yr-1'),
 ('C8F18', 'kt C8F18 yr-1'),
 ('CCl4', 'kt CCl4 yr-1'),
 ('CF4', 'kt CF4 yr-1'),
 ('CFC-11', 'kt CFC-11 yr-1'),
 ('CFC-113', 'kt CFC-113 yr-1'),
 ('CFC-114', 'kt CFC-114 yr-1'),
 ('CFC-115', 'kt CFC-115 yr-1'),
 ('CFC-12', 'kt CFC-12 yr-1'),
 ('CH2Cl2', 'kt CH2Cl2 yr-1'),
 ('CH3Br', 'kt CH3Br yr-1'),
 ('CH3CCl3', 'kt CH3CCl3 yr-1'),
 ('CH3Cl', 'kt CH3Cl yr-1'),
 ('CH4', 'Mt CH4 yr-1'),
 ('CHCl3', 'kt CHCl3 yr-1'),
 ('HCFC-141b', 'kt HCFC-141b yr-1'),
 ('HCFC-142b', 'kt HCFC-142b yr-1'),
 ('HCFC-22', 'kt HCFC-22 yr-1'),
 ('HFC-125', 'kt HFC-125 yr-1'),
 ('HFC-134a', 'kt HFC-134a yr-1'),
 ('HFC-143a', 'kt HFC-143a yr-1'),
 ('HFC-152a', 'kt HFC-152a yr-1'),
 ('HFC-227ea', 'kt HFC-227ea yr-1'),
 ('HFC-23', 'kt HFC-23 yr-1'),
 ('HFC-236fa', 'kt HFC-236fa yr-1'),
 ('HFC-245fa', 'kt HFC-245fa yr-1'),
 ('HFC-32', 'kt HFC-32 yr-1'),
 ('HFC-365mfc', 'kt HFC-365mfc yr-1'),
 ('HFC-43-10mee', 'kt HFC-43-10mee yr-1'),

In [29]:
len(f_var_units(dfout['emis_ghg']))

43

In [30]:
f_var_units(dfout['emis_slcf'])

[('BC', 'Mt BC yr-1'),
 ('CO', 'Mt CO yr-1'),
 ('NH3', 'Mt NH3 yr-1'),
 ('NMVOC', 'Mt NMVOC yr-1'),
 ('NOx', 'Mt NOx yr-1'),
 ('OC', 'Mt OC yr-1'),
 ('SO2', 'Mt SO2 yr-1')]

In [31]:
f_var_units(dfout['conc'])

[('C2F6', 'ppt'),
 ('C3F8', 'ppt'),
 ('C7F16', 'ppt'),
 ('C8F18', 'ppt'),
 ('CCl4', 'ppt'),
 ('CF4', 'ppt'),
 ('CFC-11', 'ppt'),
 ('CFC-113', 'ppt'),
 ('CFC-114', 'ppt'),
 ('CFC-115', 'ppt'),
 ('CFC-12', 'ppt'),
 ('CH2Cl2', 'ppt'),
 ('CH3Br', 'ppt'),
 ('CH3CCl3', 'ppt'),
 ('CH3Cl', 'ppt'),
 ('CH4', 'ppb'),
 ('CHCl3', 'ppt'),
 ('CO2', 'ppm'),
 ('HCFC-141b', 'ppt'),
 ('HCFC-142b', 'ppt'),
 ('HCFC-22', 'ppt'),
 ('HFC-125', 'ppt'),
 ('HFC-134a', 'ppt'),
 ('HFC-143a', 'ppt'),
 ('HFC-152a', 'ppt'),
 ('HFC-227ea', 'ppt'),
 ('HFC-23', 'ppt'),
 ('HFC-236fa', 'ppt'),
 ('HFC-245fa', 'ppt'),
 ('HFC-32', 'ppt'),
 ('HFC-365mfc', 'ppt'),
 ('HFC-43-10mee', 'ppt'),
 ('Halon-1202', 'ppt'),
 ('Halon-1211', 'ppt'),
 ('Halon-1301', 'ppt'),
 ('Halon-2402', 'ppt'),
 ('N2O', 'ppb'),
 ('NF3', 'ppt'),
 ('SF6', 'ppt'),
 ('SO2F2', 'ppt'),
 ('c-C4F8', 'ppt'),
 ('n-C4F10', 'ppt'),
 ('n-C5F12', 'ppt'),
 ('n-C6F14', 'ppt')]

In [32]:
len(f_var_units(dfout['conc']))

44

In [33]:
f_var_units(dfout['rf'])

[('BC_on_snow', 'W m-2'),
 ('C2F6', 'W m-2'),
 ('CCl4', 'W m-2'),
 ('CF4', 'W m-2'),
 ('CFC', 'W m-2'),
 ('CFC-11', 'W m-2'),
 ('CFC-113', 'W m-2'),
 ('CFC-114', 'W m-2'),
 ('CFC-115', 'W m-2'),
 ('CFC-12', 'W m-2'),
 ('CH3Br', 'W m-2'),
 ('CH3CCl3', 'W m-2'),
 ('CH3Cl', 'W m-2'),
 ('CH4', 'W m-2'),
 ('CO2', 'W m-2'),
 ('F-Gases', 'W m-2'),
 ('H2O_stratospheric', 'W m-2'),
 ('HCFC-141b', 'W m-2'),
 ('HCFC-142b', 'W m-2'),
 ('HCFC-22', 'W m-2'),
 ('HFC', 'W m-2'),
 ('HFC-125', 'W m-2'),
 ('HFC-134a', 'W m-2'),
 ('HFC-143a', 'W m-2'),
 ('HFC-227ea', 'W m-2'),
 ('HFC-23', 'W m-2'),
 ('HFC-245fa', 'W m-2'),
 ('HFC-32', 'W m-2'),
 ('HFC-43-10mee', 'W m-2'),
 ('Halon-1202', 'W m-2'),
 ('Halon-1211', 'W m-2'),
 ('Halon-1301', 'W m-2'),
 ('Halon-2402', 'W m-2'),
 ('Montreal Gases', 'W m-2'),
 ('N2O', 'W m-2'),
 ('O3_strat', 'W m-2'),
 ('O3_trop', 'W m-2'),
 ('PFC', 'W m-2'),
 ('SF6', 'W m-2'),
 ('aerosols', 'W m-2'),
 ('aerosols-cloud_interactions', 'W m-2'),
 ('aerosols-radiation_interactions

In [34]:
f_var_units(dfout['erf'])

[('BC_on_snow', 'W m-2'),
 ('CH4', 'W m-2'),
 ('CO2', 'W m-2'),
 ('H2O_stratospheric', 'W m-2'),
 ('N2O', 'W m-2'),
 ('O3_strat', 'W m-2'),
 ('O3_trop', 'W m-2'),
 ('aerosols', 'W m-2'),
 ('aerosols-cloud_interactions', 'W m-2'),
 ('aerosols-radiation_interactions', 'W m-2'),
 ('anthro', 'W m-2'),
 ('contrails', 'W m-2'),
 ('halogen', 'W m-2'),
 ('land_use', 'W m-2'),
 ('natural', 'W m-2'),
 ('other', 'W m-2'),
 ('solar', 'W m-2'),
 ('total', 'W m-2'),
 ('volcanic', 'W m-2')]

In [35]:
outpath = 'datain/ds_rcmip2.h5'
ds = ScenarioBase(outpath=outpath, mode='w')

[2025-04-23 14:16:26 mce.core] INFO:datain/ds_rcmip2.h5 already exists
[2025-04-23 14:16:26 mce.core] INFO:file datain/ds_rcmip2.h5 opened with mode=w


In [36]:
id_vars_sub = [
    x for x in  id_vars if x not in ['Variable', 'Unit']
]

In [46]:
for cat, df in dfout.items():
    for k, df1 in df.groupby(id_vars_sub):
        attrs = dict(zip(id_vars_sub, k))
        scen = attrs.pop('Scenario')

        # exception raised if a certain scenario has different sets of attributes
        g = ds.file.create_group(f'{scen}/input/{cat}')
        for k1, v1 in attrs.items():
            g.attrs[k1] = v1

        for (vname, units), v1 in (
            df1
            .droplevel(id_vars_sub)
            .T
            .dropna(how='all')
            .rename_axis('time')
            .reset_index(col_fill='yr')
            .items()
        ):
            d = g.create_dataset(vname, data=v1.dropna().values)
            d.attrs['units'] = units

In [47]:
ds.close()

[2025-04-23 14:24:26 mce.core] INFO:file datain/ds_rcmip2.h5 closed
