# Import RCMIP2 scenarios

In [1]:
%cd ..

/home/rocky/project/mce


In [54]:
import re
import numpy as np
import pandas as pd
from mce.core import ScenarioBase
from mce.core.forcing import RfAll
from mce.util.io import retrieve_url

In [105]:
forcing = RfAll()

## Read RCMIP2 data

In [162]:
fmt = 'rcmip-{}-annual-means-v5-1-0.csv'.format
dfin = {}
for cat, cat_full in {
    'emis': 'emissions',
    'conc': 'concentrations',
    'rf': 'radiative-forcing',
}.items():
    fn = fmt(cat_full)
    path = retrieve_url(
        f'datain/rcmip/{fn}',
        f'https://rcmip-protocols-au.s3-ap-southeast-2.amazonaws.com/v5.1.0/{fn}',
    )
    dfin[cat] = (
        pd.read_csv(path, index_col=list(range(7)))
        .rename(columns=int)
    )

[2025-04-10 16:11:55 mce.util.io] INFO:Use local file datain/rcmip/rcmip-emissions-annual-means-v5-1-0.csv retrieved from https://rcmip-protocols-au.s3-ap-southeast-2.amazonaws.com/v5.1.0/rcmip-emissions-annual-means-v5-1-0.csv on 2025-02-08
[2025-04-10 16:11:56 mce.util.io] INFO:Use local file datain/rcmip/rcmip-concentrations-annual-means-v5-1-0.csv retrieved from https://rcmip-protocols-au.s3-ap-southeast-2.amazonaws.com/v5.1.0/rcmip-concentrations-annual-means-v5-1-0.csv on 2025-02-09
[2025-04-10 16:11:57 mce.util.io] INFO:Use local file datain/rcmip/rcmip-radiative-forcing-annual-means-v5-1-0.csv retrieved from https://rcmip-protocols-au.s3-ap-southeast-2.amazonaws.com/v5.1.0/rcmip-radiative-forcing-annual-means-v5-1-0.csv on 2025-04-09


In [163]:
dfin['conc'].index.names

FrozenList(['Model', 'Scenario', 'Region', 'Variable', 'Unit', 'Activity_Id', 'Mip_Era'])

In [164]:
id_vars_1 = ['Scenario', 'Region']
id_vars_2 = ['Model', 'Activity_Id', 'Mip_Era']

## Rename variables and units

In [169]:
# Delete the header part  in emission variables
# and change '/yr' to 'yr-1' in emission units
dfin['emis'] = (
    dfin['emis']
    .rename(lambda x: x.replace('Emissions|', ''), level='Variable')
    .rename(lambda x: x.replace('/yr', ' yr-1'), level='Unit')
)

In [170]:
# Categorize emission variables into non-MAGICC, MAGICC AFOLU, and MAGICC FFI
df = dfin['emis']
df_afolu = df.loc[[x.find('MAGICC AFOLU')>=0 for x in df.index.get_level_values('Variable')]]
df_ffi = df.loc[[x.find('MAGICC Fossil and Industrial')>=0 for x in df.index.get_level_values('Variable')]]
df = df.loc[[x.find('MAGICC')==-1 for x in df.index.get_level_values('Variable')]]

len(df_afolu) + len(df_ffi) + len(df) == len(dfin['emis'])

True

In [171]:
def chk(df1, df2=None, level='Variable'):
    """Get unique levels for check
    """
    vl1 = pd.unique(df1.index.get_level_values(level))
    if df2 is None:
        return vl1
    else:
        vl2 = pd.unique(df2.index.get_level_values(level))
        return len(vl1) == len(vl2)

In [172]:
# Rename MAGICC parts
df_afolu_new = df_afolu.rename(lambda x: x.replace('|MAGICC AFOLU', ''), level='Variable')
df_ffi_new = df_ffi.rename(lambda x: x.replace('|MAGICC Fossil and Industrial', ''), level='Variable')

chk(df_afolu, df_afolu_new), chk(df_ffi, df_ffi_new)


(True, True)

In [173]:
df_afolu = df_afolu_new
df_ffi = df_ffi_new
del df_afolu_new
del df_ffi_new

In [207]:
def conv_ghg(x):
    x = re.sub(r'(F-|Montreal )Gases\|', '', x)
    x = re.sub(r'( ?)(CFC|HFC|PFC)\|', r'\1', x)
    x = re.sub(r'( ?)(CFC|HFC|PFC|HCFC|Halon)(.)', r'\1\2-\3', x)
    x = re.sub('(C4F10|C5F12|C6F14)', r'n-\1', x)
    x = x.replace('HFC-4310mee', 'HFC-43-10mee').replace('cC4F8', 'c-C4F8')
    return x

In [175]:
# Rename GHGs
df = df.rename(conv_ghg, level='Variable')
df = df.rename(conv_ghg, level='Unit')

In [176]:
# Check non-GHG names
for x in chk(df, level='Variable'):
    if x not in forcing.ghgs:
        print(x)

BC
CO
NH3
NOx
OC
Sulfur
VOC


In [177]:
# Check AFOLU variables
chk(df_afolu).tolist()

['CO2',
 'BC',
 'BC|Agricultural Waste Burning',
 'BC|Agriculture',
 'BC|Forest Burning',
 'BC|Grassland Burning',
 'BC|Peat Burning',
 'CH4',
 'CH4|Agricultural Waste Burning',
 'CH4|Agriculture',
 'CH4|Forest Burning',
 'CH4|Grassland Burning',
 'CH4|Peat Burning',
 'CO',
 'CO|Agricultural Waste Burning',
 'CO|Agriculture',
 'CO|Forest Burning',
 'CO|Grassland Burning',
 'CO|Peat Burning',
 'N2O',
 'NH3',
 'NH3|Agricultural Waste Burning',
 'NH3|Agriculture',
 'NH3|Forest Burning',
 'NH3|Grassland Burning',
 'NH3|Peat Burning',
 'NOx',
 'NOx|Agricultural Waste Burning',
 'NOx|Agriculture',
 'NOx|Forest Burning',
 'NOx|Grassland Burning',
 'NOx|Peat Burning',
 'OC',
 'OC|Agricultural Waste Burning',
 'OC|Agriculture',
 'OC|Forest Burning',
 'OC|Grassland Burning',
 'OC|Peat Burning',
 'Sulfur',
 'Sulfur|Agricultural Waste Burning',
 'Sulfur|Agriculture',
 'Sulfur|Forest Burning',
 'Sulfur|Grassland Burning',
 'Sulfur|Peat Burning',
 'VOC',
 'VOC|Agricultural Waste Burning',
 'VOC|Agri

In [178]:
pd.unique([x.split('|')[0] for x in chk(df_afolu)]).tolist()

['CO2', 'BC', 'CH4', 'CO', 'N2O', 'NH3', 'NOx', 'OC', 'Sulfur', 'VOC']

In [179]:
# Check FFI variables
chk(df_ffi).tolist()

['CO2',
 'BC',
 'BC|Aircraft',
 'BC|Energy Sector',
 'BC|Industrial Sector',
 'BC|International Shipping',
 'BC|Residential Commercial Other',
 'BC|Solvents Production and Application',
 'BC|Transportation Sector',
 'BC|Waste',
 'CH4',
 'CH4|Energy Sector',
 'CH4|Industrial Sector',
 'CH4|International Shipping',
 'CH4|Residential Commercial Other',
 'CH4|Solvents Production and Application',
 'CH4|Transportation Sector',
 'CH4|Waste',
 'CO2|Aircraft',
 'CO2|Energy Sector',
 'CO2|Industrial Sector',
 'CO2|International Shipping',
 'CO2|Residential Commercial Other',
 'CO2|Solvents Production and Application',
 'CO2|Transportation Sector',
 'CO2|Waste',
 'CO',
 'CO|Aircraft',
 'CO|Energy Sector',
 'CO|Industrial Sector',
 'CO|International Shipping',
 'CO|Residential Commercial Other',
 'CO|Solvents Production and Application',
 'CO|Transportation Sector',
 'CO|Waste',
 'N2O',
 'NH3',
 'NH3|Aircraft',
 'NH3|Energy Sector',
 'NH3|Industrial Sector',
 'NH3|International Shipping',
 'NH3|R

In [180]:
pd.unique([x.split('|')[0] for x in chk(df_ffi)]).tolist()

['CO2', 'BC', 'CH4', 'CO', 'N2O', 'NH3', 'NOx', 'OC', 'Sulfur', 'VOC']

In [181]:
# Check variable-units pairs
list(df.groupby(['Variable', 'Unit']).groups)

[('BC', 'Mt BC yr-1'),
 ('C2F6', 'kt C2F6 yr-1'),
 ('C3F8', 'kt C3F8 yr-1'),
 ('C7F16', 'kt C7F16 yr-1'),
 ('C8F18', 'kt C8F18 yr-1'),
 ('CCl4', 'kt CCl4 yr-1'),
 ('CF4', 'kt CF4 yr-1'),
 ('CFC-11', 'kt CFC-11 yr-1'),
 ('CFC-113', 'kt CFC-113 yr-1'),
 ('CFC-114', 'kt CFC-114 yr-1'),
 ('CFC-115', 'kt CFC-115 yr-1'),
 ('CFC-12', 'kt CFC-12 yr-1'),
 ('CH2Cl2', 'kt CH2Cl2 yr-1'),
 ('CH3Br', 'kt CH3Br yr-1'),
 ('CH3CCl3', 'kt CH3CCl3 yr-1'),
 ('CH3Cl', 'kt CH3Cl yr-1'),
 ('CH4', 'Mt CH4 yr-1'),
 ('CHCl3', 'kt CHCl3 yr-1'),
 ('CO', 'Mt CO yr-1'),
 ('CO2', 'Mt CO2 yr-1'),
 ('HCFC-141b', 'kt HCFC-141b yr-1'),
 ('HCFC-142b', 'kt HCFC-142b yr-1'),
 ('HCFC-22', 'kt HCFC-22 yr-1'),
 ('HFC-125', 'kt HFC-125 yr-1'),
 ('HFC-134a', 'kt HFC-134a yr-1'),
 ('HFC-143a', 'kt HFC-143a yr-1'),
 ('HFC-152a', 'kt HFC-152a yr-1'),
 ('HFC-227ea', 'kt HFC-227ea yr-1'),
 ('HFC-23', 'kt HFC-23 yr-1'),
 ('HFC-236fa', 'kt HFC-236fa yr-1'),
 ('HFC-245fa', 'kt HFC-245fa yr-1'),
 ('HFC-32', 'kt HFC-32 yr-1'),
 ('HFC-365

In [182]:
dfin['emis'] = df
dfin['emis_afolu'] = df_afolu
dfin['emis_ffi'] = df_ffi

In [186]:
# Rename concentration variable as well
df = (
    dfin['conc']
    .rename(lambda x: x.replace('Atmospheric Concentrations|', ''), level='Variable')
    .rename(conv_ghg, level='Variable')
)

In [187]:
# Ensure all species are supported in MCE
[x for x in chk(df) if x not in forcing.ghgs]

[]

In [188]:
list(df.groupby(['Variable', 'Unit']).groups)

[('C2F6', 'ppt'),
 ('C3F8', 'ppt'),
 ('C7F16', 'ppt'),
 ('C8F18', 'ppt'),
 ('CCl4', 'ppt'),
 ('CF4', 'ppt'),
 ('CFC-11', 'ppt'),
 ('CFC-113', 'ppt'),
 ('CFC-114', 'ppt'),
 ('CFC-115', 'ppt'),
 ('CFC-12', 'ppt'),
 ('CH2Cl2', 'ppt'),
 ('CH3Br', 'ppt'),
 ('CH3CCl3', 'ppt'),
 ('CH3Cl', 'ppt'),
 ('CH4', 'ppb'),
 ('CHCl3', 'ppt'),
 ('CO2', 'ppm'),
 ('HCFC-141b', 'ppt'),
 ('HCFC-142b', 'ppt'),
 ('HCFC-22', 'ppt'),
 ('HFC-125', 'ppt'),
 ('HFC-134a', 'ppt'),
 ('HFC-143a', 'ppt'),
 ('HFC-152a', 'ppt'),
 ('HFC-227ea', 'ppt'),
 ('HFC-23', 'ppt'),
 ('HFC-236fa', 'ppt'),
 ('HFC-245fa', 'ppt'),
 ('HFC-32', 'ppt'),
 ('HFC-365mfc', 'ppt'),
 ('HFC-43-10mee', 'ppt'),
 ('Halon-1202', 'ppt'),
 ('Halon-1211', 'ppt'),
 ('Halon-1301', 'ppt'),
 ('Halon-2402', 'ppt'),
 ('N2O', 'ppb'),
 ('NF3', 'ppt'),
 ('SF6', 'ppt'),
 ('SO2F2', 'ppt'),
 ('c-C4F8', 'ppt'),
 ('n-C4F10', 'ppt'),
 ('n-C5F12', 'ppt'),
 ('n-C6F14', 'ppt')]

In [189]:
dfin['conc'] = df

In [191]:
# Rename forcing units
dfin['rf'] = dfin['rf'].rename({'W/m^2': 'W m-2'}, level='Unit')

In [192]:
# Categorize forcing variables into radiative forcing and effective radiative forcing
dfg = dfin['rf'].groupby([
    x.split('|')[0]
    for x in dfin['rf'].index.get_level_values('Variable')
])

In [212]:
list(dfg.groups)

['Effective Radiative Forcing', 'Radiative Forcing']

In [231]:
df_rf = (
    dfg.get_group('Radiative Forcing')
    .rename({'Radiative Forcing': 'total'}, level='Variable')
    .rename(lambda x: x.replace('Radiative Forcing|', ''), level='Variable')
)

In [232]:
df_erf = (
    dfg.get_group('Effective Radiative Forcing')
    .rename({'Effective Radiative Forcing': 'total'}, level='Variable')
    .rename(lambda x: x.replace('Effective Radiative Forcing|', ''), level='Variable')
)

In [233]:
# Change GHG names
df_rf = df_rf.rename(conv_ghg, level='Variable')

In [247]:
# Delete redundant part and apply several conversions
# Need check for "Other" category that appears to include BC_on_snow, H2O_stratospheric, halogens
def conv_name(x):
    return (
        x
        .replace('Anthropogenic|', '')
        .replace('Anthropogenic', 'anthro')
        .replace('Natural|', '')
        .replace('Natural', 'natural')
        .replace('Aerosols|', '')
        .replace('Aerosols-cloud Interactions', 'aerosols-cloud_interactions')
        .replace('Aerosols-radiation Interactions', 'aerosols-radiation_interactions')
        .replace('Tropospheric Ozone', 'O3_trop')
        .replace('Stratospheric Ozone', 'O3_strat')
        .replace('BC on Snow', 'BC_on_snow')
        .replace('Albedo Change', 'land_use')
        .replace('Other|Contrails and Contrail-induced Cirrus', 'contrails')
        .replace('CH4 Oxidation Stratospheric H2O', 'H2O_stratospheric')
        .replace('Solar', 'solar')
        .replace('Volcanic', 'volcanic')
        .replace('Aerosol', 'aerosol')
        .replace('Other WMGHGs', 'halogen')
        .replace('Other|', '')
        .replace('Other', 'other')
        # nonco2wmghg,minor
    )

In [248]:
df_rf = df_rf.rename(conv_name, level='Variable')
df_erf = df_erf.rename(conv_name, level='Variable')

In [249]:
chk(df_rf)

array(['total', 'anthro', 'aerosols', 'aerosols-cloud_interactions',
       'aerosols-radiation_interactions',
       'aerosols-radiation_interactions|Biomass Burning',
       'aerosols-radiation_interactions|Fossil and Industrial',
       'aerosols-radiation_interactions|Fossil and Industrial|BC and OC',
       'aerosols-radiation_interactions|Fossil and Industrial|BC and OC|BC',
       'aerosols-radiation_interactions|Fossil and Industrial|BC and OC|OC',
       'aerosols-radiation_interactions|Fossil and Industrial|Nitrate',
       'aerosols-radiation_interactions|Fossil and Industrial|Sulfate',
       'aerosols-radiation_interactions|Mineral Dust', 'land_use', 'CH4',
       'CO2', 'F-Gases', 'HFC', 'HFC-125', 'HFC-134a', 'HFC-143a',
       'HFC-227ea', 'HFC-23', 'HFC-245fa', 'HFC-32', 'HFC-43-10mee',
       'PFC', 'C2F6', 'n-C6F14', 'CF4', 'SF6', 'Montreal Gases', 'CCl4',
       'CFC', 'CFC-11', 'CFC-113', 'CFC-114', 'CFC-115', 'CFC-12',
       'CH3Br', 'CH3CCl3', 'CH3Cl', 'HCFC-141

In [250]:
chk(df_erf)

array(['total', 'anthro', 'aerosols', 'aerosols-cloud_interactions',
       'aerosols-radiation_interactions', 'land_use', 'CH4', 'CO2', 'N2O',
       'other', 'BC_on_snow', 'H2O_stratospheric', 'contrails', 'halogen',
       'O3_strat', 'O3_trop', 'natural', 'solar', 'volcanic'],
      dtype=object)

In [252]:
dfin['rf'] = df_rf
dfin['erf'] = df_erf

## Data conversion for MCE

In [6]:
ds = ScenarioBase()

[2025-04-09 16:09:21 mce.core] INFO:in-memory file opened


### Concentrations

In [10]:
def conv(x):
    """Variable name conversion
    """
    return (
        x
        .replace('Atmospheric Concentrations|', '')
        .replace('F-Gases|', '')
        .replace('Montreal Gases|', '')
        .replace('CFC|CFC', 'CFC-')
        .replace('HFC|HFC', 'HFC-')
        .replace('PFC|', '')
        .replace('Halon', 'Halon-')
        .replace('HCFC', 'HCFC-')
        .replace('HFC-4310mee', 'HFC-43-10mee')
        .replace('C4F10', 'n-C4F10')
        .replace('C5F12', 'n-C5F12')
        .replace('C6F14', 'n-C6F14')
        .replace('cC4F8', 'c-C4F8')
    )

In [11]:
cat = 'conc'

for k1, v1 in dfin[cat].groupby(id_vars_1):
    kw1 = dict(zip(id_vars_1, k1))
    if kw1['Region'] != 'World':
        continue

    k2, v2 = next(v1.droplevel(id_vars_1).groupby(id_vars_2).__iter__())
    if len(v1) != len(v2):
        raise ValueError('unexpected indexes')

    kw2 = dict(zip(id_vars_2, k2))

    df = (
        v2.droplevel(id_vars_2)
        .rename(conv, level='Variable')
        .T
        .dropna(how='all')
    )

    for x in df.columns.get_level_values(0):
        if x not in forcing.ghgs:
            raise ValueError('unsupported species: {}'.format(x))

    grp = ds.file.create_group('{}/input/{}'.format(kw1['Scenario'], cat))
    
    for k, v in kw2.items():
        grp.attrs[k] = v
    
    dset = grp.create_dataset('time', data=df.index.values)
    dset.attrs['units'] = 'yr'
    
    for k, v in df.items():
        dset = grp.create_dataset(k[0], data=v.values)
        dset.attrs['units'] = k[1]

## Check an example scenario

In [12]:
grp = ds.file['ssp245/input/conc']
list(grp.attrs.items())

[('Activity_Id', 'input4MIPs'),
 ('Mip_Era', 'CMIP6'),
 ('Model', 'MESSAGE-GLOBIOM')]

In [13]:
pd.DataFrame({k: v for k, v in grp.items()}).set_index('time')

Unnamed: 0_level_0,C2F6,C3F8,C7F16,C8F18,CCl4,CF4,CFC-11,CFC-113,CFC-114,CFC-115,...,Halon-1301,Halon-2402,N2O,NF3,SF6,SO2F2,c-C4F8,n-C4F10,n-C5F12,n-C6F14
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1700,0.00000,0.00000,0.000000,0.000000,0.025,34.049999,0.000000,0.000000,0.00000,0.00000,...,0.000000,0.0,272.212049,0.00000,0.0000,0.000000,0.00000,0.000000,0.000000,0.000000
1701,0.00000,0.00000,0.000000,0.000000,0.025,34.049999,0.000000,0.000000,0.00000,0.00000,...,0.000000,0.0,272.243047,0.00000,0.0000,0.000000,0.00000,0.000000,0.000000,0.000000
1702,0.00000,0.00000,0.000000,0.000000,0.025,34.049999,0.000000,0.000000,0.00000,0.00000,...,0.000000,0.0,272.266047,0.00000,0.0000,0.000000,0.00000,0.000000,0.000000,0.000000
1703,0.00000,0.00000,0.000000,0.000000,0.025,34.049999,0.000000,0.000000,0.00000,0.00000,...,0.000000,0.0,272.298047,0.00000,0.0000,0.000000,0.00000,0.000000,0.000000,0.000000
1704,0.00000,0.00000,0.000000,0.000000,0.025,34.049999,0.000000,0.000000,0.00000,0.00000,...,0.000000,0.0,272.331052,0.00000,0.0000,0.000000,0.00000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2496,9.59912,1.67188,0.366361,0.176951,0.000,140.825001,0.015106,0.251401,1.05305,3.93032,...,0.003763,0.0,359.840983,2.46639,18.1191,0.000344,3.90381,0.353267,0.179582,0.685107
2497,9.59816,1.67164,0.366239,0.176892,0.000,140.822000,0.014794,0.248462,1.04701,3.92252,...,0.003707,0.0,359.825979,2.46177,18.1134,0.000334,3.90259,0.353196,0.179538,0.684886
2498,9.59720,1.67140,0.366117,0.176833,0.000,140.818998,0.014489,0.245556,1.04100,3.91474,...,0.003651,0.0,359.811981,2.45715,18.1077,0.000325,3.90137,0.353126,0.179494,0.684665
2499,9.59624,1.67116,0.365995,0.176774,0.000,140.815999,0.014190,0.242685,1.03502,3.90697,...,0.003597,0.0,359.797984,2.45254,18.1021,0.000316,3.90015,0.353055,0.179450,0.684444


In [14]:
{k: v.attrs['units'] for k, v in grp.items()}

{'C2F6': 'ppt',
 'C3F8': 'ppt',
 'C7F16': 'ppt',
 'C8F18': 'ppt',
 'CCl4': 'ppt',
 'CF4': 'ppt',
 'CFC-11': 'ppt',
 'CFC-113': 'ppt',
 'CFC-114': 'ppt',
 'CFC-115': 'ppt',
 'CFC-12': 'ppt',
 'CH2Cl2': 'ppt',
 'CH3Br': 'ppt',
 'CH3CCl3': 'ppt',
 'CH3Cl': 'ppt',
 'CH4': 'ppb',
 'CHCl3': 'ppt',
 'CO2': 'ppm',
 'HCFC-141b': 'ppt',
 'HCFC-142b': 'ppt',
 'HCFC-22': 'ppt',
 'HFC-125': 'ppt',
 'HFC-134a': 'ppt',
 'HFC-143a': 'ppt',
 'HFC-152a': 'ppt',
 'HFC-227ea': 'ppt',
 'HFC-23': 'ppt',
 'HFC-236fa': 'ppt',
 'HFC-245fa': 'ppt',
 'HFC-32': 'ppt',
 'HFC-365mfc': 'ppt',
 'HFC-43-10mee': 'ppt',
 'Halon-1211': 'ppt',
 'Halon-1301': 'ppt',
 'Halon-2402': 'ppt',
 'N2O': 'ppb',
 'NF3': 'ppt',
 'SF6': 'ppt',
 'SO2F2': 'ppt',
 'c-C4F8': 'ppt',
 'n-C4F10': 'ppt',
 'n-C5F12': 'ppt',
 'n-C6F14': 'ppt',
 'time': 'yr'}

In [15]:
ds.close()

[2025-04-09 16:16:15 mce.core] INFO:in-memory file closed
