# WG3 AR6 scenario database

In [None]:
%cd ..

In [184]:
import sys
import warnings
import pathlib
import json
import numpy as np
import pandas as pd
import openpyxl
import pyam
from mce.core import ScenarioBase

In [185]:
warnings.simplefilter('ignore', FutureWarning)

In [3]:
sys.path.append('../climate-assessment/src')

In [4]:
from climate_assessment.utils import columns_to_basic
from climate_assessment.harmonization_and_infilling import harmonization_and_infilling
from climate_assessment.checks import perform_input_checks

  import tqdm.autonotebook as tqdman


## Input data

In [5]:
path_ar6db = pathlib.Path('../iamc/datain/ar6')

In [6]:
list(path_ar6db.glob('*'))

[PosixPath('../iamc/datain/ar6/AR6_Scenarios_Database_World_v1.1.csv'),
 PosixPath('../iamc/datain/ar6/README_historical.txt'),
 PosixPath('../iamc/datain/ar6/AR6_Scenarios_Database_metadata_indicators_v1.1.xlsx'),
 PosixPath('../iamc/datain/ar6/License_sheet.xlsx'),
 PosixPath('../iamc/datain/ar6/AR6_historical_emissions.csv'),
 PosixPath('../iamc/datain/ar6/reference.json')]

### Reference

In [7]:
with open(path_ar6db.joinpath('reference.json'), 'r') as f1:
    refs = json.loads(f1.read())

list(refs)

['models', 'scenarios', 'regions', 'variables']

In [8]:
ref = refs['models']
ref[0], len(ref[1:])

(['ID', 'Name'], 188)

In [9]:
ref = refs['scenarios']
ref[0], len(ref[1:])

(['ID', 'Name'], 1389)

In [10]:
ref = refs['variables']
ref[0], len(ref[1:])

(['ID', 'Name', 'Unit ID', 'Unit'], 1799)

In [11]:
# It seems to include duplicated variable names
len([x[1] for x in ref[1:]]), len({x[1] for x in ref[1:]})

(1799, 1664)

### Metadata

In [12]:
path = path_ar6db.joinpath('AR6_Scenarios_Database_metadata_indicators_v1.1.xlsx')
wb = openpyxl.load_workbook(path, read_only=True, data_only=True)
wb.sheetnames

['README',
 'meta_Ch3vetted_withclimate',
 'meta',
 'categories_indicators_doc',
 'Illustrative_Pathways']

In [13]:
meta = {}
for sheet in wb.sheetnames:
    if sheet == 'README':
        continue
    rows = wb[sheet].iter_rows(values_only=True)
    columns = next(rows)
    df = pd.DataFrame(list(rows), columns=columns)
    meta[sheet] = df

In [14]:
wb.close()

In [15]:
meta['meta']

Unnamed: 0,Model,Scenario,Category,Category_name,Category_subset,Subset_Ch4,Category_Vetting_historical,IMP_marker,Literature Reference (if applicable),Policy_category,...,P67 peak warming (FaIRv1.6.2),Median warming in 2100 (FaIRv1.6.2),Median year of peak warming (FaIRv1.6.2),Exceedance Probability 1.5C (FaIRv1.6.2),Exceedance Probability 2.0C (FaIRv1.6.2),Exceedance Probability 3.0C (FaIRv1.6.2),IMP_color_rgb,IMP_color_hex,Category_color_rgb,Category_color_hex
0,AIM/CGE 2.0,ADVANCE_2020_1.5C-2100,failed-vetting,failed-vetting,C1a_NZGHGs,Limit to 1.5C (>50%) no or low OS immediate 20...,failed_Vetting_historical_failed-vetting,non-IMP,https://doi.org/10.1038/s41558-018-0198-6,P2a,...,1.501157,1.056861,2037,0.332588,0.009388,0,,,"151, 206, 228",97CEE4
1,AIM/CGE 2.0,ADVANCE_2020_Med2C,failed-vetting,failed-vetting,C4,,failed_Vetting_historical_failed-vetting,non-IMP,https://doi.org/10.1038/s41558-018-0198-6,P2a,...,1.92688,1.772975,2100,0.820742,0.26017,0.004023,,,"167, 198, 130",A7C682
2,AIM/CGE 2.0,ADVANCE_2020_WB2C,failed-vetting,failed-vetting,C1b_+veGHGs,Limit to 1.5C (>50%) no or low OS immediate 20...,failed_Vetting_historical_failed-vetting,non-IMP,https://doi.org/10.1038/s41558-018-0198-6,P2a,...,1.565292,1.384028,2050,0.426017,0.035762,0.000447,,,"151, 206, 228",97CEE4
3,AIM/CGE 2.0,ADVANCE_2030_Med2C,failed-vetting,failed-vetting,C4,,failed_Vetting_historical_failed-vetting,non-IMP,https://doi.org/10.1038/s41558-018-0198-6,P3b,...,2.043335,1.882453,2100,0.907912,0.374162,0.008494,,,"167, 198, 130",A7C682
4,AIM/CGE 2.0,ADVANCE_2030_Price1.5C,failed-vetting,failed-vetting,C2,Limit 2C (>67%) or return to 1.5C (>50%) after...,failed_Vetting_historical_failed-vetting,non-IMP,https://doi.org/10.1038/s41558-018-0198-6,P3b,...,1.705307,1.37244,2039,0.646401,0.071524,0.000447,,,"119, 134, 99",778663
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2299,WEGDYN 1.0,WEO_NDCplus_ASIA,no-climate-assessment,no-climate-assessment,no-climate-assessment,,no-climate-assessment,non-IMP,,,...,no-climate-assessment,no-climate-assessment,no-climate-assessment,no-climate-assessment,no-climate-assessment,no-climate-assessment,,,,
2300,WEGDYN 1.0,WEO_NDCplus_EUR_CHN,no-climate-assessment,no-climate-assessment,no-climate-assessment,,no-climate-assessment,non-IMP,,,...,no-climate-assessment,no-climate-assessment,no-climate-assessment,no-climate-assessment,no-climate-assessment,no-climate-assessment,,,,
2301,WEGDYN 1.0,WEO_NDCplus_GLOBAL,no-climate-assessment,no-climate-assessment,no-climate-assessment,,no-climate-assessment,non-IMP,,,...,no-climate-assessment,no-climate-assessment,no-climate-assessment,no-climate-assessment,no-climate-assessment,no-climate-assessment,,,,
2302,WEGDYN 1.0,WEO_NDCplus_PARTIAL,no-climate-assessment,no-climate-assessment,no-climate-assessment,,no-climate-assessment,non-IMP,,,...,no-climate-assessment,no-climate-assessment,no-climate-assessment,no-climate-assessment,no-climate-assessment,no-climate-assessment,,,,


In [16]:
meta['meta_Ch3vetted_withclimate']

Unnamed: 0,Model,Scenario,Category,Category_name,Category_subset,Subset_Ch4,Category_Vetting_historical,IMP_marker,Literature Reference (if applicable),Policy_category,...,P67 peak warming (FaIRv1.6.2),Median warming in 2100 (FaIRv1.6.2),Median year of peak warming (FaIRv1.6.2),Exceedance Probability 1.5C (FaIRv1.6.2),Exceedance Probability 2.0C (FaIRv1.6.2),Exceedance Probability 3.0C (FaIRv1.6.2),IMP_color_rgb,IMP_color_hex,Category_color_rgb,Category_color_hex
0,AIM/CGE 2.0,SSP1-26,C3,C3: limit warming to 2°C (>67%),C3y_+veGHGs,Limit to 2C (>67%) immediate 2020 action,C3,non-IMP,https://doi.org/10.1016/j.gloenvcha.2016.05.009,P2a,...,1.717124,1.536621,2070,0.599911,0.121591,0.001788,,,"111, 120, 153",6F7899
1,AIM/CGE 2.0,SSP1-34,C5,C5: limit warming to 2.5°C (>50%),C5,,C5,non-IMP,https://doi.org/10.1016/j.gloenvcha.2016.05.009,P2a,...,2.144655,1.962484,2100,0.931158,0.463120,0.022351,,,"140, 167, 208",8CA7D0
2,AIM/CGE 2.0,SSP1-45,C6,C6: limit warming to 3°C (>50%),C6,,C6,non-IMP,https://doi.org/10.1016/j.gloenvcha.2016.05.009,P2a,...,2.629060,2.405440,2100,0.996424,0.836388,0.144837,,,"250, 193, 130",FAC182
3,AIM/CGE 2.0,SSP1-Baseline,C7,C7: limit warming to 4°C (>50%),C7,,C7,non-IMP,https://doi.org/10.1016/j.gloenvcha.2016.05.009,P1a,...,3.270344,3.002760,2100,1.000000,0.990612,0.502012,,,"241, 136, 114",F18872
4,AIM/CGE 2.0,SSP4-26,C3,C3: limit warming to 2°C (>67%),C3y_+veGHGs,Limit to 2C (>67%) immediate 2020 action,C3,non-IMP,https://doi.org/10.1016/j.gloenvcha.2016.05.009,P2a,...,1.678820,1.496835,2070,0.580688,0.082700,0.000447,,,"111, 120, 153",6F7899
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1197,WITCH-GLOBIOM 4.4,CD-LINKS_NPi,C7,C7: limit warming to 4°C (>50%),C7,,C7,non-IMP,https://doi.org/10.1007/s10584-020-02837-9,P1b,...,3.954603,3.639530,2100,1.000000,0.999553,0.863210,,,"241, 136, 114",F18872
1198,WITCH-GLOBIOM 4.4,CD-LINKS_NPi2020_1000,C1,C1: limit warming to 1.5°C (>50%) with no or l...,C1a_NZGHGs,Limit to 1.5C (>50%) no or low OS immediate 20...,C1,non-IMP,https://doi.org/10.1038/s41560-018-0179-z; htt...,P2a,...,1.582366,1.377366,2070,0.446133,0.060796,0.000894,,,"151, 206, 228",97CEE4
1199,WITCH-GLOBIOM 4.4,CD-LINKS_NPi2020_1600,C3,C3: limit warming to 2°C (>67%),C3y_+veGHGs,Limit to 2C (>67%) immediate 2020 action,C3,non-IMP,https://doi.org/10.1038/s41560-018-0179-z; htt...,P2a,...,1.840110,1.662531,2095,0.718373,0.204738,0.005364,,,"111, 120, 153",6F7899
1200,WITCH-GLOBIOM 4.4,CD-LINKS_NPi2020_400,C1,C1: limit warming to 1.5°C (>50%) with no or l...,C1a_NZGHGs,Limit to 1.5C (>50%) no or low OS immediate 20...,C1,non-IMP,https://doi.org/10.1038/s41560-018-0179-z; htt...,P2a,...,1.492339,1.090956,2028,0.314707,0.017434,0.000000,,,"151, 206, 228",97CEE4


### Scenario data

In [22]:
df = pd.read_csv(path_ar6db.joinpath('AR6_Scenarios_Database_World_v1.1.csv'))
len(df)

693299

In [23]:
df.head()

Unnamed: 0,Model,Scenario,Region,Variable,Unit,1995,1996,1997,1998,1999,...,2091,2092,2093,2094,2095,2096,2097,2098,2099,2100
0,AIM/CGE 2.0,ADVANCE_2020_1.5C-2100,World,AR6 climate diagnostics|Effective Radiative Fo...,W/m^2,1.606191,1.706876,1.66803,1.762945,1.963232,...,1.976137,1.994174,1.980347,1.957527,1.938977,1.910443,1.893993,1.879013,1.878013,1.889258
1,AIM/CGE 2.0,ADVANCE_2020_1.5C-2100,World,AR6 climate diagnostics|Effective Radiative Fo...,W/m^2,1.310639,1.416508,1.273504,1.369464,1.653079,...,1.774826,1.801188,1.787406,1.762791,1.742435,1.71305,1.697114,1.679779,1.6817,1.696405
2,AIM/CGE 2.0,ADVANCE_2020_1.5C-2100,World,AR6 climate diagnostics|Effective Radiative Fo...,W/m^2,1.726261,1.82928,1.810633,1.908043,2.083303,...,2.046819,2.066905,2.050494,2.024065,2.004448,1.973908,1.957375,1.941416,1.940207,1.951514
3,AIM/CGE 2.0,ADVANCE_2020_1.5C-2100,World,AR6 climate diagnostics|Effective Radiative Fo...,W/m^2,1.862109,1.961965,1.968039,2.053398,2.218931,...,2.116304,2.135642,2.120652,2.094222,2.073362,2.041366,2.024292,2.007639,2.007148,2.019429
4,AIM/CGE 2.0,ADVANCE_2020_1.5C-2100,World,AR6 climate diagnostics|Effective Radiative Fo...,W/m^2,2.242113,2.33129,2.37042,2.454901,2.584046,...,2.327459,2.342879,2.322864,2.292776,2.270133,2.237653,2.220305,2.203234,2.199014,2.210547


In [24]:
idx_vars = df.columns[:5].tolist()
df = df.set_index(idx_vars).rename(columns=int)
df.shape

(693299, 106)

In [25]:
dfall = df

In [26]:
# Ensure World data
pd.unique(dfall.index.get_level_values('Region'))

array(['World'], dtype=object)

## Select Illustrative Pathways

Five illustrative mitigation pathways
- IMP-SP: mitigation in the context of broader sustainable development;
- IMP-LD: strong emphasis on energy demand reductions;
- IMP-Ren: heavy reliance on renewables;
- IMP-Neg: extensive use of carbon dioxide removal (CDR) in the energy and the industry sectors to achieve net negative emissions;
- IMP-GS: the implications of a less rapid and gradual strengthening of near-term mitigation actions

Two reference pathways as the consequences of current policies and pledges
- ModAct: Moderate action, explores the impact of implementing the Nationally Determined Contributions (NDCs) as formulated in 2020 and some further strengthening after that;
- CurPol: explores the consequences of continuing along the path of implemented climate policies in 2020 and only a gradual strengthening after that

In [27]:
meta['Illustrative_Pathways']

Unnamed: 0,model,scenario,IMP_marker,IMP_notes,IMP_reference
0,AIM/CGE 2.2,EN_NPi2020_900f,Neg-2.0,"""2.0C sensitivity case for Neg""",https://doi.org/10.1038/s41558-021-01215-2; ht...
1,COFFEE 1.1,EN_NPi2020_400f_lowBECCS,Neg,,https://doi.org/10.1038/s41558-021-01215-2; ht...
2,GCAM 5.3,NGFS2_Current Policies,CurPol,,NGFS Climate Scenarios for central banks and s...
3,IMAGE 3.0,EN_INDCi2030_3000f,ModAct,,https://doi.org/10.1038/s41558-021-01215-2; ht...
4,MESSAGEix-GLOBIOM 1.0,LowEnergyDemand_1.3_IPCC,LD,,https://doi.org/10.1038/s41560-018-0172-6
5,MESSAGEix-GLOBIOM_GEI 1.0,SSP2_openres_lc_50,Ren-2.0,"""2.0C sensitivity case for Ren""","Guo, F., van Ruijven, B., Zakeri, B., Krey, V...."
6,REMIND-MAgPIE 2.1-4.2,SusDev_SDP-PkBudg1000,SP,,https://doi.org/10.1038/s41558-021-01098-3
7,REMIND-MAgPIE 2.1-4.3,DeepElec_SSP2_ HighRE_Budg900,Ren,,https://doi.org/10.1038/s41560-021-00937-z
8,WITCH 5.0,CO_Bridge,GS,,https://doi.org/10.1038/s41467-021-26595-z


In [28]:
map_imp = {
    d1.IMP_marker: (d1.model, d1.scenario)
    for _, d1 in meta['Illustrative_Pathways'].iterrows()
}
map_imp

{'Neg-2.0': ('AIM/CGE 2.2', 'EN_NPi2020_900f'),
 'Neg': ('COFFEE 1.1', 'EN_NPi2020_400f_lowBECCS'),
 'CurPol': ('GCAM 5.3', 'NGFS2_Current Policies'),
 'ModAct': ('IMAGE 3.0', 'EN_INDCi2030_3000f'),
 'LD': ('MESSAGEix-GLOBIOM 1.0', 'LowEnergyDemand_1.3_IPCC'),
 'Ren-2.0': ('MESSAGEix-GLOBIOM_GEI 1.0', 'SSP2_openres_lc_50'),
 'SP': ('REMIND-MAgPIE 2.1-4.2', 'SusDev_SDP-PkBudg1000'),
 'Ren': ('REMIND-MAgPIE 2.1-4.3', 'DeepElec_SSP2_ HighRE_Budg900'),
 'GS': ('WITCH 5.0', 'CO_Bridge')}

In [29]:
outpath = 'datain/ds_ar6db_imp.h5'
ds = ScenarioBase(outpath=outpath)

[2025-04-17 16:06:17 mce.core] INFO:file datain/ds_ar6db_imp.h5 opened with mode=w


In [30]:
# Save variable reference data
grp = ds.file.create_group('source/reference')

ref = refs['variables']

for k, j in zip(['var_id', 'var_name'], [0, 1]):
    grp.create_dataset(k, data=[x[j] for x in ref[1:]])

units_set = {(x[2], x[3]) for x in ref[1:]}

for k, j in zip(['units_id', 'units'], [0, 1]):
    grp.create_dataset(k, data=[x[j] for x in units_set])

In [31]:
# Make dictionaries for variable and units ids
map_var = dict(zip(grp['var_name'].asstr(), grp['var_id']))
map_units = dict(zip(grp['units'].asstr(), grp['units_id']))
len(map_var), len(map_units)

(1664, 92)

In [32]:
# Save selected scenario data

dfg = dfall.groupby(['Model', 'Scenario'])

for imp_marker, (model, scenario) in map_imp.items():
    df = dfg.get_group((model, scenario))

    grp = ds.file.create_group(f'source/data/{imp_marker}')
    grp.attrs['model'] = model
    grp.attrs['scenario'] = scenario

    # Create datasets by scenario category
    for cat, v in df.groupby([
        vn.split('|')[0] for vn in df.index.get_level_values('Variable')
    ]):
        v = (
            v
            .dropna(how='all', axis=1)
            .droplevel(['Model', 'Scenario', 'Region'])
            .rename(map_var, level='Variable')
            .rename(map_units, level='Unit')
        )
        g = grp.create_group(cat)
        g.create_dataset('value', data=v.values)
        g.create_dataset('year', data=v.columns.values)
        g.create_dataset('variable_id', data=v.index.get_level_values('Variable').values)
        g.create_dataset('units_id', data=v.index.get_level_values('Unit').values)

### Data retrieve example

In [33]:
# Make dictionaries for variables
g = ds.file['source/reference']
map_var_id = dict(zip(g['var_id'], g['var_name'].asstr()))
map_units_id = dict(zip(g['units_id'], g['units'].asstr()))

In [34]:
imp_marker = 'GS'
grp = ds.file[f'source/data/{imp_marker}']
{k: v for k, v in grp.attrs.items()}

{'model': 'WITCH 5.0', 'scenario': 'CO_Bridge'}

In [37]:
cat = 'Emissions'
g = grp[cat]
df = pd.DataFrame(
    g['value'],
    index=pd.MultiIndex.from_tuples(zip(g['variable_id'], g['units_id'])),
    columns=g['year'],
).rename(map_var_id, level=0).rename(map_units_id, level=1)
df

Unnamed: 0,Unnamed: 1,2005,2010,2015,2020,2025,2030,2035,2040,2045,2050,2055,2060,2065,2070,2075,2080,2085,2090,2095,2100
Emissions|BC,Mt BC/yr,8.065985,8.876729,7.641252,6.342710,5.390290,4.411215,3.564201,2.550160,1.849921,1.276129,1.143114,1.053432,0.966429,0.884493,0.811315,0.750801,0.703484,0.657683,0.615944,0.574120
Emissions|CH4,Mt CH4/yr,359.240089,368.869178,372.663636,383.266607,297.564517,251.249534,199.836874,168.228838,149.758945,140.424796,136.524665,133.452676,129.243335,125.479958,122.141371,119.512598,116.134127,112.932110,110.610983,108.686114
Emissions|CH4|AFOLU,Mt CH4/yr,152.728256,151.130191,145.620904,156.442344,150.624185,145.705793,136.868327,130.531797,124.558657,119.118578,116.696483,114.300986,111.043731,107.986890,105.058342,103.033802,100.941728,98.721600,96.868592,95.373987
Emissions|CH4|Energy|Supply,Mt CH4/yr,129.970592,147.357825,154.609307,150.172120,79.147548,60.535408,22.469637,8.401068,4.164351,1.867729,0.973795,0.931750,0.869629,0.817136,0.982278,1.012828,0.432771,0.080646,0.000000,0.000000
Emissions|CO,Mt CO/yr,982.406000,908.387907,848.753317,750.117627,663.968306,580.469894,492.708924,402.665460,329.654768,265.665608,235.388401,228.090780,220.103319,214.661260,209.803917,206.035191,202.773095,199.839214,197.151913,194.373130
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Emissions|Sulfur|Other,Mt SO2/yr,0.054000,0.048851,0.045815,0.042780,0.039745,0.036709,0.032756,0.028802,0.024848,0.021649,0.021068,0.020487,0.019906,0.019325,0.018746,0.018182,0.017655,0.017136,0.016617,0.016139
Emissions|CO2|Energy|Demand|Transportation|Freight,Mt CO2/yr,1716.584261,2002.737335,2017.421870,2030.490252,1934.791503,1851.342655,1909.702873,1677.096416,1438.903979,1264.704677,1037.520223,849.384607,590.825175,404.480280,154.984330,82.596538,49.330746,33.200711,25.397271,18.952986
Emissions|CO2|Energy|Demand|Transportation|Passenger,Mt CO2/yr,2546.582058,2767.549548,2848.611906,2496.178191,2365.313828,2280.375060,2108.108344,1838.523772,1613.871713,1362.466389,1082.936304,848.592596,519.415659,400.105888,170.058880,133.689002,79.497556,51.929565,43.913245,33.333680
Emissions|CO2|Energy|Demand|Other Sector,Mt CO2/yr,164.047254,182.724708,176.135488,809.151662,758.898579,790.712959,692.612168,569.585059,452.767177,354.442795,317.059982,306.723253,300.400174,287.739904,280.187689,278.265338,261.195111,238.868001,223.329291,205.015009


In [37]:
cat = 'Emissions'
g = grp[cat]
df = pd.DataFrame(
    g['value'],
    index=pd.MultiIndex.from_tuples(zip(g['variable_id'], g['units_id'])),
    columns=g['year'],
).rename(map_var_id, level=0).rename(map_units_id, level=1)
df

Unnamed: 0,Unnamed: 1,2005,2010,2015,2020,2025,2030,2035,2040,2045,2050,2055,2060,2065,2070,2075,2080,2085,2090,2095,2100
Emissions|BC,Mt BC/yr,8.065985,8.876729,7.641252,6.342710,5.390290,4.411215,3.564201,2.550160,1.849921,1.276129,1.143114,1.053432,0.966429,0.884493,0.811315,0.750801,0.703484,0.657683,0.615944,0.574120
Emissions|CH4,Mt CH4/yr,359.240089,368.869178,372.663636,383.266607,297.564517,251.249534,199.836874,168.228838,149.758945,140.424796,136.524665,133.452676,129.243335,125.479958,122.141371,119.512598,116.134127,112.932110,110.610983,108.686114
Emissions|CH4|AFOLU,Mt CH4/yr,152.728256,151.130191,145.620904,156.442344,150.624185,145.705793,136.868327,130.531797,124.558657,119.118578,116.696483,114.300986,111.043731,107.986890,105.058342,103.033802,100.941728,98.721600,96.868592,95.373987
Emissions|CH4|Energy|Supply,Mt CH4/yr,129.970592,147.357825,154.609307,150.172120,79.147548,60.535408,22.469637,8.401068,4.164351,1.867729,0.973795,0.931750,0.869629,0.817136,0.982278,1.012828,0.432771,0.080646,0.000000,0.000000
Emissions|CO,Mt CO/yr,982.406000,908.387907,848.753317,750.117627,663.968306,580.469894,492.708924,402.665460,329.654768,265.665608,235.388401,228.090780,220.103319,214.661260,209.803917,206.035191,202.773095,199.839214,197.151913,194.373130
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Emissions|Sulfur|Other,Mt SO2/yr,0.054000,0.048851,0.045815,0.042780,0.039745,0.036709,0.032756,0.028802,0.024848,0.021649,0.021068,0.020487,0.019906,0.019325,0.018746,0.018182,0.017655,0.017136,0.016617,0.016139
Emissions|CO2|Energy|Demand|Transportation|Freight,Mt CO2/yr,1716.584261,2002.737335,2017.421870,2030.490252,1934.791503,1851.342655,1909.702873,1677.096416,1438.903979,1264.704677,1037.520223,849.384607,590.825175,404.480280,154.984330,82.596538,49.330746,33.200711,25.397271,18.952986
Emissions|CO2|Energy|Demand|Transportation|Passenger,Mt CO2/yr,2546.582058,2767.549548,2848.611906,2496.178191,2365.313828,2280.375060,2108.108344,1838.523772,1613.871713,1362.466389,1082.936304,848.592596,519.415659,400.105888,170.058880,133.689002,79.497556,51.929565,43.913245,33.333680
Emissions|CO2|Energy|Demand|Other Sector,Mt CO2/yr,164.047254,182.724708,176.135488,809.151662,758.898579,790.712959,692.612168,569.585059,452.767177,354.442795,317.059982,306.723253,300.400174,287.739904,280.187689,278.265338,261.195111,238.868001,223.329291,205.015009


## Harmonization and infilling

In [153]:
id_vars = ['model', 'scenario', 'region', 'variable', 'unit']

In [157]:
cat = 'Emissions'
input_df = pyam.IamDataFrame(
    pd.concat({
        (v.attrs['model'], v.attrs['scenario'], 'World'):
        pd.DataFrame(
            v[f'{cat}/value'],
            columns=v[f'{cat}/year'],
            index=pd.MultiIndex.from_tuples(zip(v[f'{cat}/variable_id'], v[f'{cat}/units_id'])),
        )
        for k, v in ds.file['source/data'].items()
    })
    .rename_axis(id_vars)
    .rename(map_var_id, level='variable')
    .rename(map_units_id, level='unit')
)

In [158]:
input_df = columns_to_basic(input_df)

In [159]:
path = pathlib.Path(ds.outpath)

In [160]:
outdir = path.parent
key_string = path.stem

In [186]:
df = perform_input_checks(
    input_df,
    output_csv_files=True,
    output_filename=key_string,
    lead_variable_check=True,
    outdir=outdir,
)

In [188]:
infilling_database = '../climate-assessment/data/1652361598937-ar6_emissions_vetted_infillerdatabase_10.5281-zenodo.6390768.csv'

harmonize = True
prefix = 'AR6 climate diagnostics'
harmonization_instance = 'ar6'
output = []

for model in df.model:
    print(model)
    assessable = harmonization_and_infilling(
        df.filter(model=model),
        key_string,
        infilling_database,
        outdir=outdir,
        do_harmonization=harmonize,
        prefix=prefix,
        instance=harmonization_instance,
    )
    if not assessable:
        raise RuntimeError

    output.append(
        pyam.IamDataFrame(path.with_name(f'{path.stem}_harmonized_infilled.xlsx'))
    )

AIM/CGE 2.2


Harmonisation: 1it [00:02,  2.49s/it]
Filling required variables: 100%|███████████████████████████████████████████████████████| 10/10 [00:17<00:00,  1.73s/it]
Filling required variables: 100%|█████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 22.90it/s]


Note that the lead variable ['Emissions|CO2|Energy and Industrial Processes'] goes negative.


Filling required variables: 100%|███████████████████████████████████████████████████████| 11/11 [00:09<00:00,  1.13it/s]
Filling required variables: 100%|███████████████████████████████████████████████████████| 29/29 [00:02<00:00, 10.75it/s]


COFFEE 1.1


Harmonisation: 1it [00:01,  1.88s/it]
Filling required variables: 100%|███████████████████████████████████████████████████████| 10/10 [00:17<00:00,  1.78s/it]
Filling required variables: 100%|█████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 29.41it/s]


Note that the lead variable ['Emissions|CO2|Energy and Industrial Processes'] goes negative.


Filling required variables: 100%|███████████████████████████████████████████████████████| 11/11 [00:09<00:00,  1.11it/s]
Filling required variables: 100%|███████████████████████████████████████████████████████| 29/29 [00:02<00:00, 11.70it/s]


GCAM 5.3


Harmonisation: 1it [00:02,  2.17s/it]
Filling required variables: 100%|███████████████████████████████████████████████████████| 10/10 [00:17<00:00,  1.77s/it]
Filling required variables: 100%|█████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 31.42it/s]
Filling required variables: 100%|███████████████████████████████████████████████████████| 11/11 [00:09<00:00,  1.20it/s]
Filling required variables: 100%|███████████████████████████████████████████████████████| 29/29 [00:02<00:00, 11.29it/s]


IMAGE 3.0


Harmonisation: 1it [00:02,  2.25s/it]
Filling required variables: 100%|███████████████████████████████████████████████████████| 10/10 [00:17<00:00,  1.71s/it]
Filling required variables: 100%|█████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 30.20it/s]
Filling required variables: 100%|███████████████████████████████████████████████████████| 11/11 [00:09<00:00,  1.21it/s]
Filling required variables: 100%|███████████████████████████████████████████████████████| 29/29 [00:02<00:00,  9.85it/s]


MESSAGEix-GLOBIOM 1.0


Harmonisation: 1it [00:02,  2.15s/it]
Filling required variables: 100%|███████████████████████████████████████████████████████| 10/10 [00:18<00:00,  1.83s/it]
Filling required variables: 100%|█████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 27.72it/s]
Filling required variables: 100%|███████████████████████████████████████████████████████| 11/11 [00:10<00:00,  1.09it/s]
Filling required variables: 100%|███████████████████████████████████████████████████████| 29/29 [00:02<00:00, 11.10it/s]


MESSAGEix-GLOBIOM_GEI 1.0


Harmonisation: 1it [00:02,  2.22s/it]
Filling required variables: 100%|███████████████████████████████████████████████████████| 10/10 [00:17<00:00,  1.73s/it]
Filling required variables: 100%|█████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 27.69it/s]


Note that the lead variable ['Emissions|CO2|Energy and Industrial Processes'] goes negative.


Filling required variables: 100%|███████████████████████████████████████████████████████| 11/11 [00:09<00:00,  1.16it/s]
Filling required variables: 100%|███████████████████████████████████████████████████████| 29/29 [00:02<00:00, 11.06it/s]


REMIND-MAgPIE 2.1-4.2


Harmonisation: 1it [00:02,  2.15s/it]
Filling required variables: 100%|███████████████████████████████████████████████████████| 10/10 [00:17<00:00,  1.80s/it]
Filling required variables: 100%|█████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 30.18it/s]


Note that the lead variable ['Emissions|CO2|Energy and Industrial Processes'] goes negative.


Filling required variables: 100%|███████████████████████████████████████████████████████| 11/11 [00:09<00:00,  1.12it/s]
Filling required variables: 100%|███████████████████████████████████████████████████████| 29/29 [00:02<00:00, 11.07it/s]


REMIND-MAgPIE 2.1-4.3


Harmonisation: 1it [00:00,  2.09it/s]
Filling required variables: 100%|███████████████████████████████████████████████████████| 10/10 [00:18<00:00,  1.83s/it]
Filling required variables: 100%|█████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 30.42it/s]


Note that the lead variable ['Emissions|CO2|Energy and Industrial Processes'] goes negative.


Filling required variables: 100%|███████████████████████████████████████████████████████| 11/11 [00:10<00:00,  1.10it/s]
Filling required variables: 100%|███████████████████████████████████████████████████████| 29/29 [00:02<00:00, 11.61it/s]


WITCH 5.0


Harmonisation: 1it [00:00,  2.14it/s]
Filling required variables: 100%|███████████████████████████████████████████████████████| 10/10 [00:17<00:00,  1.80s/it]
Filling required variables: 100%|█████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 28.52it/s]


Note that the lead variable ['Emissions|CO2|Energy and Industrial Processes'] goes negative.


Filling required variables: 100%|███████████████████████████████████████████████████████| 11/11 [00:09<00:00,  1.20it/s]
Filling required variables: 100%|███████████████████████████████████████████████████████| 29/29 [00:02<00:00, 11.73it/s]


In [192]:
dfout = pyam.concat(output)
dfout

<class 'pyam.core.IamDataFrame'>
Index:
 * model    : AIM/CGE 2.2, COFFEE 1.1, GCAM 5.3, IMAGE 3.0, ... WITCH 5.0 (9)
 * scenario : CO_Bridge, DeepElec_SSP2_ HighRE_Budg900, ... SusDev_SDP-PkBudg1000 (9)
Timeseries data coordinates:
   region   : World (1)
   variable : AR6 climate diagnostics|Harmonized|Emissions|BC, ... (80)
   unit     : Mt BC/yr, Mt CH4/yr, Mt CO/yr, Mt CO2/yr, Mt NH3/yr, ... kt cC4F8/yr (53)
   year     : 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, ... 2100 (86)
Meta indicators:
   assessment-tools (object) ... (1)

In [213]:
g = ds.file['source/reference']
g.create_dataset('var_name_infilled', data=dfout.variable)
g.create_dataset('units_infilled', data=dfout.unit)

<HDF5 dataset "units_infilled": shape (53,), type "|O">

In [214]:
list(g)

['units',
 'units_id',
 'units_infilled',
 'var_id',
 'var_name',
 'var_name_infilled']

In [216]:
map_var_infilled = dict([(k, i) for i, k in enumerate(g['var_name_infilled'].asstr())])
map_units_infilled = dict([(k, i) for i, k in enumerate(g['units_infilled'].asstr())])

In [241]:
for k, v in ds.file['source/data'].items():
    df1 = (
        dfout
        .filter(model=v.attrs['model'], scenario=v.attrs['scenario'])
        .timeseries()
        .droplevel(['model', 'scenario', 'region'])
        .rename(map_var_infilled, level='variable')
        .rename(map_units_infilled, level='unit')
    )
    g = v.create_group('Infilled Emissions')
    g.create_dataset('value', data=df1.values)
    g.create_dataset('variable_id', data=df1.index.get_level_values('variable'))
    g.create_dataset('units_id', data=df1.index.get_level_values('unit'))
    g.create_dataset('year', data=df1.columns)

In [242]:
ds.close()

[2025-04-18 09:09:31 mce.core] INFO:file datain/ds_ar6db_imp.h5 closed
