In [1]:
import yaml
import pandas as pd
import numpy as np
from message_ix.utils import make_df
from message_ix.models import MESSAGE_ITEMS
from collections.abc import Mapping
from itertools import repeat

<IPython.core.display.Javascript object>

In [2]:
with open('tech_data_02.yaml','r') as stream:
    tech_data = yaml.safe_load(stream)
    
years = [*range(700,730,10)]
regions = ['Westeros', 'Westerlands']
modes = ['standard']
emissions = ['CO2','CH4']
time = ['year']
first_active_year = tech_data['model_data'].get('first_active_year')

In [3]:
years_dict = {yv:[ya for ya in years if ya >= yv] for yv in years if yv >= first_active_year}

vtg = [yv for ya in list(years_dict.keys()) for yv in repeat(ya,len(years_dict[ya]))]

act = [yv for ya in list(years_dict.keys()) for yv in repeat(ya,len(years_dict[ya]))]

years_vtg_act = pd.DataFrame(
    {
    'year_vtg':[k for k, v in years_dict.items() for i in range(len(v))],
    'year_act':[ya for value in years_dict.values() for ya in value]
    }
)

years_vtg_act

Unnamed: 0,year_vtg,year_act
0,700,700
1,700,710
2,700,720
3,710,710
4,710,720
5,720,720


In [4]:
#par_dim = {'inv_cost':['node_loc'],'fix_cost':['node_loc'],'var_cost':['node_loc']}
parameters = {}
for tech in set(tech_data) - set(['model_data']):
    parameters.update({par: list(MESSAGE_ITEMS[par]['idx_names']) for par in set(tech_data[tech])})
data = {par: [] for par in list(parameters.keys())}

data

{'capacity_factor': [],
 'var_cost': [],
 'technical_lifetime': [],
 'inv_cost': [],
 'growth_new_capacity_up': [],
 'fix_cost': [],
 'emission_factor': [],
 'initial_new_capacity_up': []}

In [5]:
print(MESSAGE_ITEMS['technical_lifetime']['idx_names'],'-- technical_lifetime')
print(MESSAGE_ITEMS['growth_new_capacity_up']['idx_names'],'-- growth_new_capacity_up')
print(MESSAGE_ITEMS['initial_new_capacity_up']['idx_names'],'-- initial_new_capacity_up')
print(MESSAGE_ITEMS['inv_cost']['idx_names'],'-- inv_cost')
print('')
print(MESSAGE_ITEMS['fix_cost']['idx_names'],'-- fix_cost')
print(MESSAGE_ITEMS['capacity_factor']['idx_names'],'-- capacity_factor')
print('')
print(MESSAGE_ITEMS['var_cost']['idx_names'],'-- var_cost')
print(MESSAGE_ITEMS['emission_factor']['idx_names'],'-- emission_factor')
print('')
print(MESSAGE_ITEMS['input']['idx_names'],'-- input')


('node_loc', 'technology', 'year_vtg') -- technical_lifetime
('node_loc', 'technology', 'year_vtg') -- growth_new_capacity_up
('node_loc', 'technology', 'year_vtg') -- initial_new_capacity_up
('node_loc', 'technology', 'year_vtg') -- inv_cost

('node_loc', 'technology', 'year_vtg', 'year_act') -- fix_cost
('node_loc', 'technology', 'year_vtg', 'year_act', 'time') -- capacity_factor

('node_loc', 'technology', 'year_vtg', 'year_act', 'mode', 'time') -- var_cost
('node_loc', 'technology', 'year_vtg', 'year_act', 'mode', 'emission') -- emission_factor

('node_loc', 'technology', 'year_vtg', 'year_act', 'mode', 'node_origin', 'commodity', 'level', 'time', 'time_origin') -- input


In [6]:
# Basic DataFrame
group1 = ['inv_cost','technical_lifetime','growth_new_capacity_up','initial_new_capacity_up']
group2 = ['fix_cost','capacity_factor']
for tech, par_dict in tech_data.items():
    if tech != 'model_data':
        for par, par_data in par_dict.items():
            print(par)
            if not isinstance(par_data, Mapping):
                par_data = {'value': par_data, 'unit': '-'}
            if parameters[par] == ['node_loc', 'technology', 'year_vtg']:
                kwargs = {'year_vtg': sorted(list(set(years_vtg_act['year_vtg'])))}
            elif all(e in ['node_loc', 'technology', 'year_vtg', 'year_act'] for e in parameters[par]):
                kwargs = {'year_vtg': years_vtg_act['year_vtg'],
                          'year_act': years_vtg_act['year_act']}
            data[par].append(
                make_df(
                    par,
                    technology=tech,
                    value=par_data['value'],
                    unit=par_data['unit'],
                    **kwargs,
                ))
            if 'emission' in parameters[par]: # this is an important block for dimentional expansion
                data[par] = data[par]*len(emissions)
                for e in range(len(emissions)):
                    data[par][e] = data[par][e].assign(emission=emissions[e])
            # below should follow the 'emission' format
            #if 'mode' in parameters[par]:
            #    kwargs.update({'mode':par_data['mode']}) [SEARCH 'mode' in 'model_data']
            #if 'time' in parameters[par]:
            #    kwargs.update({'time':par_data['time']}) [SEARCH 'time' in 'model_data']
            # do the same for dimensions in input and output
            

data = {k: pd.concat(v) for k, v in data.items()}

data

inv_cost
fix_cost
var_cost
emission_factor
capacity_factor
technical_lifetime
growth_new_capacity_up
initial_new_capacity_up


{'capacity_factor':   node_loc technology  year_vtg  year_act  time  value unit
 0     None      DACCS       700       700  None      1    -
 1     None      DACCS       700       710  None      1    -
 2     None      DACCS       700       720  None      1    -
 3     None      DACCS       710       710  None      1    -
 4     None      DACCS       710       720  None      1    -
 5     None      DACCS       720       720  None      1    -,
 'var_cost':   node_loc technology  year_vtg  year_act  mode  time  value   unit
 0     None      DACCS       700       700  None  None    100  $/kWa
 1     None      DACCS       700       710  None  None    100  $/kWa
 2     None      DACCS       700       720  None  None    100  $/kWa
 3     None      DACCS       710       710  None  None    100  $/kWa
 4     None      DACCS       710       720  None  None    100  $/kWa
 5     None      DACCS       720       720  None  None    100  $/kWa,
 'technical_lifetime':   node_loc technology  year_vtg  v

In [7]:
# Expanded DataFrame
data_expand ={par: [] for par in list(parameters.keys())} 
for par in list(parameters.keys()):
    if par in group1+group2:
        for tech, diffs in tech_data['model_data'].items():
            if tech != 'first_active_year':
                for reg in regions:
                    multiplier = []
                    for i in range(len(data[par])):
                    #for y_vtg in data[par]['year_vtg'].values:
                        multiplier.append(
                            np.prod([diffs.get(par,{}).get('node_loc',{}).get(reg,1), # by region 
                                     ((1+diffs.get(par,{}).get('year_vtg',{}).get('rate',0))
                                      **(data[par]['year_vtg'][i]-first_active_year)), # by year_vtg
                                     ((1+diffs.get(par,{}).get('year_act',{}).get('rate',0))
                                      **(data.get(par,{}).get('year_act',{}) # by year_act
                                         .get(i,data[par]['year_vtg'][i])-data[par]['year_vtg'][i])),
                                     # for year_act, n years is calculated according to year_vtg
                                     # if year_act is not in dimension, year_act here is made to be equal to year_vtg so the power is 0
                                     # yielding 1 as multiplier
                            ])
                        )
                                          
#                                               *costperiodfactor_by_region.get(tech).get(y))
                    value = data[par]['value']*multiplier
                    data_expand[par].append(
                        data[par].assign(node_loc=reg,value=value)
                       )
                    
data_expand

{'capacity_factor': [   node_loc technology  year_vtg  year_act  time  value unit
  0  Westeros      DACCS       700       700  None      1    -
  1  Westeros      DACCS       700       710  None      1    -
  2  Westeros      DACCS       700       720  None      1    -
  3  Westeros      DACCS       710       710  None      1    -
  4  Westeros      DACCS       710       720  None      1    -
  5  Westeros      DACCS       720       720  None      1    -,
        node_loc technology  year_vtg  year_act  time  value unit
  0  Westerlands      DACCS       700       700  None      1    -
  1  Westerlands      DACCS       700       710  None      1    -
  2  Westerlands      DACCS       700       720  None      1    -
  3  Westerlands      DACCS       710       710  None      1    -
  4  Westerlands      DACCS       710       720  None      1    -
  5  Westerlands      DACCS       720       720  None      1    -],
 'var_cost': [],
 'technical_lifetime': [   node_loc technology  year_vtg  

In [16]:
data['fix_cost']['year_act'].get(3)

710

In [None]:
data_expand = {k: pd.concat(v) for k, v in data_expand.items() 
               if k in group2+group1}

In [None]:
with pd.ExcelWriter('tech_data.xlsx', engine='xlsxwriter', mode='w') as writer:
    for sheet_name, sheet_data in data_expand.items():
        sheet_data.to_excel(writer, sheet_name=sheet_name, index=False)