In [14]:
import pandas as pd

In [23]:
# Load data
data = pd.read_csv('sector_structure_by_work_SWE_2019.csv', index_col=[0,1]).squeeze()

# Define desired format
spatial_units = data.index.get_level_values('spatial_unit').unique()
economic_activities = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I','J', 'K',
                           'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T']
iterables = [spatial_units, economic_activities]
out = pd.DataFrame(index=pd.MultiIndex.from_product(iterables, names=data.index.names), columns=['abs', 'rel'])

# Define conversion factors for aggregated sectors
# computed in excel using the total number of employees at the NACE 64 level
# available in `~/data/interim/eco/national_accounts/SWE/other_accounts_SWE.csv`
conversion_factors = {'B_C': [0.024, 0.976], 'D_E': [0.478, 0.522], 'M_N': [0.557, 0.443],
                         'R_S_T': [0.447, 0.536, 0.017]}

In [25]:
for sector in economic_activities:
    # Copy what we have
    if sector not in ['B', 'C', 'D', 'E', 'M', 'N', 'R', 'S', 'T']:
        out.loc[(slice(None), sector), 'abs'] = data.loc[slice(None), sector].values
    
    # The rest (I'm not gonna bother doing this elegantly)
    if sector == 'B':
        out.loc[(slice(None), sector), 'abs'] = 0.024*data.loc[slice(None), 'B, C'].values
    if sector == 'C':
        out.loc[(slice(None), sector), 'abs'] = 0.976*data.loc[slice(None), 'B, C'].values
    if sector == 'D':
        out.loc[(slice(None), sector), 'abs'] = 0.478*data.loc[slice(None), 'D, E'].values
    if sector == 'E':
        out.loc[(slice(None), sector), 'abs'] = 0.522*data.loc[slice(None), 'D, E'].values 
    if sector == 'M':
        out.loc[(slice(None), sector), 'abs'] = 0.557*data.loc[slice(None), 'M, N'].values 
    if sector == 'N':
        out.loc[(slice(None), sector), 'abs'] = 0.443*data.loc[slice(None), 'M, N'].values     
    if sector == 'R':
        out.loc[(slice(None), sector), 'abs'] = 0.447*data.loc[slice(None), 'R, S, T, U'].values     
    if sector == 'S':
        out.loc[(slice(None), sector), 'abs'] = 0.536*data.loc[slice(None), 'R, S, T, U'].values     
    if sector == 'T':
        out.loc[(slice(None), sector), 'abs'] = 0.017*data.loc[slice(None), 'R, S, T, U'].values          

In [29]:
for prov in spatial_units:
    n = out.loc[prov, 'abs'].sum()
    out.loc[prov, 'rel'] = out.loc[prov, 'abs'].values/n

In [31]:
out.to_csv('sector_structure_by_work_NACE_21_SWE_2019.csv')