<div class="alert alert-block alert-info">
This script imports WIOD for exploration. No outputs...<br>  
    <hr> 
    Note: <br>
    <i><b>Not a part of the pipeline</b></i>
</div>

# Imports

In [None]:
%config IPCompleter.use_jedi = False # disable jedi autocompleter (https://stackoverflow.com/a/65734178/14485040)

import project_path  # makes possible the access to `src` directory using relative path
from src.utils import explore_dir
from src.utils import read_excel_to_pandas as r_excel

%run init_nb.ipynb

# INPUTS: Identify file(s) and read data to df

In [None]:
# Explore the directory to find the file(s)

# --------------- WorldInputOutputTable (WIOT) file ----------------
inputs_dir_1, files_list_1 = explore_dir(
    path_to_dir=r"..\data\external\WIOD",
    file_extension="xlsb",
    print_files_list=True,
)

print("\nnº of found files in", inputs_dir_1, ": ", len(files_list_1), "\n")

In [None]:
%%time
# Process raw data

# IO data for a specific year
df_IO_raw = r_excel(
    inputs_dir_1,
    "WIOT2014_Nov16_ROW.xlsb",
    sheets="2014",
    engine="pyxlsb",
    index_col=[0, 1, 2, 3],
    header=[2, 3, 4, 5],
#     skiprows = [0, 1]
)
print("df of WIOT (raw) ".ljust(40, "."), f" {df_IO_raw.shape}".rjust(13, "."))
df_IO_raw.head(5)

# Operations

 - aggregate WIOD by sector for one GLO country (aggregate rows and columns)
 - put rows/columns in order (e.g. r9 comes before r10)
 - separate WIOD into T, Y, X, VA matrices
 - perform operations

In [None]:
# clean column/index names
df_IO_raw.columns.names = len(df_IO_raw.columns.names)*[None]
df_IO_raw.index.names = len(df_IO_raw.index.names)*[None]
df_io = df_IO_raw.copy()

In [None]:
io_cols = pd.DataFrame(df_io.columns.to_list(), columns=["col_code", "sector", "country", "col_num"])
io_cols

In [None]:
io_rows = pd.DataFrame(df_io.index.to_list(), columns=["row_code", "sector", "country", "row_num"])
io_rows

In [None]:
map_col_num_to_code = dict()
for i in pd.DataFrame(io_cols.groupby(["col_code", "sector", "col_num"]))[0]:
    map_col_num_to_code[i[2]] = [i[0], i[1]]
# map_col_num_to_code

In [None]:
df_col_num_to_code = pd.DataFrame(
    map_col_num_to_code, index=[["col_code", "col_name"]]
).T
df_col_num_to_code = df_col_num_to_code.loc[
    sorted(df_col_num_to_code.index.to_list(), key=lambda x: int(x.partition("c")[2]))
]
df_col_num_to_code.tail(10)

In [None]:
map_row_num_to_code = dict()
for i in pd.DataFrame(io_rows.groupby(["row_code", "sector", "row_num"]))[0]:
    map_row_num_to_code[i[2]] = [i[0], i[1]]
# map_row_num_to_code

In [None]:
df_row_num_to_code = pd.DataFrame(
    map_row_num_to_code, index=[["row_code", "row_name"]]
).T
df_row_num_to_code = df_row_num_to_code.loc[
    sorted(df_row_num_to_code.index.to_list(), key=lambda x: int(x.partition("r")[2]))
]
df_row_num_to_code.tail(10)

In [None]:
# io_rows[~io_rows.row_code.isin(io_cols.col_code)]

In [None]:
# io_cols[~io_cols.col_code.isin(io_rows.row_code)]
# pd.DataFrame(io_cols[~io_cols.col_code.isin(io_rows.row_code)].groupby(by="col_code"))

In [None]:
len(map_row_num_to_code)

In [None]:
len(map_col_num_to_code)

In [None]:
# Aggregate columns by col_num and rows by row_num
# This will create a GLO IO table industry-by-industry
df_io_GLO_raw = df_io.groupby(axis=1, level=3).sum().groupby(axis=0, level=3).sum()

# sort columns/rows by col_num/row_num in ascending order
df_io_GLO_raw = df_io_GLO_raw.loc[
    sorted(df_io_GLO_raw.index.to_list(), key=lambda x: int(x.partition("r")[2])),
    sorted(df_io_GLO_raw.columns.to_list(), key=lambda x: int(x.partition("c")[2])),
]
# df_io_GLO_raw

# aggregated IO table with names of sectors
df_io_GLO_name = df_io_GLO_raw.rename(index = {k:v[1] for k,v in map_row_num_to_code.items()},
                     columns = {k:v[1] for k,v in map_col_num_to_code.items()})

# aggregated IO table with codes of sectors
df_io_GLO_code = df_io_GLO_raw.rename(index = {k:v[0] for k,v in map_row_num_to_code.items()},
                     columns = {k:v[0] for k,v in map_col_num_to_code.items()})

In [None]:
# matrix T of domestic transactions
df_T = df_io_GLO_raw.loc[
    df_io_GLO_raw.index[
        ~df_io_GLO_raw.index.isin(["r65", "r66", "r67", "r68", "r69", "r70", "r71", "r73"])
    ],    
    df_io_GLO_raw.columns[
        ~df_io_GLO_raw.columns.isin(["c57", "c58", "c59", "c60", "c61", "c62"])
    ],
]

In [None]:
# matrix Y of Final Demands
df_FDs = df_io_GLO_raw.loc[
    df_io_GLO_raw.index[
        ~df_io_GLO_raw.index.isin(["r65", "r66", "r67", "r68", "r69", "r70", "r71", "r73"])
    ],    
    df_io_GLO_raw.columns[
        df_io_GLO_raw.columns.isin(["c57", "c58", "c59", "c60", "c61"]) # , "c62"
    ],
]
df_Y_temp = df_FDs.sum(axis=1) # aggregate all Final demands in one vector-column
df_Y = pd.DataFrame(df_Y_temp, columns=["Y"])

In [None]:
# matrix X of gross output
df_X = df_io_GLO_raw.loc[
    df_io_GLO_raw.index[
        ~df_io_GLO_raw.index.isin(["r65", "r66", "r67", "r68", "r69", "r70", "r71", "r73"])
    ],    
    df_io_GLO_raw.columns[
        df_io_GLO_raw.columns.isin(["c62"])
    ],
]
# df_X

In [None]:
# vector VA of value added
df_VA = df_io_GLO_raw.loc[
    df_io_GLO_raw.index[
        df_io_GLO_raw.index.isin(["r70"])
    ],    
    df_io_GLO_raw.columns[
        ~df_io_GLO_raw.columns.isin(["c57", "c58", "c59", "c60", "c61", "c62"])
    ],
] 
# df_VA

In [None]:
# total GLO value added at basic prices
totGVA = df_VA.sum(axis=1)

In [None]:
# share allocated to a sector : VAsector/totVA
df_share_sectors = df_VA.div(totGVA[0])

In [None]:
# map_col_num_to_code["c11"]

In [None]:
df_col_num_to_code.loc["c11"]

In [None]:
list_sectors = ["c10", "c11", "c12", "c13"]  # sectors to include?
df_col_num_to_code.loc[list_sectors]

In [None]:
list_sectors_rows = ["r10", "r11", "r12", "r13"]  # sectors to include?
df_row_num_to_code.loc[list_sectors_rows]

## Gonzalo's calculations

In [None]:
# df_T[list_sectors].sum(axis=1)

In [None]:
# fraction of what each sector gives to sector c11 relative to the total output
frac_spr_to_s = pd.DataFrame(df_T["c11"].to_numpy()/df_X.iloc[:,0].to_numpy().T, index=df_T.index, columns=["s_out_per_X"])
# frac_spr_to_s

delta_share_spr_to_s = pd.DataFrame(
    df_share_sectors.to_numpy() * frac_spr_to_s.to_numpy().T,
    index=["delta_share"],
    columns=df_share_sectors.columns,
)
delta_share_spr_to_s["c11"] = 0 # sector doesn't give a share to itself
delta_share_spr_to_s

In [None]:
print("direct share =",df_share_sectors["c11"][0]) 
print("deltashare = ",delta_share_spr_to_s.sum(axis=1)[0])
share_c11 =  df_share_sectors["c11"] + delta_share_spr_to_s.sum(axis=1)[0]
share_c11

#### aggregated sectors c10-13

In [None]:
# df_io_GLO_raw.loc[list_sectors_rows].sum(axis=0)

In [None]:
df_io_GLO_raw_aggsectors = df_io_GLO_raw.copy()
df_io_GLO_raw_aggsectors["c10-13"] = df_io_GLO_raw_aggsectors[list_sectors].sum(axis=1)
df_io_GLO_raw_aggsectors.loc["r10-13"] = df_io_GLO_raw_aggsectors.loc[
    list_sectors_rows
].sum(axis=0)
df_io_GLO_raw_aggsectors.drop(list_sectors, axis=1, inplace=True)
df_io_GLO_raw_aggsectors.drop(list_sectors_rows, axis=0, inplace=True)
df_io_GLO_raw_aggsectors.rename(columns={"c10-13": "c10"}, inplace=True)
df_io_GLO_raw_aggsectors.rename(index={"r10-13": "r10"}, inplace=True)

df_io_GLO_raw_aggsectors = df_io_GLO_raw_aggsectors.loc[
    sorted(df_io_GLO_raw_aggsectors.index.to_list(), key=lambda x: int(x.partition("r")[2])),
    sorted(df_io_GLO_raw_aggsectors.columns.to_list(), key=lambda x: int(x.partition("c")[2])),]
# df_io_GLO_raw_aggsectors

In [None]:
df_T_aggsectors = df_io_GLO_raw_aggsectors.loc[
    df_io_GLO_raw_aggsectors.index[
        ~df_io_GLO_raw_aggsectors.index.isin(["r65", "r66", "r67", "r68", "r69", "r70", "r71", "r73"])
    ],    
    df_io_GLO_raw_aggsectors.columns[
        ~df_io_GLO_raw_aggsectors.columns.isin(["c57", "c58", "c59", "c60", "c61", "c62"])
    ],
]
# df_T_aggsectors

In [None]:
# check...
df_X.loc[list_sectors_rows].sum(axis=0)

In [None]:
df_X_aggsectors = df_io_GLO_raw_aggsectors.loc[
    df_io_GLO_raw_aggsectors.index[
        ~df_io_GLO_raw_aggsectors.index.isin(["r65", "r66", "r67", "r68", "r69", "r70", "r71", "r73"])
    ],    
    df_io_GLO_raw_aggsectors.columns[
        df_io_GLO_raw_aggsectors.columns.isin(["c62"])
    ],
]
# df_X_aggsectors #.loc["r10"]

In [None]:
# check...
df_VA[list_sectors].sum(axis=1)

In [None]:
# vector VA of value added
df_VA_aggsectors = df_io_GLO_raw_aggsectors.loc[
    df_io_GLO_raw_aggsectors.index[
        df_io_GLO_raw_aggsectors.index.isin(["r70"])
    ],    
    df_io_GLO_raw_aggsectors.columns[
        ~df_io_GLO_raw_aggsectors.columns.isin(["c57", "c58", "c59", "c60", "c61", "c62"])
    ],
] 
# df_VA_aggsectors # ["c10"]

In [None]:
df_share_sectors[list_sectors].sum(axis=1)

In [None]:
totGVA_aggsectors = df_VA_aggsectors.sum(axis=1)
df_share_sectors_aggsectors = df_VA_aggsectors.div(totGVA_aggsectors[0])
# df_share_sectors_aggsectors # ["c10"]

In [None]:
# fraction of what each sector gives to sector c11 relative to the total output
frac_spr_to_s_aggsectors = pd.DataFrame(
    df_T_aggsectors["c10"].to_numpy() / df_X_aggsectors.iloc[:, 0].to_numpy().T,
    index=df_T_aggsectors.index,
    columns=["s_out_per_X"],
)
# frac_spr_to_s

delta_share_spr_to_s_aggsectors = pd.DataFrame(
    df_share_sectors_aggsectors.to_numpy() * frac_spr_to_s_aggsectors.to_numpy().T,
    index=["delta_share"],
    columns=df_share_sectors_aggsectors.columns,
)
delta_share_spr_to_s_aggsectors["c10"] = 0  # sector doesn't give a share to itself
delta_share_spr_to_s_aggsectors

In [None]:
print("direct share =", df_share_sectors_aggsectors["c10"][0])
print("deltashare = ", delta_share_spr_to_s_aggsectors.sum(axis=1)[0])
share_aggsectors =  df_share_sectors_aggsectors["c10"] + delta_share_spr_to_s_aggsectors.sum(axis=1)[0]
share_aggsectors

## Calculations using L inverse

In [None]:
# check that T + Y = X
a = df_T.sum(axis=1) + df_Y.sum(axis=1)
a.to_numpy()/df_X.to_numpy().T[0]

In [None]:
# diagonalize X
diagX_raw = np.diag(df_X.to_numpy().T[0])
diagX = np.where(diagX_raw==0, 1e-100, diagX_raw) # small non-zero values, to be able to invert this matrix

In [None]:
#calculate technical coefficients A
df_A = pd.DataFrame(df_T.to_numpy() @ np.linalg.inv(diagX), index=df_T.index, columns=df_T.columns)

In [None]:
# calculate Leontief's inverse matrix
IminusA = np.eye(df_A.shape[0]) - df_A
df_L = pd.DataFrame(np.linalg.inv(IminusA.to_numpy()), index=df_A.index, columns=df_A.columns)
# df_L

In [None]:
# alternative method!

# share intensities : df_share_sectors / X
share_intensity = pd.DataFrame(
    df_share_sectors.iloc[0, :].to_numpy() / df_X.iloc[:, 0].to_numpy(),
    index=df_share_sectors.columns, columns=["share_intensity"]
).T
share_intensity

In [None]:
# check, production based intensities @ gross output should be equal to the total share (which is 1)
share_intensity.to_numpy() @ df_X.to_numpy()
# this has to be equal to the shares consumption based (below)

In [None]:
# calculate the share (consumption based) : share_intensity * L * df_Y
# share_consumption_based = share_intensity.to_numpy() @ df_L.to_numpy() @ df_Y.to_numpy()
share_consumption_based = pd.DataFrame(
    share_intensity.to_numpy() @ df_L.to_numpy() * df_Y.to_numpy().T,
    index=["share_cons_base"],
    columns=share_intensity.columns,
)
share_consumption_based #.sum(axis=1)

In [None]:
share_consumption_based["c11"]

In [None]:
print("share of sectors = ", share_consumption_based[list_sectors].sum(axis=1)[0])
share_consumption_based[list_sectors].sum(axis=1)

In [None]:
# GVA_chemsector = 9.71e11
# GVA_world = 7.38e13
# deltaSOS_purchases = 0.0234371088889808
# GVA_chemsector / GVA_world + deltaSOS_purchases

In [None]:
df_share_sectors[list_sectors]

In [None]:
share_consumption_based[list_sectors]

In [None]:
share_consumption_based[list_sectors].to_numpy()/df_share_sectors[list_sectors].to_numpy()

In [None]:
df_FDs["c57"].sum(axis=0)

In [None]:
df_Y.sum(axis=0)

In [None]:
df_FDs["c57"].sum(axis=0)/df_Y.sum(axis=0)

# OUTPUTS: Export data to excel