# Double ML

In [1]:
import pandas as pd
import numpy as np

## 1. Data Wrangling

In [6]:

df = pd.read_csv('../../data/eora_merged.csv')

In [7]:
df.columns

Index(['country', 'sect', 't', 'source', 'outp', 'outpd', 'outptt', 'gvco',
       'gvcobp', 'gvcofp', 'gvcomix', 'outpd%', 'outptt%', 'gvco%', 'gvcobp%',
       'gvcofp%', 'gvcomix%', 'outpd_diff', 'outptt_diff', 'gvco_diff',
       'gvcobp_diff', 'gvcofp_diff', 'gvcomix_diff', 'Unnamed: 0',
       'onset2COWCS', 'd2incidenceU', 'd3_6incidenceU', 'onsetUCS', 'coup',
       'periregular', 'milexp_pergdpSIPRI', 'decade', 'ecgrowth', 'logpop_M',
       'logpopdens', 'logoutreg', 'democracy', 'logmountain',
       'ethnic_fractionalization', 'religion_fractionalization',
       'language_fractionalization', 'leg_british', 'opec',
       'milexp_pergdpSIPRI_diff', 'logpop_M_diff', 'logpopdens_diff',
       'logoutreg_diff', 'ecgrowth_demeaned'],
      dtype='object')

In [8]:
df = df[['country', 'sect', 't', 
         'gvcobp%', 'gvcofp%', 'gvcomix%', 
         'gvcobp_diff', 'gvcofp_diff', 'gvcomix_diff', 
         'onset2COWCS',
         'decade', 'ecgrowth', 'logpop_M', 'logpopdens', 'logoutreg', 'democracy', 'logmountain','ethnic_fractionalization', 'religion_fractionalization', 'language_fractionalization', 'leg_british','opec', 'milexp_pergdpSIPRI_diff', 'logpop_M_diff', 'logpopdens_diff', 'logoutreg_diff', 'ecgrowth_demeaned']]

In [None]:
# Pivot the dataframe to have one row per country and year with columns for each sector
df_pivot = df.pivot_table(index=['country', 't'], columns='sect', values='gvcomix%', aggfunc='first').reset_index()

# Flatten the MultiIndex in columns
df_pivot.columns = [col if not isinstance(col, tuple) else f"{col[0]} {col[1]}" for col in df_pivot.columns]

# List of additional columns to include in the final output
additional_columns = ['onset2COWCS', 'decade', 'ecgrowth', 'logpop_M', 'logpopdens', 'logoutreg', 'democracy', 'logmountain', 'ethnic_fractionalization', 'religion_fractionalization', 'language_fractionalization', 'leg_british', 'opec']

# Drop duplicates in the original dataframe based on 'country', 'year', and additional_columns
df_unique = df[['country', 't'] + additional_columns].drop_duplicates()

# Merge the pivoted dataframe with the unique dataframe
df_final = pd.merge(df_pivot, df_unique, on=['country', 't'], how='left')

# Save the transformed dataframe to a new CSV file
df_final.to_csv('gvcomix_transformed.csv', index=False)