In [14]:
import pandas as pd

# Read the CSV file
df = pd.read_csv('regression_data.csv')

# Pivot the dataframe to have one row per country and year with columns for each sector
df_pivot = df.pivot_table(index=['country', 't'], columns='sect', values='gvcomix_diff', aggfunc='first').reset_index()

# Flatten the MultiIndex in columns
df_pivot.columns = [col if not isinstance(col, tuple) else f"{col[0]} {col[1]}" for col in df_pivot.columns]

# List of additional columns to include in the final output
additional_columns = ['onset2COWCS', 'decade', 'ecgrowth', 'logpop_M', 'logpopdens', 'logoutreg', 'democracy', 'logmountain', 'ethnic_fractionalization', 'religion_fractionalization', 'language_fractionalization', 'leg_british', 'opec']

# Drop duplicates in the original dataframe based on 'country', 'year', and additional_columns
df_unique = df[['country', 't'] + additional_columns].drop_duplicates()

# Merge the pivoted dataframe with the unique dataframe
df_final = pd.merge(df_pivot, df_unique, on=['country', 't'], how='left')

# Save the transformed dataframe to a new CSV file
df_final.to_csv('gvcomix_transformed.csv', index=False)