# CS 328 Writing Submission

---

## Setup

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import requests
import io
import os
from tqdm import tqdm
from scipy import stats
import plotly.express as px
import plotly.graph_objects as go
import statsmodels.api as sm
from plotly.subplots import make_subplots
import matplotlib as mpl
from linearmodels.panel import PanelOLS, RandomEffects, compare
import warnings

# Ignore warnings for cleaner output
warnings.filterwarnings('ignore')


# Set display options for better readability
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)

# Set visualization styles
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette("colorblind")
plt.rcParams['figure.figsize'] = (12, 8)
plt.rcParams['font.size'] = 12

# Create output directories if they don't exist
os.makedirs('figures', exist_ok=True)
os.makedirs('output', exist_ok=True)

---

## Data

In [None]:
# Define file paths (adjust as needed)
file_income_wealth = "WID_data/WID_Data_average national wealth per adult.csv"
file_wealth_inequality = "WID_data/WID_Data_Wealth_Gini_coeff.csv"
file_income_inequality = "WID_data/WID_Data_Income_Gini_coeff.csv"
file_wealth_to_income = "WID_data/WID_Data_Net national wealth to Net National Income Ratio.csv"

# Read in the datasets
df_income_wealth = pd.read_csv(file_income_wealth, header=1, sep=';')
df_wealth_inequality = pd.read_csv(file_wealth_inequality, header=1, sep=';')
df_income_inequality = pd.read_csv(file_income_inequality, header=1, sep=';')
df_wealth_to_income = pd.read_csv(file_wealth_to_income, header=1, sep=';')

# Define a function to reshape data from wide to long format
def reshape_wid(df, value_column_name):
    return df.melt(id_vars=['Percentile', 'Year'], var_name='Country', value_name=value_column_name)

# Reshape each dataset
df_income_long = reshape_wid(df_income_wealth, 'PerAdultIncome')
df_wealth_ineq_long = reshape_wid(df_wealth_inequality, 'WealthInequality')
df_income_ineq_long = reshape_wid(df_income_inequality, 'IncomeInequality')
df_wealth_to_income_long = reshape_wid(df_wealth_to_income, 'WealthToIncomeRatio')

In [None]:
# Convert Year to numeric
for df in [df_income_long, df_wealth_ineq_long, df_income_ineq_long, df_wealth_to_income_long]:
    df['Year'] = pd.to_numeric(df['Year'], errors='coerce')

# Filter for consistent percentiles:
# - For Income Wealth and Wealth-to-Income, only "pall" is available.
# - For Income Inequality and Wealth Inequality, choose "pall".
df_income_long = df_income_long[df_income_long['Percentile'] == 'pall']
df_wealth_to_income_long = df_wealth_to_income_long[df_wealth_to_income_long['Percentile'] == 'pall']
df_income_ineq_long = df_income_ineq_long[df_income_ineq_long['Percentile'] == 'pall']
df_wealth_ineq_long = df_wealth_ineq_long[df_wealth_ineq_long['Percentile'] == 'pall']

# Drop the Percentile column since it is no longer needed
df_income_long = df_income_long.drop(columns=['Percentile'])
df_wealth_to_income_long = df_wealth_to_income_long.drop(columns=['Percentile'])
df_income_ineq_long = df_income_ineq_long.drop(columns=['Percentile'])
df_wealth_ineq_long = df_wealth_ineq_long.drop(columns=['Percentile'])

# Clean country names (remove extra whitespace)
for df in [df_income_long, df_wealth_ineq_long, df_income_ineq_long, df_wealth_to_income_long]:
    df['Country'] = df['Country'].str.strip()


df_merged = (df_income_long.merge(df_income_ineq_long, on=['Country','Year'], how='inner')
                        .merge(df_wealth_ineq_long, on=['Country','Year'], how='inner')
                        .merge(df_wealth_to_income_long, on=['Country','Year'], how='inner'))

In [None]:
# Set a MultiIndex for panel models
df_panel = df_merged.set_index(["Country", "Year"])

# Define exogenous variable with constant
exog = sm.add_constant(df_panel["PerAdultIncome"])

# a) Fixed Effects Model with robust standard errors
fe_model = PanelOLS(df_panel["IncomeInequality"], exog, entity_effects=True)
fe_results = fe_model.fit(cov_type='robust')
print("Fixed Effects Model (PanelOLS):")
print(fe_results)

# b) Random Effects Model
re_model = RandomEffects(df_panel["IncomeInequality"], exog)
re_results = re_model.fit()
print("\nRandom Effects Model:")
print(re_results)

# c) Model comparison (Hausman-type)
comparison = compare({'FE': fe_results, 'RE': re_results})
print("\nModel Comparison:")
print(comparison)


df_2020 = df_merged[df_merged["Year"] == 2020].copy()
ols_model = sm.OLS(df_2020["IncomeInequality"], sm.add_constant(df_2020["PerAdultIncome"]))
ols_results = ols_model.fit()
print("\nCross-sectional OLS (2020) results:")
print(ols_results.summary())