In [10]:
import pandas as pd

# Load dataset
df = pd.read_csv("CES_RF_SMT_clean.csv")

# List of variables for summary
vars_to_summarize = [
    "Highschool_Educated",
    "Tertiary_Educated",
    "Age",
    "Household_Size",
    "Log_Household_Net_Income",
    "Male",
    "Growth_Uncertainty_Probability",
    "pr2010D",
    "pr2110D",
    "First_Moment_Expectation_Prior",
    "Second_Moment_Prior",
    "Belgian", 
    "Spanish", 
    "French", 
    "Italian", 
    "Dutch",
    "Danish"
]

# Function to compute mean and std
def summarize(df_subset):
    return df_subset[vars_to_summarize].agg(['mean', 'std']).transpose().rename(
        columns={'mean': 'Mean', 'std': 'Standard_Deviation'}
    )

# Segment summaries
summary_all = summarize(df)
summary_control = summarize(df[df['Control_Group'] == 1])
summary_treatment = summarize(df[df['Second_Moment_Treatment'] == 1])

# Combine into one nicely formatted table
summary_all['Group'] = 'All'
summary_control['Group'] = 'Control'
summary_treatment['Group'] = 'Treatment'

final_summary = pd.concat([summary_all, summary_control, summary_treatment])
final_summary = final_summary.reset_index().rename(columns={'index': 'Variable'})

# Display results
print(final_summary)



                          Variable       Mean  Standard_Deviation      Group
0              Highschool_Educated   0.306496            0.461128        All
1                Tertiary_Educated   0.557760            0.496749        All
2                              Age  47.600156           14.026780        All
3                   Household_Size   2.644496            1.241480        All
4         Log_Household_Net_Income  10.150065            1.050817        All
5                             Male   0.495527            0.500077        All
6   Growth_Uncertainty_Probability   0.375336            0.259027        All
7                          pr2010D   0.449242            0.497514        All
8                          pr2110D   0.173473            0.378730        All
9   First_Moment_Expectation_Prior   0.386426           11.186591        All
10             Second_Moment_Prior   1.877609            2.036983        All
11                         Belgian   0.111630            0.314972        All

In [3]:
import pandas as pd

# Load dataset
df = pd.read_csv("CES_RF_SMT_clean.csv")

def income_summary(data, label):
    q10 = data["Log_Household_Net_Income"].quantile(0.10)
    q90 = data["Log_Household_Net_Income"].quantile(0.90)

    bottom_10 = data[data["Log_Household_Net_Income"] <= q10]["Log_Household_Net_Income"].mean()
    top_10 = data[data["Log_Household_Net_Income"] >= q90]["Log_Household_Net_Income"].mean()
    overall = data["Log_Household_Net_Income"].mean()

    print(f"\n{label}")
    print(f"Bottom 10% mean (log income): {bottom_10:.3f}")
    print(f"Top 10% mean (log income):    {top_10:.3f}")
    print(f"Overall mean (log income):    {overall:.3f}")


In [5]:
import pandas as pd
import numpy as np

df = pd.read_csv("CES_RF_SMT_Dummies.csv")

def income_summary(data, label):
    q10 = data["Log_Household_Net_Income"].quantile(0.10)
    q90 = data["Log_Household_Net_Income"].quantile(0.90)

    bottom_log = data[data["Log_Household_Net_Income"] <= q10]["Log_Household_Net_Income"].mean()
    top_log = data[data["Log_Household_Net_Income"] >= q90]["Log_Household_Net_Income"].mean()
    overall_log = data["Log_Household_Net_Income"].mean()

    print(f"\n{label}")
    print("Bottom 10% mean:")
    print(f"  Log income:   {bottom_log:.3f}")
    print(f"  Income lvl:  {np.exp(bottom_log):,.0f}")

    print("Top 10% mean:")
    print(f"  Log income:   {top_log:.3f}")
    print(f"  Income lvl:  {np.exp(top_log):,.0f}")

    print("Overall mean:")
    print(f"  Log income:   {overall_log:.3f}")
    print(f"  Income lvl:  {np.exp(overall_log):,.0f}")

# Control group
income_summary(
    df[df["Second_Moment_Treatment"] == 0],
    "Control Group"
)

# Treatment group
income_summary(
    df[df["Second_Moment_Treatment"] == 1],
    "Treatment Group"
)

# Entire population
income_summary(
    df,
    "Entire Population"
)



Control Group
Bottom 10% mean:
  Log income:   7.926
  Income lvl:  2,767
Top 10% mean:
  Log income:   11.309
  Income lvl:  81,555
Overall mean:
  Log income:   10.156
  Income lvl:  25,755

Treatment Group
Bottom 10% mean:
  Log income:   7.678
  Income lvl:  2,160
Top 10% mean:
  Log income:   11.430
  Income lvl:  92,010
Overall mean:
  Log income:   10.144
  Income lvl:  25,432

Entire Population
Bottom 10% mean:
  Log income:   7.779
  Income lvl:  2,389
Top 10% mean:
  Log income:   11.401
  Income lvl:  89,422
Overall mean:
  Log income:   10.150
  Income lvl:  25,593


In [9]:
import pandas as pd

# Load dataset
df = pd.read_csv("CES_RF_SMT_clean.csv")

# Variables to summarize
vars_to_summarize = [
    "Highschool_Educated",
    "Tertiary_Educated",
    "Age",
    "Household_Size",
    "Log_Household_Net_Income",
    "Male",
    "Growth_Uncertainty_Probability",
    "pr2010D",
    "pr2110D",
    "First_Moment_Expectation_Prior",
    "Second_Moment_Prior",
    "Belgian", 
    "Spanish", 
    "French", 
    "Italian", 
    "Dutch", 
    "Danish",
]

# Determine thresholds for bottom and top 10%
bottom_10_thresh = df['Log_Household_Net_Income'].quantile(0.1)
top_10_thresh = df['Log_Household_Net_Income'].quantile(0.9)

# Subset bottom 10% and top 10%
bottom_10 = df[df['Log_Household_Net_Income'] <= bottom_10_thresh]
top_10 = df[df['Log_Household_Net_Income'] >= top_10_thresh]

# Compute means
bottom_10_means = bottom_10[vars_to_summarize].mean()
top_10_means = top_10[vars_to_summarize].mean()

# Combine into a single table
summary = pd.DataFrame({
    'Bottom 10% Mean': bottom_10_means,
    'Top 10% Mean': top_10_means
})

print(summary)


                                Bottom 10% Mean  Top 10% Mean
Highschool_Educated                    0.399225      0.178295
Tertiary_Educated                      0.387597      0.794574
Age                                   46.263566     48.406977
Household_Size                         2.410853      3.042636
Log_Household_Net_Income               7.778813     11.401125
Male                                   0.395349      0.608527
Growth_Uncertainty_Probability         0.354341      0.408721
pr2010D                                0.480620      0.538760
pr2110D                                0.120155      0.170543
First_Moment_Expectation_Prior        -0.292636     -0.044574
Second_Moment_Prior                    1.847403      2.048362
Belgian                                0.127907      0.143411
Spanish                                0.151163      0.093023
French                                 0.135659      0.174419
Italian                                0.259690      0.135659
Dutch   

In [11]:
import pandas as pd

# Load dataset
df = pd.read_csv("CES_RF_SMT_clean.csv")

# Variables to compute SD for
vars_to_sd = ["Log_Household_Net_Income", "Household_Size", "Age"]

# Determine thresholds for bottom and top 10%
bottom_10_thresh = df['Log_Household_Net_Income'].quantile(0.1)
top_10_thresh = df['Log_Household_Net_Income'].quantile(0.9)

# Subset bottom 10% and top 10%
bottom_10 = df[df['Log_Household_Net_Income'] <= bottom_10_thresh]
top_10 = df[df['Log_Household_Net_Income'] >= top_10_thresh]

# Compute standard deviations
bottom_10_sd = bottom_10[vars_to_sd].std()
top_10_sd = top_10[vars_to_sd].std()

# Combine into a single table
sd_summary = pd.DataFrame({
    'Bottom 10% SD': bottom_10_sd,
    'Top 10% SD': top_10_sd
})

print(sd_summary)


                          Bottom 10% SD  Top 10% SD
Log_Household_Net_Income       1.362668    0.370745
Household_Size                 1.312095    1.102757
Age                           13.949335   12.507471
