### Import necessary libraries, set options

In [None]:
import numpy as np
import os
import pandas as pd
import warnings

pd.set_option('display.max_columns', 125)
warnings.filterwarnings("ignore")

### Read in pickled datasets

In [None]:
path_to_data = os.path.join("path/to/data/processed-data")

##### Between-individual

In [None]:
data_pid = pd.read_pickle(path_to_data + "/pkl/data_pid.pkl")
print(len(data_pid))
data_pid.head()

### Sample size

##### Number of participants

In [None]:
print("Total number of participants (including those who got dropped along the way):")
print(len(data_pid))
print()

print("Number of participants who finished the task in its entirety:")
print(len(data_pid[data_pid['country'] == data_pid['country']]))
print()

print("Number of participants from the US:")
print(len(data_pid[data_pid['country'] == 'yes']))
print(len(data_pid[data_pid['ip_to_country'] == 'US']))
print(len(data_pid[(data_pid['country'] == 'yes') & (data_pid['ip_to_country'] == 'US')]))
print()

##### Number of participants per condition

In [None]:
print("Including participants who got dropped along the way:")
print(data_pid.groupby(['earned1', 'equal1', 'earned2', 'equal2'])['pid'].count())
print()

print("Excluding participants who got dropped along the way:")
print(data_pid[data_pid['country'] == data_pid['country']].groupby(['earned1', 'equal1', 
                                                                    'earned2', 'equal2'])['pid'].count())

### Demographics

In [None]:
def get_percentage(row):
    vals = list(row[4:])
    vals_sum = np.sum(vals)
    vals_norm = vals / vals_sum * 100
    for i in range(len(vals)):
        row['temp_col_' + str(i)] = vals_norm[i]
    return row

##### Experience

In [None]:
print(np.mean(data_pid['num_other']))
print(np.nanmedian(data_pid['num_other']))

experience = pd.DataFrame(
    data_pid.groupby(['earned1', 'equal1',
                      'earned2', 'equal2'])['num_other'].agg(['mean', 'median']).reset_index()
)

experience.columns = ['earned1', 'equal1', 'earned2', 'equal2', 'mean_num_other', 'median_num_other']

experience

##### Age

In [None]:
print(np.mean(data_pid['age']))
print(np.nanmedian(data_pid['age']))

age = pd.DataFrame(
    data_pid.groupby(['earned1', 'equal1',
                      'earned2', 'equal2'])['age'].agg(['mean', 'median']).reset_index()
)

age.columns = ['earned1', 'equal1', 'earned2', 'equal2', 'mean_age', 'median_age']

age

##### Gender

In [None]:
gender = pd.DataFrame(
    data_pid.groupby(['earned1', 'equal1',
                      'earned2', 'equal2',
                      'gender'])['gender'].agg(['count']).reset_index()
)

gender = gender.pivot_table(index = ['earned1', 'equal1', 'earned2', 'equal2'],
                            columns = 'gender',
                            values = 'count').reset_index().rename_axis(None, 1)

gender = gender.fillna(0)

gender = pd.DataFrame(gender.apply(get_percentage, axis = 1))

gender.columns = ['earned1', 'equal1', 'earned2', 'equal2',
                  'female', 'male', 'other',
                  'female_perc', 'male_perc', 'other_perc']

gender = gender[['earned1', 'equal1', 'earned2', 'equal2', 
                 'male_perc', 
                 'female_perc', 
                 'other_perc',
                 'male', 'female', 'other']]

print(
    pd.DataFrame(
        data = [gender[['male', 'female', 'other']].sum().values / 
                gender[['male', 'female', 'other']].sum().sum() * 100],
        columns = ['male_perc', 'female_perc', 'other_perc']
    )
)

gender

##### Race

In [None]:
race = pd.DataFrame(
    data_pid.groupby(['earned1', 'equal1',
                      'earned2', 'equal2',
                      'race'])['race'].agg(['count']).reset_index()
)

race = race.pivot_table(index = ['earned1', 'equal1', 'earned2', 'equal2'],
                        columns = 'race',
                        values = 'count').reset_index().rename_axis(None, 1)

race = race.fillna(0)

race = pd.DataFrame(race.apply(get_percentage, axis = 1))

race.columns = ['earned1', 'equal1', 'earned2', 'equal2',
                'asian', 'black', 'hispanic', 'other', 'white',
                'asian_perc', 'black_perc', 'hispanic_perc', 'other_perc', 'white_perc']

race = race[['earned1', 'equal1', 'earned2', 'equal2',
             'white_perc', 
             'black_perc', 
             'asian_perc', 
             'hispanic_perc', 
             'other_perc',
             'white', 'black', 'asian', 'hispanic', 'other']]

print(
    pd.DataFrame(
        data = [race[['white', 'black', 'asian', 'hispanic', 'other']].sum().values / 
                race[['white', 'black', 'asian', 'hispanic', 'other']].sum().sum() * 100],
        columns = ['white_perc', 'black_perc', 'asian_perc', 'hispanic_perc', 'other_perc']
    )
)

race

##### Education

In [None]:
education = pd.DataFrame(
    data_pid.groupby(['earned1', 'equal1',
                      'earned2', 'equal2',
                      'education'])['education'].agg(['count']).reset_index()
)

education = education.pivot_table(index = ['earned1', 'equal1', 'earned2', 'equal2'],
                                  columns = 'education',
                                  values = 'count').reset_index().rename_axis(None, 1)

education = education.fillna(0)

education = pd.DataFrame(education.apply(get_percentage, axis = 1))

education.columns = ['earned1', 'equal1', 'earned2', 'equal2',
                     'college_degree', 'graduate_degree', 'hs', 'less_than_hs', 'other', 'some_college',
                     'college_degree_perc', 'graduate_degree_perc', 'hs_perc', 
                     'less_than_hs_perc', 'other_perc', 'some_college_perc']

education = education[['earned1', 'equal1', 'earned2', 'equal2',
                       'less_than_hs_perc', 
                       'hs_perc', 
                       'some_college_perc', 
                       'college_degree_perc',
                       'graduate_degree_perc', 
                       'other_perc',
                       'less_than_hs', 'hs', 'some_college', 'college_degree', 'graduate_degree', 'other']]

print(
    pd.DataFrame(
        data = [education[['less_than_hs', 'hs', 'some_college',
                           'college_degree', 'graduate_degree', 'other']].sum().values / 
                education[['less_than_hs', 'hs', 'some_college',
                           'college_degree', 'graduate_degree', 'other']].sum().sum() * 100],
        columns = ['less_than_hs_perc', 'hs_perc', 'some_college_perc',
                   'college_degree_perc', 'graduate_degree_perc', 'other_perc']
    )
)

education

##### Income

In [None]:
income = pd.DataFrame(
    data_pid.groupby(['earned1', 'equal1', 
                      'earned2', 'equal2',
                      'income'])['income'].agg(['count']).reset_index()
)

income = income.pivot_table(index = ['earned1', 'equal1', 'earned2', 'equal2'],
                            columns = 'income', 
                            values = 'count').reset_index().rename_axis(None, 1)

income = income.fillna(0)

income = pd.DataFrame(income.apply(get_percentage, axis = 1))

income.columns = ['earned1', 'equal1', 'earned2', 'equal2',
                  '20k_to_40k', '40k_to_60k', '60k_to_80k', '80k_to_100k', 'less_than_20k', 'more_than_100k',
                  '20k_to_40k_perc', '40k_to_60k_perc', '60k_to_80k_perc', 
                  '80k_to_100k_perc', 'less_than_20k_perc', 'more_than_100k_perc']

income = income[['earned1', 'equal1', 'earned2', 'equal2',
                 'less_than_20k_perc',
                 '20k_to_40k_perc',
                 '40k_to_60k_perc',
                 '60k_to_80k_perc',
                 '80k_to_100k_perc',
                 'more_than_100k_perc',
                 'less_than_20k', '20k_to_40k', '40k_to_60k', '60k_to_80k', '80k_to_100k', 'more_than_100k']]

print(
    pd.DataFrame(
        data = [income[['less_than_20k', '20k_to_40k', '40k_to_60k',
                        '60k_to_80k', '80k_to_100k', 'more_than_100k']].sum().values / 
                income[['less_than_20k', '20k_to_40k', '40k_to_60k',
                        '60k_to_80k', '80k_to_100k', 'more_than_100k']].sum().sum() * 100],
        columns = ['less_than_20k_perc', '20k_to_40k_perc', '40k_to_60k_perc',
                   '60k_to_80k_perc', '80k_to_100k_perc', 'more_than_100k_perc']
    )
)

income

##### Politics

In [None]:
politics = pd.DataFrame(
    data_pid.groupby(['earned1', 'equal1', 
                      'earned2', 'equal2',
                      'politics'])['politics'].agg(['count']).reset_index()
)

politics = politics.pivot_table(index = ['earned1', 'equal1', 'earned2', 'equal2'],
                                columns = 'politics', 
                                values = 'count').reset_index().rename_axis(None, 1)

politics = politics.fillna(0)

politics = pd.DataFrame(politics.apply(get_percentage, axis = 1))

politics.columns = ['earned1', 'equal1', 'earned2', 'equal2',
                    'conservative', 'liberal', 'middle', 'very_conservative', 'very_liberal',
                    'conservative_perc', 'liberal_perc', 'middle_perc', 
                    'very_conservative_perc', 'very_liberal_perc']

politics = politics[['earned1', 'equal1', 'earned2', 'equal2',
                     'very_liberal_perc',
                     'liberal_perc',
                     'middle_perc',
                     'conservative_perc',
                     'very_conservative_perc',
                     'very_liberal', 'liberal', 'middle', 'conservative', 'very_conservative']]

print(
    pd.DataFrame(
        data = [politics[['very_liberal', 'liberal', 'middle',
                          'conservative', 'very_conservative']].sum().values / 
                politics[['very_liberal', 'liberal', 'middle',
                          'conservative', 'very_conservative']].sum().sum() * 100],
        columns = ['very_liberal_perc', 'liberal_perc', 'middle_perc', 
                   'conservative_perc', 'very_conservative_perc']
    )
)

politics