In [1]:
import numpy as np
import pandas as pd

# pd options
pd.set_option("mode.chained_assignment", "raise")
pd.set_option("display.max_colwidth", 100)
pd.set_option("display.max_rows", 100)
pd.set_option('display.float_format', lambda x: '%.1f' % x)

# summary stats: one-child sample


In [None]:
df = pd.read_pickle('./data/vars_onlychild.pkl')

In [3]:
clist = ['custody_m','custody_f','care_mother','care_mother_p_grand','care_mother_m_grand','care_p_grand',  'care_m_grand',  'care_father','malehome2','childage','p_female', 'consent', 'female_nojob','violence', 'ill_m', 'ill_f', 'addchild_m', 'addchild_f', 'rural']

newindex={
        'childage': 'Child age',
        "custody_m": "Mother custody",
        "custody_f": "Father custody",
        'p_female': 'Female plaintiff',
        'consent': "Defendant's consent to divorce",
        'care_mother': 'Mother',
        'care_mother_p_grand': 'Mother + Paternal grandparents',
        'care_mother_m_grand': 'Mother + Maternal grandparents',
        'care_p_grand': 'Paternal grandparents',
        'care_m_grand': 'Maternal grandparents',
        'care_father': 'Father',
        'malehome2': 'Father is sole homeowner',
        'female_nojob': 'Unemployed mother',
        'violence': 'Claim of domestic violence',
        'ill_m': 'Mother has serious illness',
        'ill_f': 'Father has serious illness',
        'addchild_m': 'Mother has additional children',
        'addchild_f': 'Father has additional children',
        'rural': 'Rural areas'
    }

In [4]:
# frequency distribution (%)
# All & by child sex
sum_sex = df[clist].describe().loc[['mean']].T * 100
sum_sex.rename(columns={'mean':'All Children'}, inplace=True)
sum_sex = sum_sex.join(df.loc[df['child_boy']==0, clist].describe().loc[['mean']].T * 100)
sum_sex.rename(columns={'mean':'Girl'}, inplace=True)
sum_sex = sum_sex.join(df.loc[df['child_boy']==1, clist].describe().loc[['mean']].T * 100)
sum_sex.rename(columns={'mean':'Boy'}, inplace=True)
sum_sex.loc['childage'] = sum_sex.loc['childage'] / 100
sum_sex.rename(
    index=newindex,
    inplace=True,
)
# sum_sex

In [5]:
# frequency distribution (%)
# by urban/rural
sum_rural = df.loc[df['rural']==0, clist].describe().loc[['mean']].T * 100
sum_rural.rename(columns={'mean':'Urban'}, inplace=True)
sum_rural = sum_rural.join(df.loc[df['rural']==1, clist].describe().loc[['mean']].T * 100)
sum_rural.rename(columns={'mean':'Rural'}, inplace=True)
sum_rural.loc['childage'] = sum_rural.loc['childage'] / 100
sum_rural.rename(
    index=newindex,
    inplace=True,
)
# sum_rural

In [6]:
sum_onechild = pd.concat([sum_sex, sum_rural], axis=1)

# Add 'N' row
newrow = pd.DataFrame({
    'All Children': f"{df['rid'].count():,}",
    'Girl': f"{df.loc[df['child_boy'] == 0, 'rid'].count():,}",
    'Boy': f"{df.loc[df['child_boy'] == 1, 'rid'].count():,}",
    'Urban': f"{df.loc[df['rural'] == 0, 'rid'].count():,}",
    'Rural': f"{df.loc[df['rural'] == 1, 'rid'].count():,}"
}, index=['N'])

sum_onechild = pd.concat([newrow, sum_onechild])
sum_onechild

Unnamed: 0,All Children,Girl,Boy,Urban,Rural
N,156270.0,70542.0,85728.0,67087.0,89183.0
Mother custody,47.2,54.5,41.3,55.3,41.2
Father custody,52.8,45.5,58.7,44.7,58.8
Mother,52.5,55.9,49.7,52.6,52.4
Mother + Paternal grandparents,14.0,13.3,14.5,15.7,12.7
Mother + Maternal grandparents,5.4,5.9,4.9,7.0,4.2
Paternal grandparents,21.9,18.8,24.4,18.2,24.6
Maternal grandparents,3.2,3.4,3.1,4.0,2.7
Father,3.0,2.7,3.3,2.5,3.4
Father is sole homeowner,80.5,79.0,81.8,54.7,100.0


# summary stats: multi-child sample


In [None]:
df_m = pd.read_pickle("./data/vars_multichild.pkl")

In [None]:
clist = ['custody_outcome_mother', 'custody_outcome_father', 'custody_outcome_split','care_mother','care_mother_p_grand','care_mother_m_grand','care_p_grand',  'care_m_grand', 'care_father', 'malehome2','age_gap', 'average_age', 'p_female', 'consent', 'female_nojob','violence', 'ill_m', 'ill_f', 'addchild_m', 'addchild_f', 'sib_com', 'rural']

newindex={
        "custody_outcome_mother": "Mother custody",
        "custody_outcome_father": "Father custody",
        "custody_outcome_split": "Split custody",
        'care_mother': 'Mother',
        'care_mother_m_grand': 'Mother + Maternal grandparents',
        'care_mother_p_grand': 'Mother + Paternal grandparents',
        'care_p_grand': 'Paternal grandparents',
        'care_m_grand': 'Maternal grandparents',
        'care_father': 'Father',
        'malehome2': 'Father is sole homeowner',
        'age_gap': 'Sibling age gap',
        'average_age': 'Average age of siblings',
        'p_female': 'Female plaintiff',
        'consent': "Defendant's consent to divorce",
        'female_nojob': 'Unemployed mother',
        'violence': 'Claim of domestic violence',
        'ill_m': 'Mother has serious illness',
        'ill_f': 'Father has serious illness',
        'addchild_m': 'Mother has additional children',
        'addchild_f': 'Father has additional children',
    }

In [9]:
# frequency distribution (%)
# All & by child sex
sum_sex = df_m[clist].describe().loc[['mean']].T * 100
sum_sex.rename(columns={'mean':'All Children'}, inplace=True)
sum_sex = sum_sex.join(df_m.loc[df_m['sib_com']=='Girls and Boys', clist].describe().loc[['mean']].T * 100)
sum_sex.rename(columns={'mean':'Girls and Boys'}, inplace=True)
sum_sex = sum_sex.join(df_m.loc[df_m['sib_com']=='Girls', clist].describe().loc[['mean']].T * 100)
sum_sex.rename(columns={'mean':'Girls'}, inplace=True)
sum_sex = sum_sex.join(df_m.loc[df_m['sib_com']=='Boys', clist].describe().loc[['mean']].T * 100)
sum_sex.rename(columns={'mean':'Boys'}, inplace=True)

sum_sex.loc['average_age'] = sum_sex.loc['average_age'] / 100
sum_sex.loc['age_gap'] = sum_sex.loc['age_gap'] / 100

sum_sex.rename(
    index=newindex,
    inplace=True,
)
# sum_sex

In [10]:
# frequency distribution (%)
# by urban/rural
sum_rural = df_m.loc[df_m['rural']==0, clist].describe().loc[['mean']].T * 100
sum_rural.rename(columns={'mean':'Urban'}, inplace=True)
sum_rural = sum_rural.join(df_m.loc[df_m['rural']==1, clist].describe().loc[['mean']].T * 100)
sum_rural.rename(columns={'mean':'Rural'}, inplace=True)

sum_rural.loc['average_age'] = sum_rural.loc['average_age'] / 100
sum_rural.loc['age_gap'] = sum_rural.loc['age_gap'] / 100

sum_rural.rename(
    index=newindex,
    inplace=True,
)
# sum_rural

In [11]:
sum_mchild = pd.concat([sum_sex, sum_rural], axis=1)

# Add 'N' row
newrow = pd.DataFrame({
    'All Children': f"{df_m['rid'].count():,}",
    'Girls and Boys': f"{df_m.loc[df_m['sib_com']=='Girls and Boys', 'rid'].count():,}",
    'Girls': f"{df_m.loc[df_m['sib_com'] == 'Girls', 'rid'].count():,}",
    'Boys': f"{df_m.loc[df_m['sib_com'] == 'Boys', 'rid'].count():,}",
    'Urban': f"{df_m.loc[df_m['rural'] == 0, 'rid'].count():,}",
    'Rural': f"{df_m.loc[df_m['rural'] == 1, 'rid'].count():,}"
}, index=['N'])

sum_mchild = pd.concat([newrow, sum_mchild])

sum_mchild

Unnamed: 0,All Children,Girls and Boys,Girls,Boys,Urban,Rural
N,64353.0,40669.0,15290.0,8394.0,18669.0,45684.0
Mother custody,21.6,21.0,24.8,18.8,24.2,20.6
Father custody,27.0,27.6,22.6,32.2,23.2,28.6
Split custody,51.3,51.3,52.7,49.0,52.6,50.8
Mother,58.2,57.9,59.9,56.5,53.5,60.2
Mother + Paternal grandparents,15.9,16.2,15.2,15.8,19.3,14.5
Mother + Maternal grandparents,4.3,4.2,4.6,4.0,6.9,3.2
Paternal grandparents,15.6,15.8,14.0,17.3,11.2,17.3
Maternal grandparents,3.0,2.9,3.3,3.0,5.2,2.1
Father,3.0,2.9,3.0,3.3,3.9,2.7
