<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Helper-functions" data-toc-modified-id="Helper-functions-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Helper functions</a></span></li><li><span><a href="#Coefficient-plots" data-toc-modified-id="Coefficient-plots-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Coefficient plots</a></span><ul class="toc-item"><li><span><a href="#Actions" data-toc-modified-id="Actions-2.1"><span class="toc-item-num">2.1&nbsp;&nbsp;</span>Actions</a></span></li><li><span><a href="#Nutrients" data-toc-modified-id="Nutrients-2.2"><span class="toc-item-num">2.2&nbsp;&nbsp;</span>Nutrients</a></span></li><li><span><a href="#Interaction-with-liquidity-constraints" data-toc-modified-id="Interaction-with-liquidity-constraints-2.3"><span class="toc-item-num">2.3&nbsp;&nbsp;</span>Interaction with liquidity constraints</a></span></li></ul></li><li><span><a href="#Coefficient-tables" data-toc-modified-id="Coefficient-tables-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Coefficient tables</a></span><ul class="toc-item"><li><span><a href="#Behaviors" data-toc-modified-id="Behaviors-3.1"><span class="toc-item-num">3.1&nbsp;&nbsp;</span>Behaviors</a></span></li><li><span><a href="#Nutrients" data-toc-modified-id="Nutrients-3.2"><span class="toc-item-num">3.2&nbsp;&nbsp;</span>Nutrients</a></span></li><li><span><a href="#Behaviors,-interaction" data-toc-modified-id="Behaviors,-interaction-3.3"><span class="toc-item-num">3.3&nbsp;&nbsp;</span>Behaviors, interaction</a></span></li><li><span><a href="#Nutrients,-interaction" data-toc-modified-id="Nutrients,-interaction-3.4"><span class="toc-item-num">3.4&nbsp;&nbsp;</span>Nutrients, interaction</a></span></li></ul></li><li><span><a href="#Summary-tables" data-toc-modified-id="Summary-tables-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Summary tables</a></span></li></ul></div>

# assemble_tables

Assembles tables for Goodman (2021)

Last updated: June 2021

In [None]:
import pandas as pd
import numpy as np
pd.options.display.max_columns = 50
pd.options.display.max_rows = 100
pd.options.display.max_colwidth = 100
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
from matplotlib import cycler
import re

from typing import Optional, List

In [None]:
# matplotlib style

color_list = ['#3388BB', '#EE6666', '#9988DD',
                 '#EECC55', '#88BB44', '#FFBBBB']
colors = cycler('color', color_list)
plt.rc('axes', edgecolor='none',
       axisbelow=True, grid=True, prop_cycle=colors)
plt.rc('grid', color='k', linestyle='solid', alpha=0.15)
plt.rc('xtick', direction='out', color='k')
plt.rc('ytick', direction='out', color='k')
plt.rc('patch', edgecolor='#E6E6E6')
plt.rc('lines', linewidth=2)
plt.rcParams['axes.titlesize'] = 16
# plt.rcParams['font.family'] = ['Lucida Grande']

## Helper functions

In [None]:
def get_coeffs(outcome:str, 
               sample:str = 'all',
               twfe:bool = True,
               interact:bool = False) -> pd.DataFrame:
    """
    Returns dataframe with the coefficients and standard errors
    for `nutrient` estimated using a two-way FE estimator, if
    twfe = True, otherwise using the Sun & Abraham estimator.
    
    Params
    ------
    outcome : str
        Outcome var of interest, e.g. 'total_spent'
        
    sample : str
        Can take values 'all', 'dd', or 'paper'
        
    twfe : bool
        If True, uses TWFE method, otherwise S&A
        
    interact : bool
        If True, includes interaction term for liquidity constraints
    """
    assert sample in ['all', 'dd', 'paper']
    cols = ['week', 'coeff', 'se']
    
    # import raw
    method = 'sa'
    if twfe:
        method = 'twfe'
    if interact:
        outcome = 'interact_' + outcome
    df = pd.read_csv(f'../data/temp_data/{method}/{sample}/{outcome}.txt', sep='\t')
    
    # get min and max weeks
    min_week = df.iloc[0, 0]
    min_week = -1 * int(min_week[min_week.find('n')+1:])
    max_week = df.iloc[-1, 0]
    end_idx = max_week.rfind('_') if interact else len(max_week)
    max_week = int(max_week[max_week.find('_')+1:end_idx])
    
    # handle interaction and add week var
    if interact:
        df = df.iloc[int((len(df)-1)/2):].copy().reset_index(drop=True)
        df['week'] = list(range(min_week, max_week+1))
        df.loc[len(df), cols] = [min_week-1, 0, 0]
    else:
        df['week'] = list(range(min_week, -2)) + list(range(-1, max_week+1))
        for week in [-2, min_week-1]:
            df.loc[len(df), cols] = [week, 0, 0]
        
    # sort by week
    df = df.loc[df.week <= 10, cols].sort_values('week').reset_index(drop=True)

    # rename columns
    df = df.rename(columns={'week': 'name'})
    
    # return
    return df

df = get_coeffs('total_spent', 'dd', interact=True)
df

In [None]:
def coef_plot(dfx: pd.DataFrame,
              ax: Optional[plt.axes] = None,
              title: str = '',
              xlabel: str = '',
              ylabel: str = '',
              ylim: Optional[tuple] = None,
              legend: Optional[bool] = False) -> None:
    """
    Plots a coefficient plot using coefficients stored in
    `dfx`. `dfx` should have columns 'name', 'coeff', and 'se'.
    """
    assert all(c in dfx.columns for c in ['name', 'coeff', 'se'])
    interact = 'interact' in dfx.columns  # whether to include two series
    
    # init figure
    if not ax:
        fig, ax = plt.subplots(figsize=(8, 5))
    
    # restrict sample
    dfx = dfx.loc[dfx['name'].between(-10, 3), :].copy()
    
    # plot confidence intervals
    dfx['ci'] = dfx.se * 1.96

    if interact:
        # add offset
        dfx['name'] += -0.05
        dfx.loc[dfx.interact == 1, 'name'] += 0.1
        dfx.loc[dfx.interact == 0].plot('name', 'coeff', 
                                        kind='scatter', 
                                        ax=ax, yerr='ci',
                                        label='Has liquidity',
                                        color=color_list[0])
        dfx.loc[dfx.interact == 1].plot('name', 'coeff', 
                                        kind='scatter', 
                                        ax=ax, yerr='ci',
                                        label='Liquidity constrained',
                                        color=color_list[4])
    else:
        dfx.plot('name', 'coeff', kind='scatter', ax=ax, yerr='ci', label='Coefficient')
    
    # plot attributes
    ax.set_title(title)
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    if ylim:
        ax.set_ylim(ylim)
    ax.legend(loc='best', frameon=False, fontsize=12).set_visible(legend)
    
    # percent and integer axes
    ax.yaxis.set_major_formatter(mtick.PercentFormatter(xmax=1, decimals=0))
    ax.xaxis.set_major_locator(mtick.MaxNLocator(integer=True))
    
    # solid y = 0
    ax.axhline(0, color='k', zorder=-1)
    
    # dashed x = -0.5
    ax.axvline(-0.5, linestyle='--', color='red', alpha=0.7)
    

dftmp = df.reset_index()
dftmp.rename(columns={'week': 'name'}, inplace=True)
coef_plot(dftmp, legend=False)

In [None]:
# dictionary for mapping variable names to titles

varnames = {
    'total_spent': 'Total spent',
    'final_price_paid': 'Total spent, scanned items',
    'final_price_paid_food': 'Total spent, scanned food',
    'trips': 'Trips',
    'items_scanned': 'Scanned items',
    'items_food': 'Scanned food items',
    'calories': 'Calories',
    'carbsgrams': 'Carbohydrates',
    'fatgrams': 'Fats',
    'proteingrams': 'Protein',
    'sugargrams': 'Sugar',
    'fibergrams': 'Fiber',
    'sodiumgrams': 'Sodium',
    'transfatgrams': 'Trans fat',
    'satfatgrams': 'Saturated fat',
    'cholesterolgrams': 'Cholesterol',
    'oz_alcohol': 'Alcohol',
    'oz_soda_regular': 'Regular soda'
}

## Coefficient plots

### Actions

trips, items purchased, dollars spent, etc.

In [None]:
# define vars for each column
varlist = [['total_spent', 'final_price_paid', 'final_price_paid_food'],
           ['trips', 'items_scanned', 'items_food']]

# initialize figure
shape = np.shape(varlist)
fig, ax = plt.subplots(shape[1], shape[0], figsize=(15, 10))

# adjust spacing
plt.subplots_adjust(hspace=0.35)

# loop through columns
for j, colvars in enumerate(varlist):
    for i, outcome in enumerate(colvars):
        print(i, outcome)
        
        # assign x and y labels
        xlabel = 'Weeks since ESP received' if i == shape[1] - 1 else ''
        ylabel = 'Percent change' if j == 0 else ''
        
        # get df and plot
        dftmp = get_coeffs(outcome)
        coef_plot(dftmp, ax = ax[i, j], 
                  title=varnames.get(outcome),
                  xlabel=xlabel,
                  ylabel=ylabel,
                  ylim=(-0.1, 0.15))
        
plt.savefig("../tex/figures/es_behaviors_all.pdf", bbox_inches="tight")

### Nutrients

In [None]:
# define vars for each column
varlist = [['calories', 'fatgrams', 'satfatgrams', 'transfatgrams'],
           ['carbsgrams', 'sugargrams', 'fibergrams', 'sodiumgrams']]

# initialize figure
shape = np.shape(varlist)
fig, ax = plt.subplots(shape[1], shape[0], figsize=(15, 13))

# adjust spacing
plt.subplots_adjust(hspace=0.35)

# loop through columns
for j, colvars in enumerate(varlist):
    for i, outcome in enumerate(colvars):
        print(i, outcome)
        
        # assign x and y labels
        xlabel = 'Weeks since ESP received' if i == shape[1] - 1 else ''
        ylabel = 'Percent change' if j == 0 else ''
        
        # get df and plot
        dftmp = get_coeffs(outcome)
        coef_plot(dftmp, ax = ax[i, j], 
                  title=varnames.get(outcome),
                  xlabel=xlabel,
                  ylabel=ylabel,
                  ylim=(-0.2, 0.2))
        
plt.savefig("../tex/figures/es_nutrients_all.pdf", bbox_inches="tight")

### Interaction with liquidity constraints

In [None]:
# define vars for each column
varlist = [['trips', 'total_spent', 'final_price_paid_food', 'items_food'],
           ['calories', 'carbsgrams', 'fatgrams', 'proteingrams']]

# initialize figure
shape = np.shape(varlist)
fig, ax = plt.subplots(shape[1], shape[0], figsize=(7.5*shape[0], 4*shape[1]))

# adjust spacing
plt.subplots_adjust(hspace=0.35)

# loop through columns
for j, colvars in enumerate(varlist):
    for i, outcome in enumerate(colvars):
        print(i, outcome)
        if j == 0:
            ylim = (-0.20, 0.30)
            
        else:
            ylim = (-0.30, 0.35)
        
        # assign x and y labels
        xlabel = 'Weeks since ESP received' if i == shape[1] - 1 else ''
        ylabel = 'Percent change' if j == 0 else ''
        
        # get df and plot, for each interaction
        dftmp = get_coeffs(outcome, interact=True)
        coef_plot(dftmp, ax = ax[i, j], 
                  title=varnames.get(outcome),
                  xlabel=xlabel,
                  ylabel=ylabel,
                  ylim=ylim,
                  legend=False)
        
plt.savefig("../tex/figures/es_interaction.pdf", bbox_inches="tight")

## Coefficient tables

In [None]:
# combine table for actions
# one column each: combined, just dd, just paper
# first table: effect during first week

def get_table(varlist: List[str],
              interaction: bool = False,
              week: bool = True) -> pd.DataFrame:
    """
    Returns dataframe formatted for each latex translation.
    """

    # init master df to keep results
    cols = ['var', 'sample', 'coeff', 'se']
    dfall = pd.DataFrame(columns=cols)

    # loop through and take coeffs of interest from table
    for v in varlist:
        for m in ['all', 'dd', 'paper']:
            df = get_coeffs(v, m, interact=interaction)
            df['var'] = v
            df['sample'] = m
            # TODO make next line more general
            if week:
                dfall = pd.concat([dfall, df.loc[df.name == 0, cols]], 0)
            else:
                dftmp = df.loc[df.name.between(0, 1), cols]
                dftmp['name'] = 0
                dftmp = dftmp.groupby(['var', 'sample', 'name']).agg('mean').reset_index()
                dfall = pd.concat([dfall, dftmp], 0)
            
    # get stars
    dfall['stars'] = 0
    for tcrit, stars in zip([1.645, 1.960, 2,576], [1, 2, 3]):
        dfall.loc[(abs(dfall['coeff']) - dfall['se'] * tcrit) > 0, 'stars'] = stars
        
    # string numeric vars
    dfall['coeff'] = dfall['coeff'].apply(lambda x: '{n:.{d}f}'.format(d=str(3), n=x))
    dfall['coeff'] = dfall['coeff'] + dfall.stars.apply(lambda x: '*'*x)
    dfall['se'] = dfall['se'].apply(lambda x: '({n:.{d}f})'.format(d=str(3), n=x))
    dfall = dfall.drop('stars', 1)

    # reshape data
    dfall = dfall.melt(['var', 'sample'], value_vars=['coeff', 'se'], var_name='est')
    dfall = dfall.pivot(index=['var', 'est'], columns=['sample']).reset_index()
    dfall.columns = ['var', 'est', 'all', 'dd', 'paper']
    
    # order by varlist
    sorterIndex = dict(zip(varlist, range(len(varlist))))
    dfall['rank'] = dfall['var'].map(sorterIndex)
    dfall = dfall.sort_values(['rank', 'est']).reset_index(drop=True)
    
    # replace vars with actual names
    dfall['var'] = dfall['var'].apply(lambda x: varnames.get(x))
    
    # one 'var'
    dfall.loc[dfall.est != 'coeff', 'var'] = ''
        
    # drop unnecessary cols and rename
    dfall = dfall.drop(['rank', 'est'], 1)
    dfall.columns = ['Outcome', 'All', 'Dir. Dep.', 'Check']
    
    # add observations to bottom
    dfall.loc[len(dfall), :] = ['Households', '19,961', '9,190', '10,744']
        
    return dfall

In [None]:
# convert dataframe to latex table

def convert_to_latex(dft: pd.DataFrame, 
                     column_format: str, 
                     caption: str, 
                     label: str, 
                     note: str = '', 
                     observations: bool = True, 
                     stars: bool = True,
                     resize: bool = True) -> str:
    """
    Takes dataframe dft and spits out tex table given options.
    
    Params
    ------
    dft : pd.DataFrame
        dataframe ready to become a tex table. Column names are correct.
    
    column_format : str
        tabular columns (in tex)
    
    caption : str
        title of the table
    
    label : str
        tex ref to add to table
    
    note : str
        note at the bottom of the table
    
    observations : bool
        if True, adds a line above the observations count
    
    stars : bool
        if True, wraps * in \\sym{}
        
    resize : bool
        if True, wraps the table in a resize box set to line width
    """
    assert isinstance(dft, pd.core.frame.DataFrame)
    for var in [column_format, caption, label, note]:
        assert isinstance(var, str)
    for var in [observations, stars]:
        assert isinstance(var, bool)
    pass

    # convert to tex
    t = dft.to_latex(index=False, escape=False,
                   column_format=column_format)
    # add caption (title) and reference label to table
    addendum = '\\begin{spacing}{1.0} \n' +\
        '\\begin{table} \\centering \\caption{' + caption + '} \n' +\
        '\\label{' + label + '} \n'
    addendum += '\\begin{threeparttable} \n'
    t = addendum + t

    # add notes to the bottom
    addendum = '\\Fignote{' + note + '} \n\\end{threeparttable}\n\\end{table} \n\\end{spacing}'
    t = t + addendum
        
    # insert \sym{} around stars
    if stars:
        t = t.replace('*** ', '\\sym{***} ')
        t = t.replace('** ', '\\sym{**} ')
        t = t.replace('* ', '\\sym{*} ')
        
    # add line above N, if N, for all N
    if observations:
        idx = [m.start() for m in re.finditer('Households &', t)]
        for pos in idx[::-1]:
            t = t[:pos] + '\n\\midrule \n' + t[pos:]
            
    if resize:
        t = t.replace('\\begin{threeparttable}', '\\resizebox{\\linewidth}{!}{%\n\\begin{threeparttable}')
        t = t.replace('\\end{threeparttable}', '\\end{threeparttable}}')
        
    return t

### Behaviors

In [None]:
# behaviors

varlist = ['total_spent', 'final_price_paid', 'final_price_paid_food', 
           'trips', 'items_scanned', 'items_food']

column_format='m{0.30\\linewidth} *{6}{>{\\centering\\arraybackslash}m{0.10\\linewidth}}'
caption = 'Effects of ESP receipt on behaviors'
name = 'behaviors_all'
label = 'table_' + name
note = '\\FEnote \\Regnote'

dfweek = get_table(varlist, False, True)
dfmonth = get_table(varlist, False, False)
dfall = pd.concat([dfweek, dfmonth.iloc[:,1:]], 1)
display(dfall)
t = convert_to_latex(dfall, column_format, caption, label, note, resize=True)

# add column groupings
t = t.replace('\\toprule', '\\toprule\n& \\multicolumn{3}{c}{First week} & \\multicolumn{3}{c}{Two weeks}\\\\\n \\cmidrule(l{.75em}){2-4} \\cmidrule(l{.75em}){5-7}')

print(t)

# write to tex file
with open(f'../tex/tables/{name}.tex', 'w') as tf:
     tf.write(t)

### Nutrients

In [None]:
# foods

varlist = ['calories', 'fatgrams', 'satfatgrams', 'transfatgrams', 
           'carbsgrams', 'sugargrams', 'fibergrams', 'sodiumgrams',
           'proteingrams', 'oz_alcohol', 'cholesterolgrams']

caption = 'Effects of ESP receipt on nutrients'
name = 'nutrients_all'
label = 'table_' + name

dfweek = get_table(varlist, False, True)
dfmonth = get_table(varlist, False, False)
dfall = pd.concat([dfweek, dfmonth.iloc[:,1:]], 1)
display(dfall)
t = convert_to_latex(dfall, column_format, caption, label, note)

# add column groupings

t = t.replace('\\toprule', '\\toprule\n& \\multicolumn{3}{c}{First week} & \\multicolumn{3}{c}{Two weeks}\\\\\n \\cmidrule(l{.75em}){2-4} \\cmidrule(l{.75em}){5-7}')

# write to tex file
with open(f'../tex/tables/{name}.tex', 'w') as tf:
     tf.write(t)

### Behaviors, interaction

In [None]:
# behaviors

varlist = ['total_spent', 'final_price_paid', 'final_price_paid_food', 
           'trips', 'items_scanned', 'items_food']

caption = 'Effects of ESP receipt on behaviors, low liquidity households'
name = 'behaviors_inter'
label = 'table_' + name
note = '''The first set of columns is $\\beta_1$ from Equation \\ref{spec_het} while 
the second set includes the average of $\\beta_1$ and $\\beta_2$. \\FEnote \\Regnote'''

dfweek = get_table(varlist, True, True)
dfmonth = get_table(varlist, True, False)
dfall = pd.concat([dfweek, dfmonth.iloc[:,1:]], 1)
display(dfall)
t = convert_to_latex(dfall, column_format, caption, label, note)

# add column groupings

t = t.replace('\\toprule', '\\toprule\n& \\multicolumn{3}{c}{First week} & \\multicolumn{3}{c}{Two weeks}\\\\\n \\cmidrule(l{.75em}){2-4} \\cmidrule(l{.75em}){5-7}')

# write to tex file
with open(f'../tex/tables/{name}.tex', 'w') as tf:
     tf.write(t)

### Nutrients, interaction

In [None]:
# foods

varlist = ['calories', 'fatgrams', 'satfatgrams', 'transfatgrams', 
           'carbsgrams', 'sugargrams', 'fibergrams', 'sodiumgrams',
           'proteingrams', 'oz_alcohol', 'cholesterolgrams']


caption = 'Effects of ESP receipt on nutrients, low liquidity households'
name = 'nutrients_inter'
label = 'table_' + name

dfweek = get_table(varlist, True, True)
dfmonth = get_table(varlist, True, False)
dfall = pd.concat([dfweek, dfmonth.iloc[:,1:]], 1)
display(dfall)
t = convert_to_latex(dfall, column_format, caption, label, note)

# add column groupings

t = t.replace('\\toprule', '\\toprule\n& \\multicolumn{3}{c}{First week} & \\multicolumn{3}{c}{Two weeks}\\\\\n \\cmidrule(l{.75em}){2-4} \\cmidrule(l{.75em}){5-7}')

# write to tex file
with open(f'../tex/tables/{name}.tex', 'w') as tf:
     tf.write(t)

## Summary tables

Escape the characters, add labels

In [None]:
# open file and make changes
fname = '../tex/tables/summary_cat.tex'
with open(fname, 'r') as file:
    data = file.read()
    data = data.replace('female', 'male', 1)
    data = data.replace('N&', 'Households &')
    data = data.replace('<', '$<$')
    data = data.replace('>=', '$\geq$')
    data = data.replace('>', '$>$')
    data = data.replace('{$>${', '{>{')
    data = data.replace('\scriptsize{\emph{Source: }string}', '\scriptsize{}')
    data = data.replace('BA+', 'Bachelor\'s+')
    
    replace_str = """\\begin{table} \\begin{center}
                    \\caption{Summary statistics, categorical variables}
                    \\label{table_summary_cat}"""
    data = data.replace('\\begin{center}', replace_str)
    data = data + '\\end{table}'
        
# save file
with open(fname, 'w') as file:
    file.write(data)   

In [None]:
fname = '../tex/tables/summary_num.tex'
with open(fname, 'r') as file:
    data = file.read()
    data = data.replace('table\\_summary\\_num', 'table_summary_num')
    data = data.replace('\\begin{threeparttable}', '\\resizebox{\\linewidth}{!}{%\n\\begin{threeparttable}')
    data = data.replace('\\end{threeparttable}', '\\end{threeparttable}}')
    
    idx0 = data.find('Observations')
    idx1 = data.find('\\bottomrule')
    data = data[:idx0] + 'Households & 7,136 & & 12,825 & & 19,961 & \\\\\n' + data[idx1:]
    

# save file
with open(fname, 'w') as file:
    file.write(data) 