In [1]:
import os
import pandas as pd
import numpy as np
import statsmodels.formula.api as smf
import plotnine as pn
from sspipe import p, px

# Set up paths
derived_path = os.path.abspath('..') | p(os.path.join, 'data', 'derived')

# Read data
sample = pd.read_csv(os.path.join(derived_path, 'sample_simulate_retire.csv'))

In [2]:
# Any newly diagnosed disease
disease_list = ['angina', 'heart_attack', 'diabetes', 'stroke', 'arthritis', 'cancer', 'psych']
sample['any_post'] = (sample[[disease + '_post' for disease in disease_list]] == 1).any(axis=1)

In [3]:
vars = ['any_post'] + [disease + '_post' for disease in disease_list] + ['poor_health_3', 'limit_3', 'adl_3']

In [4]:
df = sample.groupby(['treatment'])[vars].mean().round(3).T
df.rename(columns={1: 'Retired', 0: 'Employed'}, inplace=True)
df = df[['Employed', 'Retired']]
df.index = ['Any newly diagnosed disease', 
            'Newly diagnosed angina', 'New heart attack', 'New stroke', 'Newly diagnosed diabetes',
            'Newly diagnosed arthritis', 'Newly diagnosed cancer', 'Newly diagnosed psychiatric problem',
            'Poor self-assessed health', 'Limiting long-standing illness', 'Number of difficulties in ADL']

In [5]:
print(df.to_latex(float_format="%.2f"))

\begin{tabular}{lrr}
\toprule
treatment & Employed & Retired \\
\midrule
Any newly diagnosed disease & 0.03 & 0.17 \\
Newly diagnosed angina & 0.00 & 0.01 \\
New heart attack & 0.00 & 0.02 \\
New stroke & 0.00 & 0.03 \\
Newly diagnosed diabetes & 0.00 & 0.01 \\
Newly diagnosed arthritis & 0.02 & 0.10 \\
Newly diagnosed cancer & 0.00 & 0.03 \\
Newly diagnosed psychiatric problem & 0.01 & 0.02 \\
Poor self-assessed health & 0.01 & 0.05 \\
Limiting long-standing illness & 0.15 & 0.27 \\
Number of difficulties in ADL & 0.79 & 1.30 \\
\bottomrule
\end{tabular}



**Difference**
Numbers for retired individuals should be identical, since their retirement dates are not imputed.

Different items:
- Newly diagnosed angina
- New heart attack (not much)
- New stroke (not much)
- Newly diagnosed arthritis (not much)
- Newly diagnosed cancer (not much)
- Newly diagnosed psych (not much)
- Limiting long-standing illness
- Number of difficulties in ADL

