In [12]:
import pandas as pd
import numpy as np
import json

In [13]:
# read in the data
data = pd.read_csv('data_2018_mntlhlth.csv')

In [14]:
fname = '2018_all_marital_PS.json'
with open(fname, 'r') as infile:
    param_dict = json.load(infile)

param_dict

{'bootstrap': True,
 'max_depth': 20,
 'max_features': 'log2',
 'min_samples_leaf': 6,
 'min_samples_split': 2,
 'n_estimators': 400,
 'test_score': 0.5909,
 'train_score': 0.6569}

In [15]:
from sklearn.ensemble import RandomForestClassifier
best_rf = RandomForestClassifier(bootstrap=param_dict['bootstrap'], 
                                 max_depth=param_dict['max_depth'], 
                                 max_features=param_dict['max_features'],
                                 n_estimators=param_dict['n_estimators'],
                                 min_samples_leaf=param_dict['min_samples_leaf'],
                                 min_samples_split=param_dict['min_samples_split'],
                                 random_state=99)

In [16]:
best_rf.fit(data.drop(columns=['mntlhlth', 'marital']), y=data['marital'])

In [17]:
data['propensity'] = best_rf.predict_proba(data.drop(columns=['mntlhlth', 'marital']))[:,1]
data.shape

(1393, 10)

In [18]:
# remove propensity score that is 1 or zero
data = data.loc[~data['propensity'].isin([1,0])]
data.shape

(1393, 10)

In [19]:
def ipw_cal(propensity_score, marital_status):
    '''
    Calculates IPW score for given propensity score and marital status
    :param propensity_score: propensity score
    :param marital_status: marital status
    :return: get the inverse propensity score weights
    '''
    if marital_status == 1:
        weighting = 1/propensity_score

    if marital_status == 0:
        weighting = 1/(1-propensity_score)

    return weighting

In [20]:
data['weighting'] = [ipw_cal(x, y) for x, y in zip(data['propensity'], data['marital'])]

In [21]:
# outcome model
import statsmodels.api as sm
Y = np.array(data['mntlhlth'],dtype=float)
X = np.array(data.drop(columns=['mntlhlth', 'propensity', 'weighting']),dtype=float)
weights = np.array(data['weighting'],dtype=float)
wls_model = sm.WLS(Y,X, weights=weights)
wls_model.exog_names[:] = list(data.drop(columns=['mntlhlth', 'propensity', 'weighting']).columns)
results = wls_model.fit()
results.params

array([-1.4082841 , -0.05191778,  1.73367885,  0.07715127,  0.36769357,
        0.31987207, -1.43443821, -1.3653786 ])

In [22]:
results.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.06
Model:,WLS,Adj. R-squared:,0.055
Method:,Least Squares,F-statistic:,12.6
Date:,"Sat, 11 May 2024",Prob (F-statistic):,9.67e-16
Time:,15:38:55,Log-Likelihood:,-4618.9
No. Observations:,1393,AIC:,9254.0
Df Residuals:,1385,BIC:,9296.0
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
marital,-1.4083,0.349,-4.029,0.000,-2.094,-0.723
degree,-0.0519,0.150,-0.347,0.729,-0.346,0.242
satfin,1.7337,0.250,6.943,0.000,1.244,2.223
neisafe,0.0772,0.278,0.278,0.781,-0.468,0.622
relpersn,0.3677,0.176,2.091,0.037,0.023,0.713
race_is_white,0.3199,0.917,0.349,0.727,-1.479,2.119
race_is_black,-1.4344,1.036,-1.385,0.166,-3.466,0.597
race_is_other,-1.3654,1.049,-1.301,0.193,-3.423,0.693

0,1,2,3
Omnibus:,725.348,Durbin-Watson:,2.021
Prob(Omnibus):,0.0,Jarque-Bera (JB):,4071.849
Skew:,2.469,Prob(JB):,0.0
Kurtosis:,9.765,Cond. No.,40.1
