In [1]:
import pandas as pd
import numpy as np
import json

In [2]:
# read in the data
data = pd.read_csv('data_2018_mntlhlth_marital_trim.csv')

In [3]:
fname = '2018_marital_trim_PS.json'
with open(fname, 'r') as infile:
    param_dict = json.load(infile)

param_dict

{'bootstrap': True,
 'max_depth': 60,
 'max_features': 'log2',
 'min_samples_leaf': 6,
 'min_samples_split': 3,
 'n_estimators': 200,
 'test_score': 0.6809,
 'train_score': 0.728}

In [4]:
from sklearn.ensemble import RandomForestClassifier
best_rf = RandomForestClassifier(bootstrap=param_dict['bootstrap'], 
                                 max_depth=param_dict['max_depth'], 
                                 max_features=param_dict['max_features'],
                                 n_estimators=param_dict['n_estimators'],
                                 min_samples_leaf=param_dict['min_samples_leaf'],
                                 min_samples_split=param_dict['min_samples_split'],
                                 random_state=99)

In [5]:
best_rf.fit(data.drop(columns=['mntlhlth', 'marital']), y=data['marital'])

In [6]:
data['propensity'] = best_rf.predict_proba(data.drop(columns=['mntlhlth', 'marital']))[:,1]
data.shape

(1057, 10)

In [7]:
# remove propensity score that is 1 or zero
data = data.loc[~data['propensity'].isin([1,0])]
data.shape

(1057, 10)

In [8]:
def ipw_cal(propensity_score, marital_status):
    '''
    Calculates IPW score for given propensity score and marital status
    :param propensity_score: propensity score
    :param marital_status: marital status
    :return: get the inverse propensity score weights
    '''
    if marital_status == 1:
        weighting = 1/propensity_score

    if marital_status == 0:
        weighting = 1/(1-propensity_score)

    return weighting

In [9]:
data['weighting'] = [ipw_cal(x, y) for x, y in zip(data['propensity'], data['marital'])]

In [10]:
# outcome model
import statsmodels.api as sm
Y = np.array(data['mntlhlth'],dtype=float)
X = np.array(data.drop(columns=['mntlhlth', 'propensity', 'weighting']),dtype=float)
weights = np.array(data['weighting'],dtype=float)
wls_model = sm.WLS(Y,X, weights=weights)
wls_model.exog_names[:] = list(data.drop(columns=['mntlhlth', 'propensity', 'weighting']).columns)
results = wls_model.fit()
results.params

array([-1.33120495,  0.13373808,  1.49964554,  0.14071653,  0.59637346,
       -0.34836475, -2.13839959, -1.90428323])

In [11]:
results.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.064
Model:,WLS,Adj. R-squared:,0.058
Method:,Least Squares,F-statistic:,10.21
Date:,"Sat, 11 May 2024",Prob (F-statistic):,2.03e-12
Time:,16:05:18,Log-Likelihood:,-3452.8
No. Observations:,1057,AIC:,6922.0
Df Residuals:,1049,BIC:,6961.0
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
marital,-1.3312,0.377,-3.532,0.000,-2.071,-0.592
degree,0.1337,0.160,0.838,0.402,-0.180,0.447
satfin,1.4996,0.273,5.492,0.000,0.964,2.035
neisafe,0.1407,0.300,0.469,0.639,-0.448,0.730
relpersn,0.5964,0.189,3.160,0.002,0.226,0.967
race_is_white,-0.3484,0.993,-0.351,0.726,-2.296,1.599
race_is_black,-2.1384,1.106,-1.934,0.053,-4.308,0.031
race_is_other,-1.9043,1.114,-1.709,0.088,-4.090,0.282

0,1,2,3
Omnibus:,579.808,Durbin-Watson:,1.972
Prob(Omnibus):,0.0,Jarque-Bera (JB):,3660.362
Skew:,2.546,Prob(JB):,0.0
Kurtosis:,10.562,Cond. No.,40.0
