In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import json

In [8]:
# read in the data
data = pd.read_csv('data_2018_mntlhlth_marital_trim.csv')

In [9]:
fname = '2018_marital_trim_PS.json'
with open(fname, 'r') as infile:
    param_dict = json.load(infile)

param_dict

{'bootstrap': True,
 'max_depth': 50,
 'max_features': 'log2',
 'min_samples_leaf': 4,
 'min_samples_split': 8,
 'n_estimators': 200,
 'test_score': 0.5986,
 'train_score': 0.6903}

In [10]:
from sklearn.ensemble import RandomForestClassifier
best_rf = RandomForestClassifier(bootstrap=param_dict['bootstrap'], 
                                 max_depth=param_dict['max_depth'], 
                                 max_features=param_dict['max_features'],
                                 n_estimators=param_dict['n_estimators'],
                                 min_samples_leaf=param_dict['min_samples_leaf'],
                                 min_samples_split=param_dict['min_samples_split'],
                                 random_state=99)

In [11]:
best_rf.fit(data.drop(columns=['mntlhlth', 'marital']), y=data['marital'])

In [12]:
data['propensity'] = best_rf.predict_proba(data.drop(columns=['mntlhlth', 'marital']))[:,1]
data.shape

(1393, 10)

In [13]:
# remove propensity score that is 1 or zero
data = data.loc[~data['propensity'].isin([1,0])]
data.shape

(1393, 10)

In [14]:
def ipw_cal(propensity_score, marital_status):
    '''
    Calculates IPW score for given propensity score and marital status
    :param propensity_score: propensity score
    :param marital_status: marital status
    :return: get the inverse propensity score weights
    '''
    if marital_status == 1:
        weighting = 1/propensity_score

    if marital_status == 0:
        weighting = 1/(1-propensity_score)

    return weighting

In [15]:
data['weighting'] = [ipw_cal(x, y) for x, y in zip(data['propensity'], data['marital'])]

In [16]:
# outcome model
import statsmodels.api as sm
Y = np.array(data['mntlhlth'],dtype=float)
X = np.array(data.drop(columns=['mntlhlth', 'propensity', 'weighting']),dtype=float)
weights = np.array(data['weighting'],dtype=float)
wls_model = sm.WLS(Y,X, weights=weights)
wls_model.exog_names[:] = list(data.drop(columns=['mntlhlth', 'propensity', 'weighting']).columns)
results = wls_model.fit()
results.params

array([-1.41827459, -0.05441329,  1.72808152,  0.07090803,  0.35467173,
        0.39489503, -1.42431977, -1.27120903])

In [17]:
results.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.059
Model:,WLS,Adj. R-squared:,0.055
Method:,Least Squares,F-statistic:,12.47
Date:,"Sat, 11 May 2024",Prob (F-statistic):,1.44e-15
Time:,11:20:37,Log-Likelihood:,-4625.6
No. Observations:,1393,AIC:,9267.0
Df Residuals:,1385,BIC:,9309.0
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
marital,-1.4183,0.350,-4.050,0.000,-2.105,-0.731
degree,-0.0544,0.150,-0.362,0.717,-0.349,0.240
satfin,1.7281,0.250,6.905,0.000,1.237,2.219
neisafe,0.0709,0.279,0.254,0.800,-0.477,0.619
relpersn,0.3547,0.176,2.012,0.044,0.009,0.700
race_is_white,0.3949,0.919,0.430,0.668,-1.408,2.198
race_is_black,-1.4243,1.038,-1.372,0.170,-3.461,0.612
race_is_other,-1.2712,1.053,-1.207,0.227,-3.337,0.794

0,1,2,3
Omnibus:,736.224,Durbin-Watson:,2.021
Prob(Omnibus):,0.0,Jarque-Bera (JB):,4315.406
Skew:,2.495,Prob(JB):,0.0
Kurtosis:,10.033,Cond. No.,40.1
