In [24]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

from sklearn.model_selection import train_test_split

from pgmpy.estimators import HillClimbSearch, ExhaustiveSearch, BayesianEstimator
from pgmpy.estimators import ConstraintBasedEstimator, K2Score, BicScore, BDeuScore
from pgmpy.models import BayesianModel


In [102]:
medidata = pd.read_csv('../data/medicare_part_b/comprehensive_hospital_medicare_data.csv')
medidata = medidata.drop(columns =[
    'household_income_acs2018_Households (HH) Count','tract_race_acs2018_Total'
])

medidata.columns

Index(['System', 'catchment_area_sqmi', 'num_beds', 'tract_race_acs2018_White',
       'tract_race_acs2018_Black', 'tract_race_acs2018_Native',
       'tract_race_acs2018_Asian', 'tract_race_acs2018_Pacific Islander',
       'tract_race_acs2018_Other', 'tract_race_acs2018_Two or More',
       'household_income_acs2018_HH 0-10k',
       'household_income_acs2018_HH 10k-15k',
       'household_income_acs2018_HH 15k-25k',
       'household_income_acs2018_HH 25k-35k',
       'household_income_acs2018_HH 35k-50k',
       'household_income_acs2018_HH 50k-75k',
       'household_income_acs2018_HH 75k-100k',
       'household_income_acs2018_HH 100k-150k',
       'household_income_acs2018_HH 150k-200k',
       'household_income_acs2018_HH 200k-UP', '291_2015_Discharges',
       '291_2015_Medicare_Payments', '291_2015_Total_Payments',
       '291_2015_Non_Medicare_Payments', '291_2016_Discharges',
       '291_2016_Medicare_Payments', '291_2016_Total_Payments',
       '291_2016_Non_Medicare_Payme

In [85]:
medi_income = medidata.loc[:,'871_2017_Discharges':'871_2017_Non_Medicare_Payments'].copy()
medi_income['num_beds'] = medidata['num_beds']
medi_income['0-35k']    = medidata.loc[:,'household_income_acs2018_HH 0-10k':'household_income_acs2018_HH 25k-35k'].sum(axis=1)
medi_income['35k-75k']  = medidata.loc[:,'household_income_acs2018_HH 35k-50k':'household_income_acs2018_HH 50k-75k'].sum(axis=1)
medi_income['75k-150k'] = medidata.loc[:,'household_income_acs2018_HH 75k-100k':'household_income_acs2018_HH 100k-150k'].sum(axis=1)
medi_income['150k-UP']  = medidata.loc[:,'household_income_acs2018_HH 150k-200k':'household_income_acs2018_HH 200k-UP'].sum(axis=1)

medi_income.head()

Unnamed: 0,871_2017_Discharges,871_2017_Medicare_Payments,871_2017_Total_Payments,871_2017_Non_Medicare_Payments,num_beds,0-35k,35k-75k,75k-150k,150k-UP
0,36.3,2.6,4.2,14.8,324,31.353763,27.166667,26.943011,10.221505
1,-17.3,20.1,22.3,32.9,1109,32.784783,25.77029,23.926812,15.344928
2,4.0,-0.2,-0.6,-2.4,257,24.553488,24.832558,28.660465,14.972093
3,27.1,-2.4,-6.4,-29.8,422,54.547458,28.333898,12.740678,2.688136
4,20.4,9.2,8.1,2.2,1658,19.820408,17.92551,26.837755,32.37551


In [92]:
medi_race = medidata.loc[:, 'num_beds': 'tract_race_acs2018_Two or More']
medi_race =medi_race.join( medidata.loc[:,'871_2017_Discharges':'871_2017_Non_Medicare_Payments'])

medi_race.head()

Unnamed: 0,num_beds,tract_race_acs2018_White,tract_race_acs2018_Black,tract_race_acs2018_Native,tract_race_acs2018_Asian,tract_race_acs2018_Pacific Islander,tract_race_acs2018_Other,tract_race_acs2018_Two or More,871_2017_Discharges,871_2017_Medicare_Payments,871_2017_Total_Payments,871_2017_Non_Medicare_Payments
0,324,159610,66408,1358,19051,76,52791,9288,36.3,2.6,4.2,14.8
1,1109,150508,261084,1556,36239,76,35886,17045,-17.3,20.1,22.3,32.9
2,257,57223,109528,829,5870,141,11693,4341,4.0,-0.2,-0.6,-2.4
3,422,41321,72878,1835,6183,179,135676,8291,27.1,-2.4,-6.4,-29.8
4,1658,268848,21853,1338,53804,483,15933,14785,20.4,9.2,8.1,2.2


In [74]:
train,test = train_test_split(medidata, random_state = 6969, test_size=0.4)
len(train)

In [100]:
hc = HillClimbSearch(medi_income, scoring_method = K2Score(medi_income))
best_model = hc.estimate()
print(best_model.edges(), "\n")

[('871_2017_Discharges', '75k-150k'), ('871_2017_Medicare_Payments', '150k-UP'), ('871_2017_Total_Payments', '871_2017_Medicare_Payments'), ('871_2017_Non_Medicare_Payments', '871_2017_Total_Payments'), ('num_beds', '35k-75k'), ('num_beds', '871_2017_Non_Medicare_Payments'), ('35k-75k', '0-35k'), ('75k-150k', 'num_beds')] 



In [101]:
hc = HillClimbSearch(medi_race, scoring_method = K2Score(medi_race))
best_model = hc.estimate()
print(best_model.edges(), "\n")

[('num_beds', 'tract_race_acs2018_Two or More'), ('tract_race_acs2018_White', 'tract_race_acs2018_Black'), ('tract_race_acs2018_White', 'num_beds'), ('tract_race_acs2018_Black', '871_2017_Non_Medicare_Payments'), ('tract_race_acs2018_Native', '871_2017_Discharges'), ('tract_race_acs2018_Asian', 'tract_race_acs2018_White'), ('tract_race_acs2018_Pacific Islander', 'tract_race_acs2018_Asian'), ('tract_race_acs2018_Two or More', '871_2017_Medicare_Payments'), ('871_2017_Non_Medicare_Payments', 'tract_race_acs2018_Other'), ('871_2017_Non_Medicare_Payments', '871_2017_Total_Payments'), ('871_2017_Non_Medicare_Payments', 'tract_race_acs2018_Native')] 



In [None]:
<class 'pgmpy.estimators.BDeuScore.BDeuScore'>
[('num_beds', 'tract_race_acs2018_Two or More'), 
 ('tract_race_acs2018_White', 'tract_race_acs2018_Black'), 
 ('tract_race_acs2018_White', 'num_beds'), 
 ('tract_race_acs2018_Black', '871_2017_Non_Medicare_Payments'), 
 ('tract_race_acs2018_Native', '871_2017_Discharges'), 
 ('tract_race_acs2018_Asian', 'tract_race_acs2018_White'), 
 ('tract_race_acs2018_Two or More', '871_2017_Medicare_Payments'), 
 ('871_2017_Total_Payments', 'tract_race_acs2018_Pacific Islander'),
 ('871_2017_Non_Medicare_Payments', 'tract_race_acs2018_Other'), 
 ('871_2017_Non_Medicare_Payments', '871_2017_Total_Payments'), 
 ('871_2017_Non_Medicare_Payments', 'tract_race_acs2018_Native')
] 