In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.stats import proportion
from scipy import stats
%matplotlib inline
os.chdir('/Users/YuChen/Desktop/loyola') # for mac 
#os.chdir('C:\\Users\\YuChen\\Desktop\\loyola') for windows

In [2]:
CAData = pd.read_stata('SPINE_ILEUS_CA_REVISED.dta')

## Reading the data

In [3]:
varLst = ['ACDF','CLAM','SHORT','LONG','ALIF','PLIF','LOS','COMBINED','COMBINED_SHORT','COMBINED_LONG',
          'ileus_NPOA','SHORT_ILEUS',
          'LONG_ILEUS','ALIF_ILEUS','PLIF_ILEUS', 'DIED', 'MI_NPOA',
           'Sepsis_NPOA', 'PE_NPOA','ACDF_ILEUS','CLAM_ILEUS','COMBINED_ILEUS','COMBINED_SHORT_ILEUS','COMBINED_LONG_ILEUS']

CAData = pd.read_stata('SPINE_ILEUS_CA_REVISED.dta', columns=varLst)

In [4]:
CAData.describe()

Unnamed: 0,ACDF,CLAM,SHORT,LONG,ALIF,PLIF,LOS,COMBINED,COMBINED_SHORT,COMBINED_LONG,...,PLIF_ILEUS,DIED,MI_NPOA,Sepsis_NPOA,PE_NPOA,ACDF_ILEUS,CLAM_ILEUS,COMBINED_ILEUS,COMBINED_SHORT_ILEUS,COMBINED_LONG_ILEUS
count,345067.0,345067.0,345067.0,345067.0,345067.0,345067.0,345010.0,345067.0,345067.0,345067.0,...,345067.0,345045.0,345067.0,345067.0,345067.0,345067.0,345067.0,345067.0,345067.0,345067.0
mean,0.195733,0.043823,0.459282,0.12173,0.08494,0.061374,7.709695,0.010923,0.074049,0.014551,...,0.002292,0.033816,0.008416,0.044522,0.0,0.000606,0.000484,0.001078,0.005373,0.001837
std,0.397012,0.204741,0.498372,0.327466,0.279424,0.240172,79.797306,0.103546,0.262318,0.119164,...,0.047785,0.19398,0.091449,0.206195,0.0,0.024598,0.021989,0.032803,0.072964,0.042796
min,0.0,0.0,0.0,0.0,0.0,0.0,-6666.0,0.0,0.0,0.0,...,0.0,-9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,0.0,0.0,1.0,0.0,0.0,0.0,9.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,1.0,1.0,1.0,1.0,1.0,1.0,358.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0


In [5]:
CAData.head(5)

Unnamed: 0,ACDF,CLAM,SHORT,LONG,ALIF,PLIF,LOS,COMBINED,COMBINED_SHORT,COMBINED_LONG,...,PLIF_ILEUS,DIED,MI_NPOA,Sepsis_NPOA,PE_NPOA,ACDF_ILEUS,CLAM_ILEUS,COMBINED_ILEUS,COMBINED_SHORT_ILEUS,COMBINED_LONG_ILEUS
0,0.0,1.0,1.0,0.0,0.0,0.0,15.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1.0,0.0,1.0,0.0,0.0,0.0,16.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,7.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,13.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1.0,0.0,1.0,0.0,0.0,0.0,18.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Processing the data:

### 1. Finding NA values

In [6]:
np.sum(np.array(pd.isnull(CAData)),0)

array([ 0,  0,  0,  0,  0,  0, 57,  0,  0,  0,  0,  0,  0,  0,  0, 22,  0,
        0,  0,  0,  0,  0,  0,  0])

### 2. Finding negative values

In [7]:
np.sum(np.array(CAData < 0), 0)

array([ 0,  0,  0,  0,  0,  0, 48,  0,  0,  0,  0,  0,  0,  0,  0, 19,  0,
        0,  0,  0,  0,  0,  0,  0])

### 3. Finding patients who length of stay (LOS) is 1 day or less

In [8]:
np.sum(np.array(CAData.LOS<=1),0)

36850

## Cleaning data:

In [9]:
for feature in CAData.columns: 
    CAData = CAData[CAData[feature] >= 0]

In [10]:
CAData = CAData[CAData['LOS'] > 1]

In [11]:
CAData.shape

(308119, 24)

## Answer to Questions:

### 1) Comparing ACDF_ILEUS vs MI_NPOA to see if there is a higher rate of MI for patients that undergo ACDF_ILEUS.


In [12]:
ct_ACDF_MI = pd.crosstab(CAData.MI_NPOA,CAData.ACDF_ILEUS); ct_ACDF_MI

ACDF_ILEUS,0.0,1.0
MI_NPOA,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,305052,203
1.0,2862,2


In [13]:
pd.crosstab(CAData.MI_NPOA,CAData.ACDF_ILEUS).apply(lambda x: x/x.sum(), 0)


ACDF_ILEUS,0.0,1.0
MI_NPOA,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,0.990705,0.990244
1.0,0.009295,0.009756


In [14]:
print('Odds ratio:', stats.fisher_exact(ct_ACDF_MI)[0])


Odds ratio: 1.05011824726


In [15]:
#Implemented the hypothesis test in R
'''
prop.test(c(2862,2),c(307914,205))

2-sample test for equality of proportions with continuity correction

data:  c(2862, 2) out of c(307914, 205)
X-squared = 7.7793e-28, df = 1, p-value = 1
alternative hypothesis: two.sided
95 percent confidence interval:
-0.01438175  0.01345916
sample estimates:
    prop 1      prop 2 
0.009294803 0.009756098 

Warning message:
In prop.test(c(2862, 2), c(307914, 205)) :
 Chi-squared approximation may be incorrect
'''
#Insignificant



### 2) Comparing CLAM_ILEUS vs MI_NPOA to see if there is a higher rate of MI for patients that undergo CLAM_ILEUS.


In [16]:
ct_CLAM_MI = pd.crosstab(CAData.MI_NPOA,CAData.CLAM_ILEUS); ct_CLAM_MI

CLAM_ILEUS,0.0,1.0
MI_NPOA,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,305091,164
1.0,2861,3


In [17]:
pd.crosstab(CAData.MI_NPOA,CAData.CLAM_ILEUS).apply(lambda x: x/x.sum(), 0)


CLAM_ILEUS,0.0,1.0
MI_NPOA,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,0.99071,0.982036
1.0,0.00929,0.017964


In [18]:
print('Odds ratio:', stats.fisher_exact(ct_CLAM_MI)[0])


Odds ratio: 1.95069308872


In [19]:
#Implemented the hypothesis test in R
'''
prop.test(c(2861,3),c(307952,167))

2-sample test for equality of proportions with continuity correction

data:  c(2861, 3) out of c(307952, 167)
X-squared = 0.58436, df = 1, p-value = 0.4446
alternative hypothesis: two.sided
95 percent confidence interval:
-0.03181662  0.01446930
sample estimates:
    prop 1      prop 2 
0.009290409 0.017964072 

Warning message:
In prop.test(c(2861, 3), c(307952, 167)) :
 Chi-squared approximation may be incorrect

'''
#insignificant



### 3) Comparing LONG_ILEUS vs MI_NPOA to see if there is a higher rate of MI for patients that undergo LONG_ILEUS.


In [20]:
ct_LONG_MI = pd.crosstab(CAData.MI_NPOA,CAData.LONG_ILEUS); ct_LONG_MI

LONG_ILEUS,0.0,1.0
MI_NPOA,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,303595,1660
1.0,2853,11


In [21]:
pd.crosstab(CAData.MI_NPOA,CAData.LONG_ILEUS).apply(lambda x: x/x.sum(), 0)


LONG_ILEUS,0.0,1.0
MI_NPOA,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,0.99069,0.993417
1.0,0.00931,0.006583


In [22]:
print('Odds ratio:', stats.fisher_exact(ct_LONG_MI)[0])


Odds ratio: 0.705143391653


In [23]:
#Implemented the hypothesis test in R
'''
prop.test(c(2853,11),c(306448,1671))

2-sample test for equality of proportions with continuity correction

data:  c(2853, 11) out of c(306448, 1671)
X-squared = 1.0623, df = 1, p-value = 0.3027
alternative hypothesis: two.sided
95 percent confidence interval:
-0.001466059  0.006920088
sample estimates:
    prop 1      prop 2 
0.009309899 0.006582885 
'''
#insignificant

'\nprop.test(c(2853,11),c(306448,1671))\n\n2-sample test for equality of proportions with continuity correction\n\ndata:  c(2853, 11) out of c(306448, 1671)\nX-squared = 1.0623, df = 1, p-value = 0.3027\nalternative hypothesis: two.sided\n95 percent confidence interval:\n-0.001466059  0.006920088\nsample estimates:\n    prop 1      prop 2 \n0.009309899 0.006582885 \n'

### 4) Comparing SHORT_ILEUS vs MI_NPOA to see if there is a higher rate of MI for patients that undergo SHORT_ILEUS.


In [24]:
ct_SHORT_MI = pd.crosstab(CAData.MI_NPOA,CAData.SHORT_ILEUS); ct_SHORT_MI

SHORT_ILEUS,0.0,1.0
MI_NPOA,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,301494,3761
1.0,2843,21


In [25]:
pd.crosstab(CAData.MI_NPOA,CAData.SHORT_ILEUS).apply(lambda x: x/x.sum(), 0)


SHORT_ILEUS,0.0,1.0
MI_NPOA,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,0.990658,0.994447
1.0,0.009342,0.005553


In [26]:
print('Odds ratio:', stats.fisher_exact(ct_SHORT_MI)[0])


Odds ratio: 0.592130968528


In [27]:
#Implemented the hypothesis test in R
'''
prop.test(c(2843,21),c(304337,3781))

2-sample test for equality of proportions with continuity correction

data:  c(2843, 21) out of c(304337, 3781)
X-squared = 5.4137, df = 1, p-value = 0.01998
alternative hypothesis: two.sided
95 percent confidence interval:
0.001260248 0.006314816
sample estimates:
    prop 1      prop 2 
0.009341618 0.005554086 
'''
#significant

'\nprop.test(c(2843,21),c(304337,3781))\n\n2-sample test for equality of proportions with continuity correction\n\ndata:  c(2843, 21) out of c(304337, 3781)\nX-squared = 5.4137, df = 1, p-value = 0.01998\nalternative hypothesis: two.sided\n95 percent confidence interval:\n0.001260248 0.006314816\nsample estimates:\n    prop 1      prop 2 \n0.009341618 0.005554086 \n'

### 5). Comparing ALIF_ILEUS vs MI_NPOA to see if there is a higher rate of MI for patients that undergo ALIF_ILEUS.

In [28]:
ct_ALIF_MI = pd.crosstab(CAData.MI_NPOA,CAData.ALIF_ILEUS); ct_ALIF_MI

ALIF_ILEUS,0.0,1.0
MI_NPOA,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,302936,2319
1.0,2855,9


In [29]:
pd.crosstab(CAData.MI_NPOA,CAData.ALIF_ILEUS).apply(lambda x: x/x.sum(), 0)


ALIF_ILEUS,0.0,1.0
MI_NPOA,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,0.990664,0.996134
1.0,0.009336,0.003866


In [30]:
print('Odds ratio:', stats.fisher_exact(ct_ALIF_MI)[0])

Odds ratio: 0.411800182608


In [31]:
#Implemented the hypothesis test in R
'''
prop.test(c(2855,9),c(305791,2328))

2-sample test for equality of proportions with continuity correction

data:  c(2855, 9) out of c(305791, 2328)
X-squared = 6.9259, df = 1, p-value = 0.008495
alternative hypothesis: two.sided
95 percent confidence interval:
0.002710269 0.008230656
sample estimates:
    prop 1      prop 2 
0.009336442 0.003865979
'''
#significant

'\nprop.test(c(2855,9),c(305791,2328))\n\n2-sample test for equality of proportions with continuity correction\n\ndata:  c(2855, 9) out of c(305791, 2328)\nX-squared = 6.9259, df = 1, p-value = 0.008495\nalternative hypothesis: two.sided\n95 percent confidence interval:\n0.002710269 0.008230656\nsample estimates:\n    prop 1      prop 2 \n0.009336442 0.003865979\n'

### 6). Comparing PLIF_ILEUS vs MI_NPOA to see if there is a higher rate of MI for patients that undergo PLIF_ILEUS.

In [32]:
ct_PLIF_MI = pd.crosstab(CAData.MI_NPOA,CAData.PLIF_ILEUS); ct_PLIF_MI

PLIF_ILEUS,0.0,1.0
MI_NPOA,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,304465,790
1.0,2863,1


In [33]:
pd.crosstab(CAData.MI_NPOA,CAData.PLIF_ILEUS).apply(lambda x: x/x.sum(), 0)


PLIF_ILEUS,0.0,1.0
MI_NPOA,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,0.990684,0.998736
1.0,0.009316,0.001264


In [34]:
print('Odds ratio:', stats.fisher_exact(ct_PLIF_MI)[0])

Odds ratio: 0.134613599084


In [35]:
#Implemented the hypothesis test in R
'''
prop.test(c(2863,1),c(307328,791))

2-sample test for equality of proportions with continuity correction

data:  c(2863, 1) out of c(307328, 791)
X-squared = 4.7143, df = 1, p-value = 0.02991
alternative hypothesis: two.sided
95 percent confidence interval:
0.004918371 0.011184744
sample estimates:
    prop 1      prop 2 
0.009315780 0.001264223 
'''
#significant

'\nprop.test(c(2863,1),c(307328,791))\n\n2-sample test for equality of proportions with continuity correction\n\ndata:  c(2863, 1) out of c(307328, 791)\nX-squared = 4.7143, df = 1, p-value = 0.02991\nalternative hypothesis: two.sided\n95 percent confidence interval:\n0.004918371 0.011184744\nsample estimates:\n    prop 1      prop 2 \n0.009315780 0.001264223 \n'

### CERVICAL_COMBINED(ACDF and CLAM)

In [36]:
def cervical_combined(x,y):
    return(x+y)//2

In [37]:
CAData.Combined=cervical_combined(CAData.ACDF, CAData.CLAM)

### 7).Comparing CERVICAL_COMBINED vs MI_NPOA to see if there is a higher rate of MI for patients that undergo CERVICAL_COMBINED.

In [38]:
ct_Combined_MI = pd.crosstab(CAData.MI_NPOA,CAData.Combined); ct_Combined_MI

col_0,0.0,1.0
MI_NPOA,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,301647,3608
1.0,2840,24


In [39]:
pd.crosstab(CAData.MI_NPOA,CAData.Combined).apply(lambda x: x/x.sum(), 0)


col_0,0.0,1.0
MI_NPOA,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,0.990673,0.993392
1.0,0.009327,0.006608


In [40]:
print('Odds ratio:', stats.fisher_exact(ct_Combined_MI)[0])

Odds ratio: 0.706521501515


In [41]:
#Implemented the hypothesis test in R
'''
prop.test(c(2840,24),c(304487,3632))

2-sample test for equality of proportions with continuity correction

data:  c(2840, 24) out of c(304487, 3632)
X-squared = 2.5943, df = 1, p-value = 0.1073
alternative hypothesis: two.sided
95 percent confidence interval:
-7.702731e-05  5.515495e-03
sample estimates:
    prop 1      prop 2 
0.009327163 0.006607930 
'''
#insignificant

'\nprop.test(c(2840,24),c(304487,3632))\n\n2-sample test for equality of proportions with continuity correction\n\ndata:  c(2840, 24) out of c(304487, 3632)\nX-squared = 2.5943, df = 1, p-value = 0.1073\nalternative hypothesis: two.sided\n95 percent confidence interval:\n-7.702731e-05  5.515495e-03\nsample estimates:\n    prop 1      prop 2 \n0.009327163 0.006607930 \n'

### 8). Comparing COMBINED(Anterior/posterior thoracic/lumbar fusion) vs MI_NPOA to see if there is a higher rate of MI for patients that undergo COMBINED(Anterior/posterior thoracic/lumbar fusion).

In [42]:
ct_COMBINED_MI = pd.crosstab(CAData.MI_NPOA,CAData.COMBINED); ct_COMBINED_MI

COMBINED,0.0,1.0
MI_NPOA,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,301540,3715
1.0,2853,11


In [43]:
pd.crosstab(CAData.MI_NPOA,CAData.COMBINED).apply(lambda x: x/x.sum(), 0)


COMBINED,0.0,1.0
MI_NPOA,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,0.990627,0.997048
1.0,0.009373,0.002952


In [44]:
print('Odds ratio:', stats.fisher_exact(ct_COMBINED_MI)[0])

Odds ratio: 0.312951491641


In [45]:
#Implemented the hypothesis test in R
'''
prop.test(c(2853,11),c(304393,3726))

2-sample test for equality of proportions with continuity correction

data:  c(2853, 11) out of c(304393, 3726)
X-squared = 15.788, df = 1, p-value = 7.085e-05
alternative hypothesis: two.sided
95 percent confidence interval:
0.004509330 0.008331718
sample estimates:
    prop 1      prop 2 
0.009372752 0.002952228 
'''
#significant

'\nprop.test(c(2853,11),c(304393,3726))\n\n2-sample test for equality of proportions with continuity correction\n\ndata:  c(2853, 11) out of c(304393, 3726)\nX-squared = 15.788, df = 1, p-value = 7.085e-05\nalternative hypothesis: two.sided\n95 percent confidence interval:\n0.004509330 0.008331718\nsample estimates:\n    prop 1      prop 2 \n0.009372752 0.002952228 \n'

### 9).Comparing ACDF_ILEUS vs Sepsis_NPOA to see if there is a higher rate of Sepsis for patients that undergo ACDF_ILEUS.

In [46]:
ct_ACDF_Sepsis = pd.crosstab(CAData.Sepsis_NPOA,CAData.ACDF_ILEUS); ct_ACDF_Sepsis

ACDF_ILEUS,0.0,1.0
Sepsis_NPOA,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,292716,182
1.0,15198,23


In [47]:
pd.crosstab(CAData.Sepsis_NPOA,CAData.ACDF_ILEUS).apply(lambda x: x/x.sum(), 0)


ACDF_ILEUS,0.0,1.0
Sepsis_NPOA,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,0.950642,0.887805
1.0,0.049358,0.112195


In [48]:
print('Odds ratio:', stats.fisher_exact(ct_ACDF_Sepsis)[0])


Odds ratio: 2.43397699813


In [49]:
#Implemented the hypothesis test in R
'''
> prop.test(c(15198,23),c(307914,205))

2-sample test for equality of proportions with continuity correction

data:  c(15198, 23) out of c(307914, 205)
X-squared = 15.914, df = 1, p-value = 6.63e-05
alternative hypothesis: two.sided
95 percent confidence interval:
 -0.10848791 -0.01718646
sample estimates:
    prop 1     prop 2 
0.04935794 0.11219512 
'''
#significant

'\n> prop.test(c(15198,23),c(307914,205))\n\n2-sample test for equality of proportions with continuity correction\n\ndata:  c(15198, 23) out of c(307914, 205)\nX-squared = 15.914, df = 1, p-value = 6.63e-05\nalternative hypothesis: two.sided\n95 percent confidence interval:\n -0.10848791 -0.01718646\nsample estimates:\n    prop 1     prop 2 \n0.04935794 0.11219512 \n'

### 10). Comparing CLAM_ILEUS vs Sepsis_NPOA to see if there is a higher rate of Sepsis for patients that undergo CLAM_ILEUS.

In [50]:
ct_CLAM_Sepsis = pd.crosstab(CAData.Sepsis_NPOA,CAData.CLAM_ILEUS); ct_CLAM_Sepsis


CLAM_ILEUS,0.0,1.0
Sepsis_NPOA,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,292752,146
1.0,15200,21


In [51]:
pd.crosstab(CAData.Sepsis_NPOA,CAData.CLAM_ILEUS).apply(lambda x: x/x.sum(), 0)


CLAM_ILEUS,0.0,1.0
Sepsis_NPOA,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,0.950642,0.874251
1.0,0.049358,0.125749


In [52]:
print('Odds ratio:', stats.fisher_exact(ct_CLAM_Sepsis)[0])


Odds ratio: 2.7702739726


In [53]:
#Implemented the hypothesis test in R
'''
> prop.test(c(15200,21),c(307952,167))

2-sample test for equality of proportions with continuity correction

data:  c(15200, 21) out of c(307952, 167)
X-squared = 19.146, df = 1, p-value = 1.211e-05
alternative hypothesis: two.sided
95 percent confidence interval:
 -0.12967905 -0.02310127
sample estimates:
    prop 1     prop 2 
0.04935834 0.12574850 
'''
#significant



'\n> prop.test(c(15200,21),c(307952,167))\n\n2-sample test for equality of proportions with continuity correction\n\ndata:  c(15200, 21) out of c(307952, 167)\nX-squared = 19.146, df = 1, p-value = 1.211e-05\nalternative hypothesis: two.sided\n95 percent confidence interval:\n -0.12967905 -0.02310127\nsample estimates:\n    prop 1     prop 2 \n0.04935834 0.12574850 \n'

### 11). Comparing LONG_ILEUS vs Sepsis_NPOA to see if there is a higher rate of Sepsis for patients that undergo LONG_ILEUS.

In [54]:
ct_LONG_Sepsis = pd.crosstab(CAData.Sepsis_NPOA,CAData.LONG_ILEUS); ct_LONG_Sepsis


LONG_ILEUS,0.0,1.0
Sepsis_NPOA,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,291284,1614
1.0,15164,57


In [55]:
pd.crosstab(CAData.Sepsis_NPOA,CAData.LONG_ILEUS).apply(lambda x: x/x.sum(), 0)


LONG_ILEUS,0.0,1.0
Sepsis_NPOA,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,0.950517,0.965889
1.0,0.049483,0.034111


In [56]:
print('Odds ratio:', stats.fisher_exact(ct_LONG_Sepsis)[0])


Odds ratio: 0.678381786642


In [57]:
#Implemented the hypothesis test in R
'''
> prop.test(c(15164,57),c(306448,1671))

2-sample test for equality of proportions with continuity correction

data:  c(15164, 57) out of c(306448, 1671)
X-squared = 8.0385, df = 1, p-value = 0.004579
alternative hypothesis: two.sided
95 percent confidence interval:
 0.006334065 0.024409533
sample estimates:
    prop 1     prop 2 
0.04948311 0.03411131 
'''
#significant

'\n> prop.test(c(15164,57),c(306448,1671))\n\n2-sample test for equality of proportions with continuity correction\n\ndata:  c(15164, 57) out of c(306448, 1671)\nX-squared = 8.0385, df = 1, p-value = 0.004579\nalternative hypothesis: two.sided\n95 percent confidence interval:\n 0.006334065 0.024409533\nsample estimates:\n    prop 1     prop 2 \n0.04948311 0.03411131 \n'

### 12). Comparing SHORT_ILEUS vs Sepsis_NPOA to see if there is a higher rate of Sepsis for patients that undergo SHORT_ILEUS.

In [58]:
ct_SHORT_Sepsis = pd.crosstab(CAData.Sepsis_NPOA,CAData.SHORT_ILEUS); ct_SHORT_Sepsis


SHORT_ILEUS,0.0,1.0
Sepsis_NPOA,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,289211,3687
1.0,15126,95


In [59]:
pd.crosstab(CAData.Sepsis_NPOA,CAData.SHORT_ILEUS).apply(lambda x: x/x.sum(), 0)


SHORT_ILEUS,0.0,1.0
Sepsis_NPOA,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,0.950299,0.974881
1.0,0.049701,0.025119


In [60]:
print('Odds ratio:', stats.fisher_exact(ct_SHORT_Sepsis)[0])


Odds ratio: 0.492653053291


In [61]:
#Implemented the hypothesis test in R
'''
> prop.test(c(15126,95),c(304337,3782))

2-sample test for equality of proportions with continuity correction

data:  c(15126, 95) out of c(304337, 3782)
X-squared = 47.549, df = 1, p-value = 5.364e-12
alternative hypothesis: two.sided
95 percent confidence interval:
 0.01940195 0.02976305
sample estimates:
    prop 1     prop 2 
0.04970148 0.02511898 
'''
#significant



'\n> prop.test(c(15126,95),c(304337,3782))\n\n2-sample test for equality of proportions with continuity correction\n\ndata:  c(15126, 95) out of c(304337, 3782)\nX-squared = 47.549, df = 1, p-value = 5.364e-12\nalternative hypothesis: two.sided\n95 percent confidence interval:\n 0.01940195 0.02976305\nsample estimates:\n    prop 1     prop 2 \n0.04970148 0.02511898 \n'

### 13). Comparing ALIF_ILEUS vs Sepsis_NPOA to see if there is a higher rate of Sepsis for patients that undergo ALIF_ILEUS.

In [62]:
ct_ALIF_Sepsis = pd.crosstab(CAData.Sepsis_NPOA,CAData.ALIF_ILEUS); ct_ALIF_Sepsis


ALIF_ILEUS,0.0,1.0
Sepsis_NPOA,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,290610,2288
1.0,15181,40


In [63]:
pd.crosstab(CAData.Sepsis_NPOA,CAData.ALIF_ILEUS).apply(lambda x: x/x.sum(), 0)


ALIF_ILEUS,0.0,1.0
Sepsis_NPOA,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,0.950355,0.982818
1.0,0.049645,0.017182


In [64]:
print('Odds ratio:', stats.fisher_exact(ct_ALIF_Sepsis)[0])


Odds ratio: 0.334667966906


In [65]:
#Implemented the hypothesis test in R
'''
> prop.test(c(15181,40),c(305791,2328))

2-sample test for equality of proportions with continuity correction

data:  c(15181, 40) out of c(305791, 2328)
X-squared = 51.16, df = 1, p-value = 8.513e-13
alternative hypothesis: two.sided
95 percent confidence interval:
 0.02691188 0.03801390
sample estimates:
    prop 1     prop 2 
0.04964502 0.01718213 
'''
#significant

'\n> prop.test(c(15181,40),c(305791,2328))\n\n2-sample test for equality of proportions with continuity correction\n\ndata:  c(15181, 40) out of c(305791, 2328)\nX-squared = 51.16, df = 1, p-value = 8.513e-13\nalternative hypothesis: two.sided\n95 percent confidence interval:\n 0.02691188 0.03801390\nsample estimates:\n    prop 1     prop 2 \n0.04964502 0.01718213 \n'

### 14). Comparing PLIF_ILEUS vs Sepsis_NPOA to see if there is a higher rate of Sepsis for patients that undergo PLIF_ILEUS.

In [66]:
ct_PLIF_Sepsis = pd.crosstab(CAData.Sepsis_NPOA,CAData.PLIF_ILEUS); ct_PLIF_Sepsis


PLIF_ILEUS,0.0,1.0
Sepsis_NPOA,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,292125,773
1.0,15203,18


In [67]:
pd.crosstab(CAData.Sepsis_NPOA,CAData.PLIF_ILEUS).apply(lambda x: x/x.sum(), 0)


PLIF_ILEUS,0.0,1.0
Sepsis_NPOA,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,0.950532,0.977244
1.0,0.049468,0.022756


In [68]:
print('Odds ratio:', stats.fisher_exact(ct_PLIF_Sepsis)[0])


Odds ratio: 0.447437563176


In [69]:
#Implemented the hypothesis test in R
'''
> prop.test(c(15203,18),c(307328,791))

	2-sample test for equality of proportions with continuity correction

data:  c(15203, 18) out of c(307328, 791)
X-squared = 11.426, df = 1, p-value = 0.0007241
alternative hypothesis: two.sided
95 percent confidence interval:
 0.01565809 0.03776654
sample estimates:
    prop 1     prop 2 
0.04946832 0.02275601 

'''
#significant

'\n> prop.test(c(15203,18),c(307328,791))\n\n\t2-sample test for equality of proportions with continuity correction\n\ndata:  c(15203, 18) out of c(307328, 791)\nX-squared = 11.426, df = 1, p-value = 0.0007241\nalternative hypothesis: two.sided\n95 percent confidence interval:\n 0.01565809 0.03776654\nsample estimates:\n    prop 1     prop 2 \n0.04946832 0.02275601 \n\n'

### 15). Comparing CERVICAL_Combined vs Sepsis_NPOA to see if there is a higher rate of Sepsis for patients that undergo CERVICAL_Combined.

In [70]:
ct_Combined_Sepsis = pd.crosstab(CAData.Sepsis_NPOA,CAData.Combined); ct_Combined_Sepsis

col_0,0.0,1.0
Sepsis_NPOA,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,289360,3538
1.0,15127,94


In [71]:
pd.crosstab(CAData.Sepsis_NPOA,CAData.Combined).apply(lambda x: x/x.sum(), 0)


col_0,0.0,1.0
Sepsis_NPOA,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,0.95032,0.974119
1.0,0.04968,0.025881


In [72]:
print('Odds ratio:', stats.fisher_exact(ct_Combined_Sepsis)[0])


Odds ratio: 0.508224636461


In [73]:
#Implemented the hypothesis test in R
'''
> prop.test(c(15127,94),c(304487,3632))

2-sample test for equality of proportions with continuity correction

data:  c(15127, 94) out of c(304487, 3632)
X-squared = 42.786, df = 1, p-value = 6.107e-11
alternative hypothesis: two.sided
95 percent confidence interval:
 0.01843873 0.02915972
sample estimates:
    prop 1     prop 2 
0.04968028 0.02588106 

'''
#significant

'\n> prop.test(c(15127,94),c(304487,3632))\n\n2-sample test for equality of proportions with continuity correction\n\ndata:  c(15127, 94) out of c(304487, 3632)\nX-squared = 42.786, df = 1, p-value = 6.107e-11\nalternative hypothesis: two.sided\n95 percent confidence interval:\n 0.01843873 0.02915972\nsample estimates:\n    prop 1     prop 2 \n0.04968028 0.02588106 \n\n'

### 16). Comparing COMBINED (Anterior/posterior thoracic/lumbar fusion) vs Sepsis_NPOA to see if there is a higher rate of Sepsis for patients that undergo COMBINED (Anterior/posterior thoracic/lumbar fusion).

In [74]:
ct_COMBINED_Sepsis = pd.crosstab(CAData.Sepsis_NPOA,CAData.COMBINED); ct_COMBINED_Sepsis

COMBINED,0.0,1.0
Sepsis_NPOA,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,289200,3698
1.0,15193,28


In [75]:
pd.crosstab(CAData.Sepsis_NPOA,CAData.COMBINED).apply(lambda x: x/x.sum(), 0)


COMBINED,0.0,1.0
Sepsis_NPOA,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,0.950088,0.992485
1.0,0.049912,0.007515


In [76]:
print('Odds ratio:', stats.fisher_exact(ct_COMBINED_Sepsis)[0])


Odds ratio: 0.144127175359


In [77]:
#Implemented the hypothesis test in R
'''
> prop.test(c(15193,28),c(304393,3726))

2-sample test for equality of proportions with continuity correction

data:  c(15193, 28) out of c(304393, 3726)
X-squared = 140, df = 1, p-value < 2.2e-16
alternative hypothesis: two.sided
95 percent confidence interval:
 0.03938299 0.04541239
sample estimates:
     prop 1      prop 2 
0.049912449 0.007514761

'''
#significant

'\n> prop.test(c(15193,28),c(304393,3726))\n\n2-sample test for equality of proportions with continuity correction\n\ndata:  c(15193, 28) out of c(304393, 3726)\nX-squared = 140, df = 1, p-value < 2.2e-16\nalternative hypothesis: two.sided\n95 percent confidence interval:\n 0.03938299 0.04541239\nsample estimates:\n     prop 1      prop 2 \n0.049912449 0.007514761\n\n'