In [62]:
'''
Author: Sriram Yarlagadda + George Nakhleh
Date Created: 9/12/2016
Note: Please use Python 2.7
'''
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.stats import proportion
from scipy import stats
%matplotlib inline
#os.chdir('C:\\Users\\gnakhleh\\Documents\\Loyola\\Spine Ileus') #Set location of home dir here.
#os.chdir('/Users/Sriram/Desktop/DePaul/Loyola-ILEUS-Analysis')#location in Mac
os.chdir('C:/Users/syarlag1/Desktop/Loyola-ILEUS-Analysis')

### Reading the Data

In [30]:
varLst = ['KEY','SCOLI','LOS','SHORT','LONG','ALIF','PLIF','ileus_NPOA', 'SCOLI_ILEUS','SHORT_ILEUS',\
          'LONG_ILEUS','ALIF_ILEUS','PLIF_ILEUS', 'DIED', 'MI_POA', 'MI_NPOA', 'DVT_POA', 'DVT_NPOA',\
          'Sepsis_POA', 'Sepsis_NPOA', 'PE_POA', 'PE_NPOA']
CAData = pd.read_stata('SPINE_ILEUS_CA.dta', columns=varLst)

In [31]:
CAData.describe()  #note that there are no patients with PE

Unnamed: 0,KEY,SCOLI,LOS,SHORT,LONG,ALIF,PLIF,ileus_NPOA,SCOLI_ILEUS,SHORT_ILEUS,...,PLIF_ILEUS,DIED,MI_POA,MI_NPOA,DVT_POA,DVT_NPOA,Sepsis_POA,Sepsis_NPOA,PE_POA,PE_NPOA
count,198557.0,198557.0,198553.0,198557.0,198557.0,198557.0,198557.0,198557.0,198557.0,198557.0,...,198557.0,198544.0,198557.0,198557.0,198557.0,198557.0,198557.0,198557.0,198557.0,198557.0
mean,6200873000000.0,0.870642,4.831481,0.798174,0.211551,0.149327,0.10666,0.026169,0.023409,0.019073,...,0.003984,0.003752,0.000579,0.00277,0.001501,0.003616,0.00209,0.005293,0.0,0.0
std,169988100.0,0.336071,22.35319,0.401497,0.408499,0.355765,0.309005,0.159243,0.151016,0.136434,...,0.062903,0.068153,0.024052,0.052513,0.038685,0.059937,0.045644,0.072438,0.0,0.0
min,6200610000000.0,0.0,-6666.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,-9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,6200713000000.0,1.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,6200911000000.0,1.0,3.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,6201012000000.0,1.0,5.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,6201114000000.0,1.0,358.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0


### Data Preprocessing

In [36]:
np.sum(np.array(CAData < 0), 0)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

As we can see there are only 4 negative values that need to be removed. In addition to this, we also scan for any NaNs:

In [37]:
np.sum(np.array(pd.isnull(CAData)),0)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

There are 17 NaNs. We proceed to clean the data:

In [38]:
for feature in CAData.columns: 
    CAData = CAData[CAData[feature] >= 0]

In [39]:
CAData.shape

(198536, 22)

### Answers to Questions

#### 1) Is there higher rate of ILEUS for patients that undergo ALIF vs PLIF (as past studies have shown)?

In [97]:
CADataSub = CAData[(CAData.PLIF + CAData.ALIF) ==1]; CADataSub.shape #all such cases are removed

(43287, 22)

In [98]:
# We now look at a contingency table for both these cases
ct_PvA = pd.crosstab(CADataSub.ileus_NPOA, [CADataSub.PLIF, CADataSub.ALIF]); ct_PvA#just the counts 
# We only need to compare based on one of the columns since PLIF == 0 would mean ALIF == 1 anyway

PLIF,0.0,1.0
ALIF,1.0,0.0
ileus_NPOA,Unnamed: 1_level_2,Unnamed: 2_level_2
0.0,23921,16990
1.0,1957,419


In [96]:
pd.crosstab(CADataSub.ileus_NPOA, [CADataSub.PLIF, CADataSub.ALIF]).apply(lambda x: x/x.sum(), 0)

PLIF,0.0,1.0
ALIF,1.0,0.0
ileus_NPOA,Unnamed: 1_level_2,Unnamed: 2_level_2
0.0,0.924376,0.975932
1.0,0.075624,0.024068


In [82]:
#Implemented the test in R as there doesnt seem to be a reliable package in python. Here are the results:
'''
prop.test(c(1957,419),c(25878,17409),correct=FALSE)

	2-sample test for equality of proportions without
	continuity correction

data:  c(1957, 419) out of c(25878, 17409) 
X-squared = 533.26, df = 1, p-value < 2.2e-16 
alternative hypothesis: two.sided
95 percent confidence interval:
 0.04761145 0.05550070
sample estimates:
    prop 1     prop 2 
0.07562408 0.02406801 
'''
#SIGNIFICANT

'\n\t2-sample test for equality of proportions without\n\tcontinuity correction\n\ndata:  c(1957, 419) out of c(25878, 17409) \\\nX-squared = 533.26, df = 1, p-value < 2.2e-16 \\\nalternative hypothesis: two.sided\n95 percent confidence interval:\n 0.04761145 0.05550070\nsample estimates:\n    prop 1     prop 2 \n0.07562408 0.02406801 \n'

#### 2) Does length of Fusion (Long vs Short) have an impact on ILEUS?

In [102]:
CADataSub = CAData[(CAData.LONG + CAData.SHORT) ==1]; CADataSub.shape

(190922, 22)

In [103]:
ct_LvS = pd.crosstab(CADataSub.ileus_NPOA, [CADataSub.LONG, CADataSub.SHORT]); ct_LvS#just the c

LONG,0.0,1.0
SHORT,1.0,0.0
ileus_NPOA,Unnamed: 1_level_2,Unnamed: 2_level_2
0.0,150233,35873
1.0,3465,1351


In [104]:
pd.crosstab(CADataSub.ileus_NPOA, [CADataSub.LONG, CADataSub.SHORT]).apply(lambda x: x/x.sum(),0)

LONG,0.0,1.0
SHORT,1.0,0.0
ileus_NPOA,Unnamed: 1_level_2,Unnamed: 2_level_2
0.0,0.977456,0.963706
1.0,0.022544,0.036294


In [None]:
#As before, we implemented the test in R as there doesnt seem to be a reliable package in python. Here are the results:
'''
prop.test(c(3465,1351),c(153698,37224),correct=FALSE)

	2-sample test for equality of proportions without
	continuity correction

data:  c(3465, 1351) out of c(153698, 37224)
X-squared = 230.4, df = 1, p-value < 2.2e-16
alternative hypothesis: two.sided
95 percent confidence interval:
 -0.01578926 -0.01170990
sample estimates:
    prop 1     prop 2 
0.02254421 0.03629379 
'''
#SIGNIFICANT

#### 3) Does ILEUS affect LOS?

#### 4) Does ILEUS affect death?

In [101]:
ct_DIED = pd.crosstab(CAData.DIED, CAData.ileus_NPOA); ct_DIED#just the counts

ileus_NPOA,0.0,1.0
DIED,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,192640,5137
1.0,701,58


In [105]:
pd.crosstab(CAData.DIED, CAData.ileus_NPOA).apply(lambda x: x/x.sum(),0)

ileus_NPOA,0.0,1.0
DIED,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,0.996374,0.988835
1.0,0.003626,0.011165


In [None]:
#As before, we implemented the test in R as there doesnt seem to be a reliable package in python. Here are the results:
'''
prop.test(c(701,58),c(193341,5195),correct=FALSE)

	2-sample test for equality of proportions without
	continuity correction

data:  c(701, 58) out of c(193341, 5195)
X-squared = 75.499, df = 1, p-value < 2.2e-16
alternative hypothesis: two.sided
95 percent confidence interval:
 -0.010408583 -0.004669143
sample estimates:
     prop 1      prop 2 
0.003625718 0.011164581 
'''
#SIGNIFICANT

#### 5) Does ILEUS affect DVT?

In [106]:
ct_dvt = pd.crosstab(CAData.DVT_NPOA, CAData.ileus_NPOA); ct_dvt

ileus_NPOA,0.0,1.0
DVT_NPOA,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,192721,5098
1.0,620,97


In [107]:
#Look at proportions: did people who developed ileuses also develop DVT?
ct_dvt.apply(lambda x: x/x.sum(), 0)

ileus_NPOA,0.0,1.0
DVT_NPOA,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,0.996793,0.981328
1.0,0.003207,0.018672


In [None]:
#As before, we implemented the test in R as there doesnt seem to be a reliable package in python. Here are the results:
'''
prop.test(c(620,97),c(193341, 5292),correct=FALSE)

	2-sample test for equality of proportions without
	continuity correction

data:  c(620, 97) out of c(193341, 5292)
X-squared = 327.54, df = 1, p-value < 2.2e-16
alternative hypothesis: two.sided
95 percent confidence interval:
 -0.01874563 -0.01149994
sample estimates:
     prop 1      prop 2 
0.003206769 0.018329554 
'''
#SIGNIFICANT

#### 6) Does ILEUS affect MI (heart attack)?

In [108]:
ct_mi = pd.crosstab(CAData.MI_NPOA, CAData.ileus_NPOA); ct_mi

ileus_NPOA,0.0,1.0
MI_NPOA,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,192823,5164
1.0,518,31


In [109]:
ct_mi.apply(lambda x: x/x.sum(), 0)

ileus_NPOA,0.0,1.0
MI_NPOA,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,0.997321,0.994033
1.0,0.002679,0.005967


In [110]:
#As before, we implemented the test in R as there doesnt seem to be a reliable package in python. Here are the results:
'''
prop.test(c(518,31),c(193341, 5195),correct=FALSE)

	2-sample test for equality of proportions without
	continuity correction

data:  c(518, 31) out of c(193341, 5195)
X-squared = 19.835, df = 1, p-value = 8.444e-06
alternative hypothesis: two.sided
95 percent confidence interval:
 -0.005395032 -0.001181112
sample estimates:
     prop 1      prop 2 
0.002679204 0.005967276 
'''
# IMPORTANT: Since the difference is so small, eventhough, we have a significant result, it is better to be conservative
# and mention that this result is not useful


'\nprop.test(c(518,31),c(193341, 5195),correct=FALSE)\n\n\t2-sample test for equality of proportions without\n\tcontinuity correction\n\ndata:  c(518, 31) out of c(193341, 5195)\nX-squared = 19.835, df = 1, p-value = 8.444e-06\nalternative hypothesis: two.sided\n95 percent confidence interval:\n -0.005395032 -0.001181112\nsample estimates:\n     prop 1      prop 2 \n0.002679204 0.005967276 \n'

#### 7) Does ILEUS affect sepsis?

In [111]:
ct_sepsis = pd.crosstab(CAData.Sepsis_NPOA, CAData.ileus_NPOA); ct_sepsis

ileus_NPOA,0.0,1.0
Sepsis_NPOA,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,192435,5054
1.0,906,141


In [112]:
ct_sepsis.apply(lambda x: x/x.sum(), 0)

ileus_NPOA,0.0,1.0
Sepsis_NPOA,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,0.995314,0.972859
1.0,0.004686,0.027141


In [None]:
#As before, we implemented the test in R as there doesnt seem to be a reliable package in python. Here are the results:
'''
prop.test(c(906,141),c(193341, 5195),correct=FALSE)

	2-sample test for equality of proportions without
	continuity correction

data:  c(906, 141) out of c(193341, 5195)
X-squared = 486.3, df = 1, p-value < 2.2e-16
alternative hypothesis: two.sided
95 percent confidence interval:
 -0.02688466 -0.01802626
sample estimates:
     prop 1      prop 2 
0.004686021 0.027141482 
'''
#SIGNIFICANT