### Choose the first entry of each patient. Extract body Glasgow coma scale data into ***Glasgow***, create ***Glasgow_index*** for each patient. 
Contains patient Glasgow coma scale data from both nurseCharting.csv

**1. Extract sub-categories patient id from cardiovascular**

In [2]:
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
# import data_toolbox

# change to your folder path
# os.chdir('C:/Users/xiao-zy19/Desktop/Johns Hopkins/Biomedical Data Design/EICU Database/eicu-collaborative-research-database-demo-2.0.1')
os.chdir('/Users/xiao-zy19/Desktop/Johns Hopkins/Biomedical Data Design/EICU/eicu-collaborative-research-database-2.0')


# import diagnosis.csv
df_diagnosis = pd.read_csv('diagnosis.csv')
df_diagnosis.sort_values(by=['patientunitstayid', 'diagnosisoffset'], inplace=True)

# select cardiovascular patients
df_cardiovascular = df_diagnosis[df_diagnosis['diagnosisstring'].str.contains('cardiovascular')]

# get shock patient 
shock_patient = df_cardiovascular[df_cardiovascular['diagnosisstring'].str.contains('shock')]

# get ventricular patient 
ventricular_patient = df_cardiovascular[df_cardiovascular['diagnosisstring'].str.contains('ventricular')]

# get chest pain patient 
chest_pain_patient = df_cardiovascular[df_cardiovascular['diagnosisstring'].str.contains('chest pain')]

# get arrhythmias patient 
arrhythmias_patient = df_cardiovascular[df_cardiovascular['diagnosisstring'].str.contains('arrhythmias')]

# put id together
df_wanted = pd.concat([shock_patient, ventricular_patient, chest_pain_patient, arrhythmias_patient])

# Get the patient ids from df_wanted & sort the patient id
# patient_id_all multiple entry patient's stayid
patient_id_all = df_wanted['patientunitstayid'].unique()
patient_id_all.sort()
print(patient_id_all)

[ 141168  141203  141227 ... 3353216 3353235 3353251]


**2. Exclude patient whose unitvisitnumbe>1**

In [3]:
# import patient.csv
df_patient = pd.read_csv('patient.csv')
df_patient.sort_values(by=['patientunitstayid'], inplace=True)
df_patient_buf = df_patient[df_patient['patientunitstayid'].isin(patient_id_all)]
df_1time_patient = df_patient_buf[df_patient_buf['unitvisitnumber']==1]
# print(df_1time_patient)

# select the patient id from df_1time_patient
patient_id = df_1time_patient['patientunitstayid'].unique()
print(f'Total number of patients: {len(patient_id)}')

Total number of patients: 71353


**3. Extract data of patients within the id list**

In [4]:
# import nurseCharting.csv
df_nurseCharting = pd.read_csv('nurseCharting.csv')
df_nurseCharting.sort_values(by=['patientunitstayid', 'nursingchartoffset'], inplace=True)

# select the wanted patient
df_nurseCharting = df_nurseCharting[df_nurseCharting['patientunitstayid'].isin(patient_id)]

# print the shape of the wanted file
print(f'nurseCharting shape: {df_nurseCharting.shape}')

nurseCharting shape: (61929777, 8)


**4. Extract Glasgow data & create index**

In [5]:
# nursingchartcelltypevallabel Temperature
df_nurseCharting = df_nurseCharting[df_nurseCharting['nursingchartcelltypevallabel']=='Glasgow coma score']
df_nurseCharting = df_nurseCharting.rename(columns={'nursingchartoffset': 'observationoffset', 'nursingchartvalue':'Glasgow score'})
print(df_nurseCharting.head())
print(df_nurseCharting.shape)

           nursingchartid  patientunitstayid  observationoffset  \
151475922       247094422             141227              -2230   
151475370       283246555             141227               -663   
151475782       174895080             141227               -132   
151475638       265182694             141227                 57   
151475246       174895188             141227                322   

           nursingchartentryoffset nursingchartcelltypecat  \
151475922                    -2230                  Scores   
151475370                     -663                  Scores   
151475782                     -132                  Scores   
151475638                       57                  Scores   
151475246                      322                  Scores   

          nursingchartcelltypevallabel nursingchartcelltypevalname  \
151475922           Glasgow coma score                   GCS Total   
151475370           Glasgow coma score                   GCS Total   
151475782     

In [6]:
# extract glasgow score from df_nurseCharting
Glasgow = df_nurseCharting[['patientunitstayid', 'observationoffset', 'Glasgow score']].copy()
Glasgow.sort_values(by=['patientunitstayid', 'observationoffset'], inplace=True)
print(f'First 5 rows of Temp: \n{Glasgow.head()} \n')

value_position_dict = {}
first_occurrences = []
for idx, value in enumerate(Glasgow['patientunitstayid']):
    # if the value is not in the dictionary, add it and create index
    if value not in value_position_dict:
        value_position_dict[value] = idx
        first_occurrences.append(idx)

first_occurrences.append(len(Glasgow))
# create first occurrence index for every patient
Glasgow_index = pd.Series(first_occurrences)
print(f'First 5 rows of Temp_index: \n{Glasgow_index.head()}')

First 5 rows of Temp: 
           patientunitstayid  observationoffset Glasgow score
151475922             141227              -2230            14
151475370             141227               -663            14
151475782             141227               -132            14
151475638             141227                 57            11
151475246             141227                322            11 

First 5 rows of Temp_index: 
0     0
1     8
2    11
3    33
4    46
dtype: int64


**Example: how to use Glasgow & Glasgow_index**

In [7]:
# if we want the i th patient's data (i starts from 0)
# use Glasgow.iloc[Glasgow_index[i]:Glasgow_index[i+1]]
i = 0
print(f'Glasgow score data for patient {i+1}: \n{Glasgow.iloc[Glasgow_index[i]:Glasgow_index[i+1]]}')

Glasgow score data for patient 1: 
           patientunitstayid  observationoffset Glasgow score
151475922             141227              -2230            14
151475370             141227               -663            14
151475782             141227               -132            14
151475638             141227                 57            11
151475246             141227                322            11
151475882             141227                537            11
151475511             141227                805            10
151475805             141227               1617             4


In [14]:
# 第一次进入ICU的心血管疾病患者ID集合
first_time_icu_patients = set(patient_id)
print(f'First time ICU patients: {len(first_time_icu_patients)}')

# 有格拉斯哥昏迷评分数据的患者ID集合
patients_with_glasgow = set(Glasgow['patientunitstayid'].unique())
print(f'Patients with glasgow: {len(patients_with_glasgow)}')

# 没有格拉斯哥昏迷评分数据的患者ID集合
patients_without_glasgow = first_time_icu_patients - patients_with_glasgow
print(f'Patients without glasgow: {len(patients_without_glasgow)}')
print(patients_without_glasgow)


First time ICU patients: 71353
Patients with glasgow: 52429
Patients without glasgow: 18924
{2850816, 3014657, 1572868, 3014662, 2621450, 1310731, 1572875, 2785291, 1277970, 2424851, 1310742, 229399, 1605656, 1146904, 2687002, 1146907, 229412, 2818086, 2818087, 2687018, 2883628, 2883631, 1310768, 2916401, 2611680, 3014707, 3014708, 2818101, 2588724, 1179703, 524344, 3014713, 1179706, 3113013, 1081404, 2752576, 2883649, 2359363, 2621508, 2752580, 3014725, 2785350, 3342405, 229450, 3014731, 2621516, 2752594, 2850900, 3014742, 2818134, 2719830, 1179741, 1146974, 3014759, 2883688, 2719848, 3211370, 163947, 1343595, 2818157, 2785390, 2883695, 2818158, 1572977, 2719853, 2719863, 3014776, 1605753, 3014778, 524411, 1310844, 2588793, 2752638, 1572990, 2687106, 2818179, 2752645, 163974, 2785416, 229513, 2752650, 2785418, 2687115, 2719887, 2719888, 1147025, 229524, 1179797, 393371, 2785436, 2359453, 393374, 1704094, 2883744, 1573025, 3014816, 3145888, 1212573, 327843, 2850977, 2490535, 2916520, 1

In [16]:
print(Glasgow['Glasgow score'].unique())

['14' '11' '10' '4' '15' '13' '8' '7' '3' '6' '9' '12' '5' '2' '1'
 'Unable to score due to medication' nan]
