### Choose the first entry of each patient. Extract blood pressure data into ***Pressure***, create ***Pressure_index*** for each patient. 
Contains patient Blood Pressure data from both vitalPeriodic.csv & nurseCharting.csv

Including: Noninvasivesystolic, Non-Invasive BP Systolic, Invasive BP Systolic, Systemicsystolic

**1. Extract sub-categories patient id from cardiovascular**

In [2]:
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# change to your folder path
os.chdir('/Users/xiao-zy19/Desktop/Johns Hopkins/Biomedical Data Design/EICU/eicu-collaborative-research-database-2.0')

# import diagnosis.csv
df_diagnosis = pd.read_csv("diagnosis.csv")
df_diagnosis.sort_values(by=["patientunitstayid", "diagnosisoffset"], inplace=True)

# select cardiovascular patients
df_cardiovascular = df_diagnosis[
    df_diagnosis["diagnosisstring"].str.contains("cardiovascular")
]

# get shock patient
shock_patient = df_cardiovascular[
    df_cardiovascular["diagnosisstring"].str.contains("shock")
]

# get ventricular patient
ventricular_patient = df_cardiovascular[
    df_cardiovascular["diagnosisstring"].str.contains("ventricular")
]

# get chest pain patient
chest_pain_patient = df_cardiovascular[
    df_cardiovascular["diagnosisstring"].str.contains("chest pain")
]

# get arrhythmias patient
arrhythmias_patient = df_cardiovascular[
    df_cardiovascular["diagnosisstring"].str.contains("arrhythmias")
]

# put id together
df_wanted = pd.concat(
    [shock_patient, ventricular_patient, chest_pain_patient, arrhythmias_patient]
)
# print(df_wanted)

# Get the patient ids from df_wanted & sort the patient id
# patient_id_all multiple entry patient's stayid
patient_id_all = df_wanted["patientunitstayid"].unique()
patient_id_all.sort()
print(patient_id_all)

[ 141168  141203  141227 ... 3353216 3353235 3353251]


**2. Exclude patient whose unitvisitnumbe>1**

In [3]:
# import patient.csv
df_patient = pd.read_csv("patient.csv")
df_patient.sort_values(by=["patientunitstayid"], inplace=True)
df_patient_buf = df_patient[df_patient["patientunitstayid"].isin(patient_id_all)]
df_1time_patient = df_patient_buf[df_patient_buf["unitvisitnumber"] == 1]
# print(df_1time_patient)

# select the patient id from df_1time_patient
patient_id = df_1time_patient["patientunitstayid"].unique()
print(f"Total number of patients: {len(patient_id)}")

Total number of patients: 71353


**3. Extract data of patients within the id list**

In [4]:
# import vitalPeriodic.csv & nurseCharting.csv
df_vitalPeriodic = pd.read_csv("vitalPeriodic.csv")
df_vitalPeriodic.sort_values(
    by=["patientunitstayid", "observationoffset"], inplace=True
)
df_nurseCharting = pd.read_csv("nurseCharting.csv")
df_nurseCharting.sort_values(
    by=["patientunitstayid", "nursingchartoffset"], inplace=True
)
df_vitalAPeriodic = pd.read_csv("vitalAperiodic.csv")
df_vitalAPeriodic.sort_values(
    by=["patientunitstayid", "observationoffset"], inplace=True
)


# select the wanted patient
df_vitalPeriodic = df_vitalPeriodic[
    df_vitalPeriodic["patientunitstayid"].isin(patient_id)
]
df_nurseCharting = df_nurseCharting[
    df_nurseCharting["patientunitstayid"].isin(patient_id)
]
df_vitalAPeriodic = df_vitalAPeriodic[
    df_vitalAPeriodic["patientunitstayid"].isin(patient_id)
]

# save the wanted file (uncomment the code to save)
# df_vitalPeriodic.to_csv('vitalPeriodic_wanted.csv', index=False)
# df_nurseCharting.to_csv('nurseCharting_wanted.csv', index=False)
# df_vitalAPeriodic.to_csv('vitalAPeriodic_wanted.csv', index=False)

# print the shape of the wanted file
print(f"vitalperiodic shape: {df_vitalPeriodic.shape}")
print(f"nurseCharting shape: {df_nurseCharting.shape}")
print(f"vitalAPeriodic shape: {df_vitalAPeriodic.shape}")

vitalperiodic shape: (63195275, 19)
nurseCharting shape: (61929777, 8)
vitalAPeriodic shape: (11017182, 13)


**4. Extract Blood Pressure data & create index**


In [5]:
# nursingchartcelltypevallabel Non-Invasive BP Systolic
df_nurseCharting_noninvasive = df_nurseCharting[
    df_nurseCharting["nursingchartcelltypevalname"] == "Non-Invasive BP Systolic"
]
df_nurseCharting_noninvasive = df_nurseCharting_noninvasive.rename(
    columns={
        "nursingchartoffset": "observationoffset",
        "nursingchartvalue": "Non-Invasive BP Systolic",
    }
)
# print(df_nurseCharting_noninvasive.head())

# nursingchartcelltypevallabel Invasive BP Systolic
df_nurseCharting_invasive = df_nurseCharting[
    df_nurseCharting["nursingchartcelltypevalname"] == "Invasive BP Systolic"
]
df_nurseCharting_invasive = df_nurseCharting_invasive.rename(
    columns={
        "nursingchartoffset": "observationoffset",
        "nursingchartvalue": "Invasive BP Systolic",
    }
)
# print(df_nurseCharting_invasive.head())

In [6]:
# extract systolics from vitalPeriodic, nurseCharting & vitalAPeriodic
systemicsystolic = df_vitalPeriodic[
    ["patientunitstayid", "observationoffset", "systemicsystolic"]
]
non_invasive_BP_Systolic = df_nurseCharting_noninvasive[
    ["patientunitstayid", "observationoffset", "Non-Invasive BP Systolic"]
]
invasive_BP_Systolic = df_nurseCharting_invasive[
    ["patientunitstayid", "observationoffset", "Invasive BP Systolic"]
]
Noninvasivesystolic = df_vitalAPeriodic[
    ["patientunitstayid", "observationoffset", "noninvasivesystolic"]
]


def create_index(df, column_name="patientunitstayid"):
    value_position_dict = {}
    first_occurrences = []

    for idx, value in enumerate(df[column_name]):
        if value not in value_position_dict:
            value_position_dict[value] = idx
            first_occurrences.append(idx)

    first_occurrences.append(len(df))
    return pd.Series(first_occurrences)


# create index for each variable
systemicsystolic_index = create_index(systemicsystolic)
non_invasive_BP_Systolic_index = create_index(non_invasive_BP_Systolic)
invasive_BP_Systolic_index = create_index(invasive_BP_Systolic)
Noninvasivesystolic_index = create_index(Noninvasivesystolic)

In [8]:
# 第一次进入ICU的心血管疾病患者ID集合
first_time_icu_patients = set(patient_id)
print(f'First time ICU patients: {len(first_time_icu_patients)}')

# 有systemicsystolic noninvasivesystolic invasive BP Systolic non_invasive_BP Systolic的患者ID集合
patients_with_systolic = set(systemicsystolic["patientunitstayid"].unique())
patients_with_noninvasive = set(non_invasive_BP_Systolic["patientunitstayid"].unique())
patients_with_invasive = set(invasive_BP_Systolic["patientunitstayid"].unique())
patients_with_Noninvasive = set(Noninvasivesystolic["patientunitstayid"].unique())

# 求并集
patients_with_bp = patients_with_systolic.union(patients_with_noninvasive, patients_with_invasive, patients_with_Noninvasive)

# 求交集长度
print(f'Patients with all systolic: {len(patients_with_bp)}')

First time ICU patients: 71353
Patients with all systolic: 71238
