In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GroupShuffleSplit

In [2]:
static_dynamic = pd.read_csv('../data/static_dynamic.csv')
notes = pd.read_csv('../data/notes_embedded.csv')

In [3]:
# Convert text embeddings from csv string to np array
def convert_to_np(string):
    ss = string.strip('[]').split()
    return np.array(ss, dtype=float)
    
notes['text_embeddings'] = notes['text_embeddings'].map(lambda x: convert_to_np(x))

In [4]:
# Explore csv files imported
display(static_dynamic.head())
display(notes.head())

print(f'\nDimensions of static_dynamic: {static_dynamic.shape}')
print(f'Dimensions of notes: {notes.shape}\n')
print(f'Number of IDs in static_dynamic: {static_dynamic.id.nunique()}')
print(f'Number of IDs in notes: {notes.id.nunique()}')

Unnamed: 0,id,charttime,hosp_admittime,hosp_dischtime,icu_intime,icu_outtime,los_icu,icu_death,gender,admission_age,...,sodium_bg,lactate_bg,glucose_bg,d_dimer,fibrinogen,thrombin,inr,pt,ptt,urineoutput
0,20001305,1978-03-25 02:00:00,1978-03-25 02:58:00,1978-03-27 19:23:00,1978-03-25 02:59:00,1978-03-27 21:46:00,2.78,1.0,1.0,84.22776,...,,,,,,,,,,
1,20001305,1978-03-25 03:00:00,1978-03-25 02:58:00,1978-03-27 19:23:00,1978-03-25 02:59:00,1978-03-27 21:46:00,2.78,1.0,1.0,84.22776,...,,,,,,,,,,
2,20001305,1978-03-25 04:00:00,1978-03-25 02:58:00,1978-03-27 19:23:00,1978-03-25 02:59:00,1978-03-27 21:46:00,2.78,1.0,1.0,84.22776,...,,,,,,,,,,
3,20001305,1978-03-25 05:00:00,1978-03-25 02:58:00,1978-03-27 19:23:00,1978-03-25 02:59:00,1978-03-27 21:46:00,2.78,1.0,1.0,84.22776,...,,,,,,,,,,
4,20001305,1978-03-25 06:00:00,1978-03-25 02:58:00,1978-03-27 19:23:00,1978-03-25 02:59:00,1978-03-27 21:46:00,2.78,1.0,1.0,84.22776,...,,,,,,,,,,


Unnamed: 0,id,text,los_icu,icu_death,text_embeddings
0,20001305,INDICATION: ___ with copd in resp distress in...,2.78,1,"[0.0495544821, -0.0371760167, -0.127426326, -0..."
1,20001361,CHEST X-RAY DATED ___.\n\nCOMPARISON: None.\n...,6.05,0,"[0.0776797682, -0.31048429, -0.00207543001, 0...."
2,20001770,EXAMINATION: LIVER OR GALLBLADDER US (SINGLE ...,2.87,0,"[0.132888228, -0.191068441, -0.0299163219, 0.1..."
3,20002506,EXAMINATION: CTA HEAD AND CTA NECK Q16 CT NEC...,6.56,0,"[0.00452079531, -0.232567102, 0.0254237894, 0...."
4,20003425,ADDENDUM Findings were communicated to the EN...,4.0,0,"[0.122243397, -0.215054199, -0.0655379891, 0.0..."



Dimensions of static_dynamic: (510075, 110)
Dimensions of notes: (20403, 5)

Number of IDs in static_dynamic: 20403
Number of IDs in notes: 20403


In [5]:
# Subset notes with only text_embeddings and ID for merger
notes_subset = notes[['id', 'text_embeddings']]
display(notes_subset.head())

Unnamed: 0,id,text_embeddings
0,20001305,"[0.0495544821, -0.0371760167, -0.127426326, -0..."
1,20001361,"[0.0776797682, -0.31048429, -0.00207543001, 0...."
2,20001770,"[0.132888228, -0.191068441, -0.0299163219, 0.1..."
3,20002506,"[0.00452079531, -0.232567102, 0.0254237894, 0...."
4,20003425,"[0.122243397, -0.215054199, -0.0655379891, 0.0..."


In [6]:
# Merge text embeddings with static_dynamic to create dataset
dataset = static_dynamic.merge(notes_subset, how='inner', on='id')
display(dataset.head())
print(f'Dimensions of dataset: {dataset.shape}')
print(f'Number of IDs in dataset: {dataset.id.nunique()}')

Unnamed: 0,id,charttime,hosp_admittime,hosp_dischtime,icu_intime,icu_outtime,los_icu,icu_death,gender,admission_age,...,lactate_bg,glucose_bg,d_dimer,fibrinogen,thrombin,inr,pt,ptt,urineoutput,text_embeddings
0,20001305,1978-03-25 02:00:00,1978-03-25 02:58:00,1978-03-27 19:23:00,1978-03-25 02:59:00,1978-03-27 21:46:00,2.78,1.0,1.0,84.22776,...,,,,,,,,,,"[0.0495544821, -0.0371760167, -0.127426326, -0..."
1,20001305,1978-03-25 03:00:00,1978-03-25 02:58:00,1978-03-27 19:23:00,1978-03-25 02:59:00,1978-03-27 21:46:00,2.78,1.0,1.0,84.22776,...,,,,,,,,,,"[0.0495544821, -0.0371760167, -0.127426326, -0..."
2,20001305,1978-03-25 04:00:00,1978-03-25 02:58:00,1978-03-27 19:23:00,1978-03-25 02:59:00,1978-03-27 21:46:00,2.78,1.0,1.0,84.22776,...,,,,,,,,,,"[0.0495544821, -0.0371760167, -0.127426326, -0..."
3,20001305,1978-03-25 05:00:00,1978-03-25 02:58:00,1978-03-27 19:23:00,1978-03-25 02:59:00,1978-03-27 21:46:00,2.78,1.0,1.0,84.22776,...,,,,,,,,,,"[0.0495544821, -0.0371760167, -0.127426326, -0..."
4,20001305,1978-03-25 06:00:00,1978-03-25 02:58:00,1978-03-27 19:23:00,1978-03-25 02:59:00,1978-03-27 21:46:00,2.78,1.0,1.0,84.22776,...,,,,,,,,,,"[0.0495544821, -0.0371760167, -0.127426326, -0..."


Dimensions of dataset: (510075, 111)
Number of IDs in dataset: 20403


In [7]:
# Create prediction classes
# icu_death 
dataset['icu_death'] = dataset['icu_death'].astype(int)
# los_icu <3 days vs los_icu >=3 days
dataset['los_icu_class'] = dataset['los_icu'].apply(lambda x: 'greater than or equal to 3 days' if x >= 3 else 'less than 3 days')

In [8]:
# Check dataset
display(dataset.head())
print(f'Dimensions of dataset: {dataset.shape}')

Unnamed: 0,id,charttime,hosp_admittime,hosp_dischtime,icu_intime,icu_outtime,los_icu,icu_death,gender,admission_age,...,glucose_bg,d_dimer,fibrinogen,thrombin,inr,pt,ptt,urineoutput,text_embeddings,los_icu_class
0,20001305,1978-03-25 02:00:00,1978-03-25 02:58:00,1978-03-27 19:23:00,1978-03-25 02:59:00,1978-03-27 21:46:00,2.78,1,1.0,84.22776,...,,,,,,,,,"[0.0495544821, -0.0371760167, -0.127426326, -0...",less than 3 days
1,20001305,1978-03-25 03:00:00,1978-03-25 02:58:00,1978-03-27 19:23:00,1978-03-25 02:59:00,1978-03-27 21:46:00,2.78,1,1.0,84.22776,...,,,,,,,,,"[0.0495544821, -0.0371760167, -0.127426326, -0...",less than 3 days
2,20001305,1978-03-25 04:00:00,1978-03-25 02:58:00,1978-03-27 19:23:00,1978-03-25 02:59:00,1978-03-27 21:46:00,2.78,1,1.0,84.22776,...,,,,,,,,,"[0.0495544821, -0.0371760167, -0.127426326, -0...",less than 3 days
3,20001305,1978-03-25 05:00:00,1978-03-25 02:58:00,1978-03-27 19:23:00,1978-03-25 02:59:00,1978-03-27 21:46:00,2.78,1,1.0,84.22776,...,,,,,,,,,"[0.0495544821, -0.0371760167, -0.127426326, -0...",less than 3 days
4,20001305,1978-03-25 06:00:00,1978-03-25 02:58:00,1978-03-27 19:23:00,1978-03-25 02:59:00,1978-03-27 21:46:00,2.78,1,1.0,84.22776,...,,,,,,,,,"[0.0495544821, -0.0371760167, -0.127426326, -0...",less than 3 days


Dimensions of dataset: (510075, 112)


In [9]:
groups = dataset['id']
group_shuffle_split = GroupShuffleSplit(n_splits=1, test_size=0.2, random_state=122)

for train_test_index, validation_index in group_shuffle_split.split(dataset, groups=groups):
    train_test = dataset.iloc[train_test_index]
    validation = dataset.iloc[validation_index]

In [10]:
# Explore train_test and validation dataframes

display(train_test.head())
display(validation.head())

print('\n=====dataset=====')
print(f'Dimensions of dataset: {dataset.shape}')
print(f'Number of ID in dataset: {dataset.id.nunique()}')
print(f'icu_deaths in dataset: {dataset.icu_death.value_counts()}')
print(f'los_ic_class in dataset: {dataset.los_icu_class.value_counts()}')

print('\n=====train_test=====')
print(f'Dimensions of train_test: {train_test.shape}')
print(f'Number of ID in train_test: {train_test.id.nunique()}')
print(f'icu_deaths in train_test: {train_test.icu_death.value_counts()}')
print(f'los_ic_class in train_test: {train_test.los_icu_class.value_counts()}')

print('\n=====validation=====')
print(f'Dimensions of validation: {validation.shape}')
print(f'Number of ID in validation: {validation.id.nunique()}')
print(f'icu_deaths in validation: {validation.icu_death.value_counts()}')
print(f'los_ic_class in validation: {validation.los_icu_class.value_counts()}')

Unnamed: 0,id,charttime,hosp_admittime,hosp_dischtime,icu_intime,icu_outtime,los_icu,icu_death,gender,admission_age,...,glucose_bg,d_dimer,fibrinogen,thrombin,inr,pt,ptt,urineoutput,text_embeddings,los_icu_class
0,20001305,1978-03-25 02:00:00,1978-03-25 02:58:00,1978-03-27 19:23:00,1978-03-25 02:59:00,1978-03-27 21:46:00,2.78,1,1.0,84.22776,...,,,,,,,,,"[0.0495544821, -0.0371760167, -0.127426326, -0...",less than 3 days
1,20001305,1978-03-25 03:00:00,1978-03-25 02:58:00,1978-03-27 19:23:00,1978-03-25 02:59:00,1978-03-27 21:46:00,2.78,1,1.0,84.22776,...,,,,,,,,,"[0.0495544821, -0.0371760167, -0.127426326, -0...",less than 3 days
2,20001305,1978-03-25 04:00:00,1978-03-25 02:58:00,1978-03-27 19:23:00,1978-03-25 02:59:00,1978-03-27 21:46:00,2.78,1,1.0,84.22776,...,,,,,,,,,"[0.0495544821, -0.0371760167, -0.127426326, -0...",less than 3 days
3,20001305,1978-03-25 05:00:00,1978-03-25 02:58:00,1978-03-27 19:23:00,1978-03-25 02:59:00,1978-03-27 21:46:00,2.78,1,1.0,84.22776,...,,,,,,,,,"[0.0495544821, -0.0371760167, -0.127426326, -0...",less than 3 days
4,20001305,1978-03-25 06:00:00,1978-03-25 02:58:00,1978-03-27 19:23:00,1978-03-25 02:59:00,1978-03-27 21:46:00,2.78,1,1.0,84.22776,...,,,,,,,,,"[0.0495544821, -0.0371760167, -0.127426326, -0...",less than 3 days


Unnamed: 0,id,charttime,hosp_admittime,hosp_dischtime,icu_intime,icu_outtime,los_icu,icu_death,gender,admission_age,...,glucose_bg,d_dimer,fibrinogen,thrombin,inr,pt,ptt,urineoutput,text_embeddings,los_icu_class
100,20003425,2055-07-22 17:00:00,2055-07-21 10:00:00,2055-07-29 14:40:00,2055-07-22 17:13:00,2055-07-26 17:11:00,4.0,0,0.0,76.551461,...,,,455.0,,1.4,14.7,35.6,,"[0.122243397, -0.215054199, -0.0655379891, 0.0...",greater than or equal to 3 days
101,20003425,2055-07-22 18:00:00,2055-07-21 10:00:00,2055-07-29 14:40:00,2055-07-22 17:13:00,2055-07-26 17:11:00,4.0,0,0.0,76.551461,...,,,455.0,,1.4,14.7,35.6,,"[0.122243397, -0.215054199, -0.0655379891, 0.0...",greater than or equal to 3 days
102,20003425,2055-07-22 19:00:00,2055-07-21 10:00:00,2055-07-29 14:40:00,2055-07-22 17:13:00,2055-07-26 17:11:00,4.0,0,0.0,76.551461,...,,,455.0,,1.4,14.7,35.6,,"[0.122243397, -0.215054199, -0.0655379891, 0.0...",greater than or equal to 3 days
103,20003425,2055-07-22 20:00:00,2055-07-21 10:00:00,2055-07-29 14:40:00,2055-07-22 17:13:00,2055-07-26 17:11:00,4.0,0,0.0,76.551461,...,,,455.0,,1.4,14.7,35.6,,"[0.122243397, -0.215054199, -0.0655379891, 0.0...",greater than or equal to 3 days
104,20003425,2055-07-22 21:00:00,2055-07-21 10:00:00,2055-07-29 14:40:00,2055-07-22 17:13:00,2055-07-26 17:11:00,4.0,0,0.0,76.551461,...,,,455.0,,1.4,14.7,35.6,,"[0.122243397, -0.215054199, -0.0655379891, 0.0...",greater than or equal to 3 days



=====dataset=====
Dimensions of dataset: (510075, 112)
Number of ID in dataset: 20403
icu_deaths in dataset: icu_death
0    456700
1     53375
Name: count, dtype: int64
los_ic_class in dataset: los_icu_class
less than 3 days                   268350
greater than or equal to 3 days    241725
Name: count, dtype: int64

=====train_test=====
Dimensions of train_test: (408050, 112)
Number of ID in train_test: 16322
icu_deaths in train_test: icu_death
0    365875
1     42175
Name: count, dtype: int64
los_ic_class in train_test: los_icu_class
less than 3 days                   216025
greater than or equal to 3 days    192025
Name: count, dtype: int64

=====validation=====
Dimensions of validation: (102025, 112)
Number of ID in validation: 4081
icu_deaths in validation: icu_death
0    90825
1    11200
Name: count, dtype: int64
los_ic_class in validation: los_icu_class
less than 3 days                   52325
greater than or equal to 3 days    49700
Name: count, dtype: int64


In [11]:
dataset.to_csv('../data/dataset.csv', index=False)
train_test.to_csv('../data/train_test.csv', index=False)
validation.to_csv('../data/validation.csv', index=False)

#### Confirming right table format in csv file

In [12]:
d = pd.read_csv('../data/dataset.csv')
t = pd.read_csv('../data/train_test.csv')
v = pd.read_csv('../data/validation.csv')
display(d.head())
display(t.head())
display(v.head())

Unnamed: 0,id,charttime,hosp_admittime,hosp_dischtime,icu_intime,icu_outtime,los_icu,icu_death,gender,admission_age,...,glucose_bg,d_dimer,fibrinogen,thrombin,inr,pt,ptt,urineoutput,text_embeddings,los_icu_class
0,20001305,1978-03-25 02:00:00,1978-03-25 02:58:00,1978-03-27 19:23:00,1978-03-25 02:59:00,1978-03-27 21:46:00,2.78,1,1.0,84.22776,...,,,,,,,,,[ 4.95544821e-02 -3.71760167e-02 -1.27426326e-...,less than 3 days
1,20001305,1978-03-25 03:00:00,1978-03-25 02:58:00,1978-03-27 19:23:00,1978-03-25 02:59:00,1978-03-27 21:46:00,2.78,1,1.0,84.22776,...,,,,,,,,,[ 4.95544821e-02 -3.71760167e-02 -1.27426326e-...,less than 3 days
2,20001305,1978-03-25 04:00:00,1978-03-25 02:58:00,1978-03-27 19:23:00,1978-03-25 02:59:00,1978-03-27 21:46:00,2.78,1,1.0,84.22776,...,,,,,,,,,[ 4.95544821e-02 -3.71760167e-02 -1.27426326e-...,less than 3 days
3,20001305,1978-03-25 05:00:00,1978-03-25 02:58:00,1978-03-27 19:23:00,1978-03-25 02:59:00,1978-03-27 21:46:00,2.78,1,1.0,84.22776,...,,,,,,,,,[ 4.95544821e-02 -3.71760167e-02 -1.27426326e-...,less than 3 days
4,20001305,1978-03-25 06:00:00,1978-03-25 02:58:00,1978-03-27 19:23:00,1978-03-25 02:59:00,1978-03-27 21:46:00,2.78,1,1.0,84.22776,...,,,,,,,,,[ 4.95544821e-02 -3.71760167e-02 -1.27426326e-...,less than 3 days


Unnamed: 0,id,charttime,hosp_admittime,hosp_dischtime,icu_intime,icu_outtime,los_icu,icu_death,gender,admission_age,...,glucose_bg,d_dimer,fibrinogen,thrombin,inr,pt,ptt,urineoutput,text_embeddings,los_icu_class
0,20001305,1978-03-25 02:00:00,1978-03-25 02:58:00,1978-03-27 19:23:00,1978-03-25 02:59:00,1978-03-27 21:46:00,2.78,1,1.0,84.22776,...,,,,,,,,,[ 4.95544821e-02 -3.71760167e-02 -1.27426326e-...,less than 3 days
1,20001305,1978-03-25 03:00:00,1978-03-25 02:58:00,1978-03-27 19:23:00,1978-03-25 02:59:00,1978-03-27 21:46:00,2.78,1,1.0,84.22776,...,,,,,,,,,[ 4.95544821e-02 -3.71760167e-02 -1.27426326e-...,less than 3 days
2,20001305,1978-03-25 04:00:00,1978-03-25 02:58:00,1978-03-27 19:23:00,1978-03-25 02:59:00,1978-03-27 21:46:00,2.78,1,1.0,84.22776,...,,,,,,,,,[ 4.95544821e-02 -3.71760167e-02 -1.27426326e-...,less than 3 days
3,20001305,1978-03-25 05:00:00,1978-03-25 02:58:00,1978-03-27 19:23:00,1978-03-25 02:59:00,1978-03-27 21:46:00,2.78,1,1.0,84.22776,...,,,,,,,,,[ 4.95544821e-02 -3.71760167e-02 -1.27426326e-...,less than 3 days
4,20001305,1978-03-25 06:00:00,1978-03-25 02:58:00,1978-03-27 19:23:00,1978-03-25 02:59:00,1978-03-27 21:46:00,2.78,1,1.0,84.22776,...,,,,,,,,,[ 4.95544821e-02 -3.71760167e-02 -1.27426326e-...,less than 3 days


Unnamed: 0,id,charttime,hosp_admittime,hosp_dischtime,icu_intime,icu_outtime,los_icu,icu_death,gender,admission_age,...,glucose_bg,d_dimer,fibrinogen,thrombin,inr,pt,ptt,urineoutput,text_embeddings,los_icu_class
0,20003425,2055-07-22 17:00:00,2055-07-21 10:00:00,2055-07-29 14:40:00,2055-07-22 17:13:00,2055-07-26 17:11:00,4.0,0,0.0,76.551461,...,,,455.0,,1.4,14.7,35.6,,[ 1.22243397e-01 -2.15054199e-01 -6.55379891e-...,greater than or equal to 3 days
1,20003425,2055-07-22 18:00:00,2055-07-21 10:00:00,2055-07-29 14:40:00,2055-07-22 17:13:00,2055-07-26 17:11:00,4.0,0,0.0,76.551461,...,,,455.0,,1.4,14.7,35.6,,[ 1.22243397e-01 -2.15054199e-01 -6.55379891e-...,greater than or equal to 3 days
2,20003425,2055-07-22 19:00:00,2055-07-21 10:00:00,2055-07-29 14:40:00,2055-07-22 17:13:00,2055-07-26 17:11:00,4.0,0,0.0,76.551461,...,,,455.0,,1.4,14.7,35.6,,[ 1.22243397e-01 -2.15054199e-01 -6.55379891e-...,greater than or equal to 3 days
3,20003425,2055-07-22 20:00:00,2055-07-21 10:00:00,2055-07-29 14:40:00,2055-07-22 17:13:00,2055-07-26 17:11:00,4.0,0,0.0,76.551461,...,,,455.0,,1.4,14.7,35.6,,[ 1.22243397e-01 -2.15054199e-01 -6.55379891e-...,greater than or equal to 3 days
4,20003425,2055-07-22 21:00:00,2055-07-21 10:00:00,2055-07-29 14:40:00,2055-07-22 17:13:00,2055-07-26 17:11:00,4.0,0,0.0,76.551461,...,,,455.0,,1.4,14.7,35.6,,[ 1.22243397e-01 -2.15054199e-01 -6.55379891e-...,greater than or equal to 3 days


In [13]:
# Convert text embeddings from csv string to np array
def convert_to_np(string):
    ss = string.strip('[]').split()
    return np.array(ss, dtype=float)
    
d['text_embeddings'] = d['text_embeddings'].map(lambda x: convert_to_np(x))
t['text_embeddings'] = t['text_embeddings'].map(lambda x: convert_to_np(x))
t['text_embeddings'] = v['text_embeddings'].map(lambda x: convert_to_np(x))

In [14]:
display(d.head())
display(t.head())
display(v.head())

Unnamed: 0,id,charttime,hosp_admittime,hosp_dischtime,icu_intime,icu_outtime,los_icu,icu_death,gender,admission_age,...,glucose_bg,d_dimer,fibrinogen,thrombin,inr,pt,ptt,urineoutput,text_embeddings,los_icu_class
0,20001305,1978-03-25 02:00:00,1978-03-25 02:58:00,1978-03-27 19:23:00,1978-03-25 02:59:00,1978-03-27 21:46:00,2.78,1,1.0,84.22776,...,,,,,,,,,"[0.0495544821, -0.0371760167, -0.127426326, -0...",less than 3 days
1,20001305,1978-03-25 03:00:00,1978-03-25 02:58:00,1978-03-27 19:23:00,1978-03-25 02:59:00,1978-03-27 21:46:00,2.78,1,1.0,84.22776,...,,,,,,,,,"[0.0495544821, -0.0371760167, -0.127426326, -0...",less than 3 days
2,20001305,1978-03-25 04:00:00,1978-03-25 02:58:00,1978-03-27 19:23:00,1978-03-25 02:59:00,1978-03-27 21:46:00,2.78,1,1.0,84.22776,...,,,,,,,,,"[0.0495544821, -0.0371760167, -0.127426326, -0...",less than 3 days
3,20001305,1978-03-25 05:00:00,1978-03-25 02:58:00,1978-03-27 19:23:00,1978-03-25 02:59:00,1978-03-27 21:46:00,2.78,1,1.0,84.22776,...,,,,,,,,,"[0.0495544821, -0.0371760167, -0.127426326, -0...",less than 3 days
4,20001305,1978-03-25 06:00:00,1978-03-25 02:58:00,1978-03-27 19:23:00,1978-03-25 02:59:00,1978-03-27 21:46:00,2.78,1,1.0,84.22776,...,,,,,,,,,"[0.0495544821, -0.0371760167, -0.127426326, -0...",less than 3 days


Unnamed: 0,id,charttime,hosp_admittime,hosp_dischtime,icu_intime,icu_outtime,los_icu,icu_death,gender,admission_age,...,glucose_bg,d_dimer,fibrinogen,thrombin,inr,pt,ptt,urineoutput,text_embeddings,los_icu_class
0,20001305,1978-03-25 02:00:00,1978-03-25 02:58:00,1978-03-27 19:23:00,1978-03-25 02:59:00,1978-03-27 21:46:00,2.78,1,1.0,84.22776,...,,,,,,,,,"[0.122243397, -0.215054199, -0.0655379891, 0.0...",less than 3 days
1,20001305,1978-03-25 03:00:00,1978-03-25 02:58:00,1978-03-27 19:23:00,1978-03-25 02:59:00,1978-03-27 21:46:00,2.78,1,1.0,84.22776,...,,,,,,,,,"[0.122243397, -0.215054199, -0.0655379891, 0.0...",less than 3 days
2,20001305,1978-03-25 04:00:00,1978-03-25 02:58:00,1978-03-27 19:23:00,1978-03-25 02:59:00,1978-03-27 21:46:00,2.78,1,1.0,84.22776,...,,,,,,,,,"[0.122243397, -0.215054199, -0.0655379891, 0.0...",less than 3 days
3,20001305,1978-03-25 05:00:00,1978-03-25 02:58:00,1978-03-27 19:23:00,1978-03-25 02:59:00,1978-03-27 21:46:00,2.78,1,1.0,84.22776,...,,,,,,,,,"[0.122243397, -0.215054199, -0.0655379891, 0.0...",less than 3 days
4,20001305,1978-03-25 06:00:00,1978-03-25 02:58:00,1978-03-27 19:23:00,1978-03-25 02:59:00,1978-03-27 21:46:00,2.78,1,1.0,84.22776,...,,,,,,,,,"[0.122243397, -0.215054199, -0.0655379891, 0.0...",less than 3 days


Unnamed: 0,id,charttime,hosp_admittime,hosp_dischtime,icu_intime,icu_outtime,los_icu,icu_death,gender,admission_age,...,glucose_bg,d_dimer,fibrinogen,thrombin,inr,pt,ptt,urineoutput,text_embeddings,los_icu_class
0,20003425,2055-07-22 17:00:00,2055-07-21 10:00:00,2055-07-29 14:40:00,2055-07-22 17:13:00,2055-07-26 17:11:00,4.0,0,0.0,76.551461,...,,,455.0,,1.4,14.7,35.6,,[ 1.22243397e-01 -2.15054199e-01 -6.55379891e-...,greater than or equal to 3 days
1,20003425,2055-07-22 18:00:00,2055-07-21 10:00:00,2055-07-29 14:40:00,2055-07-22 17:13:00,2055-07-26 17:11:00,4.0,0,0.0,76.551461,...,,,455.0,,1.4,14.7,35.6,,[ 1.22243397e-01 -2.15054199e-01 -6.55379891e-...,greater than or equal to 3 days
2,20003425,2055-07-22 19:00:00,2055-07-21 10:00:00,2055-07-29 14:40:00,2055-07-22 17:13:00,2055-07-26 17:11:00,4.0,0,0.0,76.551461,...,,,455.0,,1.4,14.7,35.6,,[ 1.22243397e-01 -2.15054199e-01 -6.55379891e-...,greater than or equal to 3 days
3,20003425,2055-07-22 20:00:00,2055-07-21 10:00:00,2055-07-29 14:40:00,2055-07-22 17:13:00,2055-07-26 17:11:00,4.0,0,0.0,76.551461,...,,,455.0,,1.4,14.7,35.6,,[ 1.22243397e-01 -2.15054199e-01 -6.55379891e-...,greater than or equal to 3 days
4,20003425,2055-07-22 21:00:00,2055-07-21 10:00:00,2055-07-29 14:40:00,2055-07-22 17:13:00,2055-07-26 17:11:00,4.0,0,0.0,76.551461,...,,,455.0,,1.4,14.7,35.6,,[ 1.22243397e-01 -2.15054199e-01 -6.55379891e-...,greater than or equal to 3 days


In [16]:
display(d.shape)
display(t.shape)
display(v.shape)

(510075, 112)

(408050, 112)

(102025, 112)