O notebook abaixo foi criado para a extração dos exames médicos e informaçoes importantes dos pacientes sépticos

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns


In [2]:
def aggregate_events(df):
    itemid_map = {
        'Albumine': [52022, 53138, 50862, 53085],
        'Lactate': [50813, 52442, 53154],
        'Bicarbonate': [50813, 52442, 53154],
        'Chloride': [50902, 52535],
        'Hemoglobin': [50811, 51222, 51640],
        'Sodium': [50983, 52623],
        'Glucose': [50809, 50931, 52569],
        'Troponin': [51002, 51003, 52642],
        'Platelets': [51240,51704,52159],
        'INR': [51237, 51675],
        'Creatinine': [50813, 52442, 53154],
        'Bun': [51842,52647,51006],
        'Bilirubin': [50885, 53089],
        'AST': [50878, 53088],
        'ALT': [50861, 53084],
        'C_reactive': [50889],
        'ESR': [51288],
        'Bands': [51144],
        'WBC': [51301,53134,51300],
        'Oxygen_saturation': [50817],
        'Temperature': [223762],
        'Systolic_pressure': [227242, 227243],
        'Respiratory_rate': [220210],
        'Heart_rate': [220045]
    }
    
    for key, itemids in itemid_map.items():
        new_col = df['itemid'].isin(itemids) * df["value"]
        new_col[new_col == ""] = np.NaN
        new_col[new_col == "___"] = np.NaN
        df[key] = new_col
    return df


A base de dados "exams" pode ser gerada a partir do notebook extract_data, as outras bases são originais da MIMIC_IV

In [3]:
exams = pd.read_csv("exams.csv")

  exams = pd.read_csv("exams.csv")


In [4]:
sepsis3 = pd.read_csv('/scratch/haniel.botelho/physionet.org/files/mimiciv/2.2/sepsis3.csv')

In [5]:
icu_stays = pd.read_csv('/scratch/haniel.botelho/physionet.org/files/mimiciv/2.2/icu/icustays.csv')

In [6]:
sepsis3 = pd.merge(sepsis3,icu_stays[['subject_id','hadm_id','stay_id']],on =['subject_id','stay_id'],how='left' )

In [7]:
sepsis3_patients = sepsis3[['subject_id','hadm_id']]
sepsis3_patients

Unnamed: 0,subject_id,hadm_id
0,18421337,22413411
1,12207593,22795209
2,16513856,24463832
3,10656173,25778760
4,17921898,28841024
...,...,...
32965,19046950,24352151
32966,15954569,25851401
32967,15669140,29818488
32968,13651601,22584645


In [8]:
exams = exams.dropna(subset=['subject_id', 'hadm_id'])
exams['subject_id'] = exams['subject_id'].astype(int)
exams['hadm_id'] = exams['hadm_id'].astype(int)

In [9]:
exams['key'] = exams['subject_id'].astype(int).astype(str) + '_' + exams['hadm_id'].astype(int).astype(str)
sepsis3_patients['key'] = sepsis3_patients['subject_id'].astype(str) + '_' + sepsis3_patients['hadm_id'].astype(str)

# Marcar as linhas em exams que têm a combinação de subject_id e hadm_id presente em sepsis3
exams['sepsis3'] = exams['key'].isin(sepsis3_patients['key'])



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sepsis3_patients['key'] = sepsis3_patients['subject_id'].astype(str) + '_' + sepsis3_patients['hadm_id'].astype(str)


#### Extração dos sépticos 

In [10]:
diagnostics = pd.read_csv("diagnostics.csv")


In [11]:
diagnostics = diagnostics[['subject_id','hadm_id','sepsis']].drop_duplicates()
exams = pd.merge(diagnostics[['subject_id','hadm_id','sepsis']],exams,on = ['subject_id','hadm_id'],how = 'left')


In [12]:
exams['Sepsis'] = exams.apply(lambda row: 'diagnostico' if row['sepsis'] == True 
                                  else 'sepsis3' if row['sepsis3'] == True 
                                  else False, axis=1)


In [13]:
exams = exams.drop(['sepsis','key','sepsis3'],axis = 1)
exams = exams[exams["Sepsis"]!=False]
exams

Unnamed: 0,subject_id,hadm_id,charttime,itemid,value,Sepsis
405,10000826,21086876,2146-12-18 21:20:00,51237.0,1.9,diagnostico
406,10000826,21086876,2146-12-19 05:15:00,51222.0,10.7,diagnostico
407,10000826,21086876,2146-12-19 05:15:00,51301.0,20.0,diagnostico
408,10000826,21086876,2146-12-19 05:15:00,50861.0,33,diagnostico
409,10000826,21086876,2146-12-19 05:15:00,50862.0,3.1,diagnostico
...,...,...,...,...,...,...
36341507,19999840,21033226,2164-09-17 13:34:00,50813.0,4.0,sepsis3
36341508,19999840,21033226,2164-09-17 13:34:00,50817.0,25,sepsis3
36341509,19999840,21033226,2164-09-17 13:39:00,50809.0,369,sepsis3
36341510,19999840,21033226,2164-09-17 13:39:00,50813.0,___,sepsis3


#### Calculo dos offsets

In [14]:
admissions = pd.read_csv("/scratch/haniel.botelho/physionet.org/files/mimiciv/2.2/hosp/admissions.csv")

In [15]:
admissions = admissions[['subject_id','hadm_id','admittime','dischtime','discharge_location']]
admissions

Unnamed: 0,subject_id,hadm_id,admittime,dischtime,discharge_location
0,10000032,22595853,2180-05-06 22:23:00,2180-05-07 17:15:00,HOME
1,10000032,22841357,2180-06-26 18:27:00,2180-06-27 18:49:00,HOME
2,10000032,25742920,2180-08-05 23:44:00,2180-08-07 17:50:00,HOSPICE
3,10000032,29079034,2180-07-23 12:35:00,2180-07-25 17:55:00,HOME
4,10000068,25022803,2160-03-03 23:16:00,2160-03-04 06:26:00,
...,...,...,...,...,...
431226,19999828,25744818,2149-01-08 16:44:00,2149-01-18 17:00:00,HOME HEALTH CARE
431227,19999828,29734428,2147-07-18 16:23:00,2147-08-04 18:10:00,HOME HEALTH CARE
431228,19999840,21033226,2164-09-10 13:47:00,2164-09-17 13:42:00,DIED
431229,19999840,26071774,2164-07-25 00:27:00,2164-07-28 12:15:00,HOME


In [16]:
exams = pd.merge(exams,admissions,on = ['subject_id','hadm_id'],how = 'left')

In [17]:
# Convertendo colunas para datetime
exams['charttime'] = pd.to_datetime(exams['charttime'])
exams['admittime'] = pd.to_datetime(exams['admittime'])
exams['dischtime'] = pd.to_datetime(exams['dischtime'])
# Calculando os offsets
exams['offsettime'] = exams['charttime'] - exams['admittime']
exams['offsettime_disch'] = exams['dischtime'] - exams['admittime']

#### Salvando a base

In [18]:
exams.to_csv("exams_all_sepsis.csv",index=False)

In [19]:
exams

Unnamed: 0,subject_id,hadm_id,charttime,itemid,value,Sepsis,admittime,dischtime,discharge_location,offsettime,offsettime_disch
0,10000826,21086876,2146-12-18 21:20:00,51237.0,1.9,diagnostico,2146-12-18 17:39:00,2146-12-24 19:55:00,HOME,0 days 03:41:00,6 days 02:16:00
1,10000826,21086876,2146-12-19 05:15:00,51222.0,10.7,diagnostico,2146-12-18 17:39:00,2146-12-24 19:55:00,HOME,0 days 11:36:00,6 days 02:16:00
2,10000826,21086876,2146-12-19 05:15:00,51301.0,20.0,diagnostico,2146-12-18 17:39:00,2146-12-24 19:55:00,HOME,0 days 11:36:00,6 days 02:16:00
3,10000826,21086876,2146-12-19 05:15:00,50861.0,33,diagnostico,2146-12-18 17:39:00,2146-12-24 19:55:00,HOME,0 days 11:36:00,6 days 02:16:00
4,10000826,21086876,2146-12-19 05:15:00,50862.0,3.1,diagnostico,2146-12-18 17:39:00,2146-12-24 19:55:00,HOME,0 days 11:36:00,6 days 02:16:00
...,...,...,...,...,...,...,...,...,...,...,...
20692723,19999840,21033226,2164-09-17 13:34:00,50813.0,4.0,sepsis3,2164-09-10 13:47:00,2164-09-17 13:42:00,DIED,6 days 23:47:00,6 days 23:55:00
20692724,19999840,21033226,2164-09-17 13:34:00,50817.0,25,sepsis3,2164-09-10 13:47:00,2164-09-17 13:42:00,DIED,6 days 23:47:00,6 days 23:55:00
20692725,19999840,21033226,2164-09-17 13:39:00,50809.0,369,sepsis3,2164-09-10 13:47:00,2164-09-17 13:42:00,DIED,6 days 23:52:00,6 days 23:55:00
20692726,19999840,21033226,2164-09-17 13:39:00,50813.0,___,sepsis3,2164-09-10 13:47:00,2164-09-17 13:42:00,DIED,6 days 23:52:00,6 days 23:55:00
