In [1]:
import numpy as np
import pandas as pd
import psycopg2

import matplotlib.pyplot as plt
import seaborn as sns

import matplotlib.style
matplotlib.style.use('ggplot')
%matplotlib inline

In [2]:
# create a database connection
sqluser = 'mimic'
dbname = 'mimic'
schema_name = 'mimiciii'

cur = None

In [16]:
if cur: 
    cur.close()
    con.close()

con = psycopg2.connect(dbname = dbname, user = sqluser)
cur = con.cursor()
cur.execute('SET search_path to ' + schema_name)

In [4]:
query = \
"""
SELECT subject_id, hadm_id, icustay_id
    , avg(
        case
            when itemid in (920, 1394, 4187, 3486, 226707)
                THEN valuenum*2.54 --convert to cm
            else valuenum -- already in cm
        end
    ) as valuenum
from chartevents
where itemid in (920, 1394, 4187, 3486, 3485, 4188, 226707)
AND valuenum IS NOT NULL
AND valuenum > 0
AND valuenum < 500
GROUP BY subject_id, hadm_id, icustay_id
"""
res = pd.read_sql_query(query, con)
res

Unnamed: 0,subject_id,hadm_id,icustay_id,valuenum
0,3,145834,211552.0,179.070000
1,8,159514,262299.0,48.999951
2,9,150750,220597.0,182.880000
3,10,184167,288409.0,39.191450
4,13,143045,263738.0,144.780000
5,17,161087,257980.0,167.640000
6,17,194023,277042.0,166.370000
7,20,157681,264490.0,165.100000
8,21,109451,217847.0,175.260000
9,23,152223,227807.0,170.180000


In [7]:
res['icustay_id'].value_counts().sort_values(ascending = False).head()

294912.0    1
294941.0    1
229392.0    1
294943.0    1
262158.0    1
Name: icustay_id, dtype: int64

In [8]:
query = \
"""
select subject_id, hadm_id, icustay_id
    , avg(
        case
            when itemid in (3581, 226512)
               then valuenum*0.45359237
            when itemid = 3582
               then valuenum*0.0283495231
            else valuenum
        end
    ) as valuenum
from chartevents
where itemid in (762, 763, 3723, 3580, 3581, 3582, 226512)
AND valuenum IS NOT NULL
AND valuenum > 0
GROUP BY subject_id, hadm_id, icustay_id
"""
res = pd.read_sql_query(query, con)
res

Unnamed: 0,subject_id,hadm_id,icustay_id,valuenum
0,3,145834,211552.0,101.900002
1,4,185777,294638.0,53.599998
2,8,159514,262299.0,3.160000
3,9,150750,220597.0,102.920001
4,10,184167,288409.0,1.045368
5,12,112213,232669.0,83.966667
6,13,143045,263738.0,73.649999
7,17,161087,257980.0,71.666667
8,17,194023,277042.0,67.900002
9,20,157681,264490.0,93.300003


In [9]:
res['icustay_id'].value_counts().sort_values(ascending = False).head()

207928.0    2
219572.0    2
298739.0    2
228181.0    2
291546.0    2
Name: icustay_id, dtype: int64

In [10]:
res.loc[res['icustay_id'] == 207928.0]

Unnamed: 0,subject_id,hadm_id,icustay_id,valuenum
454,417,102633,207928.0,53.700001
455,417,178013,207928.0,53.700001


In [12]:
query = \
"""
select * from admissions where subject_id = 417;
"""
res = pd.read_sql_query(query, con)
res

Unnamed: 0,row_id,subject_id,hadm_id,admittime,dischtime,deathtime,admission_type,admission_location,discharge_location,insurance,language,religion,marital_status,ethnicity,edregtime,edouttime,diagnosis,hospital_expire_flag,has_chartevents_data
0,533,417,178013,2177-03-22 22:24:00,2177-03-23 07:20:00,2177-03-23 07:20:00,EMERGENCY,EMERGENCY ROOM ADMIT,DEAD/EXPIRED,Private,,UNOBTAINABLE,MARRIED,WHITE,2177-03-22 22:01:00,2177-03-23 00:20:00,SUBARACHNOID HEMORRHAGE,1,1
1,534,417,102633,2177-03-23 16:17:00,2177-03-23 07:20:00,2177-03-23 07:20:00,URGENT,PHYS REFERRAL/NORMAL DELI,DEAD/EXPIRED,Private,,UNOBTAINABLE,MARRIED,WHITE,NaT,NaT,ORGAN DONOR ACCOUNT,1,1


# Get first echo of each icustay

In [37]:
query = \
"""
with icustay_ed AS (
    select ed.*, ic.icustay_id, 
        DENSE_RANK() OVER (PARTITION BY ic.icustay_id ORDER BY ed.charttime) as first_echo
        ,case
          when technicalquality = 'Adequate' then 2
          when technicalquality = 'Suboptimal' then 1
          when technicalquality = 'Good' then 3
          else 0
        end as technicalquality_
    from echodata ed
    inner join icustays ic
    on ic.hadm_id = ed.hadm_id
    where (ed.charttime > (ic.intime - INTERVAL '8 hours')) AND (ed.charttime < ic.outtime)
)
select distinct on (icustay_id) * from icustay_ed
WHERE first_echo = 1
order by icustay_id, technicalquality_ desc
"""
res = pd.read_sql_query(query, con)
res.head()

Unnamed: 0,row_id,subject_id,hadm_id,chartdate,charttime,indication,height,weight,bsa,bp,...,bpdias,hr,status,test,doppler,contrast,technicalquality,icustay_id,first_echo,technicalquality_
0,83481,27513,163557,2199-08-03,2199-08-03 12:40:00,ARDS vs CHF,70.0,160.0,1.9,105/71,...,71.0,93.0,Inpatient,Portable TTE (Complete),Full Doppler and color Doppler,,Adequate,200003,1,2
1,99984,29904,129607,2189-12-01,2189-12-01 08:34:00,"Intra-op TEE for Re-exploration of chest, s/p ...",64.0,190.0,1.92,95/45,...,45.0,105.0,Inpatient,TEE (Complete),Full Doppler and color Doppler,,Adequate,200009,1,2
2,71333,9514,127229,2105-02-18,2105-02-18 16:51:00,Chronic lung disease. Left ventricular function.,,,,,...,,,Inpatient,Portable TTE(Complete),Complete pulse and color flow,,Suboptimal,200014,1,1
3,77556,21789,112486,2178-07-08,2178-07-08 12:15:00,Cerebrovascular event/TIA. Hypertension.,,,,172/46,...,46.0,55.0,Inpatient,Portable TTE (Complete),Complete pulse and color flow,,Adequate,200019,1,2
4,82662,19167,164161,2113-08-25,2113-08-25 12:17:00,Intra-operative TEE for CABG,78.0,300.0,2.68,,...,,,Inpatient,TEE (Complete),Full Doppler and color Doppler,,Adequate,200025,1,2


In [32]:
res['icustay_id'].value_counts().head()

256761    2
268205    2
217826    2
298972    2
273235    2
Name: icustay_id, dtype: int64

In [36]:
res.loc[res['icustay_id'] == 268205]

Unnamed: 0,row_id,subject_id,hadm_id,chartdate,charttime,indication,height,weight,bsa,bp,...,bpdias,hr,status,test,doppler,contrast,technicalquality,icustay_id,first_echo,technicalquality_
13508,90167,53280,148878,2113-01-31,2113-01-31 12:08:00,Pulmonary embolus. Right ventricular function.,64.0,113.0,1.54,134/65,...,65.0,86.0,Inpatient,Portable TTE (Complete),Full Doppler and color Doppler,,Suboptimal,268205,1,1
13509,90168,53280,148878,2113-01-31,2113-01-31 12:08:00,Pulmonary embolus. Right ventricular function.,64.0,113.0,1.54,134/65,...,65.0,85.0,Inpatient,TTE (Complete),Full Doppler and color Doppler,,Adequate,268205,1,2


In [38]:
res.loc[res['icustay_id'] == 268205]

Unnamed: 0,row_id,subject_id,hadm_id,chartdate,charttime,indication,height,weight,bsa,bp,...,bpdias,hr,status,test,doppler,contrast,technicalquality,icustay_id,first_echo,technicalquality_
13506,90168,53280,148878,2113-01-31,2113-01-31 12:08:00,Pulmonary embolus. Right ventricular function.,64.0,113.0,1.54,134/65,...,65.0,85.0,Inpatient,TTE (Complete),Full Doppler and color Doppler,,Adequate,268205,1,2


In [25]:
res['technicalquality_'].value_counts()

1    15109
2     4607
3       40
0        1
Name: technicalquality_, dtype: int64