# Exploratory data analysis

In [1]:
%matplotlib inline
import matplotlib
matplotlib.style.use('ggplot')
matplotlib.rcParams['figure.figsize'] = (10.0, 8.0)

import pandas as pd
pd.options.display.max_columns = 10
pd.options.display.max_rows = 10

import warnings
warnings.filterwarnings('ignore')

In [2]:
from chatto_transform.sessions.mimic import mimic_common

### You can also enter arbitrary SQL queries:

### Type in the following SQL query in the text area that appears:

````
SELECT * FROM MIMICIII.ADMISSIONS LIMIT 3
````

### Then click *Execute*.

Pull out all heart patients:
<pre>
SELECT subject_id, hadm_id, transfertime
FROM MIMICIII.SERVICES
WHERE curr_service = 'CMED'
</pre>

Find item IDs for potassium labs:
<pre>
SELECT *
FROM MIMICIII.D_LABITEMS
WHERE label ILIKE '%potassium%'
</pre>

Pull all potassium labs:
<pre>
SELECT * 
FROM MIMICIII.LABEVENTS
WHERE itemid = 50971
LIMIT 10
<pre>

In [3]:
mimic_common.sql()

Loaded SELECT * 
FROM MIMICIII.LABEVENTS
WHERE itemid = 50971
LIMIT 10
 and stored in loaded_sql[0]


### And access the results like this:

Find K+ labs for cardiac medicine patients:
* find cmed pts
* join with K+ labs

<pre>
SELECT cmed.subject_id, cmed.hadm_id AS cmed_hadm_id, cmed.transfertime AS cmed_transfertime,
    lab.charttime AS k_charttime, lab.value AS k_value, lab.valuenum AS k_valuenum, lab.uom AS k_uom, lab.hadm_id AS k_hadm_id, lab.itemid AS k_itemid
FROM
(SELECT subject_id, hadm_id, transfertime
    FROM 
    mimiciii.services
    WHERE curr_service = 'CMED') AS cmed
LEFT JOIN mimiciii.labevents AS lab
    ON lab.subject_id = cmed.subject_id
    AND lab.itemid = 50971
</pre>

In [4]:
mimic_common.loaded_sql[0]

Unnamed: 0,row_id,subject_id,hadm_id,itemid,charttime,value,valuenum,uom,flag
0,1740,3,,50971,2102-01-28 05:40:00,4.5,4.5,mEq/L,
1,1753,3,,50971,2102-01-29 05:50:00,4.6,4.6,mEq/L,
2,1780,4,,50971,2191-03-15 14:12:00,3.1,3.1,mEq/L,abnormal
3,1195,3,145834.0,50971,2101-10-24 04:15:00,3.5,3.5,mEq/L,
4,1285,3,145834.0,50971,2101-10-24 17:30:00,4.4,4.4,mEq/L,
5,1294,3,145834.0,50971,2101-10-25 04:00:00,3.9,3.9,mEq/L,
6,1313,3,145834.0,50971,2101-10-25 18:52:00,3.8,3.8,mEq/L,
7,1330,3,145834.0,50971,2101-10-26 04:00:00,3.4,3.4,mEq/L,abnormal
8,1380,3,145834.0,50971,2101-10-27 08:56:00,4.4,4.4,mEq/L,
9,1407,3,145834.0,50971,2101-10-28 06:10:00,3.9,3.9,mEq/L,


In [6]:
cmed_pts = mimic_common.loaded_sql[1]

In [7]:
cmed_pts

Unnamed: 0,subject_id,hadm_id,transfertime
0,154,102354,2127-12-23 18:47:00
1,154,102354,2127-12-24 00:47:40
2,160,161672,2174-11-06 08:38:58
3,163,138528,2146-06-21 17:42:45
4,164,182743,2116-12-28 15:34:05
...,...,...,...
9130,92287,133462,2171-02-16 18:12:20
9131,92292,153237,2163-12-28 08:00:16
9132,92295,111545,2181-06-08 20:22:24
9133,92316,158581,2187-05-05 18:02:51


In [17]:
# Number of unique patients on CMED service
cmed_pts.subject_id.nunique()

7949

In [21]:
# Potassium lab codes
potassium_labs = mimic_common.loaded_sql[6]
potassium_labs

Unnamed: 0,row_id,itemid,label,fluid,category,loinc_code
0,264,51064,"POTASSIUM, STOOL",STOOL,CHEMISTRY,15202-5
1,297,51097,"POTASSIUM, URINE",URINE,CHEMISTRY,2828-2
2,23,50822,"POTASSIUM, WHOLE BLOOD",BLOOD,BLOOD GAS,6298-4
3,34,50833,POTASSIUM,OTHER BODY FLUID,BLOOD GAS,2821-7
4,48,50847,"POTASSIUM, ASCITES",ASCITES,CHEMISTRY,49789-1
5,172,50971,POTASSIUM,BLOOD,CHEMISTRY,2823-3
6,241,51041,"POTASSIUM, BODY FLUID",OTHER BODY FLUID,CHEMISTRY,2821-7
7,257,51057,"POTASSIUM, PLEURAL",PLEURAL,CHEMISTRY,


We want itemid = 50971 (blood chemistry).

### Let's get into doing stuff with code. You can load tables and queries without using the visual widgets above:

In [None]:
from chatto_transform.schema.mimic import mimic_schema

In [None]:
df = mimic_common.load_table(mimic_schema.admissions_schema)

In [None]:
df

### We just loaded the `admissions` table.

### We did this by called `mimic_common.load_table`, and passing it a `schema` object.

### Let's take a closer look at the `schema` we used, `mimic_schema.admissions_schema`:

In [None]:
mimic_schema.admissions_schema

### Here we see information about the `admissions` table. We see the names of types of the different columns.

### We can pass any of the schema objects in `mimic_schema` to `load_table`, and the table will be loaded. Feel free to take a look at the different schemas:

In [None]:
dir(mimic_schema)

### Say we want to download the table we just loaded. We can do it like this:

In [None]:
mimic_common.df_to_csv('admissions.csv', df)

# First pass: Select potassium labs  

In [None]:
from chatto_transform.sessions.mimic import cohorts

In [None]:
my_cohort = cohorts.Cohort()

In [None]:
my_cohort.filter_labevents()

In [None]:
my_cohort.summary_fields

In [None]:
my_cohort.summary()

In [None]:
x = 3