# API

## Basics
For reading ventilator data:

In [None]:
from io import open

from ventmap.raw_utils import extract_raw

# create generator that will iterate through file. Specify False to ensure that
# breaths without BS/BE markers will be dropped. If you say True, then breaths
# without BS/BE will be kep?t
filepath ="/data/aim2/experiment1/all_data/raw/0700RPI21/0700RPI2120190219-rpi21-2019-02-19-01-01-01.749347.csv"
generator = extract_raw(open(filepath), False)
for breath in generator:
    # breath data is output in dictionary format
    flow, pressure = breath['flow'], breath['pressure']


If you want to preprocess a breath file for later usage, or if you intend to process it again then it is suggested to use the `process_breath_file` method

In [2]:
from io import open

from ventmap.raw_utils import process_breath_file, read_processed_file

# This function will output 2 files. The first will just contain raw breath data
# the other will contain higher level processed data
process_breath_file(open(filepath), False, 'new_filename')
raw_filepath_name = 'new_filename.raw.npy'
processed_filepath_name = 'new_filename.processed.npy'

for breath in read_processed_file(raw_filepath_name, processed_filepath_name):
    # breath data is output in dictionary format
    flow, pressure = breath['flow'], breath['pressure']


In [6]:
breath

{'rel_bn': 1916,
 'vent_bn': 4128,
 'flow': [1.36,
  5.92,
  17.08,
  30.73,
  41.89,
  49.57,
  52.28,
  46.58,
  41.9,
  37.84,
  35.71,
  33.0,
  31.05,
  29.51,
  27.98,
  26.51,
  25.3,
  24.23,
  23.45,
  23.63,
  22.67,
  21.84,
  20.93,
  20.24,
  17.77,
  17.33,
  15.32,
  14.65,
  13.64,
  13.11,
  11.57,
  11.37,
  11.15,
  10.74,
  10.02,
  9.1,
  7.34,
  5.92,
  6.45,
  5.35,
  4.93,
  4.25,
  4.44,
  3.56,
  4.16,
  4.55,
  -36.86,
  -56.03,
  -49.85,
  -28.58,
  -23.35,
  -25.81,
  -31.52,
  -29.56,
  -26.77,
  -25.6,
  -25.3,
  -23.77,
  -23.76,
  -22.38,
  -22.66,
  -21.52,
  -21.51,
  -19.78,
  -20.16,
  -18.53,
  -17.52,
  -16.1,
  -15.31,
  -14.43,
  -14.55,
  -14.99,
  -13.55,
  -13.67,
  -12.7,
  -12.29,
  -11.75,
  -12.19,
  -11.04,
  -10.06,
  -9.22,
  -9.13,
  -8.31,
  -7.91,
  -7.94,
  -6.76,
  -6.73,
  -6.5,
  -6.06,
  -5.43,
  -5.64,
  -4.65,
  -4.33,
  -4.06,
  -3.9,
  -3.8,
  -3.88,
  -4.29,
  -4.15,
  -4.01,
  -3.99,
  -3.57,
  -2.88,
  -1.95,
  -0.71,
  

In [3]:
breath.keys()

dict_keys(['rel_bn', 'vent_bn', 'flow', 'pressure', 'abs_bs', 'bs_time', 'frame_dur', 'dt'])

For extracting metadata (I-Time, TVe, TVi) from files.

In [1]:
from ventmap.breath_meta import get_file_breath_meta

# Data output is normally in list format. Ordering information can be found in
# ventmap.constants.META_HEADER.
# breath_meta = get_file_breath_meta(filepath)
# If you want a pandas DataFrame then you can set the optional argument to_data_frame=True
filepath ="/data/aim2/experiment1/all_data/raw/0700RPI21/0700RPI2120190219-rpi21-2019-02-19-01-01-01.749347.csv"


breath_meta = get_file_breath_meta(filepath, to_data_frame=True)


file is a str name, opening it using open
['Patient Id', 'BN', 'ventBN', 'BS', 'IEnd', 'BE', 'I:E ratio', 'iTime', 'eTime', 'inst_RR', 'tvi', 'tve', 'tve:tvi ratio', 'maxF', 'minF', 'maxP', 'PIP', 'Maw', 'PEEP', 'ipAUC', 'epAUC', ' ', 'BS.1', 'x01', 'tvi1', 'tve1', 'x02', 'tvi2', 'tve2', 'x0_index', 'abs_time_at_BS', 'abs_time_at_x0', 'abs_time_at_BE', 'rel_time_at_BS', 'rel_time_at_x0', 'rel_time_at_BE', 'min_pressure']
[['BN', 'ventBN', 'BS', 'IEnd', 'BE', 'I:E ratio', 'iTime', 'eTime', 'inst_RR', 'tvi', 'tve', 'tve:tvi ratio', 'maxF', 'minF', 'maxP', 'PIP', 'Maw', 'PEEP', 'ipAUC', 'epAUC', ' ', 'BS.1', 'x01', 'tvi1', 'tve1', 'x02', 'tvi2', 'tve2', 'x0_index', 'abs_time_at_BS', 'abs_time_at_x0', 'abs_time_at_BE', 'rel_time_at_BS', 'rel_time_at_x0', 'rel_time_at_BE', 'min_pressure']]
2019-02-19-01-01-01.754177 26 1


In [2]:
breath_meta

Unnamed: 0,BN,ventBN,BS,IEnd,BE,I:E ratio,iTime,eTime,inst_RR,tvi,...,tvi2,tve2,x0_index,abs_time_at_BS,abs_time_at_x0,abs_time_at_BE,rel_time_at_BS,rel_time_at_x0,rel_time_at_BE,min_pressure
0,1,2213,0.02,0.94,4.30,0.27219,0.92,3.38,13.953488,384.407500,...,384.407500,372.316111,46,2019-02-19 01-01-01.774177,2019-02-19 01-01-02.694177,2019-02-19 01-01-06.054177,0.02,0.94,4.30,17.25
1,2,2214,4.32,5.24,8.54,0.27711,0.92,3.32,14.150943,357.682222,...,357.682222,330.920000,46,2019-02-19 01-01-02.381807,2019-02-19 01-01-03.301807,2019-02-19 01-01-06.601807,4.32,5.24,8.54,17.13
2,3,2215,8.56,9.48,12.84,0.27219,0.92,3.38,13.953488,359.044722,...,359.044722,339.396944,46,2019-02-19 01-01-06.594359,2019-02-19 01-01-07.514359,2019-02-19 01-01-10.874359,8.56,9.48,12.84,17.09
3,4,2216,12.86,13.78,17.14,0.27219,0.92,3.38,13.953488,345.575556,...,345.575556,338.113056,46,2019-02-19 01-01-11.000935,2019-02-19 01-01-11.920935,2019-02-19 01-01-15.280935,12.86,13.78,17.14,16.94
4,5,2217,17.16,18.08,21.44,0.27219,0.92,3.38,13.953488,347.510278,...,347.510278,345.029167,46,2019-02-19 01-01-15.211358,2019-02-19 01-01-16.131358,2019-02-19 01-01-19.491358,17.16,18.08,21.44,16.88
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1911,1912,4124,7204.80,7205.72,7209.08,0.27219,0.92,3.38,13.953488,308.856944,...,308.856944,319.090278,46,2019-02-19 03-00-43.042322,2019-02-19 03-00-43.962322,2019-02-19 03-00-47.322322,7204.80,7205.72,7209.08,17.74
1912,1913,4125,7209.10,7210.02,7212.96,0.31081,0.92,2.96,15.463918,317.173056,...,317.173056,306.558889,46,2019-02-19 03-00-47.648552,2019-02-19 03-00-48.568552,2019-02-19 03-00-51.508552,7209.10,7210.02,7212.96,17.69
1913,1914,4126,7212.98,7213.90,7217.24,0.27381,0.92,3.36,14.018692,308.005000,...,308.005000,301.477778,46,2019-02-19 03-00-51.374776,2019-02-19 03-00-52.294776,2019-02-19 03-00-55.634776,7212.98,7213.90,7217.24,17.74
1914,1915,4127,7217.26,7218.18,7221.54,0.27219,0.92,3.38,13.953488,312.044444,...,312.044444,292.799167,46,2019-02-19 03-00-55.661320,2019-02-19 03-00-56.581320,2019-02-19 03-00-59.941320,7217.26,7218.18,7221.54,17.68


In [3]:
breath_meta.to_excel("random.xlsx")

Take a look at the `metadata` dataframe

In [4]:
breath_meta.head()

Unnamed: 0,BN,ventBN,BS,IEnd,BE,I:E ratio,iTime,eTime,inst_RR,tvi,...,tvi2,tve2,x0_index,abs_time_at_BS,abs_time_at_x0,abs_time_at_BE,rel_time_at_BS,rel_time_at_x0,rel_time_at_BE,min_pressure
0,1,2213,0.02,0.94,4.3,0.27219,0.92,3.38,13.953488,384.4075,...,384.4075,372.316111,46,-,-,-,0.02,0.94,4.3,17.25
1,2,2214,4.32,5.24,8.54,0.27711,0.92,3.32,14.150943,357.682222,...,357.682222,330.92,46,-,-,-,4.32,5.24,8.54,17.13
2,3,2215,8.56,9.48,12.84,0.27219,0.92,3.38,13.953488,359.044722,...,359.044722,339.396944,46,-,-,-,8.56,9.48,12.84,17.09
3,4,2216,12.86,13.78,17.14,0.27219,0.92,3.38,13.953488,345.575556,...,345.575556,338.113056,46,-,-,-,12.86,13.78,17.14,16.94
4,5,2217,17.16,18.08,21.44,0.27219,0.92,3.38,13.953488,347.510278,...,347.510278,345.029167,46,-,-,-,17.16,18.08,21.44,16.88


##

In [2]:
from io import open

from ventmap.raw_utils import extract_raw


filepath = "/data/vwd-deidentified-data/011/011-2009-03-03-22-33-02.957.csv"
generator = extract_raw(open(filepath), False)
for idx,breath in enumerate(generator):
    # breath data is output in dictionary format
    flow, pressure = breath['flow'], breath['pressure']
    print(breath)
    if idx==5:
        break

deidentified_study_id,breath_datetime,flow,pressure 51 4
This is of new format
Every row follows => pt _id, timestamp, flow,pressure
{'pt_id': 11, 'rel_bn': 1, 'vent_bn': 0, 'flow': [10.43, 10.0, 9.87, 10.08, 9.38, 9.96, 10.02, 9.95, 9.67, 10.38, 9.99, 10.33, 10.0, 9.81, 9.86, 10.14, 9.8, 10.38, 9.79, 9.98, 9.92, 10.05, 9.71, 10.03, 9.74, 9.73, 10.04, 10.17, 9.98, 9.84, 10.27, 10.09, 9.92, 10.2, 10.38, 9.98, 9.79, 9.95, 10.04, 10.25, 10.15, 10.13, 10.04, 9.89, 10.09, 9.85, 9.85, 10.0, 10.13, 9.96, 10.13, 10.11, 10.04, 9.83, 10.01, 10.2, 10.02, 9.98, 10.26, 9.87, 10.04, 9.92, 9.8, 10.12, 10.13, 10.02, 10.04, 10.03, 10.15, 10.14, 9.95, 9.51, 9.94, 9.8, 10.27, 9.78, 10.23, 10.05, 9.71, 9.52, 10.04, 10.18, 10.44, 9.89, 10.04, 9.85, 10.1, 9.98, 10.08, 9.94, 9.59, 9.83, 9.69, 9.93, 9.79, 10.37, 9.99, 9.97, 9.82, 9.97, 9.55, 10.32, 9.95, 9.91, 10.1, 9.95, 9.81, 10.3, 10.28, 9.79, 10.13, 10.01, 10.16, 10.19, 10.12, 9.75, 9.99, 10.05, 9.94, 9.63, 9.99, 9.94, 10.03, 9.95, 10.17, 10.23, 9.98, 10.

In [3]:
print(len(generator))

3014


In [1]:
from ventmap.breath_meta import get_file_breath_meta,get_file_experimental_breath_meta

# Data output is normally in list format. Ordering information can be found in
# ventmap.constants.META_HEADER.
# breath_meta = get_file_breath_meta(filepath)
# If you want a pandas DataFrame then you can set the optional argument to_data_frame=True

# filepath = "/data/vwd-deidentified-data/011/011-consolidate-vwd-2009-03-03-22-33-02.957.csv"
filepath = "/data/vwd-deidentified-data/138/138-consolidate-vwd-2016-06-07-22-04-37.623.csv"

breath_meta = get_file_experimental_breath_meta(filepath, to_data_frame=True,new_format=True)
# breath_meta = get_file_breath_meta(filepath, to_data_frame=True,new_format=True)



file is a str name, opening it using open
deidentified_study_id,breath_datetime,flow,pressure 51 4
This is of new format
Every row follows => pt _id, timestamp, flow,pressure
-
[-2.39, 61.25, 60.21, 61.09, 60.79, 59.42, 62.37, 62.17, 60.41, 61.57, 61.7, 62.17, 61.7, 60.08, 59.89, 60.8, 61.99, 62.17, 60.94, 61.64, 61.94, 62.97, 61.36, 61.95, 62.18, 61.56, 61.42, 61.03, 61.37, -91.98, -99.28, -73.42, -33.28, -29.81, -35.64, -45.05, -43.46, -41.76, -37.27, -37.34, -37.76, -35.9, -34.03, -34.89, -33.84, -34.32, -31.75, -31.21, -30.38, -28.57, -28.57, -28.23, -27.46, -25.61, -26.5, -25.34, -23.57, -22.55, -22.95, -22.75, -21.54, -21.69, -20.62, -19.68, -17.61, -18.29, -18.18, -16.91, -17.33, -17.79, -17.68, -15.49, -16.12, -16.1, -14.69, -14.39, -14.55, -15.4, -13.58, -13.81, -13.33, -13.06, -11.67, -12.06, -11.63, -10.16, -10.52, -10.03, -9.91, -8.44, -9.02, -9.0, -7.36, -6.71, -5.78, -5.26, -3.84, -3.88, -4.04, -4.65, -4.94, -5.46, -5.53, -5.08, -5.11, -5.28, -5.29, -5.23, -5.28, -5.37, -

TypeError: slice indices must be integers or None or have an __index__ method

In [4]:
breath_meta

Unnamed: 0,Patient Id,BN,ventBN,BS,IEnd,BE,I:E ratio,iTime,eTime,inst_RR,...,tvi2,tve2,x0_index,abs_time_at_BS,abs_time_at_x0,abs_time_at_BE,rel_time_at_BS,rel_time_at_x0,rel_time_at_BE,min_pressure
0,11,1,0,0.02,10.40,10.40,519.00000,10.38,0.02,5.769231,...,1727.205556,0.000000,519,-,-,-,0.02,10.40,10.40,0.50
1,11,2,1,10.42,11.24,12.80,0.51899,0.82,1.58,25.000000,...,582.526667,423.569444,41,-,-,-,10.42,11.24,12.80,16.62
2,11,3,2,12.82,13.64,15.20,0.51899,0.82,1.58,25.000000,...,547.486667,432.711111,41,-,-,-,12.82,13.64,15.20,16.33
3,11,4,3,15.22,16.04,17.60,0.51899,0.82,1.58,25.000000,...,534.165556,466.033611,41,-,-,-,15.22,16.04,17.60,16.15
4,11,5,4,17.62,18.44,20.00,0.51899,0.82,1.58,25.000000,...,511.495556,492.872222,41,-,-,-,17.62,18.44,20.00,16.61
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
36880,11,36881,4993,96058.90,96059.64,96061.96,0.31624,0.74,2.34,19.480519,...,390.305556,412.210833,37,-,-,-,96058.90,96059.64,96061.96,11.38
36881,11,36882,4994,96061.98,96062.78,96068.00,0.15267,0.80,5.24,9.933775,...,418.501944,453.755556,40,-,-,-,96061.98,96062.78,96068.00,11.38
36882,11,36883,4995,96068.02,96068.82,96071.54,0.29197,0.80,2.74,16.949153,...,403.516111,402.839722,40,-,-,-,96068.02,96068.82,96071.54,10.76
36883,11,36884,4996,96071.56,96072.40,96075.16,0.30216,0.84,2.78,16.574586,...,419.417500,417.149444,42,-,-,-,96071.56,96072.40,96075.16,11.45


In [17]:
import pandas
breath_meta.to_excel("/data/patientid_11_vwd_summary.xlsx")

Exception ignored in: <function ZipFile.__del__ at 0x7f7a5c1a6700>
Traceback (most recent call last):
  File "/data1/customconda/envs/ards/lib/python3.8/zipfile.py", line 1821, in __del__
    self.close()
  File "/data1/customconda/envs/ards/lib/python3.8/zipfile.py", line 1838, in close
    self.fp.seek(self.start_dir)
ValueError: seek of closed file


OSError: [Errno 28] No space left on device

In [None]:
breath_meta.head()

For extracting Experimental Breath Metadata from files, which have some pieces of more experimental information that have been developed since 2017 paper.

In [6]:
from ventmap.breath_meta import get_file_experimental_breath_meta

# Data output is normally in list format. Ordering information can be found in
# ventmap.constants.META_HEADER.
exp_breath_meta = get_file_experimental_breath_meta(<filepath to vent data>)
# If you want a pandas DataFrame then you can set the optional argument to_data_frame=True
exp_breath_meta = get_file_experimental_breath_meta(<filepath to vent data>, to_data_frame=True)

Take a look at the Experimental Breath Metadata dataframe

In [7]:
exp_breath_meta.head()

Unnamed: 0,BN,ventBN,BS,IEnd,BE,I:E ratio,iTime,eTime,inst_RR,tvi,...,dyn_compliance,vol_at_.5_sec,vol_at_.76_sec,vol_at_1_sec,pressure_itime_4,pressure_itime_5,pressure_itime_6,pressure_itime_by_pip5,pressure_itime_by_pip6,pressure_itime_from_front
0,1,53946,0.02,2.52,9.4,0.36232,2.5,6.9,6.382979,2129.422222,...,0.218268,-510.342222,-882.069167,-1095.104722,3.36,3.2,3.14,3.34,3.38,2.56
1,2,53947,9.42,11.72,18.36,0.34535,2.3,6.66,6.696429,2261.646667,...,0.212441,-639.298889,-958.682778,-1147.471667,3.08,2.9,2.86,2.88,3.02,2.36
2,3,53948,18.38,20.78,27.98,0.33241,2.4,7.22,6.237006,744.514444,...,0.081617,-186.508889,-257.480278,-305.858056,2.44,2.42,2.42,2.44,2.46,2.46
3,4,53949,28.0,29.34,30.82,0.89333,1.34,1.5,21.126761,402.528889,...,0.044885,-169.228889,-198.612222,-200.6675,1.38,1.36,1.36,1.38,1.44,1.4
4,5,53950,30.84,31.8,32.96,0.81356,0.96,1.18,28.037383,383.351111,...,0.043672,-195.342222,-271.551111,-316.975556,0.98,0.98,0.96,1.0,1.12,1.02


In [8]:
exp_breath_meta.shape

(96, 49)

## Adding new features - example

The information you can use from raw data file are:

In [9]:
breath.keys()


dict_keys(['rel_bn', 'vent_bn', 'flow', 'pressure', 'abs_bs', 'bs_time', 'frame_dur', 'dt'])

If you want to create a new feature, first define your feature as below:
For example, if you want to calculate the time between the Maximum of flow and the Minimun of flow for each breath.

In [11]:
import pandas as pd
def time_btw_max_min_flow(breath):
    flow = breath['flow']
    dt = breath['dt']
    btw_time = abs((flow.index(max(flow)) - flow.index(min(flow)))*dt)
    return round(btw_time,2)



And then add you function at here, implement `extract_new_feature`, return a list `feature_values` for your new feature.

In [14]:
def extract_new_feature(file):
    feature_values = []
    for breath in extract_raw(file, True):
        #TODO: add new feature function
        btw_time = time_btw_max_min_flow(breath)
        feature_values.append(btw_time)
    return feature_values

btw_times = extract_new_feature(open(<filepath to vent data>))

Finally, add this new feature to your original dataframe.

In [16]:
exp_breath_meta['time_btw_max_min'] = btw_times

Take a look at the dataframe `exp_breath_meta` having one more feature `time_btw_max_min` at the end.

In [17]:
exp_breath_meta.head()

Unnamed: 0,BN,ventBN,BS,IEnd,BE,I:E ratio,iTime,eTime,inst_RR,tvi,...,vol_at_.5_sec,vol_at_.76_sec,vol_at_1_sec,pressure_itime_4,pressure_itime_5,pressure_itime_6,pressure_itime_by_pip5,pressure_itime_by_pip6,pressure_itime_from_front,time_btw_max_min
0,1,53946,0.02,2.52,9.4,0.36232,2.5,6.9,6.382979,2129.422222,...,-510.342222,-882.069167,-1095.104722,3.36,3.2,3.14,3.34,3.38,2.56,1.88
1,2,53947,9.42,11.72,18.36,0.34535,2.3,6.66,6.696429,2261.646667,...,-639.298889,-958.682778,-1147.471667,3.08,2.9,2.86,2.88,3.02,2.36,1.22
2,3,53948,18.38,20.78,27.98,0.33241,2.4,7.22,6.237006,744.514444,...,-186.508889,-257.480278,-305.858056,2.44,2.42,2.42,2.44,2.46,2.46,2.02
3,4,53949,28.0,29.34,30.82,0.89333,1.34,1.5,21.126761,402.528889,...,-169.228889,-198.612222,-200.6675,1.38,1.36,1.36,1.38,1.44,1.4,1.2
4,5,53950,30.84,31.8,32.96,0.81356,0.96,1.18,28.037383,383.351111,...,-195.342222,-271.551111,-316.975556,0.98,0.98,0.96,1.0,1.12,1.02,0.82


## Extras
Clear null bytes from a file

In [None]:
from io import open

from ventmap.clear_null_bytes import clear_descriptor_null_bytes

cleared_descriptor = clear_descriptor_null_bytes(open(<filepath to vent data>))


Cut a file into specific BN interval and store for later use

In [None]:
from io import open

from ventmap.cut_breath_section import cut_breath_section

# get file descriptor for the truncated data
new_descriptor = cut_breath_section(open(<filepath to vent data>), <breath start num>, <breath end num>)
# write output to file
with open('new_file', 'w') as f:
    f.write(new_descriptor.read())


Check if there is a plateau pressure in a breath

In [None]:
from io import open

from ventmap.raw_utils import extract_raw
from ventmap.SAM import check_if_plat_occurs

generator = extract_raw(open(<filepath to vent data>), False)
for breath in generator:
    flow, pressure = breath['flow'], breath['pressure']

    # .02 is the sampling rate for the PB-840 corresponding with 1 obs every .02 seconds
    did_plat_occur = check_if_plat_occurs(flow, pressure, .02)
