In [None]:
# LSTM with dropout for sequence classification in the IMDB dataset
import numpy
from keras.datasets import imdb
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence


In [None]:
# fix random seed for reproducibility
numpy.random.seed(7)
# load the dataset but only keep the top n words, zero the rest
top_words = 5000
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=top_words)

Downloading data from https://s3.amazonaws.com/text-datasets/imdb.npz


In [None]:
# truncate and pad input sequences
max_review_length = 500
X_train = sequence.pad_sequences(X_train, maxlen=max_review_length)
X_test = sequence.pad_sequences(X_test, maxlen=max_review_length)


In [None]:
print(numpy.shape(X_train))
print(X_train)

(25000, 500)
[[   0    0    0 ...   19  178   32]
 [   0    0    0 ...   16  145   95]
 [   0    0    0 ...    7  129  113]
 ...
 [   0    0    0 ...    4 3586    2]
 [   0    0    0 ...   12    9   23]
 [   0    0    0 ...  204  131    9]]


In [None]:
# create the model
embedding_vecor_length = 32
model = Sequential()
model.add(Embedding(top_words, embedding_vecor_length, input_length=max_review_length))
model.add(LSTM(100, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())


Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 500, 32)           160000    
_________________________________________________________________
lstm_1 (LSTM)                (None, 100)               53200     
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 101       
Total params: 213,301
Trainable params: 213,301
Non-trainable params: 0
_________________________________________________________________
None


In [None]:
model.fit(X_train, y_train, epochs=3, batch_size=64)
# Final evaluation of the model
scores = model.evaluate(X_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Epoch 1/3
Epoch 2/3
Epoch 3/3
Accuracy: 84.20%


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import train_test_split
from keras import optimizers, losses, activations, models
from keras.callbacks import ModelCheckpoint, EarlyStopping, LearningRateScheduler, ReduceLROnPlateau
from keras.layers import Dense, Input, Dropout, Convolution1D, MaxPool1D, GlobalMaxPool1D, GlobalAveragePooling1D, concatenate
import glob
import os

Using TensorFlow backend.


In [None]:
!rm -rf ventmap waveforms labels *.csv
!unzip ventmap.zip 
!unzip waveforms.zip 
!unzip labels.zip 

Archive:  ventmap.zip
   creating: ventmap/
  inflating: ventmap/add_timestamp_to_file.py  
  inflating: ventmap/anonymize_datatimes.py  
  inflating: ventmap/breath_meta.py  
  inflating: ventmap/clear_null_bytes.py  
  inflating: ventmap/constants.py    
  inflating: ventmap/convert_3_col_file_to_2_col.py  
  inflating: ventmap/cut_breath_section.py  
  inflating: ventmap/detection.py    
  inflating: ventmap/preprocess_all_files.py  
  inflating: ventmap/raw_utils.py    
  inflating: ventmap/rounding_rules.py  
  inflating: ventmap/SAM.py          
 extracting: ventmap/__init__.py     
Archive:  waveforms.zip
   creating: waveforms/
   creating: waveforms/1/
   creating: waveforms/10/
  inflating: waveforms/10/1008cut.csv  
   creating: waveforms/11/
  inflating: waveforms/11/1009cut.csv  
   creating: waveforms/12/
  inflating: waveforms/12/1010_1cut.csv  
   creating: waveforms/13/
  inflating: waveforms/13/1010_2cut.csv  
   creating: waveforms/14/
  inflating: waveforms/14/1011c

In [None]:
from io import open
from ventmap.raw_utils import extract_raw

# breaths
flow_list = []
pressure_list = []

for (dir_path, dir_names, file_names) in os.walk('./waveforms'):
    for file in file_names:
        csv_path = os.path.join(dir_path, file)
        print(csv_path)
        generator = extract_raw(open(csv_path), False)
        for breath in generator:
            # breath data is output in dictionary format
            flow, pressure = breath['flow'], breath['pressure']
            flow_list.append(flow)
            pressure_list.append(pressure)

./waveforms/26/1023cut.csv
./waveforms/30/1027cut.csv
./waveforms/37/1034cut.csv
./waveforms/8/1006cut.csv
./waveforms/18/1015cut.csv
./waveforms/31/1028cut.csv
./waveforms/5/1003cut.csv
./waveforms/13/1010_2cut.csv
./waveforms/7/1005cut.csv
./waveforms/11/1009cut.csv
./waveforms/23/1020cut.csv
./waveforms/9/1007cut.csv
./waveforms/1/1000cut.csv
./waveforms/2/1001_1cut.csv
./waveforms/24/1021cut.csv
./waveforms/28/1025cut.csv
./waveforms/29/1026cut.csv
./waveforms/22/1019cut.csv
./waveforms/25/1022cut.csv
./waveforms/19/1016cut.csv
./waveforms/15/1012cut.csv
./waveforms/21/1018cut.csv
./waveforms/10/1008cut.csv
./waveforms/35/1032cut.csv
./waveforms/14/1011cut.csv
./waveforms/27/1024cut.csv
./waveforms/3/1001_2cut.csv
./waveforms/6/1004cut.csv
./waveforms/4/1002cut.csv
./waveforms/20/1017cut.csv
./waveforms/34/1031cut.csv
./waveforms/32/1029cut.csv
./waveforms/16/1013cut.csv
./waveforms/33/1030cut.csv
./waveforms/36/1033cut.csv
./waveforms/12/1010_1cut.csv
./waveforms/17/1014cut.csv


In [None]:
print(np.array(flow_list))

[list([3.06, 58.42, 57.84, 55.95, 54.59, 52.98, 53.43, 51.08, 50.97, 48.26, 47.46, 46.78, 44.55, 42.94, 41.92, 40.13, 38.55, 38.07, 36.18, 35.29, 34.02, 32.84, 31.61, 29.37, 28.54, 27.47, 25.54, 24.8, 22.75, 21.87, 20.51, 19.34, 17.48, 16.17, 15.38, 13.89, 12.39, 11.19, 9.83, 8.2, 7.15, 5.65, -28.05, -58.84, -61.3, -49.0, -42.17, -33.55, -30.83, -29.23, -28.13, -27.16, -26.77, -26.09, -23.64, -24.56, -26.22, -25.11, -24.86, -25.38, -24.79, -22.17, -21.33, -20.77, -18.05, -16.29, -14.47, -14.43, -13.52, -13.26, -13.17, -13.11, -13.54, -15.09, -15.19, -15.42, -15.02, -15.28, -14.59, -15.19, -16.29, -15.05, -15.39, -14.17, -13.79, -12.03, -10.42, -9.82, -7.09, -5.8, -4.81, -4.53, -4.17, -4.34, -5.05, -5.77, -6.89, -8.51, -9.33, -9.43, -9.36, -10.34, -10.4, -10.68, -10.46, -10.48, -9.97, -8.82, -7.91, -6.35, -4.94, -3.84, -1.67, -0.76, -0.27, -0.29, -0.16, -0.9, -2.14, -3.65, -5.04, -6.8, -6.9, -6.83, -7.4, -8.21, -8.55, -8.41, -8.63, -8.17, -7.43, -6.23, -5.02, -3.26, -1.79, -0.58, 0.84, 

In [None]:
label_list = []
for (dir_path, dir_names, file_names) in os.walk('./labels'):
    for file in file_names:
        csv_path = os.path.join(dir_path, file)
        print(csv_path)
        df = pd.read_csv(csv_path)
        df_filter = df[['dbl', 'mt', 'bs', 'co', 'su']]
        label_list.append(df_filter)
df_total = pd.concat(label_list, axis=0, ignore_index=True)
# df_total.describe()
df_total.to_csv("label_total.csv")
notation = df_total.to_numpy()

./labels/26/1023_2000to2300_goldstd_dbl_bs_cosumtvd.csv
./labels/30/1027_2070to2371_goldstd_dbl_bs_cosumtvd.csv
./labels/37/1034_18450to18750_goldstd_dbl_bs_cosumtvd.csv
./labels/8/1006goldstd.csv
./labels/18/1015goldstd.csv
./labels/31/1028_4100to4400_goldstd_dbl_bs_cosumtvd.csv
./labels/5/1003goldstd.csv
./labels/13/1010_2goldstd.csv
./labels/7/1005goldstd.csv
./labels/11/1009goldstd.csv
./labels/23/1020_326to376_goldstd_dbl_bs_cosumtvd.csv
./labels/9/1007goldstd.csv
./labels/1/1000goldstd.csv
./labels/2/1001_1goldstd.csv
./labels/24/1021_2550to2850_goldstd_dbl_bs_cosumtvd.csv
./labels/28/1025_1650to1950_goldstd_dbl_bs_cosumtvd.csv
./labels/29/1026_3510to3810_goldstd_dbl_bs_cosumtvd.csv
./labels/22/1019_1000to1300_goldstd_dbl_bs_cosumtvd.csv
./labels/25/1022_1to301_goldstd_dbl_bs_cosumtvd.csv
./labels/19/1016goldstd.csv
./labels/15/1012goldstd.csv
./labels/21/1018_1388to1689_goldstd_dbl_bs_cosumtvd.csv
./labels/10/1008goldstd.csv
./labels/35/1032_300to600_goldstd_dbl_bs_cosumtvd.csv


In [None]:
def check_notation(i):
    if notation[i][0] == 1:
        label = 1 #"dbl"
    elif notation[i][1] == 1:
        label = 2 #"mt"
    elif notation[i][2] == 1:
        label = 3 #"bs"
    elif notation[i][3] == 1:
        label = 4 #"co"
    elif notation[i][4] == 1:
        label = 5 #"su"
    else:
        label = 0 #"normal"
    return label

In [None]:
# truncate and pad input sequences
max_review_length = 215
flow_arr = sequence.pad_sequences(flow_list, max_review_length)
pressure_arr = sequence.pad_sequences(pressure_list, max_review_length)

In [None]:
print(flow_arr)

[[ 0  0  0 ...  0  2  2]
 [ 0  0  0 ...  0  1  2]
 [ 0  0  0 ... -2  0  1]
 ...
 [-1 10 31 ... -1 -1 -1]
 [-1 10 32 ...  0 -1  0]
 [-1 10 32 ... -1  0  0]]


In [None]:
waveforms = np.hstack((flow_arr, pressure_arr))
print(np.shape(waveforms))

(9719, 430)


In [None]:
labels = []
for i, pressure in enumerate(pressure_list):
    label = check_notation(i)
    labels.append(label)
label_arr = np.array(labels)

In [None]:
# create the model
embedding_vecor_length = 32
model = Sequential()
model.add(Embedding(top_words, embedding_vecor_length, input_length=430))
model.add(LSTM(100, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_3 (Embedding)      (None, 430, 32)           160000    
_________________________________________________________________
lstm_3 (LSTM)                (None, 100)               53200     
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 101       
Total params: 213,301
Trainable params: 213,301
Non-trainable params: 0
_________________________________________________________________
None


In [None]:
model.fit(waveforms, label_arr, epochs=3, batch_size=64)
# Final evaluation of the model

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.callbacks.History at 0x7f3723659ba8>

In [None]:
scores = model.evaluate(X_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))