In [1]:
import os
import pandas as pd
import numpy as np
import math
import matplotlib.pyplot as plt
import seaborn as sns
import pyACA
import sklearn
from sklearn import preprocessing
from sklearn import model_selection
from sklearn.model_selection import train_test_split as tts
from sklearn import metrics
from sklearn.utils import shuffle
from xgboost import XGBClassifier as XGB
import librosa
import librosa.display
import IPython.display as ipd
from tqdm import tqdm


import tensorflow as tf
import keras
from keras.layers import *
from keras.models import *
from keras import backend as K
from keras.preprocessing import sequence
from keras.models import Sequential
from keras.models import Model
from keras.layers import Dense, Dropout, Embedding, LSTM, Bidirectional
import keras.layers as Layers

import warnings
warnings.filterwarnings("ignore")

In [6]:
raw_dataset = './raw dataset/'
csv_path = './mldata.csv'
np_dir = './np_arrays/'
final_dataset = './final_dataset/'
audio_path = './audio_samples/'

f_start = 101
f_end = 639

labels_df = pd.read_csv(csv_path)

In [4]:
FRAME_LENGTH = 1024
HOP_LENGTH = 512

def extract_features(file_path: str, frame_length, hop_length):

    audio_signal, sr = librosa.load(file_path)
    return audio_signal


In [7]:
for patient in tqdm(range(f_start,f_end+1), 'Extraction Progress '):
  file_name = os.path.join(os.path.abspath(raw_dataset)+'/'+str(patient)+'.wav')
  signal = extract_features(file_name,FRAME_LENGTH,HOP_LENGTH)
  np.save(audio_path+str(patient)+'.npy',signal)

Extraction Progress : 100%|██████████| 539/539 [06:50<00:00,  1.31it/s]


In [12]:
data = []

for patient in range(f_start,f_end+1):
    audio = np.load(audio_path+str(patient)+'.npy')
    final = list(audio)
    while audio.shape[0]<862:
        final.append(0)
    data.append(final[:862])

data = np.array(data)
data.shape

(539, 862)

In [10]:
labels = []

for i in range(len(labels_df)):
    crackles = labels_df['crackles'][i]
    wheezes = labels_df['wheezes'][i]
    if crackles==0 and wheezes==0:
        labels.append(0)
    elif crackles and wheezes:
        labels.append(3)
    elif crackles==0 and wheezes!=0:
        labels.append(2)
    elif crackles!=0 and wheezes==0:
        labels.append(1)
    else:
        labels.append(-1)

labels_df['class'] = labels
labels_df = pd.concat([labels_df['patient'], labels_df['crackles'],labels_df['wheezes'],labels_df['class']], axis=1)
labels_df.to_csv(csv_path)

labels_df.head(10)

Unnamed: 0,patient,crackles,wheezes,class
0,101,0,11,2
1,102,0,0,0
2,103,0,5,2
3,104,0,2,2
4,105,0,0,0
5,106,0,0,0
6,107,5,0,1
7,108,8,1,3
8,109,10,0,1
9,110,13,0,1


In [14]:
df = pd.DataFrame(data)

df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,852,853,854,855,856,857,858,859,860,861
0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
1,-0.081173,-0.119004,-0.104803,-0.113619,-0.107089,-0.112096,-0.107956,-0.111243,-0.108613,-0.110848,...,0.052971,0.053392,0.053994,0.054671,0.055081,0.055736,0.056904,0.057983,0.058783,0.059595
2,-0.050781,-0.073955,-0.064401,-0.068995,-0.064731,-0.066872,-0.063430,-0.065037,-0.062504,-0.062663,...,0.106895,0.108439,0.110037,0.111430,0.113004,0.114891,0.117044,0.119264,0.121215,0.123575
3,0.082093,0.120273,0.105930,0.115082,0.108216,0.113333,0.108985,0.112239,0.109423,0.111319,...,0.090493,0.090410,0.090345,0.090403,0.090416,0.090381,0.090390,0.090366,0.090302,0.090259
4,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
534,-0.025989,-0.038212,-0.034844,-0.038497,-0.036611,-0.038637,-0.037569,-0.039016,-0.037861,-0.038337,...,0.057786,0.054887,0.051827,0.048958,0.045507,0.042568,0.039458,0.036286,0.033645,0.030810
535,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
536,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
537,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


In [15]:
Y = labels
X = df

X_train, X_test, Y_train, Y_test = tts(X, Y, test_size=0.3, random_state=42)

In [20]:
model = XGB(max_depth = 6, n_estimators = 400)
model.fit(X_train, Y_train)

ypreds = model.predict(X_test)

metrics.accuracy_score(Y_test, ypreds)



0.4074074074074074