In [1]:
# Pandas
import pandas as pd

# Scikit learn
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from sklearn.preprocessing import LabelEncoder
from sklearn.utils import shuffle
from sklearn.utils import class_weight
from sklearn.model_selection import cross_val_score

# Audio
import librosa
import librosa.display
import wave
import IPython.display as ipd

# Utility
import os
from glob import glob
import numpy as np
from tqdm import tqdm
import itertools
import numpy as np
from scipy.io import wavfile
import fnmatch

In [2]:
# Function for getting recording duration
def get_recording_times(audio_files):
    data = []
    for file in audio_files:
        y, sr = librosa.load(file)
        dur = librosa.get_duration(y)
        data.append(dur)
    return data

In [3]:
# Function for getting spectrogram data
def get_amplitude_to_db(audio_files):
    data = []
    for file in audio_files:
        y, sr = librosa.load(file)
        D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)
        data.append(D)
    return data

In [4]:
# Function for getting mfcc data
def get_mfcc(audio_files):
    data = []
    for file in audio_files:
        y, sr = librosa.load(file)
        mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)
        data.append(mfccs)
    return data

In [5]:
# get audio data with a fix padding may also chop off some file
def load_file_data (folder,file_names, duration=12, sr=16000):
    input_length=sr*duration
    # function to load files and extract features
    # file_names = glob.glob(os.path.join(folder, '*.wav'))
    data = []
    for file_name in file_names:
        try:
            sound_file=folder+file_name
            print ("load file ",sound_file)
            # use kaiser_fast technique for faster extraction
            X, sr = librosa.load( sound_file, sr=sr, duration=duration,res_type='kaiser_fast') 
            dur = librosa.get_duration(y=X, sr=sr)
            # pad audio file same duration
            if (round(dur) < duration):
                print ("fixing audio length :", file_name)
                y = librosa.util.fix_length(X, input_length)                
            #normalized raw audio 
            # y = audio_norm(y)            
            # extract normalized mfcc feature from data
            mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sr, n_mfcc=40).T,axis=0)             
        except Exception as e:
            print("Error encountered while parsing file: ", file)        
        feature = np.array(mfccs).reshape([-1,1])
        data.append(feature)
    return data

In [None]:
# parent folder of sound files
INPUT_DIR="./Documents"
# 16 KHz
SAMPLE_RATE = 16000
# seconds
MAX_SOUND_CLIP_DURATION=100  

#Load dataset a and b
A_folder=INPUT_DIR+'/Thinkful/training/training-a/'

# set-a abnormal
A_files = fnmatch.filter(os.listdir(INPUT_DIR+'/Thinkful/training/training-a/'), 'a0*.wav')
A_sounds = load_file_data(folder=A_folder,file_names=A_files, duration=MAX_SOUND_CLIP_DURATION)
A_labels = [-1 for items in A_files]

In [None]:
#example
#Load the test set 
#Load dataset a and b
INPUT_DIR = './Documents'
TestA_folder=INPUT_DIR+'/set_a/'

# Load test set-a
A_normal_files = fnmatch.filter(os.listdir(INPUT_DIR+'/set_a'), 'normal*.wav')
A_normal_sounds = load_file_mfcc(folder=TestA_folder,file_names=A_normal_files, duration=MAX_SOUND_CLIP_DURATION)
A_normal_labels = [0 for items in A_normal_sounds]

A_murmur_files = fnmatch.filter(os.listdir(INPUT_DIR+'/set_a'), 'murmur*.wav')
A_murmur_sounds = load_file_mfcc(folder=TestA_folder,file_names=A_murmur_files, duration=MAX_SOUND_CLIP_DURATION)
A_murmur_labels = [1 for items in A_murmur_files]

In [None]:
test_normal = pd.DataFrame()
test_normal['Label'] = A_normal_labels
test_normal['mfccs'] = A_normal_sounds
test_normal['mfccs_flat'] = flat_(A_normal_sounds)
test_normal.head()

In [None]:
test_abnormal = pd.DataFrame()
test_abnormal['Label'] = A_murmur_labels
test_abnormal['mfccs'] = A_murmur_sounds
test_abnormal['mfccs_flat'] = flat_(A_murmur_sounds)
test_abnormal.head()