In [3]:
import glob
import os
import librosa
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from matplotlib.pyplot import specgram
from tqdm import tqdm_notebook as tqdm
%matplotlib inline

In [5]:
def load_sound_files(file_paths):
    raw_sounds = []
    for fp in file_paths:
        X,sr = librosa.load(fp)
        raw_sounds.append(X)
    return raw_sounds

In [7]:
meta = pd.read_table("data_v_7_stc/meta/meta.txt", header=None)
sound_file_paths = ["data_v_7_stc/audio/background_0001.wav", "data_v_7_stc/audio/background_0002.wav"]

sound_names = [meta[4][0], meta[4][1]]

raw_sounds = load_sound_files(sound_file_paths)

In [12]:
def extract_feature(file_name):
    X, sample_rate = librosa.load(file_name)
    stft = np.abs(librosa.stft(X))
    mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0)
    chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
    mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
    contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
    tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X),
    sr=sample_rate).T,axis=0)
    return mfccs,chroma,mel,contrast,tonnetz



In [13]:
def get_meta_labels(parent_dir, audio_folder, file_ext="*.wav"):
    meta = pd.read_table("data_v_7_stc/meta/meta.txt", header=None)
    train_label_dict = meta.set_index(0).to_dict()[4]    
    labels = []
    for file_name in tqdm(glob.glob(os.path.join(parent_dir, audio_folder, file_ext))):
        labels.append(train_label_dict[file_name.split('\\')[2]])
    return labels

In [14]:
def parse_audio_files(parent_dir, audio_folder, file_ext="*.wav", train=False):
    features, labels = np.empty((0, 193)), np.empty(0) 
    i = 0
    for file_name in tqdm(glob.glob(os.path.join(parent_dir, audio_folder, file_ext))):
        try:
            mfccs, chroma, mel, contrast,tonnetz = extract_feature(file_name)
        except Exception as e:
            print("Error encountered while parsing file: ", file_name)
            continue
        ext_features = np.hstack([mfccs,chroma,mel,contrast,tonnetz])
        features = np.vstack([features,ext_features])
        i+=1
    if train:
        meta = pd.read_table("data_v_7_stc/meta/meta.txt", header=None)
        train_label_dict = meta.set_index(0).to_dict()[4]
        labs = []
        for file_name in tqdm(glob.glob(os.path.join(parent_dir, audio_folder, file_ext))):
            labs.append(train_label_dict[file_name.split('\\')[2]])
        labels = np.array(labs)
        return np.array(features), np.array(labels)
    else:
        return np.array(features)

In [105]:
def one_hot_encode(labels):
    one_hot_labels_tr = map(lambda x: 0 if x=='background'
                                 else (1 if x=='bags' 
                                       else (2 if x=='door'
                                            else (3 if x=='keyboard'
                                                 else (4 if x=='knocking_door'
                                                      else (5 if x=='ring'
                                                           else (6 if x=='speech'
                                                                else (7 if x=='tool'
                                                                      else None))))))), tr_labels)
    labels = np.fromiter(one_hot_labels_tr, dtype=np.int)
    n_labels = len(labels)
    n_unique_labels = len(np.unique(labels))
    one_hot_encode = np.zeros((n_labels,n_unique_labels))
    one_hot_encode[np.arange(n_labels), labels] = 1
    return one_hot_encode

In [None]:
parent_dir = 'data_v_7_stc'
audio_folder = 'audio'
tr_features, tr_labels = parse_audio_files(parent_dir, audio_folder, train=True)

tr_features.tofile("tr_features.csv", sep=';')
tr_labels.tofile("tr_labels.csv", sep=';')



In [115]:
audio_folder = 'test'
ts_features = parse_audio_files(parent_dir, audio_folder)





Exception in thread Thread-4:
Traceback (most recent call last):
  File "/Applications/anaconda3/lib/python3.6/threading.py", line 916, in _bootstrap_inner
    self.run()
  File "/Applications/anaconda3/lib/python3.6/site-packages/tqdm/_tqdm.py", line 148, in run
    for instance in self.tqdm_cls._instances:
  File "/Applications/anaconda3/lib/python3.6/_weakrefset.py", line 60, in __iter__
    for itemref in self.data:
RuntimeError: Set changed size during iteration






In [114]:
# tr_labels = one_hot_encode(tr_labels)

In [125]:
# tr_features = pd.read_csv('tr_features_df.csv', sep=',')

In [129]:
# tr_features.columns = range(0, 193)

In [130]:
# tr_features.columns

RangeIndex(start=0, stop=193, step=1)

In [59]:
tr_labels = pd.read_csv('train_true_labels.csv', header=None, sep=';')[0]
tr_labels = np.array(tr_labels).reshape((11307,))
one_hot_labels_tr = map(lambda x: 0 if x=="'background'"
                                 else (1 if x=="'bags'" 
                                       else (2 if x=="'door'"
                                            else (3 if x=="'keyboard'"
                                                 else (4 if x=="'knocking_door'"
                                                      else (5 if x=="'ring'"
                                                           else (6 if x=="'speech'"
                                                                else (7 if x=="'tool'"
                                                                      else None))))))), tr_labels)
labels = np.fromiter(one_hot_labels_tr, dtype=np.int)

In [131]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(tr_features, labels, test_size=0.2, random_state=42)

In [132]:
ts_features = pd.DataFrame(ts_features)

In [133]:
ts_features.columns 

RangeIndex(start=0, stop=193, step=1)

In [134]:
tr_features.columns

RangeIndex(start=0, stop=193, step=1)

In [135]:
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score

model = XGBClassifier(objective='multi:softmax')
model.fit(tr_features, labels)
y_pred = model.predict(ts_features)
y_pred_proba = model.predict_proba(ts_features)
# predictions = [int(value) for value in y_pred]

  if diff:


In [93]:
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy: %.2f%%" % (accuracy * 100.0))

Accuracy: 98.89%


In [138]:
unique, freqs = np.unique(y_test, return_counts=True)
print("True:", np.round(freqs/sum(freqs), 3))
unique, freqs = np.unique(y_pred, return_counts=True)
print("Pred:", np.round(freqs/sum(freqs), 3),)

True: [0.103 0.111 0.305 0.117 0.145 0.059 0.025 0.134]
Pred: [0.064 0.082 0.12  0.097 0.097 0.134 0.239 0.167]


In [139]:
from os import listdir
from os.path import isfile, join

def get_file_names(mypath):
    return [f for f in listdir(mypath) if isfile(join(mypath, f))]
a = get_file_names("data_v_7_stc/test")
len(a)

610

In [140]:
def get_probabilities(y_pred_proba):
    return np.max(y_pred_proba, axis=1)


In [141]:
def decode(y_pred):
    one_hot_labels_tr = map(lambda x: 'background' if x== 0
                                     else ('bags' if x== 1
                                           else ('door' if x==2
                                                else ('keyboard' if x== 3
                                                     else ('knocking_door' if x== 4
                                                          else ('ring' if x== 5
                                                               else ('speech' if x==6
                                                                    else ('tool' if x==7
                                                                          else None))))))), y_pred)
    return list(one_hot_labels_tr)

In [145]:
res = pd.DataFrame(None, index=list(range(len(y_pred))),  columns=[0,1,2])
res[0] = get_file_names("data_v_7_stc/test")
res[1] = get_probabilities(y_pred_proba)
res[2] = decode(y_pred)
res.head()

Unnamed: 0,0,1,2
0,unknown_0063.wav,0.578564,tool
1,unknown_0077.wav,0.73096,keyboard
2,unknown_0088.wav,0.889426,tool
3,keyboard_0035.wav,0.962278,keyboard
4,knocking_door_t_0002.wav,0.994977,knocking_door


In [150]:
base_filename = 'RESULTS.txt'
with open(os.path.join("data_v_7_stc", base_filename),'w') as outfile:
    res.to_string(outfile)