In [None]:
import glob 
import os.path
import tensorflow as tf
import numpy as np
from tensorflow.contrib.framework.python.ops import audio_ops as contrib_audio

In [None]:
def load_wav_file(filename):
  """Loads an audio file and returns a float PCM-encoded array of samples.
  Args:
    filename: Path to the .wav file to load.
  Returns:
    Numpy array holding the sample data as floats between -1.0 and 1.0.
  """
  with tf.Session(graph=tf.Graph()) as sess:
    wav_filename_placeholder = tf.placeholder(tf.string, [])
    wav_loader = tf.read_file(filename)
    wav_decoder = contrib_audio.decode_wav(wav_loader, desired_channels=1)
    return sess.run(
        wav_decoder,
        feed_dict={wav_filename_placeholder: filename}).audio.flatten()

def parse_audio_files(parent_dir,sub_dirs,file_ext='*.wav'):
    i = 0
    total_files = 0 
    for subs in sub_dirs:
        dir_name = parent_dir + "/" + subs
        numfiles = sum(1 for f in os.listdir(dir_name) if os.path.isfile(os.path.join(dir_name, f)) and f[0] != '.')
        total_files +=  numfiles
    data = np.zeros((total_files,176400))
    labels = np.zeros((total_files), dtype=int)
    for label, sub_dir in enumerate(sub_dirs):
        for fn in glob.glob(os.path.join(parent_dir, sub_dir, file_ext)):
            data[i, :] =   load_wav_file(fn)
            num = int(fn.split('-')[1]) 
            if num != 3:
                labels [i:] = 0
            else:
                labels [i:] = 1 
            i += 1
    return data, labels

In [None]:
parent_dir = 'datasets/audio'
sub_dirs = ['fold1', 'fold2', 'fold3', 'fold4', 'fold6', 'fold7', 'fold8', 'fold9']
data, labels =  parse_audio_files(parent_dir, sub_dirs)

In [None]:
parent_dir = 'datasets/audio'
sub_dirs = ['fold10']
test_data, test_labels =  parse_audio_files(parent_dir, sub_dirs)

In [None]:
feature_columns = [tf.feature_column.numeric_column("x", shape=[176400,])]
classifier = tf.estimator.DNNClassifier(
    feature_columns=feature_columns,
    hidden_units=[500,250,125,75,50,25],
    optimizer=tf.train.AdamOptimizer(1e-5),
    n_classes=10,
    dropout=0.1,
    model_dir="dnn_sound_class"
)

In [None]:
train_input_fn = tf.estimator.inputs.numpy_input_fn(
    x={"x": data},
    y=labels,
    num_epochs=None,
    batch_size=50,
    shuffle=True
)

classifier.train(input_fn=train_input_fn, steps=10000)




In [None]:
# Define the test inputs
test_input_fn = tf.estimator.inputs.numpy_input_fn(
    x={"x": test_data},
    y=test_labels,
    num_epochs=1,
    shuffle=False
)

accuracy_score = classifier.evaluate(input_fn=test_input_fn)["accuracy"]
print("\nTest Accuracy: {0:f}%\n".format(accuracy_score*100))
