In [2]:
import glob 
import os.path
import tensorflow as tf
import numpy as np
from tensorflow.contrib.framework.python.ops import audio_ops as contrib_audio

In [3]:
def load_wav_file(filename):
  """Loads an audio file and returns a float PCM-encoded array of samples.
  Args:
    filename: Path to the .wav file to load.
  Returns:
    Numpy array holding the sample data as floats between -1.0 and 1.0.
  """
  with tf.Session(graph=tf.Graph()) as sess:
    wav_filename_placeholder = tf.placeholder(tf.string, [])
    wav_loader = tf.read_file(filename)
    wav_decoder = contrib_audio.decode_wav(wav_loader, desired_channels=1)
    return sess.run(
        wav_decoder,
        feed_dict={wav_filename_placeholder: filename}).audio.flatten()

def parse_audio_files(parent_dir,sub_dirs,file_ext='*.wav'):
    i = 0
    total_files = 0 
    for subs in sub_dirs:
        dir_name = parent_dir + "/" + subs
        numfiles = sum(1 for f in os.listdir(dir_name) if os.path.isfile(os.path.join(dir_name, f)) and f[0] != '.')
        total_files +=  numfiles
    data = np.zeros((total_files,176400))
    labels = np.zeros((total_files), dtype=int)
    for label, sub_dir in enumerate(sub_dirs):
        for fn in glob.glob(os.path.join(parent_dir, sub_dir, file_ext)):
            data[i, :] =   load_wav_file(fn)
            num = int(fn.split('-')[1]) 
            if num != 3:
                labels [i:] = 0
            else:
                labels [i:] = 1 
            i += 1
    return data, labels

In [4]:
parent_dir = 'datasets/audio'
#sub_dirs = ['fold1', 'fold2', 'fold3', 'fold4', 'fold6', 'fold7', 'fold8', 'fold9']
sub_dirs = ['fold1','fold2']
data, labels =  parse_audio_files(parent_dir, sub_dirs)

In [5]:
parent_dir = 'datasets/audio'
sub_dirs = ['fold10']
test_data, test_labels =  parse_audio_files(parent_dir, sub_dirs)

In [6]:
feature_columns = [tf.feature_column.numeric_column("x", shape=[176400,])]
classifier = tf.estimator.DNNClassifier(
    feature_columns=feature_columns,
    hidden_units=[500,250,125,75,50,25],
    optimizer=tf.train.AdamOptimizer(1e-5),
    n_classes=10,
    dropout=0.1,
    model_dir="dnn_sound_class"
)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_save_checkpoints_steps': None, '_save_summary_steps': 100, '_num_worker_replicas': 1, '_model_dir': 'dnn_sound_class', '_global_id_in_cluster': 0, '_task_id': 0, '_keep_checkpoint_every_n_hours': 10000, '_tf_random_seed': None, '_log_step_count_steps': 100, '_num_ps_replicas': 0, '_save_checkpoints_secs': 600, '_service': None, '_evaluation_master': '', '_task_type': 'worker', '_master': '', '_keep_checkpoint_max': 5, '_session_config': None, '_is_chief': True, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f3afc8424e0>, '_train_distribute': None}


In [None]:
train_input_fn = tf.estimator.inputs.numpy_input_fn(
    x={"x": data},
    y=labels,
    num_epochs=None,
    batch_size=50,
    shuffle=True
)

classifier.train(input_fn=train_input_fn, steps=10000)




INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 1 into dnn_sound_class/model.ckpt.
INFO:tensorflow:step = 0, loss = 117.44269
INFO:tensorflow:global_step/sec: 9.92429
INFO:tensorflow:step = 100, loss = 114.40611 (10.078 sec)
INFO:tensorflow:global_step/sec: 10.2316
INFO:tensorflow:step = 200, loss = 112.33241 (9.774 sec)
INFO:tensorflow:global_step/sec: 10.2913
INFO:tensorflow:step = 300, loss = 109.13292 (9.716 sec)
INFO:tensorflow:global_step/sec: 10.3749
INFO:tensorflow:step = 400, loss = 99.244095 (9.639 sec)
INFO:tensorflow:global_step/sec: 10.4146
INFO:tensorflow:step = 500, loss = 82.51747 (9.602 sec)
INFO:tensorflow:global_step/sec: 10.3398
INFO:tensorflow:step = 600, loss = 65.17527 (9.671 sec)
INFO:tensorflow:global_step/sec: 10.4082
INFO:tensorflow:st

In [15]:
# Define the test inputs
test_input_fn = tf.estimator.inputs.numpy_input_fn(
    x={"x": test_data},
    y=test_labels,
    num_epochs=1,
    shuffle=False
)

accuracy_score = classifier.evaluate(input_fn=test_input_fn)["accuracy"]
print("\nTest Accuracy: {0:f}%\n".format(accuracy_score*100))


INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-06-05-07:11:53
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from dnn_sound_class/model.ckpt-10000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-06-05-07:11:55
INFO:tensorflow:Saving dict for global step 10000: accuracy = 0.86706054, average_loss = 1.6416829, global_step = 10000, loss = 185.23656

Test Accuracy: 86.706054%



In [None]:
for n in predictions:
    print(n)

In [None]:
test_input_fn

In [None]:
print(test_input_fn)

In [7]:
# Define the test inputs   
test_input_fn = tf.estimator.inputs.numpy_input_fn(x={"x": test_data[0]},shuffle=False)

In [None]:
for n in predictions:
    print(n)

In [None]:
test_data[0].shape

In [13]:
# Generate predictions from the model
expected = test_labels
predict_x = {'x':test_data}

predictions = classifier.predict(input_fn=test_input_fn)

In [14]:
template = ('\nPrediction is "{}" ({:.1f}%), expected "{}"')

for pred_dict, expec in zip(predictions, expected):
    class_id = pred_dict['class_ids'][0]
    probability = pred_dict['probabilities'][class_id]

    print(class_id + " " + probability)


INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from dnn_sound_class/model.ckpt-10000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


TypeError: ufunc 'add' did not contain a loop with signature matching types dtype('<U21') dtype('<U21') dtype('<U21')