In [2]:
import scipy.io.wavfile
import os
import glob
import numpy as np
import scipy.signal
import tensorflow as tf

In [5]:
#it will take much time! plz be patient

# insert your directory of dataset
path = "data/AudioEventDataset/data"
files = os.listdir(path)

# choose only necessary 8 classes among 28 classes -> 1208 samples
chosen_classes = ['applause', 'dog', 'glass', 'knock', 'laughter', 'scream', 'tone', 'water']

# set X, Y
X_data = np.empty((1208, 1, 16000, 1))
Y_data  = np.zeros((1208, 8), dtype = 'int') #for softmax

#get train set
i = 0
for filename in glob.glob(os.path.join(path, '*.wav')):
    target = filename.split('_')[0].split('\\')[-1]
    if(target in chosen_classes):
        index = chosen_classes.index(target)
        Y_data[i][index] = 1
        samplerate, data = scipy.io.wavfile.read(filename)
        data = scipy.signal.resample(data, 16000)
        data = data.astype(float)
        data = data / (2.0**(16-1) + 1)
        X_data[i] = data.reshape((1,16000, 1))
        i += 1

#get test set
path = "data/AudioEventDataset/test"
files = os.listdir(path)
for filename in glob.glob(os.path.join(path, '*.wav')):
    target = filename.split('_')[0].split('\\')[-1]
    if(target in chosen_classes):
        index = chosen_classes.index(target)
        Y_data[i][index] = 1
        samplerate, data = scipy.io.wavfile.read(filename)
        data = scipy.signal.resample(data, 16000)
        data = data.astype(float)
        data = data / (2.0**(16-1) + 1)
        X_data[i] = data.reshape((1,16000, 1))
        i += 1

In [6]:
print(X_data.shape) 
print(Y_data.shape)
print(X_data[1])
print(Y_data[1])

(1208, 1, 16000, 1)
(1208, 8)
[[[ 0.00362505]
  [-0.00036751]
  [ 0.00059836]
  ...
  [-0.00028776]
  [-0.00080564]
  [-0.0034002 ]]]
[1 0 0 0 0 0 0 0]


In [7]:
# Network Parameters
num_classes = 8
dropout = 0.5
learning_rate = 0.001

# tf Graph input
X = tf.placeholder(tf.float32, [None, 1, 16000, 1])
Y = tf.placeholder(tf.int64, [None, num_classes])
keep_prob = tf.placeholder(tf.float32) # dropout (keep probability)
is_training = tf.placeholder(tf.bool) # for BN

In [8]:
# Create some wrappers for simplicity
def conv2d(x, W, b, strides=1):
    # Conv2D wrapper, with bias and relu activation
    x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
    x = tf.nn.bias_add(x, b)
    return tf.nn.relu(x)

def conv_net(x, weights, biases, dropout):
    # Tensor input become 4-D: [Batch Size, Height, Width, Channel]
    x = tf.reshape(x, shape=[-1, 1, 16000, 1])

    # Convolution Layers
    conv1 = conv2d(x, weights['wc1'], biases['bc1'])
    conv2 = conv2d(conv1, weights['wc2'], biases['bc2'])
    
    # Max Pooling
    pool2 = tf.nn.max_pool(conv2, ksize=[1, 1, 160, 1], strides=[1, 1, 160, 1],padding='VALID')
    pool2 = tf.reshape(pool2, shape=[-1, 100, 40, 1])
    
    pool3 = tf.nn.conv2d(pool2, weights['wc3'], strides=[1,1,1,1], padding='VALID')
    pool3 = tf.nn.bias_add(pool3, biases['bc3'])
    pool3 = tf.nn.max_pool(pool3, ksize=[1, 3, 3, 1], strides=[1, 3, 3, 1], padding='VALID')
    
    pool4 = tf.nn.conv2d(pool3, weights['wc4'], strides=[1,1,1,1], padding='VALID')
    pool4 = tf.nn.bias_add(pool4, biases['bc4'])
    pool4 = tf.nn.max_pool(pool4, ksize=[1, 3, 1, 1], strides=[1, 3, 1, 1], padding='VALID')
    
    # Fully connected layer
    # Reshape pool4 output to fit fully connected layer input
    fc5 = tf.reshape(pool4, [-1, weights['wd1'].get_shape().as_list()[0]])
    fc5 = tf.add(tf.matmul(fc5, weights['wd1']), biases['bd1'])
    fc5 = tf.nn.relu(fc5)
    fc5 = tf.nn.dropout(fc5, dropout)

    fc6 = tf.add(tf.matmul(fc5, weights['wd2']), biases['bd2'])
    fc6 = tf.nn.relu(fc6)
    fc6 = tf.nn.dropout(fc6, dropout)
    
    # Output, class prediction
    out = tf.add(tf.matmul(fc6, weights['out']), biases['out'])
    return out

In [9]:
# Store layers weight & bias
weights = {
    # 1x8 conv, 1 input channel , 40 outputs channel (filters)
    'wc1': tf.Variable(tf.random_normal([1, 8, 1, 40])),
    # 1x8 conv, 40 inputs, 40 outputs
    'wc2': tf.Variable(tf.random_normal([1, 8, 40, 40])),
    # 13*8 conv, 1 inputs, 50 outputs
    'wc3': tf.Variable(tf.random_normal([11, 8, 1, 13])),
    # 5*1 conv, 50 inputs, 50 outputs
    'wc4': tf.Variable(tf.random_normal([4, 1, 13, 13])),
    # fully connected
    'wd1': tf.Variable(tf.random_normal([13*11*9, 1024])),
    # fully connected
    'wd2': tf.Variable(tf.random_normal([1024, 1024])),
    # 1024 inputs, 10 outputs (class prediction)
    'out': tf.Variable(tf.random_normal([1024, num_classes]))
}

biases = {
    'bc1': tf.Variable(tf.random_normal([40])),
    'bc2': tf.Variable(tf.random_normal([40])),
    'bc3': tf.Variable(tf.random_normal([13])),
    'bc4': tf.Variable(tf.random_normal([13])),
    'bd1': tf.Variable(tf.random_normal([1024])),
    'bd2': tf.Variable(tf.random_normal([1024])),
    'out': tf.Variable(tf.random_normal([num_classes]))
}

# Construct model
logits = conv_net(X, weights, biases, keep_prob)
prediction = tf.nn.softmax(logits)

# Define loss and optimizer
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
    logits=logits, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op)

# Evaluate model
correct_pred = tf.equal(tf.argmax(prediction, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float64))

# Initialize the variables (i.e. assign their default value)
init = tf.global_variables_initializer()

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.



In [None]:
from sklearn.model_selection import train_test_split

# Training Hyper-Parameters
learning_rate = 0.001
num_steps = 2000
batch_size = 64
display_step = 100

#split dataset into training set and test set
X_train, X_test, Y_train, Y_test = train_test_split(X_data, Y_data, test_size=0.08, random_state=42, shuffle=True)

with tf.Session() as sess:
    # Run the initializer
    sess.run(init)

    for step in range(1, num_steps+1):  
        #batch of size 64
        X_batch, _, Y_batch, _ = train_test_split(X_train, Y_train, test_size=0.0, random_state=42, shuffle=True)
        X_batch = X_batch[:64]
        Y_batch = Y_batch[:64]
        
        # Run optimization op (backprop)
        sess.run(train_op, feed_dict={X: X_batch, Y: Y_batch, keep_prob: 0.2})
        if step % display_step == 0 or step == 1:
            # Calculate batch loss and accuracy
            loss, acc = sess.run([loss_op, accuracy], feed_dict={X: X_batch,
                                                                 Y: Y_batch,
                                                                 keep_prob: 1.0})
            print("Step " + str(step) + ", Minibatch Loss= " + \
                  "{:.4f}".format(loss) + ", Training Accuracy= " + \
                  "{:.3f}".format(acc))
            
        # accuracy on test set
            print("Testing Accuracy:", \
            sess.run(accuracy, feed_dict={X: X_test, Y: Y_test, keep_prob: 1.0}))
      
    print("Finished!")

Step 1, Minibatch Loss= 63636960.0000, Training Accuracy= 0.188
Testing Accuracy: 0.27835051546391754
Step 100, Minibatch Loss= 6866279.5000, Training Accuracy= 0.312
Testing Accuracy: 0.5154639175257731
Step 200, Minibatch Loss= 1505654.0000, Training Accuracy= 0.594
Testing Accuracy: 0.5257731958762887
Step 300, Minibatch Loss= 984216.8750, Training Accuracy= 0.656
Testing Accuracy: 0.5257731958762887
Step 400, Minibatch Loss= 403328.7812, Training Accuracy= 0.797
Testing Accuracy: 0.5257731958762887
Step 500, Minibatch Loss= 157411.5625, Training Accuracy= 0.875
Testing Accuracy: 0.5360824742268041
Step 600, Minibatch Loss= 107694.0000, Training Accuracy= 0.875
Testing Accuracy: 0.5463917525773195
Step 700, Minibatch Loss= 105161.3125, Training Accuracy= 0.922
Testing Accuracy: 0.5360824742268041
Step 800, Minibatch Loss= 88349.5625, Training Accuracy= 0.891
Testing Accuracy: 0.5463917525773195
Step 900, Minibatch Loss= 0.0000, Training Accuracy= 1.000
Testing Accuracy: 0.5360824742