In [1]:
# Imports
import tensorflow as tf
import numpy as np
import glob
import csv
import os
import random

In [2]:
# Access data
def get_data(data_dir, labels_path):
    data_filenames = glob.glob(os.path.join(data_dir, '*npy'))
    random.shuffle(data_filenames)

    num_examples = len(data_filenames)
    
    labels_dict = {}
    get_labels_dict(labels_path, labels_dict)
    
    X = np.zeros([num_examples, 251, 100])
    Y = np.zeros(num_examples)
    for i, df in enumerate(data_filenames):
        X[i, :, :] = np.load(df)
        
        label_key = df.split('/')[-1].split('.')[0].split('_')[0]
        Y[i] = labels_dict[label_key]
        
    # Convert -1 labels to 0
    Y[np.where(Y == -1)] = 0
        
    return X.astype(np.float32), Y.astype(np.float32)


def get_labels_dict(reference_path, reference):
    with open(reference_path) as csvfile:
        spamreader = csv.reader(csvfile, delimiter=',', quotechar='|')
        for row in spamreader:
            reference[row[0]] = row[1]
        

In [3]:
# Helper functions for defining variables in the network graph
def _variable_on_cpu(name, shape, initializer):
    """
    Helper to create a Variable stored on CPU memory.
    """

    with tf.device('/cpu:0'):
        var = tf.get_variable(
            name=name, 
            shape=shape, 
            dtype=tf.float32, 
            initializer=initializer)

    return var

def _variable_with_weight_decay(name, shape, stddev, wd):
    """
    Helper to create an initialized Variable with weight decay.
    """

    var = _variable_on_cpu(
        name=name,
        shape=shape,
        initializer=tf.truncated_normal_initializer(
            stddev=stddev,
            dtype=tf.float32))

    if wd is not None:
        weight_decay = tf.multiply(tf.nn.l2_loss(var), wd, 
                                   name='weight_loss')
        tf.add_to_collection('losses', weight_decay)

    return var


def loss(unscale_logits, labels):
    """
    Add L2Loss to all the trainable variables.
    """

    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
        labels=labels,
        logits=unscale_logits,
        name='cross_entropy')

    cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')

    tf.add_to_collection('losses', cross_entropy_mean)

    total_loss = tf.add_n(tf.get_collection('losses'), name='total_loss')

    return total_loss

In [4]:
# Function for defining CNN network 
# (window size = 300)
def cnn(features):

    # Input Layer.
    input_layer = tf.reshape(features["x"], [-1, 151, 100, 1])

    # Convolutional layer #1.
    with tf.variable_scope(name_or_scope='conv1') as scope:
        kernel = _variable_with_weight_decay(
            name='weights',
            shape=[5, 5, 1, 64],
            stddev=5e-2,
            wd=0.0)

        conv = tf.nn.conv2d(
            input=input_layer,
            filter=kernel,
            strides=[1, 1, 1, 1],
            padding='SAME')

        biases = _variable_on_cpu(
            name='biases',
            shape=[64],
            initializer=tf.constant_initializer(0.0))

        pre_activation = tf.nn.bias_add(conv, biases)

        conv1 = tf.nn.tanh(
            x=pre_activation, 
            name=scope.name)
        
        
    # Max pooling layer #1.
    pool1 = tf.nn.max_pool(
        value=conv1,
        ksize=[1, 3, 3, 1], 
        strides=[1, 2, 2, 1],
        padding='SAME', 
        name='pool1')

    
    # Local response normalization layer #1.
    norm1 = tf.nn.lrn(
        input=pool1, 
        depth_radius=4, 
        bias=1.0, 
        alpha=0.001 / 9.0, 
        beta=0.75,
        name='norm1')
    
    # Convolutional layer #2.
    with tf.variable_scope(name_or_scope='conv2') as scope:
        kernel = _variable_with_weight_decay(
            name='weights',
            shape=[5, 5, 64, 64],
            stddev=5e-2,
            wd=0.0)

        conv = tf.nn.conv2d(
            input=norm1,
            filter=kernel,
            strides=[1, 1, 1, 1],
            padding='SAME')

        biases = _variable_on_cpu(
            name='biases',
            shape=[64],
            initializer=tf.constant_initializer(0.1))

        pre_activation = tf.nn.bias_add(conv, biases)

        conv2 = tf.nn.tanh(
            x=pre_activation, 
            name=scope.name)

    # Local response normalization layer #2.
    norm2 = tf.nn.lrn(
        input=conv2, 
        depth_radius=4, 
        bias=1.0, 
        alpha=0.001 / 9.0, 
        beta=0.75,
        name='norm2')

    # Max pooling layer #2.
    pool2 = tf.nn.max_pool(
        value=norm2,
        ksize=[1, 3, 3, 1], 
        strides=[1, 2, 2, 1],
        padding='SAME', 
        name='pool2')

    # Fully connected layer #1.
    with tf.variable_scope(name_or_scope='fc1') as scope:
        flat = tf.reshape(pool2, [-1, 60800])

        weights = _variable_with_weight_decay(
            name='weights',
            shape=[60800, 384],
            stddev=0.04,
            wd=0.004)

        biases = _variable_on_cpu(
            name='biases',
            shape=[384],
            initializer=tf.constant_initializer(0.1))

        pre_activation = tf.add(tf.matmul(flat, weights), biases)

        fc1 = tf.nn.relu(
            features=pre_activation,
            name=scope.name)

        
    # Fully connected layer #2.
    with tf.variable_scope(name_or_scope='fc2') as scope:
        weights = _variable_with_weight_decay(
            name='weights',
            shape=[384, 192],
            stddev=0.04,
            wd=0.004)

        biases = _variable_on_cpu(
            name='biases',
            shape=[192],
            initializer=tf.constant_initializer(0.1))

        pre_activation = tf.add(tf.matmul(fc1, weights), biases)

        fc2 = tf.nn.relu(
            features=pre_activation,
            name=scope.name)

        
    # Unscaled logits layer #1.
    with tf.variable_scope(name_or_scope='logits1') as scope:
        weights = _variable_with_weight_decay(
            name='weights',
            shape=[192, 2],
            stddev=1.0 / 192.0,
            wd=0.0)

        biases = _variable_on_cpu(
            name='biases',
            shape=[2],
            initializer=tf.constant_initializer(0.0))

        unscale_logits = tf.add(tf.matmul(fc2, weights), biases)
        
    return unscale_logits

In [5]:
# Function for defining CNN network
# (window size = 500)
def cnn2(features):

    # Input Layer.
    input_layer = tf.reshape(features["x"], [-1, 251, 100, 1])

    # Convolutional layer #1.
    with tf.variable_scope(name_or_scope='conv1') as scope:
        kernel = _variable_with_weight_decay(
            name='weights',
            shape=[5, 5, 1, 64],
            stddev=5e-2,
            wd=0.0)

        conv = tf.nn.conv2d(
            input=input_layer,
            filter=kernel,
            strides=[1, 1, 1, 1],
            padding='SAME')

        biases = _variable_on_cpu(
            name='biases',
            shape=[64],
            initializer=tf.constant_initializer(0.0))

        pre_activation = tf.nn.bias_add(conv, biases)

        conv1 = tf.nn.tanh(
            x=pre_activation, 
            name=scope.name)
        
        
    # Max pooling layer #1.
    pool1 = tf.nn.max_pool(
        value=conv1,
        ksize=[1, 3, 3, 1], 
        strides=[1, 2, 2, 1],
        padding='SAME', 
        name='pool1')

    
    # Local response normalization layer #1.
    norm1 = tf.nn.lrn(
        input=pool1, 
        depth_radius=4, 
        bias=1.0, 
        alpha=0.001 / 9.0, 
        beta=0.75,
        name='norm1')
    
    # Convolutional layer #2.
    with tf.variable_scope(name_or_scope='conv2') as scope:
        kernel = _variable_with_weight_decay(
            name='weights',
            shape=[5, 5, 64, 64],
            stddev=5e-2,
            wd=0.0)

        conv = tf.nn.conv2d(
            input=norm1,
            filter=kernel,
            strides=[1, 1, 1, 1],
            padding='SAME')

        biases = _variable_on_cpu(
            name='biases',
            shape=[64],
            initializer=tf.constant_initializer(0.1))

        pre_activation = tf.nn.bias_add(conv, biases)

        conv2 = tf.nn.tanh(
            x=pre_activation, 
            name=scope.name)

    # Local response normalization layer #2.
    norm2 = tf.nn.lrn(
        input=conv2, 
        depth_radius=4, 
        bias=1.0, 
        alpha=0.001 / 9.0, 
        beta=0.75,
        name='norm2')

    # Max pooling layer #2.
    pool2 = tf.nn.max_pool(
        value=norm2,
        ksize=[1, 3, 3, 1], 
        strides=[1, 2, 2, 1],
        padding='SAME', 
        name='pool2')

    # Fully connected layer #1.
    with tf.variable_scope(name_or_scope='fc1') as scope:
        flat = tf.reshape(pool2, [-1, 100800])

        weights = _variable_with_weight_decay(
            name='weights',
            shape=[100800, 384],
            stddev=0.04,
            wd=0.004)

        biases = _variable_on_cpu(
            name='biases',
            shape=[384],
            initializer=tf.constant_initializer(0.1))

        pre_activation = tf.add(tf.matmul(flat, weights), biases)

        fc1 = tf.nn.relu(
            features=pre_activation,
            name=scope.name)

        
    # Fully connected layer #2.
    with tf.variable_scope(name_or_scope='fc2') as scope:
        weights = _variable_with_weight_decay(
            name='weights',
            shape=[384, 192],
            stddev=0.04,
            wd=0.004)

        biases = _variable_on_cpu(
            name='biases',
            shape=[192],
            initializer=tf.constant_initializer(0.1))

        pre_activation = tf.add(tf.matmul(fc1, weights), biases)

        fc2 = tf.nn.relu(
            features=pre_activation,
            name=scope.name)

        
    # Unscaled logits layer #1.
    with tf.variable_scope(name_or_scope='logits1') as scope:
        weights = _variable_with_weight_decay(
            name='weights',
            shape=[192, 2],
            stddev=1.0 / 192.0,
            wd=0.0)

        biases = _variable_on_cpu(
            name='biases',
            shape=[2],
            initializer=tf.constant_initializer(0.0))

        unscale_logits = tf.add(tf.matmul(fc2, weights), biases)
        
    return unscale_logits

In [12]:
# Function for building CNN network
def cnn_model_fn(features, labels, mode):
    """
    Build model.
    """
    unscale_logits = cnn2(features)
        
    # Generate predictions for PREDICT and EVAL modes.
    predictions = {
        'classes': tf.argmax(input=unscale_logits, axis=1),
        'probabilities': tf.nn.softmax(unscale_logits, name='softmax_tensor')
    }
    
    ####################
    # PREDICT
    ####################
    if mode == tf.estimator.ModeKeys.PREDICT: 
        return tf.estimator.EstimatorSpec(mode=mode, 
                                          predictions=predictions['probabilities'])
    
    else:
        # Calculate loss for both TRAIN and EVAL modes.
        labels = tf.cast(labels, tf.int64)
        total_loss = loss(unscale_logits, labels)

        # Add summary operation for total loss visualizaiton.
        tf.summary.scalar(
            name='total_loss',
            tensor=total_loss)
    

        ####################
        # TRAIN
        ####################
        if mode == tf.estimator.ModeKeys.TRAIN:

            # Compute gradients using Gradient Descent Optimizer.
            optimizer = tf.train.AdamOptimizer()

            grads_vars = optimizer.compute_gradients(loss=total_loss)

            # Add summary operations for gradient visualizations.
            for grad, var in grads_vars:
                if grad is not None:
                    tf.summary.histogram(
                        name=var.op.name + '/gradients', 
                        values=grad)

            train_op = optimizer.minimize(
                loss=total_loss,
                global_step=tf.train.get_global_step())

            # Add evaluation metrics for TRAIN mode.
            accuracy_train = tf.metrics.accuracy(
                labels=labels, 
                predictions=predictions["classes"])

            # Add summary operation for training accuracy visualizaiton.
            tf.summary.scalar(
                name='accuracy_train',
                tensor=accuracy_train[0])

            train_summary_hook = tf.train.SummarySaverHook(
                save_steps=10,
                output_dir='models/cnn2',
                summary_op=tf.summary.merge_all())

            return tf.estimator.EstimatorSpec(
                mode=mode,
                loss=total_loss, 
                train_op=train_op,
                training_hooks=[train_summary_hook])

        
        ####################
        # EVALUATE
        ####################
        else:
            accuracy_valid = tf.metrics.accuracy(
                labels=labels, 
                predictions=predictions["classes"])

            # Add summary operation for validation accuracy visualizaiton.
            tf.summary.scalar(
                name='accuracy_validation',
                tensor=accuracy_valid[0])

            eval_metric_ops = {"accuracy": accuracy_valid}

            eval_summary_hook = tf.train.SummarySaverHook(
                save_steps=1,
                output_dir='models/cnn2',
                summary_op=tf.summary.merge_all())

            return tf.estimator.EstimatorSpec(
                mode=mode, 
                loss=total_loss, 
                eval_metric_ops=eval_metric_ops,
                training_hooks=[eval_summary_hook])


In [13]:
# Main function for building, training and evaluating model.
def main(train_data, train_labels, eval_data, eval_labels, test_data):
    
    estimator_dir = 'models/cnn2'
    
    # Delete directory containing events logs and checkpoints if it exists.
    if tf.gfile.Exists(estimator_dir):
        tf.gfile.DeleteRecursively(estimator_dir)
        
    # Create directory containing events logs and checkpoints.
    tf.gfile.MakeDirs(estimator_dir)
    
    # Create the Estimator.
    classifier = tf.estimator.Estimator(
        model_fn=cnn_model_fn, 
        model_dir=estimator_dir)

    for _ in range(100):
        
        # Train the model.
        train_input_fn = tf.estimator.inputs.numpy_input_fn(
            x={"x": train_data},
            y=train_labels,
            batch_size=96,
            num_epochs=None,
            shuffle=True)

        classifier.train(
            input_fn=train_input_fn,
            steps=25)

        # Evaluate the model and print results.
        eval_input_fn = tf.estimator.inputs.numpy_input_fn(
            x={"x": eval_data},
            y=eval_labels,
            num_epochs=1,
            shuffle=False)
    
        eval_results = classifier.evaluate(input_fn=eval_input_fn)
        print(eval_results)
        
    # Generate predictions on test set.
    predict_input_fn = tf.estimator.inputs.numpy_input_fn(
      x={"x": test_data},
      num_epochs=1,
      shuffle=False)

    predictions = np.array(list(classifier.predict(input_fn=predict_input_fn))).T
    
    return predictions
        

In [8]:
# Load training and validation data
train_dir = 'data/spectrogram/training/'
valid_dir = 'data/spectrogram/validation/'

X_train, Y_train = get_data(os.path.join(train_dir, 'sub_ws500'), 
                            os.path.join(train_dir, 'REFERENCE.csv'))
                            
X_valid, Y_valid = get_data(os.path.join(valid_dir, 'sub_ws500'), 
                            os.path.join(valid_dir, 'REFERENCE.csv'))

In [14]:
# Run network
pred = main(X_train, Y_train, X_valid, Y_valid, X_valid)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_log_step_count_steps': 100, '_save_checkpoints_secs': 600, '_session_config': None, '_tf_random_seed': 1, '_save_checkpoints_steps': None, '_save_summary_steps': 100, '_model_dir': 'models/cnn2', '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000}
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into models/cnn2/model.ckpt.
INFO:tensorflow:loss = 96.751, step = 1
INFO:tensorflow:Saving checkpoints for 25 into models/cnn2/model.ckpt.
INFO:tensorflow:Loss for final step: 36.9232.
INFO:tensorflow:Starting evaluation at 2017-11-28-03:29:15
INFO:tensorflow:Restoring parameters from models/cnn2/model.ckpt-25
INFO:tensorflow:Finished evaluation at 2017-11-28-03:30:21
INFO:tensorflow:Saving dict for global step 25: accuracy = 0.623675, global_step = 25, loss = 35.5027
{'loss': 35.502678, 'global_step': 25, 'accuracy': 0.62367493}
INFO:tensorflow:Create CheckpointSaverHook.

In [8]:
# Run network
pred = main(X_train, Y_train, X_valid, Y_valid, X_valid)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_save_checkpoints_secs': 600, '_save_summary_steps': 100, '_keep_checkpoint_every_n_hours': 10000, '_save_checkpoints_steps': None, '_model_dir': 'models-test-estimator/', '_tf_random_seed': 1, '_keep_checkpoint_max': 5, '_session_config': None, '_log_step_count_steps': 100}
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into models-test-estimator/model.ckpt.
INFO:tensorflow:loss = 60.3213, step = 1
INFO:tensorflow:Saving checkpoints for 72 into models-test-estimator/model.ckpt.
INFO:tensorflow:Saving checkpoints for 100 into models-test-estimator/model.ckpt.
INFO:tensorflow:Loss for final step: 0.895572.
INFO:tensorflow:Starting evaluation at 2017-11-19-22:33:41
INFO:tensorflow:Restoring parameters from models-test-estimator/model.ckpt-100
INFO:tensorflow:Finished evaluation at 2017-11-19-22:34:02
INFO:tensorflow:Saving dict for global step 100: accuracy = 0.720848, global_step 