In [1]:
import tensorflow as tf
import numpy as np
import scipy.io as cpio
import os
import time

# Define the folder that contains all data files
# Each data file contains the variables:
#    s: The spectrogram [size = 513 x time_steps]
#    f: Frequencies [size = 513]
#    t: Time steps
#    labels: The tagging data [size = time_steps]
data_directory = '/Users/yardenc/Documents/Experiments/Imaging/Data/CanaryData/lrb853_15/movs/wav/mat'
# This folder must also contain a matlab file 'file_list.mat' with cell array 'keys' that holds the data file names
data_list = cpio.loadmat(data_directory + '/file_list.mat')
number_of_files = len(data_list['keys'][0])
# The folder for saving training checkpoints
training_records_dir = '/Users/yardenc/Documents/Experiments/Imaging/Data/CanaryData/lrb853_15/training_records'

In [2]:
# Parameters
input_vec_size = lstm_size = 513
batch_size = 1
n_lstm_layers = 2
n_syllables = 20 #including zero
learning_rate = 0.001
n_max_iter = 1000

In [3]:
# The inference graph
def label_inference_graph(spectrogram, num_hidden, num_layers, seq_length):
    # First the dynamic bi-directional, multi-layered LSTM
    with tf.name_scope('biRNN'):
        lstm_f = tf.contrib.rnn.BasicLSTMCell(num_hidden, forget_bias=1.0, state_is_tuple=True)
        lstm_b = tf.contrib.rnn.BasicLSTMCell(num_hidden, forget_bias=1.0, state_is_tuple=True)
        cells_f = tf.contrib.rnn.MultiRNNCell([lstm_f]*num_layers, state_is_tuple=True)
        cells_b = tf.contrib.rnn.MultiRNNCell([lstm_b]*num_layers, state_is_tuple=True)
        outputs, _states = tf.nn.bidirectional_dynamic_rnn(cells_f,cells_b, tf.stack([spectrogram],axis=0), time_major=False, dtype=tf.float32,sequence_length=seq_length)
    # Second, projection on the number of syllables creates logits 
    with tf.name_scope('Projection'):
        W_f = tf.Variable(tf.random_normal([num_hidden, n_syllables]))
        W_b = tf.Variable(tf.random_normal([num_hidden, n_syllables]))
        bias = tf.Variable(tf.random_normal([n_syllables]))
    logits = tf.matmul(outputs[0][0],W_f) + bias + tf.matmul(outputs[1][0],W_b)
    return logits

In [4]:
# The training graph. Calculate cross entropy and loss function
def training_graph(logits, labels, rate):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits = logits,labels = labels, name='xentropy')
    cost = tf.reduce_sum(xentropy, name='cost')
    optimizer = tf.train.AdamOptimizer(learning_rate = rate)
    global_step = tf.Variable(0, name='global_step', trainable=False)
    train_op = optimizer.minimize(cost, global_step=global_step)
    return train_op, cost

In [5]:
# Construct the full graph and add saver
full_graph = tf.Graph()
with full_graph.as_default():
    # Generate placeholders for the spectrograms and labels.
    X = tf.placeholder("float", [None,input_vec_size], name = "Xdata") # holds spectrograms
    Y = tf.placeholder("int32",[None],name = "Ylabels") # holds labels
    lng = tf.placeholder("int32",name = "nSteps") # holds the sequence length
                                    
    tf.add_to_collection("specs", X)  # Remember this Op.
    tf.add_to_collection("labels", Y)  # Remember this Op.
    tf.add_to_collection("lng", lng)  # Remember this Op.

    # Build a Graph that computes predictions from the inference model.
    logits = label_inference_graph(X, lstm_size, n_lstm_layers, lng)
    tf.add_to_collection("logits", logits)  # Remember this Op.

    # Add to the Graph the Ops that calculate and apply gradients.
    train_op, cost = training_graph(logits, Y, learning_rate) 
    
    # Add the variable initializer Op.
    init = tf.global_variables_initializer() #initialize_all_variables()

    # Create a saver for writing training checkpoints.
    saver = tf.train.Saver(max_to_keep = 10)

In [6]:
# Train and save checkpoint at the end of each file.
with tf.Session(graph=full_graph) as sess:
    # Run the Op to initialize the variables.
    sess.run(init)
    # Start the training loop.
    costs = []
    step = 1
    # Go over all training files
    for file_num in range(number_of_files):
        # load current training file
        fname = data_list['keys'][0][file_num][0][0:-3]+'mat'
        data = cpio.loadmat(data_directory + '/' + fname)
        data1 = np.transpose(data['s'])
        intY = data['labels'][0]
        iter_order = np.random.permutation(data1.shape[0]-370)
        if (len(iter_order) > n_max_iter):
            iter_order = iter_order[0:n_max_iter]
        for iternum in iter_order:
            d = {X: data1[iternum:iternum+370] ,Y: intY[iternum:iternum+370] ,lng:[370]}
            _cost,_ = sess.run((cost,train_op),feed_dict = d)
            costs.append(_cost)
            print([file_num, step, _cost])
            step = step + 1
            
        checkpoint_file = os.path.join(training_records_dir, 'checkpoint')
        saver.save(sess, checkpoint_file, global_step=step)
        
        

[0, 1, 1252.4393]
[0, 2, 3864.8286]
[0, 3, 2556.6741]
[0, 4, 4481.3047]
[0, 5, 5307.2822]
[0, 6, 3360.9033]
[0, 7, 1952.7001]
[0, 8, 1103.3469]
[0, 9, 336.16611]
[0, 10, 3855.2822]
[0, 11, 2083.7688]
[0, 12, 777.29828]
[0, 13, 2988.207]
[0, 14, 544.42535]
[0, 15, 177.57024]
[0, 16, 822.85529]
[0, 17, 636.69397]
[0, 18, 535.33893]
[0, 19, 1124.2872]
[0, 20, 1658.1976]
[0, 21, 4424.9829]
[0, 22, 396.29291]
[0, 23, 555.39178]
[0, 24, 1129.0948]
[0, 25, 264.16574]
[0, 26, 1819.9922]
[0, 27, 136.89548]
[0, 28, 311.53284]
[0, 29, 2144.551]
[0, 30, 165.97078]
[0, 31, 1764.5049]
[0, 32, 1056.7024]
[0, 33, 65.599251]
[0, 34, 309.1459]
[0, 35, 265.22626]
[0, 36, 14.15451]
[0, 37, 928.50952]
[0, 38, 608.76044]
[0, 39, 123.13672]
[0, 40, 1149.1825]
[0, 41, 3388.3955]
[0, 42, 959.677]
[0, 43, 318.66507]
[0, 44, 427.71429]
[0, 45, 1006.3029]
[0, 46, 950.68774]
[0, 47, 299.83466]
[0, 48, 88.969833]
[0, 49, 147.84154]
[0, 50, 316.69464]
[0, 51, 23.816147]
[0, 52, 91.184906]
[0, 53, 787.25793]
[0, 54, 

KeyboardInterrupt: 

In [7]:
sess.close()

In [7]:
# Evaluate training set from a saved checkpoint
with tf.Session(graph=tf.Graph()) as sess:
    saver = tf.train.import_meta_graph(
        os.path.join(training_records_dir, "checkpoint-5001.meta"))
    saver.restore(
        sess, os.path.join(training_records_dir, "checkpoint-5001"))

    # Retrieve the Ops we 'remembered'.
    logits = tf.get_collection("logits")[0]
    X = tf.get_collection("specs")[0]
    Y = tf.get_collection("labels")[0]
    lng = tf.get_collection("lng")[0]
    
    # Add an Op that chooses the top k predictions.
    eval_op = tf.nn.top_k(logits)
    
    # Run evaluation.
    errors = []
    for file_num in range(number_of_files):
        # load current training file
        fname = data_list['keys'][0][file_num][0][0:-3]+'mat'
        data = cpio.loadmat(data_directory + '/' + fname)
        data1 = np.transpose(data['s'])
        intY = data['labels'][0]
        d = {X: data1 ,Y: intY ,lng:[len(intY)]}
        pred = sess.run(eval_op,feed_dict = d)
        errors.append(np.abs(np.squeeze(pred[1])-intY) != 0)

In [8]:
# Results on training set
[np.mean(err) for err in errors]

[0.34048412813451223,
 0.35495445763179684,
 0.093001841620626149,
 0.47276603993021904,
 0.032034853921066121,
 0.46023308116949502,
 0.4215967834577829,
 0.28068693693693691]