In [7]:
import tensorflow as tf
import numpy as np
import scipy.io as cpio
import os
import time

# Define the folder that contains all data files
# Each data file contains the variables:
#    s: The spectrogram [size = 513 x time_steps]
#    f: Frequencies [size = 513]
#    t: Time steps
#    labels: The tagging data [size = time_steps]
data_directory = '/Users/yardenc/Documents/Experiments/Imaging/CanaryData/lrb853_15/mat'
# This folder must also contain a matlab file 'file_list.mat' with cell array 'keys' that holds the data file names
data_list = cpio.loadmat(data_directory + '/file_list.mat')
number_of_files = len(data_list['keys'][0])
# The folder for saving training checkpoints
training_records_dir = '/Users/yardenc/Documents/Experiments/Imaging/CanaryData/lrb853_15/training_records'

In [8]:
# Parameters
input_vec_size = lstm_size = 513
batch_size = 10
n_lstm_layers = 2
n_syllables = 28 #including zero
learning_rate = 0.001
n_max_iter = 10001

In [9]:
# The inference graph
def label_inference_graph(spectrogram, num_hidden, num_layers, seq_length):
    # First the dynamic bi-directional, multi-layered LSTM
    with tf.name_scope('biRNN'): 
        with tf.variable_scope('fwd'):
            lstm_f1 = tf.contrib.rnn.BasicLSTMCell(num_hidden, forget_bias=1.0, state_is_tuple=True,reuse=None)
            #lstm_f2 = tf.contrib.rnn.BasicLSTMCell(num_hidden, forget_bias=1.0, state_is_tuple=True,reuse=None)
            #lstm_f3 = tf.contrib.rnn.BasicLSTMCell(num_hidden, forget_bias=1.0, state_is_tuple=True,reuse=None)
            #cells_f = tf.contrib.rnn.MultiRNNCell([lstm_f1,lstm_f2,lstm_f3], state_is_tuple=True)
        with tf.variable_scope('bck'):
            lstm_b1 = tf.contrib.rnn.BasicLSTMCell(num_hidden, forget_bias=1.0, state_is_tuple=True,reuse=None)
            #lstm_b2 = tf.contrib.rnn.BasicLSTMCell(num_hidden, forget_bias=1.0, state_is_tuple=True,reuse=None)
            #lstm_b3 = tf.contrib.rnn.BasicLSTMCell(num_hidden, forget_bias=1.0, state_is_tuple=True,reuse=None)
            #cells_b = tf.contrib.rnn.MultiRNNCell([lstm_b1,lstm_b2,lstm_b3], state_is_tuple=True)
        outputs, _states = tf.nn.bidirectional_dynamic_rnn(lstm_f1,lstm_b1, spectrogram, time_major=False, dtype=tf.float32,sequence_length=seq_length)
    # Second, projection on the number of syllables creates logits 
    with tf.name_scope('Projection'):
        W_f = tf.Variable(tf.random_normal([num_hidden, n_syllables]))
        W_b = tf.Variable(tf.random_normal([num_hidden, n_syllables]))
        bias = tf.Variable(tf.random_normal([n_syllables]))
    logits = tf.matmul(outputs[0][:,-1,:],W_f) + bias + tf.matmul(outputs[1][:,-1,:],W_b)# for a,b in zip(range(batch_size),range(batch_size))]
    return logits,outputs

In [10]:
# The training graph. Calculate cross entropy and loss function
def training_graph(logits, labels, rate):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits = logits,labels = labels, name='xentropy')
    cost = tf.reduce_mean(xentropy, name='cost')
    optimizer = tf.train.AdamOptimizer(learning_rate = rate)
    global_step = tf.Variable(0, name='global_step', trainable=False)
    train_op = optimizer.minimize(cost, global_step=global_step)
    return train_op, cost

In [11]:
# Construct the full graph and add saver
full_graph = tf.Graph()
with full_graph.as_default():
    # Generate placeholders for the spectrograms and labels.
    X = tf.placeholder("float", [None,None,input_vec_size], name = "Xdata") # holds spectrograms
    Y = tf.placeholder("int32",[None],name = "Ylabels") # holds labels
    lng = tf.placeholder("int32",name = "nSteps") # holds the sequence length
                                    
    tf.add_to_collection("specs", X)  # Remember this Op.
    tf.add_to_collection("labels", Y)  # Remember this Op.
    tf.add_to_collection("lng", lng)  # Remember this Op.

    # Build a Graph that computes predictions from the inference model.
    logits,outputs = label_inference_graph(X, lstm_size, n_lstm_layers, lng)
    tf.add_to_collection("logits", logits)  # Remember this Op.

    # Add to the Graph the Ops that calculate and apply gradients.
    train_op, cost = training_graph(logits, Y, learning_rate) 
    
    # Add the variable initializer Op.
    init = tf.global_variables_initializer() #initialize_all_variables()

    # Create a saver for writing training checkpoints.
    saver = tf.train.Saver(max_to_keep = 10)

In [6]:
#debug
file_num = 0
fname = data_list['keys'][0][file_num][0][0:-3]+'mat'
data = cpio.loadmat(data_directory + '/' + fname)
data1 = np.transpose(data['s'])
intY = data['labels'][0]
for file_num in range(number_of_files-1):
    # load current training file
    fname = data_list['keys'][0][file_num+1][0][0:-3]+'mat'
    bdata = cpio.loadmat(data_directory + '/' + fname)
    bdata1 = np.transpose(bdata['s'])
    bintY = bdata['labels'][0]
    data1 = np.concatenate((data1,bdata1),axis = 0)
    intY = np.concatenate((intY,bintY),axis = 0)
temp_n = len(intY)/batch_size
data1 = data1[0:temp_n*batch_size].reshape((batch_size,temp_n,-1))
intY = intY[0:temp_n*batch_size].reshape((batch_size,-1))

In [7]:
#debug
with tf.Session(graph=full_graph,config = tf.ConfigProto(
    intra_op_parallelism_threads = batch_size)) as sess:
    sess.run(init)
    # Start the training loop.
    costs = []
    step = 1
    # Go over all training files
    
    iternum = 0
    d = {X: data1[:,iternum:iternum+30,:] ,Y: intY[:,iternum+30] ,lng:[30]*batch_size}
    _logits,_outputs = sess.run((logits,outputs),feed_dict = d)
    

In [16]:
# debug
print _outputs[0].shape
print _outputs[0][:,-1,:].shape
print _logits.shape

(3, 30, 513)
(3, 513)
(30, 28)


In [None]:
# Train and save checkpoint at the end of each file.
with tf.Session(graph=full_graph,config = tf.ConfigProto(
    intra_op_parallelism_threads = batch_size)) as sess:
    # Run the Op to initialize the variables.
    sess.run(init)
    # Start the training loop.
    costs = []
    step = 1
    # Go over all training files
    file_num = 0
    fname = data_list['keys'][0][file_num][0][0:-3]+'mat'
    data = cpio.loadmat(data_directory + '/' + fname)
    data1 = np.transpose(data['s'])
    intY = data['labels'][0]
    for file_num in range(number_of_files-1):
        # load current training file
        fname = data_list['keys'][0][file_num+1][0][0:-3]+'mat'
        bdata = cpio.loadmat(data_directory + '/' + fname)
        bdata1 = np.transpose(bdata['s'])
        bintY = bdata['labels'][0]
        data1 = np.concatenate((data1,bdata1),axis = 0)
        intY = np.concatenate((intY,bintY),axis = 0)
    temp_n = len(intY)/batch_size
    data1 = data1[0:temp_n*batch_size].reshape((batch_size,temp_n,-1))
    intY = intY[0:temp_n*batch_size].reshape((batch_size,-1))
    iter_order = np.random.permutation(data1.shape[1]-370)
    if (len(iter_order) > n_max_iter):
        iter_order = iter_order[0:n_max_iter]
    print data1.shape, len(iter_order)
    for iternum in iter_order:
        d = {X: data1[:,iternum:iternum+100,:] ,Y: intY[:,iternum+100] ,lng:[100]*batch_size}
        _cost,_ = sess.run((cost,train_op),feed_dict = d)
        costs.append(_cost)
        print([step,iternum,_cost])
        step = step + 1
            
        if (step % 1000 == 0):    
            checkpoint_file = os.path.join(training_records_dir, 'checkpoint')
            saver.save(sess, checkpoint_file, global_step=step)
            print np.mean(costs[-500:-1])
        
        

(10, 6266, 513) 5896
[1, 1099, 4.2483702]
[2, 5690, 4.7006426]
[3, 3308, 2.3439364]
[4, 4160, 5.7856154]
[5, 4521, 2.461616]
[6, 2633, 2.5212841]
[7, 1572, 3.165504]
[8, 1403, 2.184983]
[9, 1866, 2.0820374]
[10, 5641, 2.4901397]
[11, 5235, 2.4759438]
[12, 1685, 4.4641523]
[13, 2872, 2.408258]
[14, 1144, 2.7526615]
[15, 3197, 2.6022811]
[16, 961, 2.0234663]
[17, 922, 1.8030649]
[18, 3177, 2.3340554]
[19, 5545, 0.53948194]
[20, 1755, 3.0438879]
[21, 3674, 1.150262]
[22, 504, 2.0593922]
[23, 3822, 2.3870995]
[24, 4887, 2.1472015]
[25, 5575, 0.98993462]
[26, 1929, 1.6692593]
[27, 3569, 1.0253913]
[28, 3218, 1.2561661]
[29, 1931, 1.693516]
[30, 5852, 1.2745528]
[31, 714, 0.82061148]
[32, 5686, 0.84944791]
[33, 1966, 0.77201557]
[34, 171, 1.4239548]
[35, 402, 2.2866788]
[36, 2059, 1.9561851]
[37, 2781, 0.36511284]
[38, 4593, 3.5399048]
[39, 3254, 1.2125297]
[40, 4189, 1.9695721]
[41, 2911, 0.681234]
[42, 1670, 1.2766631]
[43, 1184, 0.66253489]
[44, 5499, 1.1228473]
[45, 4660, 1.1882523]
[46,

In [20]:
data1[:,1:4,:].shape

(5, 3, 513)

In [7]:
# Evaluate training set from a saved checkpoint
with tf.Session(graph=tf.Graph()) as sess:
    saver = tf.train.import_meta_graph(
        os.path.join(training_records_dir, "checkpoint-5001.meta"))
    saver.restore(
        sess, os.path.join(training_records_dir, "checkpoint-5001"))

    # Retrieve the Ops we 'remembered'.
    logits = tf.get_collection("logits")[0]
    X = tf.get_collection("specs")[0]
    Y = tf.get_collection("labels")[0]
    lng = tf.get_collection("lng")[0]
    
    # Add an Op that chooses the top k predictions.
    eval_op = tf.nn.top_k(logits)
    
    # Run evaluation.
    errors = []
    for file_num in range(number_of_files):
        # load current training file
        fname = data_list['keys'][0][file_num][0][0:-3]+'mat'
        data = cpio.loadmat(data_directory + '/' + fname)
        data1 = np.transpose(data['s'])
        intY = data['labels'][0]
        d = {X: data1 ,Y: intY ,lng:[len(intY)]}
        pred = sess.run(eval_op,feed_dict = d)
        errors.append(np.abs(np.squeeze(pred[1])-intY) != 0)

In [8]:
# Results on training set
[np.mean(err) for err in errors]

[0.34048412813451223,
 0.35495445763179684,
 0.093001841620626149,
 0.47276603993021904,
 0.032034853921066121,
 0.46023308116949502,
 0.4215967834577829,
 0.28068693693693691]