In [1]:
import random
from itertools import combinations, product
import pickle
import librosa
import numpy as np
import tensorflow.compat.v1 as tf

In [2]:
tf.compat.v1.disable_eager_execution()

In [3]:
with open('hw4_trs.pkl', 'rb') as pickle_file:
    train_data = pickle.load(pickle_file)
print(train_data.shape)

(500, 16180)


In [4]:
with open('hw4_tes.pkl', 'rb') as pickle_file:
    test_data = pickle.load(pickle_file)
print(test_data.shape)

(200, 22631)


In [5]:
train_data = np.stack([np.abs(librosa.stft(x, n_fft=1024, hop_length=512).T) for x in train_data])
test_data = np.stack([np.abs(librosa.stft(x, n_fft=1024, hop_length=512).T) for x in test_data])

In [6]:
def create_pos_pairs(spk_indices, L=45):
    pos_pairs = list(combinations(spk_indices, 2))
    return pos_pairs

In [7]:
def create_neg_pairs(spk_indices, other_indices, L=45):
    neg_pairs = list(product(spk_indices, other_indices))
    l_pairs = random.sample(neg_pairs, L)
    return l_pairs

In [8]:
def generate_batches(data):
    total_utterances = data.shape[0]
    all_indices = list(range(total_utterances))
    
    left_input = []
    right_input = []
    output = []
    
    for i in range(0, total_utterances, 10):
        speaker_indices = list(range(i, i+10))
        pos_pairs = create_pos_pairs(speaker_indices)
        other_indices = np.delete(all_indices, speaker_indices)
        neg_pairs = create_neg_pairs(speaker_indices, other_indices)
        
        l_batch = []
        r_batch = []
        o_batch = []

        for x, y in pos_pairs:
            l_batch.append(data[x])
            r_batch.append(data[y])
            o_batch.append(1)

        for x, y in neg_pairs:
            l_batch.append(data[x])
            r_batch.append(data[y])
            o_batch.append(0)
        
        left_input.append(l_batch)
        right_input.append(r_batch)
        output.append(o_batch)
    
    return np.stack(left_input), np.stack(right_input), np.stack(output)

In [9]:
left_train, right_train, y_train = generate_batches(train_data)
left_test, right_test, y_test = generate_batches(test_data)

In [10]:
print(left_train.shape, left_train.dtype)
print(right_train.shape, right_train.dtype)
print(y_train.shape,y_train.dtype)

(50, 90, 32, 513) float32
(50, 90, 32, 513) float32
(50, 90) int64


In [11]:
 y_train = y_train.astype(np.float32)
 y_test = y_test.astype(np.float32)
 print(y_train.dtype)

float32


In [12]:
def create_placeholders():
  left_x = tf.compat.v1.placeholder(tf.float32,shape = [None,None,513])
  right_x = tf.compat.v1.placeholder(tf.float32,shape = [None,None,513])
  y = tf.compat.v1.placeholder(tf.float32,shape = [None])
  rows = tf.compat.v1.placeholder(tf.int32)

  return left_x, right_x, y, rows

In [19]:
def siamese_model(inputs, reuse, rows, num_units = [513]):
  cells = [tf.nn.rnn_cell.BasicLSTMCell(num_units=n, reuse = reuse) for n in num_units]
  stacked_lstm = tf.nn.rnn_cell.MultiRNNCell(cells)
  rnn_op, state = tf.nn.dynamic_rnn(stacked_lstm, inputs, dtype = tf.float32)
  dense_1 = tf.layers.dense(rnn_op, 513, activation=tf.nn.tanh, reuse = reuse)
  output = tf.reshape(dense_1,shape = [-1, rows*513])
  return output

In [20]:
def model(train_data, test_data, learning_rate = 0.0005, num_epochs = 100):
  tf.compat.v1.reset_default_graph()

  x1, x2, y, rows  = create_placeholders()
  left_train, right_train, y_train = train_data
  left_test, right_test, y_test = test_data

  left_op = siamese_model(x1,False,rows)
  right_op = siamese_model(x2,True,rows)
  dot_prod = tf.reduce_sum(tf.multiply(left_op,right_op),axis = 1)
  yPred = tf.nn.sigmoid(dot_prod)

  binary_op = tf.cast(tf.math.greater(yPred,0.5), tf.int16)
  
  cost = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(labels = y, logits = dot_prod))
  
  optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
  init = tf.global_variables_initializer()

  with tf.Session() as sess:
    sess.run(init)

    for epoch in range(num_epochs):
        epoch_loss = 0
        i = 0
        for left, right, y_batch in zip(left_train,right_train,y_train):
          row = left.shape[1]

          _, batch_loss = sess.run([optimizer, cost], feed_dict ={x1: left, x2: right, y:y_batch, rows:row })  
          epoch_loss += batch_loss
          i += 1
        test_accuracy = 0.0
        j = 0
        for left,right,y_batch in zip(left_test,right_test,y_test):
            row = left.shape[1]
            y_pred = sess.run(binary_op, feed_dict ={x1: left, x2: right, y:y_batch, rows: row})
            test_accuracy += sum(y_pred == y_batch)
            j+=1
        print(epoch,"Cost:", epoch_loss/i, " Test Accuracy: ", test_accuracy/j)

    test_accuracy = 0.0
    j = 0
    for left,right,y_batch in zip(left_test,right_test,y_test):
      row = left.shape[1]
      y_pred = sess.run(binary_op, feed_dict ={x1: left, x2: right, y:y_batch, rows: row})
      test_accuracy += sum(y_pred == y_batch)
      j+=1
    
    print("Final Test Accuracy: ", test_accuracy/j)
    return test_accuracy/j

In [None]:
tr_data = [left_train, right_train, y_train.astype(float)]
te_data = [left_test, right_test, y_test]
acc = model(tr_data, te_data)

  
Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
  """


0 Cost: 535.3697467041015  Test Accuracy:  59.85
1 Cost: 61.108204956054685  Test Accuracy:  61.85
2 Cost: 49.69364757537842  Test Accuracy:  62.45
3 Cost: 44.704769439697266  Test Accuracy:  63.0
4 Cost: 41.00812160491943  Test Accuracy:  63.25
5 Cost: 37.50990997314453  Test Accuracy:  62.95
6 Cost: 33.98905679702759  Test Accuracy:  62.15
7 Cost: 30.228298244476317  Test Accuracy:  61.3
8 Cost: 26.110195770263672  Test Accuracy:  60.8
9 Cost: 21.685269117355347  Test Accuracy:  60.55
10 Cost: 17.081714115142823  Test Accuracy:  60.8
11 Cost: 12.614518013000488  Test Accuracy:  60.45
12 Cost: 9.378938064575195  Test Accuracy:  59.2
13 Cost: 7.919217329025269  Test Accuracy:  59.65
14 Cost: 10.907085089683532  Test Accuracy:  54.4
15 Cost: 19.522409992218016  Test Accuracy:  57.5
16 Cost: 18.447907762527464  Test Accuracy:  59.35
17 Cost: 9.589840559959411  Test Accuracy:  58.95
18 Cost: 4.611269690990448  Test Accuracy:  58.45
19 Cost: 2.455052822828293  Test Accuracy:  58.35
20 Cost