In [1]:
import sys
import librosa
import string
import numpy as np
import pickle
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.colors import colorConverter
import tensorflow as tf
import time
import os
import copy
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

char_list = list(" '" + string.ascii_lowercase + '12 ')


In [2]:
def absmax(nd_array):
    a = np.max(np.abs(nd_array))
    return((-a,a))

def elapsed_time(t_start,unit):
    t_end = time.time()
    d = t_end - t_start
    if(unit=='min'):
        d /= 60
    elif(unit=='h'):
        d /= 3600
    print('%.2f '%d, unit + ' elapsed',sep='')

def build_dict(layer_id, input_tensor, spec, weights, bn_params):
    d = dict()
    d[input_tensor]=[np.transpose(spec)]
    
    if layer_id==11:
        d['out/bias:0'] = weights[11]
        d['out/kernel:0'] = weights[12]
        layer_id -= 1
    
    for i in np.arange(layer_id+1):
        d['conv_'+str(i)+'/kernel:0'] = weights[i]
        d['batch_norm_'+str(i)+'/beta:0'] = bn_params[(i*4)]
        d['batch_norm_'+str(i)+'/gamma:0'] = bn_params[(i*4)+1]
        d['batch_norm_'+str(i)+'/moving_mean:0'] = bn_params[(i*4)+2]
        d['batch_norm_'+str(i)+'/moving_variance:0'] = bn_params[(i*4)+3]
    
    return d

def conv_layer(inputs, n_filter, kernel_size, stride, layer_id, out_layer=False):
    if(not out_layer):
        conv = tf.layers.conv1d(
            inputs = inputs, 
            filters = n_filter, 
            kernel_size = kernel_size,
            strides=stride, 
            activation=None,
            use_bias=False, 
            padding="valid",
            name="conv_"+str(layer_id),
            data_format="channels_last")

        conv_bn = tf.layers.batch_normalization(
            conv, 
            axis=2, 
            training=False, 
            name="batch_norm_"+str(layer_id),)

        relu_out = tf.nn.relu(conv_bn)

        return(conv_bn,relu_out)
    else:
        conv = tf.layers.conv1d(
            inputs = inputs, 
            filters = n_filter, 
            kernel_size = kernel_size,
            strides=stride, 
            activation=None,
            use_bias=True, 
            padding="valid",
            name="out",
            data_format="channels_last")

        return(conv)

def build_w2l_model(weights,bn_params):
    tf.reset_default_graph()

    n_filters = [256,256,256,256,256,256,256,256,256,2048,2048,31]
    kernel_sizes = [48,7,7,7,7,7,7,7,7,32,1,1]
    strides = [2,1,1,1,1,1,1,1,1,1,1,1]
    GRIDS = {16: (4, 4), 32: (8, 4), 64: (8, 8), 128: (16, 8), 256: (16, 16),
             512: (32, 16), 1024: (32, 32), 2048: (64, 32), 31: (31,1)}
    
    x = tf.placeholder(shape=(None,None,128),name='x',dtype=tf.float32)

    layer_outs = []
    pre_activations = []
    p,l = conv_layer(x,n_filters[0],kernel_sizes[0],strides[0],0)
    pre_activations.append(p)
    layer_outs.append(l)

    for i in np.arange(11)[1:]:
        p,l = conv_layer(layer_outs[i-1],n_filters[i],kernel_sizes[i],strides[i],i)
        pre_activations.append(p)
        layer_outs.append(l)

    pre_activations.append(conv_layer(layer_outs[10],n_filters[11],kernel_sizes[11],strides[11],11,out_layer=True))
    layer_outs.append(tf.nn.softmax(pre_activations[11]))
    
    # Sensitivity
    y = tf.placeholder(shape=(1,31),name='y',dtype=tf.float32)
    grads = [tf.gradients(layer_outs[11],x,grad_ys=y)[0][0,:,:]]
    
    for i in np.arange(11):
        grads.append(tf.gradients(layer_outs[11],layer_outs[i],grad_ys=y)[0][0,:,:])
    
    
    return(x,layer_outs, y, grads)

In [3]:
with open("/data/asr_introspection/w2l_weights.pkl", "rb") as input_file:
    weights = pickle.load(input_file)
with open("/data/asr_introspection/w2l_bn_params.pkl", "rb") as input_file:
    bn_params = pickle.load(input_file)

spectrogram_in, model_out, relevance_in, sensitivity_out = build_w2l_model(weights, bn_params)

In [12]:
with tf.Session() as sess:
    init = tf.global_variables_initializer()
    sess.run(init)
    
    d = build_dict(11,spectrogram_in,None,weights,bn_params)
    
    data_dir = "/data/asr_introspection/"
    input_dir = data_dir + "spectrogram_input/"
    input_groupmappings = os.listdir(input_dir)
    input_groupmappings = [s for s in input_groupmappings if "group" in s]
    
    act_dir = data_dir + "activations/"
    grad_dir = data_dir + "gradients/"
    
    for f_id, f in enumerate(input_groupmappings):
        spectrogram = np.load(input_dir + f[:-17] + ".npy")
        index = int((spectrogram.shape[1]-206)/2) -1
        if not os.path.isfile(grad_dir+f[:-17]+"_grads_pos"+str(index).zfill(4)+".pkl"):
            groupmapping = np.load(input_dir + f)
            if(groupmapping.shape[0]>0):
                d[spectrogram_in] = [np.transpose(spectrogram)]
                forward_act = sess.run(model_out,feed_dict=d)
                with open(act_dir+f[:-17]+"_act.pkl", 'wb') as pf:
                    pickle.dump(forward_act, pf, pickle.HIGHEST_PROTOCOL)

                # go through all positions
                for index in range(int((spectrogram.shape[1]-206)/2)):
                    init_relevance = np.zeros_like(forward_act[11][0,0,:])
                    init_relevance[np.argmax(forward_act[11][0,index,:])] = 1
                    d[spectrogram_in] = [np.transpose(spectrogram[:,(2*index):(2*index)+206])]
                    d[relevance_in] = np.reshape(init_relevance,[1,31])
                    grads = sess.run(sensitivity_out,feed_dict=d)
                    with open(grad_dir+f[:-17]+"_grads_pos"+str(index).zfill(4)+".pkl", 'wb') as pf:
                        pickle.dump(grads, pf, pickle.HIGHEST_PROTOCOL)
    
    if((f_id+1) % 250==0):
        print("finished " + str(f_id+1))

In [10]:
groupmapping.shape

(0, 4)

In [97]:
layer_ranges = [206,80,74,68,62,56,50,44,38,32,1,1,1]

In [7]:
data_dir = "/data/asr_introspection/"
input_dir = data_dir + "spectrogram_input/"
input_groupmappings = os.listdir(input_dir)
input_groupmappings = [s for s in input_groupmappings if "group" in s]

act_dir = data_dir + "activations/"
grad_dir = data_dir + "gradients/"

minimum = np.inf
maximum = -np.inf

for f_id, f in enumerate(input_groupmappings):
    spectrogram = np.load(input_dir + f[:-17] + ".npy")
    if np.min(spectrogram)<minimum: minimum=np.min(spectrogram)
    if np.max(spectrogram)>maximum: maximum=np.max(spectrogram)
print(minimum, maximum)

-4.977149 4.637802


In [4]:
print(np.min(spectrogram),np.max(spectrogram))

-4.400844 3.539129
