In [1]:
import sys
import librosa
import string
import numpy as np
import pickle
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.colors import colorConverter
import tensorflow as tf
import time
import os
import copy
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

char_list = list(" '" + string.ascii_lowercase + '12 ')

def absmax(nd_array):
    a = np.max(np.abs(nd_array))
    return((-a,a))

def elapsed_time(t_start,unit):
    t_end = time.time()
    d = t_end - t_start
    if(unit=='min'):
        d /= 60
    elif(unit=='h'):
        d /= 3600
    print('%.2f '%d, unit + ' elapsed',sep='')

In [2]:
with open("/data/asr_introspection/w2l_weights.pkl", "rb") as input_file:
    weights = pickle.load(input_file)
with open("/data/asr_introspection/w2l_bn_params.pkl", "rb") as input_file:
    bn_params = pickle.load(input_file)

responsive_units_array = np.load("/data/asr_introspection/responsive_units_array_signed.npy")

data_dir = "/data/asr_introspection/"

with open(data_dir + "vocabularies.pkl", "rb") as input_file:
    graphemes_list, phonemes_list, _ = pickle.load(input_file)
joint_list = graphemes_list + phonemes_list

In [382]:
responsive_units_array[1,5,:]

array([241, 252,  16, -64,  58], dtype=int32)

In [300]:
print(responsive_units_array.shape)
# kernel_sizes = [48,7,7,7,7,7,7,7,7,32,1,1]
#       strides = [2,1,1,1,1,1,1,1,1, 1,1,1]

# r=48
# print(r)
# for i in range(8):
#     r= r+(2*6)
#     print(r)
# r= r+(2*31)
# print(r)

(67, 12, 5)


In [4]:
def build_dict(layer_id, input_tensor, weights, bn_params):
    d = dict()
#     d[input_tensor]=[np.transpose(spec)]
    
    if layer_id==11:
        d['out/bias:0'] = weights[11]
        d['out/kernel:0'] = weights[12]
        layer_id -= 1
    
    for i in np.arange(layer_id+1):
        d['conv_'+str(i)+'/kernel:0'] = weights[i]
        d['batch_norm_'+str(i)+'/beta:0'] = bn_params[(i*4)]
        d['batch_norm_'+str(i)+'/gamma:0'] = bn_params[(i*4)+1]
        d['batch_norm_'+str(i)+'/moving_mean:0'] = bn_params[(i*4)+2]
        d['batch_norm_'+str(i)+'/moving_variance:0'] = bn_params[(i*4)+3]
    
    return d

def conv_layer(inputs, n_filter, kernel_size, stride, layer_id, out_layer=False):
    if(not out_layer):
        conv = tf.layers.conv1d(
            inputs = inputs, 
            filters = n_filter, 
            kernel_size = kernel_size,
            strides=stride, 
            activation=None,
            use_bias=False, 
            padding="valid",
            name="conv_"+str(layer_id),
            data_format="channels_last",
            trainable=False)

        conv_bn = tf.layers.batch_normalization(
            conv, 
            axis=2, 
            training=False, 
            name="batch_norm_"+str(layer_id),
            trainable=False)

        relu_out = tf.nn.relu(conv_bn)

        return(conv_bn,relu_out)
    else:
        conv = tf.layers.conv1d(
            inputs = inputs, 
            filters = n_filter, 
            kernel_size = kernel_size,
            strides=stride, 
            activation=None,
            use_bias=True, 
            padding="valid",
            name="out",
            data_format="channels_last",
            trainable=False)

        return(conv)

def build_w2l_model(weights,bn_params,layer_id,filter_ids):
    tf.reset_default_graph()

    n_filters = [256,256,256,256,256,256,256,256,256,2048,2048,31]
    kernel_sizes = [48,7,7,7,7,7,7,7,7,32,1,1]
    strides = [2,1,1,1,1,1,1,1,1,1,1,1]
    GRIDS = {16: (4, 4), 32: (8, 4), 64: (8, 8), 128: (16, 8), 256: (16, 16),
             512: (32, 16), 1024: (32, 32), 2048: (64, 32), 31: (31,1)}
    receptive_field_sizes = [48,60,72,84,96,108,120,132,144,206,206,206]
    
    x = tf.get_variable("x", [1,206,128], dtype=tf.float32, initializer = tf.initializers.random_normal(mean=0,stddev=0.001))
#     x = 5 * (x / tf.reduce_max(tf.abs(x)) )  # spectrogram data is in the range [-4.98,4.64]
    
    x_perturbated = tf.concat([x for i in range(5)],0) #input robust to n random perturbations
    x_perturbated = x_perturbated + tf.random_normal(tf.shape(x_perturbated),mean=0,stddev=0.05)

    layer_outs = []
    pre_activations = []
    p,l = conv_layer(x_perturbated,n_filters[0],kernel_sizes[0],strides[0],0)
    pre_activations.append(p)
    layer_outs.append(l)

    for i in np.arange(11)[1:]:
        p,l = conv_layer(layer_outs[i-1],n_filters[i],kernel_sizes[i],strides[i],i)
        pre_activations.append(p)
        layer_outs.append(l)

    pre_activations.append(conv_layer(layer_outs[10],n_filters[11],kernel_sizes[11],strides[11],11,out_layer=True))
    layer_outs.append(tf.nn.softmax(pre_activations[11]))
    
#     loss = layer_outs[layer_id][:,tf.cast(tf.floor(tf.shape(layer_outs[layer_id])[1]/2),tf.int32),:]
    loss = layer_outs[layer_id][:,0,:]
    loss = tf.reduce_sum(np.sign(filter_ids)*(-1)*tf.reduce_sum(tf.gather(loss,np.abs(filter_ids),axis=1),axis=0))
    
    # regularization parameters dependent on width of receptive field in the respective layer
    loss = loss + tf.contrib.layers.apply_regularization(
        regularizer=tf.contrib.layers.l1_l2_regularizer(15 / receptive_field_sizes[layer_id],
                                                        0.1 / receptive_field_sizes[layer_id]),
        weights_list=[x]
    )
    
    optimizer = tf.train.AdamOptimizer(learning_rate=0.05)
    train_op = optimizer.minimize(
        loss=loss,
        global_step=tf.train.get_global_step())
    
    return([train_op,x,loss])

receptive_field_sizes = [48,60,72,84,96,108,120,132,144,206,206,206]
n_steps = 16
print_each = 0#np.int(n_steps/5)
n_repetitions = 1
layers = np.arange(11)
characters = np.arange(len(joint_list))

figure_dir = "/project/asr_introspection/figures/"
fig_format = "pdf"

for char_id in characters:
    if(joint_list[char_id] != " "):
        print(joint_list[char_id],end='')
        fig = plt.figure(figsize=(100,5))
        grid = plt.GridSpec(1, len(layers), hspace=0.2, wspace=0.2, width_ratios=np.array(receptive_field_sizes)[:-1]/48)
        main_ax = fig.add_subplot(grid[0])
        main_ax.axis('off')

        for l_id, layer_id in enumerate(layers):
            tf.reset_default_graph()
            ops = build_w2l_model(weights, bn_params, layer_id,responsive_units_array[char_id,layer_id+1,:])
            mean_optimal_input = np.zeros([1,206,128])
            rep =0
            while rep < n_repetitions:
                with tf.Session() as sess:
                    init = tf.global_variables_initializer()
                    sess.run(init)

                    d = build_dict(11,None,weights,bn_params)
                    for i in range(n_steps):
                        if print_each>0 and i%print_each==print_each-1:
                            print(str(i+1),end='.')
                        _, optimal_input,tracking_loss = sess.run(ops,feed_dict=d)
                    mean_optimal_input = mean_optimal_input + optimal_input
                    rep += 1
                    if rep==n_repetitions and np.max(np.abs(optimal_input))<0.1:
                        rep=0
    #                     print("another round")
        #             print(tracking_loss)
            mean_optimal_input = mean_optimal_input / n_repetitions

            fig.add_subplot(grid[l_id], sharey=main_ax)
        #     plt.subplot(1,len(layers),l_id+1)
            plt.imshow(np.transpose(mean_optimal_input[0,:receptive_field_sizes[layer_id],:])[::-1,:],cmap='bwr')
            # plt.plot([receptive_field_sizes[layer_id]-1,receptive_field_sizes[layer_id]-1],[0,127],color='black')
            plt.clim(absmax(optimal_input[0,:,:]))
            plt.colorbar()
            plt.yticks([], [])
            plt.xticks([], [])
    #     plt.show()
        plt.savefig(figure_dir + "visualized_features_char_"+joint_list[char_id]+"."+fig_format,
                    dpi=300,
                    format=fig_format,
                    bbox_inches='tight')
        plt.close()
        print(" - plot saved")

' - plot saved
a - plot saved
b - plot saved
c - plot saved
d - plot saved
e - plot saved
f - plot saved
g - plot saved
h - plot saved
i - plot saved
j - plot saved
k - plot saved
l - plot saved
m - plot saved
n - plot saved
o - plot saved
p - plot saved
q - plot saved
s - plot saved
t - plot saved
u - plot saved
v - plot saved
w - plot saved
x - plot saved
y - plot saved
z - plot saved
AA - plot saved
AE - plot saved
AH - plot saved
AO - plot saved
AW - plot saved
AY - plot saved
B - plot saved
CH - plot saved
D - plot saved
DH - plot saved
EH - plot saved
ER - plot saved
EY - plot saved
F - plot saved
G - plot saved
HH - plot saved
IH - plot saved
IY - plot saved
JH - plot saved
K - plot saved
L - plot saved
M - plot saved
N - plot saved
NG - plot saved
OW - plot saved
OY - plot saved
P - plot saved
R - plot saved
S - plot saved
SH - plot saved
T - plot saved
TH - plot saved
UH - plot saved
UW - plot saved
V - plot saved
W - plot saved
Y - plot saved
Z - plot saved
ZH - plot saved


In [394]:
np.array([48,60,72,84,96,108,120,132,144,206,206,206])/48

array([1.        , 1.25      , 1.5       , 1.75      , 2.        ,
       2.25      , 2.5       , 2.75      , 3.        , 4.29166667,
       4.29166667, 4.29166667])