In [1]:
import matplotlib
matplotlib.use('Agg')
import numpy as np

import matplotlib.pyplot as plt
import json
import sys
import torch
from act_aware_dst.act_dst import ADST
from act_aware_dst.act_dst_reader import ADSTReader
from act_aware_dst.act_dst_predictor import ADSTPredictor

from allennlp.models.archival import load_archive
from allennlp.service.predictors import Predictor
from allennlp.modules.elmo import Elmo, batch_to_ids
from allennlp.modules.matrix_attention.linear_matrix_attention import LinearMatrixAttention
from allennlp.nn import util
import seaborn as sns



In [2]:
archive = load_archive('model_act_512/model.tar.gz')

parameters = {}
attention_weight = {}
for n, p in archive.model.named_parameters():
    if '_ds_attention._weight_vector'==n:
        print(p.size())
        weights = p.data
        attention_weight["_weight_vector"] = p
    if '_ds_attention._bias'==n:
        print(p.size())
        bias = p.data
        attention_weight["_bias"] = p



torch.Size([1836])
torch.Size([1])


In [3]:
options_file="https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x2048_256_2048cnn_1xhighway/elmo_2x2048_256_2048cnn_1xhighway_options.json"
weight_file="https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x2048_256_2048cnn_1xhighway/elmo_2x2048_256_2048cnn_1xhighway_weights.hdf5"

# Note the "1", since we want only 1 output representation for each token.
elmo = Elmo(options_file, weight_file, 1, dropout=0)


In [29]:
# use batch_to_ids to convert sentences to character ids
acts = [["select","recommend", "reqmore","book","offerbook","offerbooked","nooffer","greet","bye","request","inform","welcome","nobook"]]
acts =  [["welcome","select","recommend","recommand"]]
ds =  [['restaurant area'], ['restaurant pricerange'], ['restaurant food'], ['restaurant name'], ['restaurant bookpeople'], \
  ['restaurant booktime'], ['restaurant bookday'],[ 'hotel pricerange'], ['hotel type'], ['hotel parking'], 
  ['hotel bookstay'], ['hotel bookday'], ['hotel  bookpeople'], ['hotel  area'], ['hotel  stars'], \
  ['hotel internet'], ['hotel name'], ['train destination'], ['train departure'], ['train day'], \
  ['train arriveby'], ['train leaveat'], ['train bookpeople'], ['attraction area'], ['attraction name'], 
  ['attraction type'], ['taxi destination'], ['taxi departure'], ['taxi arriveby'], ['taxi leaveat']]

In [30]:
acts_char_ids = batch_to_ids(acts)
ds_char_ids = batch_to_ids(ds)

acts_embeddings = elmo(acts_char_ids)
ds_embeddings = elmo(ds_char_ids)

print(acts_embeddings["elmo_representations"][0].shape)
print(ds_embeddings["elmo_representations"][0].shape)

torch.Size([1, 4, 512])
torch.Size([30, 1, 512])


In [31]:
#dialog_scalar_mix = ScalarMix(mixture_size=3, trainable=False)
ds_attention= LinearMatrixAttention(612, 612, 'x,y,x*y')
ds_attention.load_state_dict(attention_weight)

num_acts = len(acts[0])
act_value_sim = ds_attention(ds_embeddings["elmo_representations"][0],acts_embeddings["elmo_representations"][0].repeat(len(ds),1,1))
act_att_scores = util.masked_softmax(act_value_sim.view(-1,num_acts),None)
act_att_scores = act_att_scores.view(len(ds), 1, num_acts)
act_att_array = act_att_scores.cpu().detach().numpy()
print(act_att_array.shape)

(30, 1, 4)


In [32]:
matrix = act_att_scores.squeeze(1).cpu().detach().numpy()

fig, ax = plt.subplots()
im = ax.matshow(matrix)

ax.set_xticks(np.arange(len(acts[0])))
ax.set_yticks(np.arange(len(ds)))
ds_list = [x[0] for x in ds]
#ds_list= list(range(1,30+1))
ax.xaxis.tick_bottom()
ax.set_xticklabels(acts[0], rotation=90,fontsize=8)
ax.set_yticklabels(ds_list,fontsize=8)

fig.tight_layout()
ax.set_title("Slots and Acts Attention")
plt.savefig(f"output/attention_10.png")