In [2]:
import time

import itertools

from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoConfig
import torch

# Build a matrix of sentence combinations

In the BDI architecture, in order to select a plan to execute in the environment, the agent needs to check whether its context is entailed by the agent's belief base.
Formally, given a set of beliefs $B$ and a set of contexts $C$, the expression $\bigwedge_{c_i \in C}\bigvee_{b_j \in \mathcal{B}} b_j \models c_i$ must be true.

Since our work deals in natural language representations, we create a matrix with all combinations between all sentences contained in the belief base with all plan context sentences.
Specifically, given a set of natural language sentences representing the belief base $B$ and another sentence set representing the context $C$, the inference operation executes the cartesian product $C \times B = \{(c, b) | c \in C \wedge b \in B\}$ to formulate all pairs of $(c, b)$ as input of the natural language inference model.
In our approach, we follow the $\bigwedge_{c_i \in C}\bigvee_{b_j \in \mathcal{B}} b_j \models c_i$ expression and generate the cartesian product as context-wise.

In [16]:
beliefs = ['This room is called the kitchen.', 'You see a cupboard. The cupboard door is closed.', 'You see a freezer. The freezer door is closed.']

context = ['you are in the kitchen', 'you see a closed cupboard']

num_ctx_statements = len(context)
num_beliefs = len(beliefs)
#

all_sentence_pairs = list(itertools.product(context, beliefs))
all_sentence_pairs.sort(key=lambda x: x[0])
print(f"Combinations = {len(all_sentence_pairs)}")
all_sentence_pairs

Combinations = 6


[('you are in the kitchen', 'This room is called the kitchen.'),
 ('you are in the kitchen',
  'You see a cupboard. The cupboard door is closed.'),
 ('you are in the kitchen', 'You see a freezer. The freezer door is closed.'),
 ('you see a closed cupboard', 'This room is called the kitchen.'),
 ('you see a closed cupboard',
  'You see a cupboard. The cupboard door is closed.'),
 ('you see a closed cupboard',
  'You see a freezer. The freezer door is closed.')]

In [15]:
start = time.time()
print("Loading Model")

max_length = 256

hg_model_hub_name = "ynie/roberta-large-snli_mnli_fever_anli_R1_R2_R3-nli"
#hg_model_hub_name = "alisawuffles/roberta-large-wanli"
config = AutoConfig.from_pretrained(hg_model_hub_name)
entailment_idx = config.label2id['entailment']

config = AutoConfig.from_pretrained(hg_model_hub_name)
tokenizer = AutoTokenizer.from_pretrained(hg_model_hub_name)

print(config.label2id)
entailment_idx = config.label2id['entailment']


#device = 'cuda' if torch.cuda.is_available() else 'cpu'
device = 'cpu'

nli_model = AutoModelForSequenceClassification.from_pretrained(hg_model_hub_name)
nli_model.to(device)

end = time.time()
print(f"Model loaded {end - start} - model {nli_model.device}")

Loading Model
{'entailment': 0, 'neutral': 1, 'contradiction': 2}


Some weights of the model checkpoint at ynie/roberta-large-snli_mnli_fever_anli_R1_R2_R3-nli were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model loaded 3.6675384044647217 - model cpu


# Manipulating vectors resulted from LLM
The next step is the application of natural language inference model for each sentence pair contained in the cartesian product $C \times B$.
Given a sentence pair $p$ and the NLI model represented by the function $nli$, we generate a matrix $I = \{nli(p) | p \in C \times B\}$ with all inference results.


In [18]:
# convert into a matrix representation
tokenized_input_seq_pair = tokenizer.batch_encode_plus(all_sentence_pairs,
                                                                    return_token_type_ids=True, truncation=True,
                                                                    padding=True)

input_ids = torch.tensor(tokenized_input_seq_pair['input_ids'], device=device).long()
token_type_ids = torch.tensor(tokenized_input_seq_pair['token_type_ids'], device=device).long()
attention_mask = torch.tensor(tokenized_input_seq_pair['attention_mask'], device=device).long()

# predicting NLI results
outputs = nli_model(input_ids,
                   attention_mask=attention_mask,
                   token_type_ids=token_type_ids,
                   labels=None)

nli_result = outputs[0]
probs = torch.softmax(nli_result, dim=1)
predicted_classes = probs.argmax(-1)
predicted_classes, probs

(tensor([0, 1, 1, 1, 0, 2]),
 tensor([[0.9472, 0.0512, 0.0016],
         [0.0119, 0.9803, 0.0078],
         [0.0051, 0.9707, 0.0242],
         [0.0130, 0.9798, 0.0072],
         [0.7888, 0.2086, 0.0026],
         [0.0429, 0.1872, 0.7699]], grad_fn=<SoftmaxBackward0>))

## Slicing matrix


In [25]:
# TODO: explicar quais linhas representam os contextos

# True when a c_n is entailed by b_n
entailment_mask = torch.where(predicted_classes == entailment_idx, True, False)
# [B,c1:B,c2:...:B,cn]
slice_idx = []
idx = 0
for i in range(num_ctx_statements):  # [c1, ..., cn]
    slice = entailment_mask[idx:(idx + num_beliefs)]
    slice_idx.append(slice)
    print(f"c{i} x B = Slice {slice}")
    idx = num_beliefs

# True if ANY context comparation is ENTAILED by an belief in belief base (OR)
context_or = [torch.where(c == entailment_idx, True, False).any().unsqueeze(0) for c in slice_idx]
print("OR operation")
context_or

c0 x B = Slice tensor([ True, False, False])
c1 x B = Slice tensor([False,  True, False])
OR operation


[tensor([True]), tensor([True])]

In [21]:
or_tensor = torch.concatenate(context_or)  # at least one belief should entail a context (OR)
and_result = or_tensor.all()  # all context must be entailed by the belief base (AND)
and_result

tensor(True)