In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# Some utilities to install things under colab. Run but ignore otherwise.

import sys
import importlib
import subprocess
from pathlib import Path

# if running from within truera repo and want to use latest code, use this:
sys.path.insert(0, str(Path("../..").resolve()))

def install_if_not_installed(packages):
  """Install the given packages if they are not already installed."""

  for package in packages:
    if isinstance(package, tuple):
      package_name, package_package = package
    else:
      package_name = package
      package_package = package

    print(f"{package_name} ... ", end='')

    try:
      importlib.import_module(package_name)
      print("already installed")

    except:
      print(f"installing from {package_package}")
      subprocess.check_call(
          [sys.executable, "-m", "pip", "install", package_package]
      )

def load_or_make(filename: Path, loader, maker=None, saver=None, downloader=None):
  """Load something from a `filename` using `loader` if the file exists, 
  otherwise make it using `maker`, save it using `saver`, and return it."""

  print(f"loading {filename} ... ", end="")

  if filename.exists():
    print("from file")
    return loader(filename)
  
  if maker is not None:
    print("using maker")
    thing = maker()
    saver(filename, thing)
    return thing

  if downloader is not None:
    print("using downloader")
    downloader(filename)
    return loader(filename)

  raise ValueError("provide a maker/saver or downloader.")

install_if_not_installed(
    [("trulens", "git+https://github.com/truera/trulens.git@piotrm/vis/output-detect"),
     "transformers",
     "pandas",
     "numpy",
     "domonic",
     "parmap"]
)

# Configure trulens
import os
os.environ['TRULENS_BACKEND']='torch'

import torch
from pathlib import Path

trulens ... already installed
transformers ... 

  from .autonotebook import tqdm as notebook_tqdm


already installed
pandas ... already installed
numpy ... already installed
domonic ... already installed
parmap ... already installed


# Minimal Examples

## Typical NLP usage using Huggingface

In [3]:
from transformers import AutoModelForSequenceClassification
from transformers import AutoTokenizer

# Wrap all of the necessary components.
class TwitterSentiment:
    MODEL = "cardiffnlp/twitter-roberta-base-sentiment"

    # device = 'cpu'
    # Can also use cuda if available:
    device = 'cuda:0'

    model = AutoModelForSequenceClassification.from_pretrained(MODEL).to(device)

    tokenizer = AutoTokenizer.from_pretrained(MODEL)
    @staticmethod
    def tokenize(inputs):
        return TwitterSentiment \
            .tokenizer(inputs, padding=True, return_tensors="pt") \
            .to(TwitterSentiment.device)
        # pt refers to pytorch tensor

    labels = ['negative', 'neutral', 'positive']

    NEGATIVE = labels.index('negative')
    NEUTRAL = labels.index('neutral')
    POSITIVE = labels.index('positive')

task = TwitterSentiment()

In [4]:
sentences = ["I'm so happy!", "I'm so sad!", "I cannot tell whether I should be happy or sad!", "meh"]

# Input sentences need to be tokenized first.

inputs = task.tokenize(sentences)

# The tokenizer gives us vocabulary indexes for each input token (in this case,
# words and some word parts like the "'m" part of "I'm" are tokens).

print(inputs)

# Decode helps inspecting the tokenization produced:

print(task.tokenizer.batch_decode(torch.flatten(inputs['input_ids'])))
# Normally decode would give us a single string for each sentence but we would
# not be able to see some of the non-word tokens there. Flattening first gives
# us a string for each input_id.

outputs = task.model(**inputs)

print(outputs)

# From logits we can extract the most likely class for each sentence and its readable label.

predictions = [task.labels[i] for i in outputs.logits.argmax(axis=1)]

for sentence, logits, prediction in zip(sentences, outputs.logits, predictions):
    print(logits.to('cpu').detach().numpy(), prediction, sentence)

{'input_ids': tensor([[   0,  100,  437,   98, 1372,  328,    2,    1,    1,    1,    1,    1,
            1],
        [   0,  100,  437,   98, 5074,  328,    2,    1,    1,    1,    1,    1,
            1],
        [   0,  100, 1395, 1137,  549,   38,  197,   28, 1372,   50, 5074,  328,
            2],
        [   0, 1794,  298,    2,    1,    1,    1,    1,    1,    1,    1,    1,
            1]], device='cuda:0'), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0],
        [1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0],
        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]], device='cuda:0')}
['<s>', 'I', "'m", ' so', ' happy', '!', '</s>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<s>', 'I', "'m", ' so', ' sad', '!', '</s>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<s>', 'I', ' cannot', ' tell', ' whether', ' I', ' should', ' be', ' happy', ' or', ' sad', '!', '</s>', '<s>', 'me', 'h', '</s>', '<pad>', '<

## Trulens usage

In [5]:
# setup visualization
from trulens.visualizations import NLP

V = NLP(
    labels=task.labels,
    decode=lambda x: task.tokenizer.decode(x),
    tokenize=task.tokenize,
    # huggingface models can take as input the keyword args as per produced by their tokenizers.

    input_accessor=lambda x: x['input_ids'],
    # for huggingface models, input/token ids are under input_ids key in the input dictionary

    output_accessor=lambda x: x['logits'],
    # and logits under 'logits' key in the output dictionary

    hidden_tokens=set([task.tokenizer.pad_token_id])
    # do not display these tokens
)

In [6]:
# tokens visualization
V.tokens(
    texts = sentences
)

In [7]:
# setup model wrapping
from trulens.nn.models import get_model_wrapper

V.wrapper=get_model_wrapper(task.model, input_shape=(None, task.tokenizer.model_max_length), device=task.device)

# output visualization
V.tokens(
    texts=sentences
)

INFO: lib level=1
INFO: root level=30
INFO: Detected pytorch backend for <class 'transformers.models.roberta.modeling_roberta.RobertaForSequenceClassification'>.
INFO: Using backend Backend.PYTORCH.
INFO: If this seems incorrect, you can force the correct backend by passing the `backend` parameter directly into your get_model_wrapper call.
DEBUG: Input dtype was not passed in. Defaulting to `torch.float32`.


In [9]:
# find which layer to attribute to (embedding layer)

V.wrapper.print_layer_names()

'roberta_embeddings_word_embeddings':	Embedding(50265, 768, padding_idx=1)
'roberta_embeddings_position_embeddings':	Embedding(514, 768, padding_idx=1)
'roberta_embeddings_token_type_embeddings':	Embedding(1, 768)
'roberta_embeddings_LayerNorm':	LayerNorm((768,), eps=1e-05, elementwise_affine=True)
'roberta_embeddings_dropout':	Dropout(p=0.1, inplace=False)
'roberta_encoder_layer_0_attention_self_query':	Linear(in_features=768, out_features=768, bias=True)
'roberta_encoder_layer_0_attention_self_key':	Linear(in_features=768, out_features=768, bias=True)
'roberta_encoder_layer_0_attention_self_value':	Linear(in_features=768, out_features=768, bias=True)
'roberta_encoder_layer_0_attention_self_dropout':	Dropout(p=0.1, inplace=False)
'roberta_encoder_layer_0_attention_output_dense':	Linear(in_features=768, out_features=768, bias=True)
'roberta_encoder_layer_0_attention_output_LayerNorm':	LayerNorm((768,), eps=1e-05, elementwise_affine=True)
'roberta_encoder_layer_0_attention_output_dropou

In [11]:
# attribution visualization

from trulens.nn.quantities import ClassQoI
from trulens.nn.attribution import IntegratedGradients
from trulens.nn.attribution import Cut, OutputCut

infl_positive = IntegratedGradients(
    model = V.wrapper,
    resolution=20,
    doi_cut=Cut('roberta_embeddings_word_embeddings'),
    qoi=ClassQoI(task.POSITIVE),
    qoi_cut=OutputCut(accessor=lambda o: o['logits'])
)

V.tokens(
    texts = sentences,
    attributor = infl_positive
)

In [12]:
# setup baseline
from trulens.utils.nlp import token_baseline

inputs_baseline_ids, inputs_baseline_embeddings = token_baseline(
    keep_tokens=set([task.tokenizer.cls_token_id, task.tokenizer.bos_token_id, task.tokenizer.eos_token_id]),
    # Which tokens to preserve.

    replacement_token=task.tokenizer.pad_token_id,
    # What to replace tokens with.

    input_accessor=lambda x: x.kwargs['input_ids'],

    ids_to_embeddings=task.model.get_input_embeddings()
    # Callable to produce embeddings from token ids.
)

# attribution with baseline
infl_positive_baseline = IntegratedGradients(
    model = V.wrapper,
    resolution=20,
    baseline = inputs_baseline_embeddings,
    doi_cut=Cut('roberta_embeddings_word_embeddings'),
    qoi=ClassQoI(task.POSITIVE),
    qoi_cut=OutputCut(accessor=lambda o: o['logits'])
)

V.tokens(
    texts = sentences,
    attributor = infl_positive_baseline
)

In [13]:
def swap(word1, word2):
    def f(sentence):
        return sentence.replace(word1, ":temp:").replace(word2, word1).replace(":temp:", word2)
    return f

# stability visualization
V.tokens_stability(
    texts1 = sentences,
    texts2 = list(map(swap("happy", "sad"), sentences)),
    attributor = infl_positive_baseline
)