In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# Some utilities to install things under colab. Run but ignore otherwise.

import sys
import subprocess
from pathlib import Path

# if running from within truera repo and want to use latest code, use this:
sys.path.insert(0, str(Path("../..").resolve()))

try: 
  import trulens
except: 
  !{sys.executable} -m pip install git+https://github.com/truera/trulens.git@piotrm/vis/output-detect

from trulens.utils.colab import install_if_not_installed, load_or_make

install_if_not_installed(["transformers", "pandas", "numpy", "domonic", "parmap"])

# Configure trulens
import os
os.environ['TRULENS_BACKEND']='torch'

from IPython.display import display
import matplotlib.pyplot as plt
import torch
import pandas as pd
import numpy as np
from pathlib import Path
import re

from torch.utils.data import DataLoader
from pandas import Series
from typing import Union
import parmap

transformers ... already installed
pandas ... already installed
numpy ... already installed
domonic ... already installed
parmap ... already installed


In [3]:
from transformers import AutoModelForSequenceClassification
from transformers import AutoTokenizer
from fastTSNE import TSNE
from fastTSNE.callbacks import ErrorLogger
import matplotlib.pyplot as plt
from IPython import display
from sklearn.decomposition import PCA
import numpy as np

# ! pip install jupyter ipywidgets ipympl mpld3
# ! pip install tsnecuda
# ! pip install tsnecuda==3.0.1+cu113 -f https://tsnecuda.isx.ai/tsnecuda_stable.html
# from sklearn.manifold import TSNE
# from tsnecuda import TSNE

# %matplotlib ipympl
# import mpld3
# mpld3.enable_notebook()

# Wrap all of the necessary components.
class TwitterSentiment:
    MODEL = f"cardiffnlp/twitter-roberta-base-sentiment"

    # device = 'cpu'
    # Can also use cuda if available:
    device = 'cuda:0'

    model = AutoModelForSequenceClassification.from_pretrained(MODEL).to(device)

    tokenizer = AutoTokenizer.from_pretrained(MODEL)
    @staticmethod
    def tokenize(inputs):
        return TwitterSentiment \
            .tokenizer(inputs, padding=True, return_tensors="pt") \
            .to(TwitterSentiment.device)
        # pt refers to pytorch tensor

    labels = ['negative', 'neutral', 'positive']

    NEGATIVE = labels.index('negative')
    NEUTRAL = labels.index('neutral')
    POSITIVE = labels.index('positive')

task = TwitterSentiment()

from trulens.nn.models import get_model_wrapper

task.wrapper = get_model_wrapper(task.model, input_shape=(None, task.tokenizer.model_max_length), device=task.device)

INFO: lib level=1
INFO: root level=30
INFO: Detected pytorch backend for <class 'transformers.models.roberta.modeling_roberta.RobertaForSequenceClassification'>.
INFO: Using backend Backend.PYTORCH.
INFO: If this seems incorrect, you can force the correct backend by passing the `backend` parameter directly into your get_model_wrapper call.
DEBUG: Input dtype was not passed in. Defaulting to `torch.float32`.


In [12]:
embedder = task.model.roberta.embeddings.word_embeddings
embeddings = embedder.weight.detach().cpu().numpy()

In [4]:
# Only needed if dataset not already present.

def download_tweets(filename):
  subprocess.call(
      ["wget", "http://cs.stanford.edu/people/alecmgo/trainingandtestdata.zip"]
  )
  subprocess.call(
      ["unzip", "trainingandtestdata.zip"]
  )

tweets = load_or_make(
    filename=Path("training.1600000.processed.noemoticon.csv"),
    loader=lambda filename: pd.read_csv(filename, encoding='ISO-8859-1', header=None, names=["polarity", "id", "timestamp", "query", "user", "text"]),
    downloader=download_tweets
)

tweets

loading training.1600000.processed.noemoticon.csv ... from file


Unnamed: 0,polarity,id,timestamp,query,user,text
0,0,1467810369,Mon Apr 06 22:19:45 PDT 2009,NO_QUERY,_TheSpecialOne_,"@switchfoot http://twitpic.com/2y1zl - Awww, t..."
1,0,1467810672,Mon Apr 06 22:19:49 PDT 2009,NO_QUERY,scotthamilton,is upset that he can't update his Facebook by ...
2,0,1467810917,Mon Apr 06 22:19:53 PDT 2009,NO_QUERY,mattycus,@Kenichan I dived many times for the ball. Man...
3,0,1467811184,Mon Apr 06 22:19:57 PDT 2009,NO_QUERY,ElleCTF,my whole body feels itchy and like its on fire
4,0,1467811193,Mon Apr 06 22:19:57 PDT 2009,NO_QUERY,Karoli,"@nationwideclass no, it's not behaving at all...."
...,...,...,...,...,...,...
1599995,4,2193601966,Tue Jun 16 08:40:49 PDT 2009,NO_QUERY,AmandaMarie1028,Just woke up. Having no school is the best fee...
1599996,4,2193601969,Tue Jun 16 08:40:49 PDT 2009,NO_QUERY,TheWDBoards,TheWDB.com - Very cool to hear old Walt interv...
1599997,4,2193601991,Tue Jun 16 08:40:49 PDT 2009,NO_QUERY,bpbabe,Are you ready for your MoJo Makeover? Ask me f...
1599998,4,2193602064,Tue Jun 16 08:40:49 PDT 2009,NO_QUERY,tinydiamondz,Happy 38th Birthday to my boo of alll time!!! ...


In [5]:
# First a method to help us evaluate the model on a large collection of instances.
def eval_batched(data: Series, batch_size=128):
    """Evaluate the model `task.model` on given `data` tokenized by
    `task.tokenizer` in a set of batches. Return the logits."""

    # Might need to cleanup GPU ram:
    torch.cuda.empty_cache()

    sentences = DataLoader(data.to_numpy(), batch_size=batch_size)

    all_logits = []

    for batch in sentences:
        tokens = task.tokenizer(batch, padding=True, return_tensors='pt').to(task.device)
        logits = task.model(**tokens)['logits'].detach().to('cpu')
        del tokens
        all_logits += logits

    returning = np.stack(list(map(torch.Tensor.numpy, all_logits)))

    # Might need to cleanup GPU ram:
    torch.cuda.empty_cache()

    return returning

In [7]:
from trulens.visualizations import HTML, NLP
from trulens.nn.quantities import ClassQoI
from trulens.nn.attribution import IntegratedGradients, InputAttribution
from trulens.nn.attribution import Cut, OutputCut
from trulens.nn.distributions import GaussianDoi

V = NLP(
    # output=HTML(),
    wrapper=task.wrapper,
    labels=task.labels,
    decode=lambda x: task.tokenizer.decode(x),
    tokenize=task.tokenize,
    # huggingface models can take as input the keyword args as per produced by their tokenizers.

    input_accessor=lambda x: x['input_ids'],
    # for huggingface models, input/token ids are under input_ids key in the input dictionary

    output_accessor=lambda x: x['logits'],
    # and logits under 'logits' key in the output dictionary

    hidden_tokens=set([task.tokenizer.pad_token_id])
    # do not display these tokens
)


Attributions are, by default, indicated by color and colored bars above (positive) and below (negative) each token. There seems to be barely any attribution shown. Lets take a look at some of the tweets from the real-world dataset.

In [8]:
from trulens.utils.nlp import token_baseline_swap

inputs_swap_baseline_ids, inputs_swap_baseline_embeddings = token_baseline_swap(
    # token1 = 205, # " good"
    token1 = 8396, # "good"
    # token2 = 1099, # " bad"
    token2 = 10999, # "bad"
    # Which tokens to preserve.

    input_accessor=lambda x: x.kwargs['input_ids'],

    ids_to_embeddings=task.model.get_input_embeddings()
    # Callable to produce embeddings from token ids.
)

from trulens.utils.nlp import token_baseline

inputs_baseline_ids, inputs_baseline_embeddings = token_baseline(
    keep_tokens=set([task.tokenizer.cls_token_id, task.tokenizer.bos_token_id, task.tokenizer.eos_token_id]),
    # Which tokens to preserve.

    replacement_token=task.tokenizer.pad_token_id,
    # What to replace tokens with.

    input_accessor=lambda x: x.kwargs['input_ids'],
    # input_accessor = lambda x: x, 

    ids_to_embeddings=task.model.get_input_embeddings()
    # Callable to produce embeddings from token ids.
)

In [9]:
from trulens.utils.typing import ModelInputs

sentences = ["good", "bad"]

# Input sentences need to be tokenized first.

inputs = task.tokenize(sentences)

# The tokenizer gives us vocabulary indexes for each input token (in this case,
# words and some word parts like the "'m" part of "I'm" are tokens).

print(inputs)

# Decode helps inspecting the tokenization produced:

print(task.tokenizer.batch_decode(torch.flatten(inputs['input_ids'])))
# Normally decode would give us a single string for each sentence but we would
# not be able to see some of the non-word tokens there. Flattening first gives
# us a string for each input_id.

print("originals=", task.tokenizer.batch_decode(inputs['input_ids']))

baseline_word_ids = inputs_swap_baseline_ids(model_inputs=ModelInputs(kwargs=inputs))
print("baselines=", task.tokenizer.batch_decode(baseline_word_ids))

baseline_embeddings = inputs_swap_baseline_embeddings(model_inputs=ModelInputs(kwargs=inputs))
# These are not useful to print.
# print("embeddings=", baseline_embeddings)

{'input_ids': tensor([[    0,  8396,     2],
        [    0, 10999,     2]], device='cuda:0'), 'attention_mask': tensor([[1, 1, 1],
        [1, 1, 1]], device='cuda:0')}
['<s>', 'good', '</s>', '<s>', 'bad', '</s>']
originals= ['<s>good</s>', '<s>bad</s>']
baselines= ['<s>bad</s>', '<s>good</s>']


In [13]:
def closest_token(embeddings, emb):
    diffs = embeddings - emb
    # print(diffs.shape)
    distances = np.linalg.norm(diffs, ord=2, axis=1)
    # print(distances.shape)
    closest = np.argsort(distances)
    # print(closest.shape)
    return closest[0], distances[closest[0]]

In [14]:
idx, dist = closest_token(embeddings, np.zeros(768))

In [15]:
print(task.tokenizer.decode(idx), dist)

<unk> 1.6685961165969307


In [16]:
from trulens.nn.distributions import PointDoi, GaussianDoi, LinearDoi
from trulens.utils.typing import ModelInputs

In [17]:
sample_inputs = task.tokenizer("I'm so happy!", return_tensors='pt').to("cuda")
sample_ids = sample_inputs['input_ids'].cuda()[0]
sample_embs = embedder(sample_ids.cuda())

In [18]:
print(sample_ids.shape)
print(sample_embs.shape)

torch.Size([7])
torch.Size([7, 768])


In [19]:
infl = IntegratedGradients(
    model = task.wrapper,
    resolution=10,
    baseline = inputs_swap_baseline_embeddings,
    doi_cut=Cut('roberta_embeddings_word_embeddings'),
    qoi=ClassQoI(task.POSITIVE),
    qoi_cut=OutputCut(accessor=lambda o: o['logits']),
    return_doi=True,
    return_grads=True
)

from trulens.nn.backend import get_backend
from trulens.utils.typing import nested_cast
B = get_backend()

def tokens_over_doi(attr, texts, doi):
    sample_inputs = task.tokenizer(texts, return_tensors='pt').to("cuda")

    samples_ids = sample_inputs['input_ids'].cuda()
    samples_embs = embedder(sample_ids.cuda()).cpu().detach().numpy()

    pieces = attr._attributions(ModelInputs(kwargs=sample_inputs))

    attributions = np.array(pieces.attributions)
    gradients = np.array(pieces.gradients)
    interventions = np.array(nested_cast(backend=B, astype=np.ndarray, args=pieces.interventions))

    # print(attributions.shape)
    # print(gradients.shape)
    # print(interventions.shape)

    for sid, sentence in enumerate(texts):
        sentence_ids = samples_ids[sid]
        attr = attributions[0,0,sid]
        grad = gradients[0,0,:,sid]
        interv = interventions[0,:,sid]

        # print(attr.shape)
        # print(grad.shape)
        # print(interv.shape)

        base_embs = samples_embs[sid]

        for iid in range(len(interv)):
            grad_aggr = (grad[iid] * base_embs).sum(axis=1)

            print(f"  {grad_aggr.sum():0.6f} ", end='')

            for word_idx in range(len(sentence_ids)):
                word_emb = base_embs[word_idx]

                word_id = sentence_ids[word_idx]
                word_token = task.tokenizer.decode(word_id)

                grad_word = grad_aggr[word_idx]

                interv_emb = B.as_array(interv[iid][word_idx])
                # interv_emb = path[i][0][word_idx]
                close_id, close_dist = closest_token(embeddings, interv_emb)
                # print(close_id)
                close_emb = embeddings[close_id]
                close_token = task.tokenizer.decode(close_id)

                print(f"{close_token}({grad_word:0.6f})", end=' ')
            
            print()

            # print(word_emb[0:2], word_id, word_token, close_emb[0:2], close_id, close_token, close_dist, interv_emb[0:2])

tokens_over_doi(attr=infl, texts=["good", "bad"], doi=GaussianDoi(resolution=11, var=0.25))

  -0.422179 <s>(-0.092397) good(-0.150222) </s>(-0.179559) 
  -0.362087 <s>(-0.088914) good(-0.104691) </s>(-0.168482) 
  -0.092288 <s>(-0.069510) good(0.117756) </s>(-0.140535) 
  1.989581 <s>(0.201561) good(1.928535) </s>(-0.140514) 
  0.209282 <s>(0.068908) good(0.043100) </s>(0.097274) 
  0.116812 <s>(0.052731) bad(-0.035391) </s>(0.099472) 
  0.019320 <s>(0.049992) bad(-0.121936) </s>(0.091263) 
  0.001710 <s>(0.044023) bad(-0.118377) </s>(0.076064) 
  0.060362 <s>(0.033159) bad(-0.033409) </s>(0.060612) 
  0.091551 <s>(0.030195) bad(0.018963) </s>(0.042393) 
  0.180389 <s>(0.065135) bad(0.151349) </s>(-0.036095) 
  0.209812 <s>(0.068865) bad(0.150447) </s>(-0.009500) 
  0.272792 <s>(0.084352) bad(0.140600) </s>(0.047841) 
  0.302432 <s>(0.104441) bad(0.095476) </s>(0.102515) 
  0.343412 <s>(0.137907) bad(0.072415) </s>(0.133090) 
  0.356934 <s>(0.104592) good(0.097356) </s>(0.154986) 
  2.081960 <s>(-0.183929) good(1.866831) </s>(0.399058) 
  0.057272 <s>(-0.060681) good(0.120154

In [36]:
from trulens.visualizations import HTML

infl = IntegratedGradients(
    model = task.wrapper,
    resolution=10,
    baseline = inputs_baseline_embeddings,
    # baseline = inputs_swap_baseline_embeddings,
    doi_cut=Cut('roberta_embeddings_word_embeddings'),
    qoi=ClassQoI(task.POSITIVE),
    qoi_cut=OutputCut(accessor=lambda o: o['logits']),
    return_doi=True,
    return_grads=True
)

V = NLP(
    # output=HTML(),
    wrapper=task.wrapper,
    labels=task.labels,
    decode=lambda x: task.tokenizer.decode(x),
    tokenize=task.tokenize,
    # huggingface models can take as input the keyword args as per produced by their tokenizers.

    input_accessor=lambda x: x['input_ids'],
    # for huggingface models, input/token ids are under input_ids key in the input dictionary

    output_accessor=lambda x: x['logits'],
    # and logits under 'logits' key in the output dictionary

    hidden_tokens=set([task.tokenizer.pad_token_id]),
    # do not display these tokens

    embeddings=embeddings,
    embedder=embedder
)

V.tokens(texts=["good this is", "bad this is"], attributor=infl, show_doi=True)

RuntimeError: The size of tensor a (5) must match the size of tensor b (768) at non-singleton dimension 2

In [None]:
emb_reduced = pca.transform(embeddings)

In [None]:
%matplotlib ipympl
plt.ion()

In [None]:
fig, ax = plt.subplots(1,1, figsize=(8,8))

errors = []

def showme(iteration, error, embedding):
    ax.clear()
    # ax.scatter(embedding[:,0], embedding[:,1], s=0.5, m=',')
    ax.scatter(embedding[:,0], embedding[:,1], s=1.0, marker='.', alpha=0.5)

    errors.append(error)

    # ax[1].clear()
    # ax[1].plot(errors)

    fig.canvas.draw()
    fig.canvas.flush_events()

In [None]:
tsne = TSNE(n_components=2, n_jobs=-1, n_iter=100, callbacks=showme, callbacks_every_iters=10, neighbors='exact', negative_gradient_method='fft')

emb = tsne.prepare_initial(emb_reduced[0:2000])

tsne.callbacks = [showme]

In [None]:
emb = emb.optimize(n_iter=1000)

In [None]:
toks = [task.tokenizer.decode(i, clean_up_tokenization_spaces=True).replace(" ", "_") for i in range(len(emb))]

annots = []

for i, e in enumerate(emb):
    tok = toks[i]
    if len(annots) <= i:
        annots.append(ax.annotate(tok, xy=e, alpha=0.5))#, fontsize=1)
    else:
        annots[i].xy = e
    # print(i, e, tok)

In [None]:
emb.transform()

In [None]:
fig.canvas.draw()
fig.canvas.flush_events()

In [None]:
import numpy as np
def map_to_2d(embs):
    embs_pca = pca.transform(embs)
    embs_2d = emb.transform(embs_pca)
    return embs_2d

# map_to_2d(np.zeros((1, 768)))

In [None]:
emin = embeddings.min()
emax = embeddings.max()

axxs = np.zeros((101,768))

axxs[:,0] = np.linspace(emin, emax, 101)

In [None]:
axis_in_2d = map_to_2d(axxs)

In [None]:
ax.plot(axis_in_2d[:,0], axis_in_2d[:,1])

In [None]:
! export LD_LIBRARY_PATH=/usr/local/cuda-11/lib64
import os, sys

print(os.environ['LD_LIBRARY_PATH'])

import tsnecuda
tsnecuda.test()

In [None]:
fig.show()

In [None]:
import umap
u = umap.UMAP()

In [None]:
u.fit(emb_reduced)

In [None]:
u.embedding_

In [None]:
plt.scatter(u.embedding_[:,0], u.embedding_[:,1], s=1, marker='.')