# Explainable AI applied to assessors

In [1]:
%load_ext autoreload
%autoreload 2

In [3]:
import logging
from typing import *
from pathlib import Path

import pandas as pd

from transformers.models.auto.modeling_auto import AutoModelForSequenceClassification

import lass.datasets
import lass.pipeline
from lass.log_handling import LogLoader, LoaderArgs

In [5]:
# model_loc = "../artifacts/assessors/deberta-nt-bs16*2-0sh-instance-split-07120027/checkpoint-8000"
model_loc = "../artifacts/assessors/deberta-nt-better-split-bs16*2-0sh-instance-split-07141059/checkpoint-9000"
# model_loc = "../artifacts/assessors/deberta-nt-bs16*2-0sh-task-split-07121735/checkpoint-2000"

loader_args = LoaderArgs(
        logdir="../artifacts/logs",
        tasks="paper-full",
        model_families=["BIG-G T=0"],
        model_sizes=["128b"],
        shots=[0],
        query_types=["multiple_choice"],
)
split = 'instance'
model_loc = model_loc
model_name = "microsoft/deberta-v3-base"
max_sequence_length = 512
test_fraction = 0.2
seed = 42

In [8]:
loader = LogLoader.from_args(loader_args)
data = lass.datasets.to_dataframe(loader)
data = lass.pipeline.augment(data)
logging.info("Loaded data.")

data = lass.pipeline.prepend_extra_features(data, include_model=False, include_n_targets=True)

# This "transformer-interpret" library doesn't really deal well with truncation
# of long sequences, so we'll just truncate the sequences ourselves.
data['input'] = lass.pipeline.truncate(data['input'], model_name, max_sequence_length)

train, test = lass.datasets.split(split, data, test_fraction=test_fraction, seed=seed)
train, test = lass.pipeline.binarize(train), lass.pipeline.binarize(test)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


  0%|          | 0/79 [00:00<?, ?ba/s]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.loc[:, 'correct'] = df['correct'].astype(int)


In [31]:
from transformers_interpret import SequenceClassificationExplainer

tokenizer = lass.pipeline.get_tokenizer(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_loc, num_labels=2)
cls_explainer = SequenceClassificationExplainer(model, tokenizer) #type: ignore

df = test.query('task == "epistemic_reasoning"').reset_index()

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [6]:
# Print header
path = Path("xai.csv")
pd.DataFrame([],columns=['word', 'contribution', 'LM_score', 'Assr_pred']).to_csv(path, index=False)

for index, instance in df.iterrows():
    if index % 50 == 0: # type: ignore
        print(f"{index}/{len(df)}")

    # EXPLAINABILITY
    text, LM_correct = instance['input'], instance['correct'] # type: ignore
    exp_neg = cls_explainer(text, class_name='LABEL_1')

    frame = pd.DataFrame(exp_neg, columns=['word', 'contribution'])
    frame['LM_score'] = LM_correct
    frame['Assr_pred'] = cls_explainer.predicted_class_index
    frame.to_csv(path, mode='a', header=False, index=False)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


0/139
50/139
100/139


In [12]:
words = pd.read_csv("xai_intent_recognition.csv")
mean = (words
    # .query('LM_score == 1')
    .groupby('word')
    .agg( # type: ignore
        mean_contribution=('contribution', 'mean'),
        word_count=('word', 'count')
    )
    .sort_values('mean_contribution', ascending=False)
    .query('word_count >= 10')
)
mean.to_csv("../artifacts/tmp/xai.csv")

In [38]:
cls_explainer(df.at[6, 'input'], class_name='LABEL_1')
_v = cls_explainer.visualize()

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,LABEL_0 (0.00),LABEL_1,-1.83,"[CLS] ▁N _ TAR GET S : ▁2 ▁Identify ▁the ▁relation ▁between ▁the ▁following ▁premises ▁and ▁hypotheses ▁, ▁choosing ▁from ▁the ▁options ▁' en tail ment ' ▁or ▁' non - en tail ment ' ▁. ▁Premise : ▁Taylor ▁thinks ▁that ▁William ▁believes ▁that ▁while ▁traveling ▁, ▁a ▁woman ▁in ▁a ▁black ▁jacket ▁is ▁resting ▁on ▁the ▁shoulder ▁of ▁a ▁man ▁in ▁a ▁black ▁jacket ▁. ▁Hypothesis : ▁William ▁believes ▁that ▁while ▁traveling ▁, ▁a ▁woman ▁in ▁a ▁black ▁jacket ▁is ▁resting ▁on ▁the ▁shoulder ▁of ▁a ▁man ▁in ▁a ▁black ▁jacket ▁. ▁Relation : [SEP]"
,,,,
