In [None]:
from transformers import AutoTokenizer, pipeline
from transformers.pipelines.pt_utils import KeyDataset
import datasets, pandas as pd

target = "MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli"
tokenizer = AutoTokenizer.from_pretrained(target)

In [None]:
hypothesis_template = "The stance of this tweet {}."
template = {
    "Clinton, Favor":"favors Clinton",
    "Clinton, Against":"is against Clinton",
    "Clinton, None":"does not express any stance toward Clinton",
    "Trump, Favor":"favors Trump",
    "Trump, Against":"is against Trump",
    "Trump, None":"does not express any stance toward Trump"}
id2label = pd.DataFrame.from_dict(template, orient="index").iloc[:,0]
label2id = {v: k for k, v in id2label.items()}

test = pd.read_csv("data/semeval_test.csv")
test_ds = datasets.Dataset.from_pandas(test)

In [None]:
pipe = pipeline('zero-shot-classification',
                model=target,
                tokenizer=tokenizer)

In [None]:
preds = []
for out in pipe(KeyDataset(test_ds, "prompt"), id2label.tolist(),
        hypothesis_template=hypothesis_template,
        multi_label=False,
        batch_size=16):
        preds.append(out["labels"][0])
        
test_preds = pd.concat([test, pd.DataFrame({"preds":preds}).replace(label2id, regex=True)], axis=1)
test_preds.to_csv(f"predicted_labels/semeval_deberta_mnli.csv", index=False)