In [2]:
import json, ujson, sys, faiss, numpy as np
from tqdm.auto import tqdm
from sentence_transformers import SentenceTransformer
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

  from .autonotebook import tqdm as notebook_tqdm


In [3]:

with open('../data/train-claims.json') as f:
    train = json.load(f)

with open('../data/dev-claims.json') as f:
    dev = json.load(f)

with open('../data/evidence.json') as f:
    evidence = ujson.load(f)  


In [5]:
import torch
print(torch.cuda.is_available())


True


In [None]:
embedder = SentenceTransformer(
    'sentence-transformers/all-MiniLM-L6-v2',
    device="cuda")
ids   = list(evidence.keys())
vecs  = embedder.encode(list(evidence.values()),
                        batch_size=64,
                        convert_to_numpy=True,
                        normalize_embeddings=True)
index = faiss.IndexFlatIP(vecs.shape[1]);  index.add(vecs)



In [11]:
def topk_ids(text: str, k: int = 3):
    # single 1-D numpy vector
    v = embedder.encode(text,
                        convert_to_numpy=True,
                        normalize_embeddings=True)
    # search expects shape (1, dim)
    _, I = index.search(v[None], k)
    return [ids[i] for i in I[0]]


In [10]:
LLM = "microsoft/phi-2"
tok = AutoTokenizer.from_pretrained(LLM)
lm  = AutoModelForCausalLM.from_pretrained(
          LLM, load_in_4bit=True, device_map="auto")
gen = pipeline("text-generation", model=lm, tokenizer=tok,
               max_new_tokens=60, temperature=0.0)

Q_PROMPT = ("Convert the following claim into ONE neutral yes/no question.\n"
            "Claim: {claim}\nQuestion:")
A_PROMPT = ("Answer ONLY with the minimal sentence from the doc that answers "
            "the question.\nQuestion: {q}\nDocument: {d}\nAnswer:")

def question(claim):           # claim -> neutral Q
    return gen(Q_PROMPT.format(claim=claim))[0]["generated_text"].split("\n")[-1].strip()

def extract_sentence(q, doc):  # (Q, passage) -> answer sentence
    out = gen(A_PROMPT.format(q=q, d=doc))[0]["generated_text"]
    s   = out.split("Answer:")[-1].strip()
    return s if 4 < len(s.split()) < 60 else None

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.
Loading checkpoint shards: 100%|██████████| 2/2 [00:09<00:00,  4.97s/it]
Device set to use cuda:0


In [14]:
out = {}
for cid, itm in tqdm(dev.items(), desc="RAG"):
    q      = question(itm["claim_text"])
    pairs  = []
    for eid in topk_ids(itm["claim_text"], k=10):
        sent = extract_sentence(q, evidence[eid]) or evidence[eid]
        pairs.append({"id": eid, "text": sent})
    out[cid] = pairs



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
RAG:   1%|          | 1/154 [00:16<42:24, 16.63s/it]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad

In [16]:
OUT_PATH = "../data/dev-evidence.json"
json.dump(out, open(OUT_PATH,"w"), indent=2)
print(f"✓ wrote {OUT_PATH}")

✓ wrote ../data/dev-evidence.json
