In [208]:
import os
if os.getcwd() == '/home/user/code':
    os.chdir('/home/user/code/nlp2024_ClefTask4SOTA')

In [209]:
from TexSoup import TexSoup

def section_split(tex):
    """returns: List of tuples (section_name, section_text)"""
    soup = TexSoup(tex)

    doc_text = tex

    sections = []

    sections.append(("title", soup.title.contents[0])) if soup.title else None
    sections.append(("abstract", soup.abstract.contents[0])) if soup.abstract else None
    sections.append(("tables", "\n".join([str(node) for node in soup.find_all("table")]))) if soup.find_all("table") else None


    # extract latex sections and corresponding text
    prev_section = "pre"

    for node in soup.find_all("section"):
        section_text, doc_text = doc_text.split(str(node), 1)
        sections.append((prev_section, section_text))
        prev_section = node.contents[0]
    sections.append((prev_section, doc_text))
    return sections

In [210]:
# TODO: Define Prompt function
import ollama

def contains_qradruple(tex):
    return f"Decide if if the following text mentions a result on a benchmark leaderboard, including the task, dataset, metric and the score. \
    Ignore the content of the text, only output yes, if it contains a leaderboard mention or no, if no such information is included. \n \
    Text: \n {tex} \n \
    Answer with yes or no only without providing any other text. "

def pass_to_ollama(prompt, model):
    try:
        res = ollama.generate(model=model, prompt=prompt)
        return res["response"]
    except:
        return ""

def section_wise_detection(tex, llama_fn):
    answers = []
    try:
        sections = section_split(tex)
    except:
        sections = [("full", tex)] # in the case the structure cannot be parsed:
    print(f"{len(sections)} sections")
    for section_name, section_text in sections:
        answers.append(str(llama_fn(contains_qradruple(section_text))).lower())

    return any([answer == 'yes' for answer in answers])



In [215]:
# run on train data to evaluate method
from tqdm import tqdm
from dataset import BinaryTDMSDataset, PATH, write_annotation_file
from datetime import datetime


train_dataset = BinaryTDMSDataset(PATH.TRAIN)
model = "llama3:8b"

run_id = f"baseline-{model.replace(':', '')}-{datetime.now().strftime('%m%d%Y-%H%M%S')}"
llama3_fn = lambda prompt: pass_to_ollama(prompt, model)

results = []
# for i in tqdm(range(len(train_dataset))):
for i in tqdm(range(1)):
    f, tex, ground_truth = train_dataset.__getitem__(i)
    pred = section_wise_detection(tex, llama3_fn)
    write_annotation_file(run_id, f, pred)
    # print(f"{f}: {pred} ({ground_truth})")
    results.append((f, ground_truth, pred))


import pandas as pd

df = pd.DataFrame(results)
df.columns = ["file", "ground_truth", "pred"]

df.to_feather(f"results/zs_train_{execution_start}.feather")


ImportError: cannot import name 'write_annotation_file' from 'dataset' (/home/user/code/nlp2024_ClefTask4SOTA/dataset.py)

# Evaluation

In [None]:
# pct of success

df["success"] =df["ground_truth"]==df["pred"]
len(df[df["success"]]) / len(df)

# .76

In [None]:
# confusion matrix
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

cm = confusion_matrix(df["ground_truth"], df["pred"])
disp = ConfusionMatrixDisplay(confusion_matrix=cm)
disp.plot()
plt.show()

In [None]:
# random trial
import random

num_true = len(df[df["ground_truth"]])
num_tot = len(df)
pct_true = num_true / num_tot


df["random"] = random.uniform(0, 1)
df["random_success"] = df["ground_truth"] == (df["random"] <= pct_true)
len(df[df["random_success"]]) / len(df)

# duh..
# .634 (random with distribution), .758 (baseline llama)
# codalab: .53 (llama 2 baseline), .83 (top score)

In [None]:
# TODO: Test set evalutation

from src.dataset import BinaryTDMSDataset, PATH

test_dataset = BinaryTDMSDataset(PATH.TEST)

def get_index(folder):
    return [i for i, t, j in test_dataset.all_paths].index(folder)

i, tex, jsn = test_dataset.__getitem__(get_index("0706.0014"))
