In [4]:
import os
if os.getcwd() == '/home/user/code':
    os.chdir('/home/user/code/nlp2024_ClefTask4SOTA')

In [5]:
from TexSoup import TexSoup
import re

def find_sections(tex):
    return [match.group(1) for match in re.finditer("\\\section\{(.*)\}", tex)]


def extract_content(node, name):
    if node:
        return ((name, node.contents[0]))


def section_split(tex):
    """returns: List of tuples (section_name, section_text)"""
    doc_text = tex
    sections = []
    # parsing with TexSoup
    # TODO: get rid of this and use regex only (brittle)
    try:
        soup = TexSoup(tex, tolerance=1)
        if title:=extract_content(soup.title, "title"):
            sections.append(title)
        if abstract:=extract_content(soup.abstract, "abstract"):
            sections.append(abstract)
        sections.append(("tables", "\n".join([str(node) for node in soup.find_all("table")]))) if soup.find_all("table") else None
    except:
        pass # could not parse tex

    # extract latex sections and corresponding text
    prev_section = "pre"

    for section in find_sections(tex):
        section_text, doc_text = doc_text.split(f"\\section{{{section}}}", 1)
        sections.append((prev_section, section_text))
        prev_section = section
    sections.append((prev_section, doc_text))
    return sections
    # except:
    #     return (("full", tex))
    


  return [match.group(1) for match in re.finditer("\\\section\{(.*)\}", tex)]


In [6]:
import ollama


def pass_to_ollama(prompt, model):
    try:
        res = ollama.generate(model=model, prompt=prompt, options={"temperature": 0})
        return res["response"]
    except Exception as ex:
        print(ex)
        return f"ollama error: {ex}"


In [7]:
import json

from src.dataset import UNANSWERABLE


# prompt template
def extract_tdms(tex, few_shot=True):
    match model:
            case "llama3:8b", "llama3:70b":
                if few_shot:
                    return f"""If the text reports benchmark leaderboard results, extract the reported Tasks, Datasets, Metrics and corresponding Scores.
                    
                    Text: {tex}
                    
                    Return the tasks, datasets, metrics and scores as reported in the text in a JSON array. Do not include precision information in the reported score.
                    Here the formating structure of the JSON. Please use exactly this formating in your answer.
                    [
                        {{"Task": "example Task 1", "Dataset": "example Dataset 1", "Metric": example metric 1", "Score": "score"}}, 
                        {{"Task": "example Task 1", "Dataset": "example Dataset 2", "Metric": example metric 2", "Score": "score"}}
                    ]
                    
                    Lets make an example for you: Template-Based Automatic Search of Compact Semantic Segmentation Architectures... One discovered architecture achieves 63.2% mean IoU on CamVid and 67.8% on CityScapes having only 270K parameters... evaluation.
                    
                    The expected answer of you is:
                    [
                        {{"Task": "Compact Sementic Segmentation", "Dataset": "CamVid", "Metric": Mean IoU", "Score": "63.2"}}, 
                        {{"Task": "Compact Sementic Segmentation", "Dataset": "CityScapes", "Metric": Mean IoU", "Score": "67.8"}}
                    ]
                    
                    """
                else:
                    return f"""If the text reports benchmark leaderboard results, extract the reported Tasks, Datasets, Metrics and corresponding Scores.
                    
                    Text: {tex}
                    
                    Return the tasks, datasets, metrics and scores as reported in the text in a JSON array. Do not include precision information in the reported score.
                    Here the formating structure of the JSON. Please use exactly this formating in your answer.
                    [
                        {{"Task": "example Task 1", "Dataset": "example Dataset 1", "Metric": example metric 1", "Score": "score"}}, 
                        {{"Task": "example Task 1", "Dataset": "example Dataset 2", "Metric": example metric 2", "Score": "score"}}
                    ]
                    
                    """
            case default:
                "Hi there. Please just say that something went wrong with the prompt since I could not make it up by the time."

zero_shot_template = lambda tex: extract_tdms(tex, False)
few_shot_template = lambda tex: extract_tdms(tex, True)

def convert_tdms_to_tuple(model_output_parsed):
    tuples = []
    for item in model_output_parsed:
        try:
            t = ((item["Task"], item["Dataset"],item["Metric"],item["Score"]))
            tuples.append(t)
        except:
            # parse error, ignore instance
            pass
    return tuples

def format_tdms(tuples):
    """make unique, format as string"""
    unique = set(tuples)
    dicts = [{"LEADERBOARD": {
        "Task": t,
        "Dataset":d,
        "Metric":m,
        "Score":s
    }} for t,d,m,s in unique]
    return str(dicts)

model = "llama3:70b"

def section_wise_extraction(tex, prompt_template):
    """takes a tex file, returns an annotation"""
    found_tdms = []
    sections = section_split(str(tex))
    for section_name, section_text in sections:
        prompt = extract_tdms(section_text)
        response = pass_to_ollama(prompt, model)

        try:
            response = json.loads(response)
            parsed = convert_tdms_to_tuple(response)
            found_tdms= [*found_tdms, *parsed]
        except:
            pass # no tuples found in section

    # print(found_tdms)
    if not found_tdms:
        annotation = UNANSWERABLE # found_tdms are empty -> unanswerable
    else:
        # dedupe and format
        annotation = format_tdms(found_tdms)
    return annotation

In [8]:
from src.dataset import PATH
from src.experiment_runner import run

# Run on Test
df_test = run(lambda tex: section_wise_extraction(tex, zero_shot_template), PATH.TEST, "llama3_70b_zeroshot")
df_test = run(lambda tex: section_wise_extraction(tex, few_shot_template), PATH.TEST, "llama3_70b_fewshot")


  0%|          | 1/789 [00:02<38:05,  2.90s/it]

model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first


  0%|          | 2/789 [00:04<31:22,  2.39s/it]

model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first


  0%|          | 3/789 [00:06<23:52,  1.82s/it]

model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first


  1%|          | 4/789 [00:07<20:26,  1.56s/it]

model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first


  1%|          | 5/789 [00:08<17:17,  1.32s/it]

model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first


  1%|          | 6/789 [00:08<14:09,  1.09s/it]

model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first


  1%|          | 7/789 [00:09<11:00,  1.18it/s]

model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first


  1%|          | 8/789 [00:10<14:54,  1.15s/it]

model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first


  1%|          | 9/789 [00:11<13:49,  1.06s/it]

model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first


  1%|▏         | 10/789 [00:13<18:14,  1.40s/it]

model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first


  1%|▏         | 11/789 [00:14<16:05,  1.24s/it]

model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first


  2%|▏         | 12/789 [00:15<15:38,  1.21s/it]

model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first


  2%|▏         | 13/789 [00:16<14:50,  1.15s/it]

model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first


  2%|▏         | 14/789 [00:17<12:57,  1.00s/it]

model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first


  2%|▏         | 15/789 [00:18<11:45,  1.10it/s]

model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first


  2%|▏         | 17/789 [00:18<07:16,  1.77it/s]

model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first


  2%|▏         | 18/789 [00:19<07:05,  1.81it/s]

model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first


  2%|▏         | 19/789 [00:20<09:07,  1.41it/s]

model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first


  3%|▎         | 20/789 [00:21<09:16,  1.38it/s]

model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
model 'llama3:70b' not found, try pulling it first
