In [2]:
import pandas as pd
from ollama import Client
from tqdm import tqdm

In [3]:
df = pd.read_csv('experiments/span-classification/task3report.csv')
df.head()

Unnamed: 0,Model,Dataset,Language,Task,Precision,Recall,F1,Accuracy
0,gemma2,fewnerd,en,ner-coarse,48.53,55.72,51.15,83.73
1,llama3.1,fewnerd,en,ner-coarse,40.6,7.72,12.87,79.46
2,mistral,fewnerd,en,ner-coarse,49.95,18.97,26.32,81.12
3,gemma2,germanler,de,ner-coarse,21.85,32.42,24.82,86.32
4,llama3.1,germanler,de,ner-coarse,11.84,1.67,2.78,81.21


In [4]:
# drop columns Dataset and Language
df = df.drop(columns=['Dataset', 'Language'])
df.head()

Unnamed: 0,Model,Task,Precision,Recall,F1,Accuracy
0,gemma2,ner-coarse,48.53,55.72,51.15,83.73
1,llama3.1,ner-coarse,40.6,7.72,12.87,79.46
2,mistral,ner-coarse,49.95,18.97,26.32,81.12
3,gemma2,ner-coarse,21.85,32.42,24.82,86.32
4,llama3.1,ner-coarse,11.84,1.67,2.78,81.21


In [5]:
# aggregate by task and model, compute mean
df2 = df.groupby(['Task', 'Model']).mean().reset_index().round(2)
df2

Unnamed: 0,Task,Model,Precision,Recall,F1,Accuracy
0,ner-coarse,gemma2,35.19,44.07,37.98,85.02
1,ner-coarse,llama3.1,26.22,4.7,7.82,80.33
2,ner-coarse,mistral,39.28,15.38,21.22,81.76
3,ner-fine,gemma2,40.25,31.64,31.84,83.86
4,ner-fine,llama3.1,26.66,4.16,6.78,80.37
5,ner-fine,mistral,39.72,10.66,15.39,81.04
6,quotations,gemma2,36.4,25.61,29.81,90.29
7,quotations,llama3.1,33.16,10.88,15.18,88.17
8,quotations,mistral,23.03,10.69,13.71,86.24


In [6]:
df2.to_csv('experiments/span-classification/task3report_aggregated.csv', index=False)

In [None]:
from datasets import load_dataset
ds = load_dataset("DFKI-SLT/few-nerd", "supervised")

In [None]:
df = ds["test"].to_pandas()

In [None]:
df["text"] = df["tokens"].apply(lambda x: " ".join(x))

In [None]:
df.head()


In [None]:
from typing import List, Tuple


def extract_annotations(tokens: List[str], tags: List[int]) -> Tuple[List[str], List[int]]:
    annotations = []
    labels = []
    current_annotation = ""
    current_annotation_label = None
    for token, tag in zip(tokens, tags):
        if tag == 0:
            if current_annotation:
                annotations.append(current_annotation.strip())
                labels.append(current_annotation_label)
                current_annotation = ""
                current_annotation_label = None
        else:
            current_annotation += token + " "
            current_annotation_label = tag
    if current_annotation:
        annotations.append(current_annotation.strip())
        labels.append(current_annotation_label)
    return annotations, labels


In [None]:
coarse_id2label = {
    0: "O",
    1: "art",
    2: "building",
    3: "event",
    4: "location",
    5: "organization",
    6: "other",
    7: "person",
    8: "product",
}

coarse_label2id = {v: k for k, v in coarse_id2label.items()}

In [None]:
fine_id2label = {
  0: "O",
  1: "art-broadcastprogram",
  2: "art-film",
  3: "art-music",
  4: "art-other",
  5: "art-painting",
  6: "art-writtenart",
  7: "building-airport",
  8: "building-hospital",
  9: "building-hotel",
  10: "building-library",
  11: "building-other",
  12: "building-restaurant",
  13: "building-sportsfacility",
  14: "building-theater",
  15: "event-attack/battle/war/militaryconflict",
  16: "event-disaster",
  17: "event-election",
  18: "event-other",
  19: "event-protest",
  20: "event-sportsevent",
  21: "location-GPE",
  22: "location-bodiesofwater",
  23: "location-island",
  24: "location-mountain",
  25: "location-other",
  26: "location-park",
  27: "location-road/railway/highway/transit",
  28: "organization-company",
  29: "organization-education",
  30: "organization-government/governmentagency",
  31: "organization-media/newspaper",
  32: "organization-other",
  33: "organization-politicalparty",
  34: "organization-religion",
  35: "organization-showorganization",
  36: "organization-sportsleague",
  37: "organization-sportsteam",
  38: "other-astronomything",
  39: "other-award",
  40: "other-biologything",
  41: "other-chemicalthing",
  42: "other-currency",
  43: "other-disease",
  44: "other-educationaldegree",
  45: "other-god",
  46: "other-language",
  47: "other-law",
  48: "other-livingthing",
  49: "other-medical",
  50: "person-actor",
  51: "person-artist/author",
  52: "person-athlete",
  53: "person-director",
  54: "person-other",
  55: "person-politician",
  56: "person-scholar",
  57: "person-soldier",
  58: "product-airplane",
  59: "product-car",
  60: "product-food",
  61: "product-game",
  62: "product-other",
  63: "product-ship",
  64: "product-software",
  65: "product-train",
  66: "product-weapon"
}

In [None]:
client = Client(host='http://localhost:19290')

In [None]:
client.list()

In [None]:
system_prompt = "You are a system to support the analysis of large amounts of text. You will assist the user by extracting the required information from the provided documents. You will always answer in the required format and use no other formatting than expected by the user!"

In [None]:
user_prompt = """
I prepared a list of categories of information that I would like to extract from the text. The categories about buildings are:

1. Airport
2. Hospital
3. Hotel
4. Library
5. Other
6. Restaurant
7. Sports facility
8. Theater

Please extract the information about the buildings (if any) from the following text:
{}

Respond in the following format.:
<category>: <extracted text>
...

If there is no information about buildings in the provided text, respond with "No information found".

Remember, you MUST extract the information verbatim from the text, do not generate it!
"""

In [None]:
user_prompt = """
Here is a list of entity categories that I would like to extract from the text. The categories are:

1. Art
2. Building
3. Event
4. Location
5. Organization
6. Other
7. Person
8. Product

Please extract fitting entities from the following text:
{}

Respond in the following format:
<category>: <extracted text>

e.g.
Art: Mona Lisa
Building: Eiffel Tower

Remember, you MUST extract the information verbatim from the text, do not generate it!
"""

In [None]:
golds = []
preds = []

for idx in range(100):
    sample = ds["test"][idx]
    sentence = " ".join(sample["tokens"]).strip()
    ners, ners_labels = extract_annotations(sample["tokens"], sample["ner_tags"])
    fine_ners, fine_ners_labels = extract_annotations(sample["tokens"], sample["fine_ner_tags"])
    
    print(sentence)
    print("NERs:")
    for ner, label in zip(ners, ners_labels):
        print(f"{ner} ({coarse_id2label[label]})")

    print()
    print("Fine NERs:")
    for ner, label in zip(fine_ners, fine_ners_labels):
        print(f"{ner} ({fine_id2label[label]})")

    print()

    response = client.chat(model='gemma2', messages=[
      {
          'role': 'system',
          'content': system_prompt.strip(),
      },
      {
        'role': 'user',
        'content': user_prompt.format(sentence).strip(),
      },
    ])
    message = response["message"]["content"]

    print("Response:")
    print(message)
    print()

    gold_labels = sample["ner_tags"]
    predicted_labels = convert_response_to_sentence_label_ids(sentence=sentence, response=message)

    assert len(gold_labels) == len(predicted_labels), f"Length mismatch between gold and predicted labels: {len(gold_labels)} != {len(predicted_labels)}"

    golds.append([coarse_id2label[l] for l in gold_labels])
    preds.append([coarse_id2label[l] for l in predicted_labels])

    print(f"Gold labels: {gold_labels}")
    print(f"Pred labels: {predicted_labels}")
    print()
    print("----------")
    print()

In [None]:
sentence = "The final season of minor league play Elkin Memorial Park saw season attendance of 16,322 , an average of 299 per contest ."
response = """
Person: Major Larry G. Messinger 
Art:  
Building:  
Event:  
Location:  
Organization:  
Other:  
Product: B-52 
"""
response = response.strip()

def convert_response_to_sentence_label_ids(sentence: str, response: str) -> List[int]:
    sentence_tokens = sentence.split()
    sentence_tags = [0] * len(sentence_tokens)

    for line in response.strip().split("\n"):
        if not line.strip():
            continue
        if not ":" in line:
            continue

        splitted_line = line.split(":")
        if len(splitted_line) != 2:
            continue
        
        label = splitted_line[0].strip()
        token = splitted_line[1].strip()
        token_tokens = token.split()

        if len(token_tokens) == 0:
            continue

        if label.startswith("<"):
            label = label[1:]
        if label.endswith(">"):
            label = label[:-1]

        if label.lower() not in coarse_label2id:
            continue

        # find all token_tokens in the sentence
        for idx, sentence_token in enumerate(sentence_tokens):
            if sentence_token == token_tokens[0] and sentence_tokens[idx:idx+len(token_tokens)] == token_tokens:
                sentence_tags = sentence_tags[:idx] + (len(token_tokens) * [coarse_label2id[label.lower()]]) + sentence_tags[idx+len(token_tokens):]

    return sentence_tags

In [None]:
convert_response_to_sentence_label_ids(sentence, response)

In [None]:
from seqeval.metrics import accuracy_score
from seqeval.metrics import classification_report
from seqeval.metrics import f1_score

In [None]:
accuracy_score(golds, preds)

In [None]:
f1_score(golds, preds)

In [None]:
print(classification_report(golds, preds))

In [None]:
import pandas as pd

In [None]:
df = pd.read_parquet("experiments/span-classification/span_classification_fewnerd_fine.parquet")

In [None]:
df.head()

In [None]:
golds = [x.tolist() for x in df["ner_tags"].to_list()]
preds = [x.tolist() for x in df["predicted_tags"].to_list()]

In [None]:
golds

In [191]:
from datasets import load_dataset
ds = load_dataset("elenanereiss/german-ler")

You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this dataset from the next major release of `datasets`.


Downloading builder script:   0%|          | 0.00/9.12k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/12.0k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/4.90M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/617k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/627k [00:00<?, ?B/s]

Generating train split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

Generating validation split: 0 examples [00:00, ? examples/s]

In [192]:
ds

DatasetDict({
    train: Dataset({
        features: ['id', 'tokens', 'ner_tags', 'ner_coarse_tags'],
        num_rows: 53384
    })
    test: Dataset({
        features: ['id', 'tokens', 'ner_tags', 'ner_coarse_tags'],
        num_rows: 6673
    })
    validation: Dataset({
        features: ['id', 'tokens', 'ner_tags', 'ner_coarse_tags'],
        num_rows: 6666
    })
})

In [194]:
df = ds["test"].to_pandas()

In [195]:
df.head()

Unnamed: 0,id,tokens,ner_tags,ner_coarse_tags
0,0,"[Wegen, der, Teilnahme, des, Antragstellers, a...","[38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 3...","[14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 1..."
1,1,"[●, Mitwirkung, im, Sinne, der, Kostenverfügung]","[38, 38, 38, 38, 38, 38]","[14, 14, 14, 14, 14, 14]"
2,2,"[Von, der, Ablehnung, eines, Straferlasses, fü...","[38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 3...","[14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 1..."
3,3,"[Zwar, führt, bei, der, in, §, 33, Abs., 2, TV...","[38, 38, 38, 38, 38, 18, 37, 37, 37, 37, 37, 3...","[14, 14, 14, 14, 14, 5, 12, 12, 12, 12, 12, 12..."
4,4,"[Der, Wortlaut, der, Zulagenregelung, verlange...","[38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 3...","[14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 1..."


In [201]:
ds.data

{'train': MemoryMappedTable
 id: string
 tokens: list<item: string>
   child 0, item: string
 ner_tags: list<item: int64>
   child 0, item: int64
 ner_coarse_tags: list<item: int64>
   child 0, item: int64
 ----
 id: [["0","1","2","3","4",...,"995","996","997","998","999"],["1000","1001","1002","1003","1004",...,"1995","1996","1997","1998","1999"],...,["52000","52001","52002","52003","52004",...,"52995","52996","52997","52998","52999"],["53000","53001","53002","53003","53004",...,"53379","53380","53381","53382","53383"]]
 tokens: [[["dd",")","Art.","33","Abs.",...,"als","Lebenszeitrichterverhältnisse","zu","begründen","."],["In","diesem","machte","er","im",...,"2006","verstorbenen","Erblasser","geltend","."],...,["Deshalb","durfte","der","Gesetzgeber","im",...,"nicht","hinreichend","sicher","prägen","."],["3.","Die","Jugendkammer","hat","die",...,"185","StGB",")","gewertet","."]],[["Die","Klägerin","begehrt","mit","der",...,"der","Rechtsauffassung","des","Gerichts","."],["3.","Der","8.

In [202]:
import pandas as pd

In [203]:
df = pd.read_parquet("experiments/span-classification/span_classification_germanler_coarse.parquet")
df.head()

Unnamed: 0,tokens,ner_tags,predicted_tags,message
0,"[Bei, seiner, beruflichen, Tätigkeit, sei, er,...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...",Person: er\n\n\n
1,"[Deshalb, durfte, der, Gesetzgeber, im, Rahmen...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...",Person: \nOrt: \nOrganisation: \nNorm: Gesetzg...
2,"[Oder, wenn, die, Muslime, bekämpft, werden, .]","[O, O, O, O, O, O, O]","[O, O, O, O, O, O, O]",Organisation: Muslime \n\n\n
3,"[Zudem, sei, der, Begriff, „, Software, “, nic...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...",Organisation: \nPerson: \nOrt: \nNorm: \nG...
4,"[1., Der, Senat, sieht, davon, ab, ,, die, Sac...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...",Person: \nOrt: Deutsche Patent- und Markenamt\...


In [208]:
df["predicted_tags"].tolist()

[array(['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O',
        'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O',
        'O'], dtype=object),
 array(['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O',
        'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O',
        'O', 'O'], dtype=object),
 array(['O', 'O', 'O', 'O', 'O', 'O', 'O'], dtype=object),
 array(['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O',
        'O', 'O', 'O', 'O'], dtype=object),
 array(['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O',
        'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O',
        'O', 'O', 'O'], dtype=object),
 array(['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'],
       dtype=object),
 array(['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'], dtype=object),
 array(['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O',
        'O', 'O', 'O'], dtype=object),
 array(

In [215]:
df = pd.read_parquet("experiments/span-classification/span_classification_german_direct_quotation.parquet")
df.head()

Unnamed: 0,tokens,ner_tags,predicted_tags,message
0,"[Die, Wiener, Linien, GmbH, &, Co, KG, ,, in, ...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","Sprecher: \nDirekte Rede: ""Hieß es zunächst n..."
1,"[Mindestens, 81, Menschen, wurden, in, Japan, ...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...",<Sprecher>: Die japanischen Behörden\n<Direkte...
2,"[Günther, Krause, (, CDU, ), war, nach, der, d...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[Sprecher, Sprecher, O, O, O, O, O, O, O, O, O...",Sprecher: Günther Krause\nDirekte Rede: „ Was ...
3,"[Der, am, Dienstag, bei, einem, Vorbereitungss...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","Sprecher: Jürgen Klinsmann\nDirekte Rede: "" D..."
4,"[Dem, chinesischen, Bürgerrechtler, Hu, Jia, w...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[O, O, O, Sprecher, Sprecher, O, O, O, O, O, O...","Sprecher: Hu Jia \nDirekte Rede: ""Nach einer ..."


In [216]:
df["message"].to_list()

['Sprecher: \nDirekte Rede:  "Hieß es zunächst noch , dass die Fahrgäste an den Unfällen selber Schuld seien , wird nun eine halbe Million Euro in die Sicherheit dieser Türen investiert ."\nDirekte Rede: "Eine weitere Diskriminierung wurde durch den Verkehrsverbund Ost-Region abgestellt . \n\n\n',
 '<Sprecher>: Die japanischen Behörden\n<Direkte Rede>:  haben 3,6 Millionen Einwohner von 18 Präfekturen aufgefordert , ihre Häuser zu verlassen – die Hälfte davon in der Stadt Hiroshima . \n<Sprecher>: Die Japan Meteorological Agency\n<Direkte Rede>: teilte mit , dass in den Präfekturen Kyoto und Gifu die „ noch nie da gewesenen Niederschläge “ anhalten und warnte für die Hauptinseln Kyūshū und Shikoku vor neuen Extremregenfällen . \n\n\n',
 'Sprecher: Günther Krause\nDirekte Rede: „ Was in 10 Millionen Jahren in der Erde geschah , machen wir in 10 Sekunden . “  \n\n\n',
 'Sprecher: Jürgen Klinsmann\nDirekte Rede:  " Durch die Operation hat er eine Chance , dass er bei der WM spielen kann .