In [7]:
import csv
import ast
import json
from sklearn.metrics import precision_score, recall_score, f1_score

In [8]:
def read_manual_labels(file_path):
    manual_labels = []
    with open(file_path, "r", encoding="utf-8") as f:
        for line in f:
            # Safely evaluate the line as a tuple
            try:
                entity = ast.literal_eval(line.strip())
                manual_labels.append(entity)  # Append the (text, label) tuple
            except (ValueError, SyntaxError) as e:
                print(f"Error parsing entity: {line}, Error: {e}")

    return manual_labels

# Example usage (for Obama labels):
manual_labels = read_manual_labels("../flairData/manual_labeling_obama.csv")
print(manual_labels)

[('United Nations', 'ORG'), ('Contact Group on Piracy off the Coast of Somalia', 'ORG'), ('United States', 'ORG'), ('New York Declaration', 'ORG'), ('International Ship and Port Facility Security Code', 'ORG'), ('Panama', 'ORG'), ('Bahamas', 'ORG'), ('Liberia', 'ORG'), ('Marshall Islands', 'ORG'), ('NATO', 'ORG'), ('European Union', 'ORG'), ('Somalia', 'LOC'), ('Coast of Somalia', 'LOC'), ('United States', 'LOC'), ('New York', 'LOC'), ('Horn of Africa', 'LOC'), ('piracy', 'MISC'), ('Syrian Embassy', 'ORG'), ('United States', 'ORG'), ('State Department', 'ORG'), ('Honorary Consuls', 'ORG'), ('Vienna Convention on Diplomatic Relations', 'ORG'), ('United States', 'LOC'), ('Michigan', 'LOC'), ('Texas', 'LOC'), ('Washington', 'LOC'), ('Syria', 'LOC'), ('Syrian ambassador', 'PER'), ('Clinton', 'PER'), ('Lavrov', 'PER'), ('Obama', 'PER'), ('Medvedev', 'PER'), ('United States', 'ORG'), ('G-20', 'ORG'), ('NATO', 'ORG'), ('NATO Russia Council', 'ORG'), ('Russia', 'ORG'), ('START', 'ORG'), ('Russ

In [9]:
def read_ner_results(file_path, max_lines=None):
    ner_results = []
    with open(file_path, "r", encoding="utf-8") as f:
        reader = csv.reader(f)
        next(reader)  # Skip the header line 'publish_date,entities'

        for i, row in enumerate(reader):
            if max_lines is not None and i >= max_lines:
                break  # Stop if we reach the specified line limit
            if len(row) < 2:
                continue  # Skip if the row is malformed or incomplete

            entities_str = row[1]  # We're only interested in the second column, which contains the entities
            try:
                # Safely evaluate the string containing the list of entities
                entities = ast.literal_eval(entities_str)
                ner_results.extend([(entity['text'], entity['label']) for entity in entities])  # Add (text, label) tuples
            except (ValueError, SyntaxError) as e:
                print(f"Error parsing entity: {entities_str}, Error: {e}")

    return ner_results

In [10]:
# Evaluation function remains the same
def evaluate_ner(manual_labels, ner_results):
    manual_set = set(manual_labels)
    ner_set = set(ner_results)

    true_positives = manual_set & ner_set
    false_positives = ner_set - manual_set
    false_negatives = manual_set - ner_set

    precision = len(true_positives) / (len(true_positives) + len(false_positives)) if len(ner_set) > 0 else 0
    recall = len(true_positives) / (len(true_positives) + len(false_negatives)) if len(manual_set) > 0 else 0
    f1 = 2 * precision * recall / (precision + recall) if precision + recall > 0 else 0

    return precision, recall, f1

In [11]:
# Paths to the files (adjusted to the naming scheme in the screenshot)
file_pairs = [
    ("../flairData/manual_labeling_biden.csv", "../flairData/ner_results_biden.csv"),
    ("../flairData/manual_labeling_obama.csv", "../flairData/ner_results_obama.csv"),
    ("../flairData/manual_labeling_trump.csv", "../flairData/ner_results_trump.csv")
]

# Evaluate and print the results for each file pair
for manual_file, ner_file in file_pairs:
    president = manual_file.split('_')[-1].split('.')[0].capitalize()  # Extracting 'Biden', 'Obama', 'Trump'
    
    # Reading manual labels and NER results using the existing functions
    manual_labels = read_manual_labels(manual_file)
    ner_results = read_ner_results(ner_file, 10)

    # Evaluate the performance
    precision, recall, f1 = evaluate_ner(manual_labels, ner_results)

    # Print the evaluation results in a nice format
    print(f"Evaluating {president} NER Results:")
    print(f"Precision: {precision:.4f}")
    print(f"Recall:    {recall:.4f}")
    print(f"F1 Score:  {f1:.4f}")
    print("-" * 40)

Evaluating Biden NER Results:
Precision: 0.5618
Recall:    0.6993
F1 Score:  0.6231
----------------------------------------
Evaluating Obama NER Results:
Precision: 0.3412
Recall:    0.5088
F1 Score:  0.4085
----------------------------------------
Evaluating Trump NER Results:
Precision: 0.5733
Recall:    0.6615
F1 Score:  0.6143
----------------------------------------
