# Assignment 4
Name: Vivek Mule
Roll: 381072
PRN: 22420145

Build a Named Entity Recognition (NER) system for extracting
entities from real-world text such as news articles or social media
data. And measure its accuracy, precision, recall, and F1 score.

Named Entity Recognition (NER) identifies and classifies
entities in text into predefined categories such as:
PERSON,
ORG (Organization),
GPE (Location),
DATE,
EVENT,

In [1]:
# Install dependencies (transformers + metrics + CPU torch)
%pip -q install transformers seqeval
%pip -q install torch --index-url https://download.pytorch.org/whl/cpu


Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.




In [2]:
import math
from typing import List, Dict, Tuple
from transformers import pipeline

# Load pretrained NER pipeline (aggregated spans for cleaner entities)
ner_tagger = pipeline(
    task="token-classification",
    model="dslim/bert-base-NER",
    aggregation_strategy="simple",
    device="cpu",
)

print("Loaded model:", ner_tagger.model.name_or_path)


config.json:   0%|          | 0.00/829 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/433M [00:00<?, ?B/s]

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

BertForTokenClassification LOAD REPORT from: dslim/bert-base-NER
Key                      | Status     |  | 
-------------------------+------------+--+-
bert.pooler.dense.bias   | UNEXPECTED |  | 
bert.pooler.dense.weight | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.


tokenizer_config.json:   0%|          | 0.00/59.0 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

added_tokens.json:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Loaded model: dslim/bert-base-NER


In [3]:
from dataclasses import dataclass

@dataclass(frozen=True)
class Span:
    start: int
    end: int
    label: str


def mentions_to_spans(text: str, mentions: List[Tuple[str, str]]) -> List[Span]:
    """Convert ordered mention strings to character spans using sequential search."""
    spans: List[Span] = []
    cursor = 0
    for mention, label in mentions:
        start = text.index(mention, cursor)
        end = start + len(mention)
        spans.append(Span(start, end, label))
        cursor = end
    return spans


raw_samples = [
    (
        "Apple Inc. announced a new iPhone in Cupertino on September 12, 2023 during its annual event.",
        [("Apple Inc.", "ORG"), ("Cupertino", "GPE"), ("September 12, 2023", "DATE")],
    ),
    (
        "Elon Musk met with NASA officials in Washington to discuss the Artemis program.",
        [("Elon Musk", "PERSON"), ("NASA", "ORG"), ("Washington", "GPE"), ("Artemis", "EVENT")],
    ),
    (
        "The UN held an emergency session in New York after the earthquake in Turkey.",
        [("UN", "ORG"), ("New York", "GPE"), ("Turkey", "GPE")],
    ),
    (
        "Manchester United defeated Chelsea 2-0 at Wembley on May 15, 2022.",
        [("Manchester United", "ORG"), ("Chelsea", "ORG"), ("Wembley", "GPE"), ("May 15, 2022", "DATE")],
    ),
    (
        "Barack Obama spoke at the Climate Summit 2021 in Paris.",
        [("Barack Obama", "PERSON"), ("Climate Summit 2021", "EVENT"), ("Paris", "GPE")],
    ),
    (
        "The World Health Organization declared COVID-19 a pandemic on March 11, 2020.",
        [("World Health Organization", "ORG"), ("COVID-19", "EVENT"), ("March 11, 2020", "DATE")],
    ),
    (
        "Amazon opened a new data center in Mumbai to serve customers across India.",
        [("Amazon", "ORG"), ("Mumbai", "GPE"), ("India", "GPE")],
    ),
    (
        "Lionel Messi signed a contract with Inter Miami CF in July 2023.",
        [("Lionel Messi", "PERSON"), ("Inter Miami CF", "ORG"), ("July 2023", "DATE")],
    ),
]

dataset = [
    {
        "text": text,
        "gold": mentions_to_spans(text, mentions),
    }
    for text, mentions in raw_samples
]

len(dataset)


8

In [4]:
import pandas as pd

sample_text = dataset[0]["text"]
preds = ner_tagger(sample_text)
pd.DataFrame(preds)[["word", "entity_group", "start", "end", "score"]]


Unnamed: 0,word,entity_group,start,end,score
0,Apple Inc,ORG,0,9,0.999356
1,iPhone,MISC,27,33,0.988667
2,Cupertino,LOC,37,46,0.997524


In [5]:
def evaluate(samples: List[Dict]) -> Dict[str, float]:
    tp = fp = fn = 0
    for sample in samples:
        text = sample["text"]
        gold_spans = { (s.start, s.end, s.label) for s in sample["gold"] }
        pred_raw = ner_tagger(text)
        pred_spans = { (p["start"], p["end"], p["entity_group"]) for p in pred_raw }

        tp += len(gold_spans & pred_spans)
        fp += len(pred_spans - gold_spans)
        fn += len(gold_spans - pred_spans)

    precision = tp / (tp + fp) if tp + fp else 0.0
    recall = tp / (tp + fn) if tp + fn else 0.0
    f1 = 2 * precision * recall / (precision + recall) if precision + recall else 0.0
    accuracy = tp / (tp + fp + fn) if tp + fp + fn else 0.0

    return {
        "tp": tp,
        "fp": fp,
        "fn": fn,
        "precision": precision,
        "recall": recall,
        "f1": f1,
        "entity_accuracy": accuracy,
    }


metrics = evaluate(dataset)
metrics


{'tp': 7,
 'fp': 17,
 'fn': 19,
 'precision': 0.2916666666666667,
 'recall': 0.2692307692307692,
 'f1': 0.27999999999999997,
 'entity_accuracy': 0.16279069767441862}