# 🏷️ Fine-tuning BERT for Named Entity Recognition (NER)
Using `bert-base-cased` on CoNLL-2003 dataset.

## 📦 Install dependencies

In [None]:
!pip install transformers datasets seqeval

## 📚 Load CoNLL-2003 Dataset

In [None]:
from datasets import load_dataset
dataset = load_dataset("conll2003")
dataset = dataset.rename_column("ner_tags", "labels")
label_list = dataset["train"].features["labels"].feature.names
label_list

## 🔢 Tokenization

In [None]:
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")

def tokenize_and_align_labels(example):
    tokenized_inputs = tokenizer(example["tokens"], truncation=True, is_split_into_words=True)
    word_ids = tokenized_inputs.word_ids()
    label_ids = []
    previous_word_idx = None
    for word_idx in word_ids:
        if word_idx is None:
            label_ids.append(-100)
        elif word_idx != previous_word_idx:
            label_ids.append(example["labels"][word_idx])
        else:
            label_ids.append(example["labels"][word_idx] if True else -100)
        previous_word_idx = word_idx
    tokenized_inputs["labels"] = label_ids
    return tokenized_inputs

tokenized_datasets = dataset.map(tokenize_and_align_labels, batched=True)
tokenized_datasets.set_format("torch")

## 🧠 Load BERT Model for Token Classification

In [None]:
from transformers import AutoModelForTokenClassification

model = AutoModelForTokenClassification.from_pretrained(
    "bert-base-cased", num_labels=len(label_list)
)

## 🏋️ Training

In [None]:
from transformers import TrainingArguments, Trainer
import numpy as np
from seqeval.metrics import classification_report, f1_score

args = TrainingArguments(
    output_dir="./outputs",
    evaluation_strategy="epoch",
    save_strategy="no",
    num_train_epochs=2,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    logging_steps=10
)

def compute_metrics(p):
    predictions, labels = p
    predictions = np.argmax(predictions, axis=2)

    true_labels = [
        [label_list[l] for (p_, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]
    true_predictions = [
        [label_list[p_] for (p_, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]
    return {"f1": f1_score(true_labels, true_predictions)}

trainer = Trainer(
    model=model,
    args=args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    compute_metrics=compute_metrics,
    tokenizer=tokenizer
)

trainer.train()

## 📊 Evaluation on Validation Set

In [None]:
predictions, labels, _ = trainer.predict(tokenized_datasets["validation"])
preds = np.argmax(predictions, axis=2)

true_labels = [
    [label_list[l] for (p_, l) in zip(pred, label) if l != -100]
    for pred, label in zip(preds, labels)
]
true_preds = [
    [label_list[p_] for (p_, l) in zip(pred, label) if l != -100]
    for pred, label in zip(preds, labels)
]

print(classification_report(true_labels, true_preds))