# Task 3: NER Model Fine-tuning
Training multilingual NER models for Ethiopian e-commerce entity extraction

In [None]:
# Setup and imports
import sys
sys.path.append('../src')
%reload_ext autoreload
%autoreload 2

import torch
from ner.model_trainer import NERModelTrainer
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'  # Use GPU if available

In [None]:
# Initialize NER trainer
trainer = NERModelTrainer()
print("NER trainer initialized")
print(f"Entity labels: {list(trainer.label2id.keys())}")

In [None]:
# Load CoNLL dataset from Task 2
conll_path = "../data/labeled/ethiopian_ner_dataset.txt"
sentences, labels = trainer.load_conll_data(conll_path)
print(f"Loaded {len(sentences)} sentences for training")

In [None]:
# Split data for training and evaluation (80/20)
split_idx = int(0.8 * len(sentences))
train_sentences = sentences[:split_idx]
train_labels = labels[:split_idx]
eval_sentences = sentences[split_idx:]
eval_labels = labels[split_idx:]

print(f"Training set: {len(train_sentences)} sentences")
print(f"Evaluation set: {len(eval_sentences)} sentences")

In [None]:
# Initialize model (DistilBERT for faster training)
model_name = "distilbert-base-multilingual-cased"
trainer.initialize_model(model_name)
print(f"Initialized model: {model_name}")
print(f"Model parameters: {sum(p.numel() for p in trainer.model.parameters()):,}")

In [None]:
# Prepare datasets
train_dataset = trainer.prepare_dataset(train_sentences, train_labels)
eval_dataset = trainer.prepare_dataset(eval_sentences, eval_labels)
print(f"Training dataset size: {len(train_dataset)}")
print(f"Evaluation dataset size: {len(eval_dataset)}")

In [None]:
# Train the model
output_dir = "../models/checkpoints/distilbert-ethiopian-ner"
model_path = trainer.train_model(train_dataset, eval_dataset, output_dir)
print(f"Model training completed!")
print(f"Model saved to: {model_path}")

In [None]:
# Test the trained model
from transformers import pipeline

# Load the trained model
ner_pipeline = pipeline(
    "ner",
    model=model_path,
    tokenizer=model_path,
    aggregation_strategy="simple"
)

# Test with sample text
test_text = "ሰላም! የሕፃናት ጠርሙስ ዋጋ 150 ብር ነው። ቦሌ አካባቢ ነው።"
predictions = ner_pipeline(test_text)

print(f"Test text: {test_text}")
print("Predictions:")
for pred in predictions:
    print(f"  {pred['word']} -> {pred['entity_group']} (confidence: {pred['score']:.3f})")