In [1]:
!pip install -U transformers datasets evaluate optuna seqeval --quiet

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/43.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.6/43.6 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.5/491.5 kB[0m [31m9.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.0/84.0 kB[0m [31m7.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m386.6/386.6 kB[0m [31m21.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m231.9/231.9 kB[0m [31m12.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m193.6/193.6 kB[0m [31m11.9 MB/s[0m eta [3

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
###############################################################################
# Indic‑NER fine‑tuning on Naamapadam (+ optional custom JSON)
###############################################################################
import argparse, random, json, os, itertools, logging, collections
import numpy as np, torch
from datasets import load_dataset, concatenate_datasets, DatasetDict, Sequence, Value
import evaluate
from transformers import (
    AutoTokenizer,
    AutoModelForTokenClassification,
    DataCollatorForTokenClassification,
    TrainingArguments,
    Trainer,
)

# ──────────────────────────────────────────────────────────────
# 0.  LOGGING
# ──────────────────────────────────────────────────────────────
logging.basicConfig(
    format="%(asctime)s %(levelname)-8s %(message)s",
    level=logging.INFO,
    datefmt="%H:%M:%S",
)
log = logging.getLogger(__name__)

# ──────────────────────────────────────────────────────────────
# 1.  ARGUMENTS & SEEDING
# ──────────────────────────────────────────────────────────────
parser = argparse.ArgumentParser()
parser.add_argument("--model_name", type=str, default="ai4bharat/indic-bert")
parser.add_argument("--languages", nargs="+", default=["as"])
parser.add_argument("--custom_data_path", type=str, default="/content/drive/MyDrive/Yash_final_btp/naamapadam_proj/0.6/0.4/naamapadam_assamese.json",
                    help="Path to your extra JSON file (optional)")
parser.add_argument("--output_dir", type=str,
                    default="./indicner-finetuned-naamapadam")
parser.add_argument("--num_train_epochs", type=int, default=20)
parser.add_argument("--per_device_train_batch_size", type=int, default=32)
parser.add_argument("--per_device_eval_batch_size", type=int, default=32)
parser.add_argument("--learning_rate", type=float, default=1e-5)
parser.add_argument("--weight_decay",  type=float, default=0.01)
parser.add_argument("--warmup_steps",  type=int, default=500)
parser.add_argument("--seed", type=int, default=42)
args, _ = parser.parse_known_args()

np.random.seed(args.seed)
random.seed(args.seed)
torch.manual_seed(args.seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(args.seed)
    log.info("CUDA device: %s", torch.cuda.get_device_name(0))
else:
    log.info("CUDA not available – falling back to CPU")

lang = args.languages[0]          # single‑language run

# ──────────────────────────────────────────────────────────────
# 2.  CANONICAL LABEL SET  (Naamapadam has exactly 7)
# ──────────────────────────────────────────────────────────────
label_list = ["B-LOC","B-ORG","B-PER","I-LOC","I-ORG","I-PER","O"]
id2label   = {i:l for i,l in enumerate(label_list)}  # type: ignore
label2id   = {l:i for i,l in id2label.items()}
num_labels = len(label_list)
log.info("Canonical labels: %s", id2label)

# Helper – map *any* tag outside this list to “O”
def normalise_tag(tag: str) -> str:
    tag = str(tag)
    if tag in ("B-PERSON", "I-PERSON"):         # your PERSON alias
        tag = tag.replace("PERSON", "PER")
    return tag if tag in label2id else "O"

# ──────────────────────────────────────────────────────────────
# 3.  LOAD NAAMAPADAM (train / test)
# ──────────────────────────────────────────────────────────────
train_ref = load_dataset("ai4bharat/naamapadam", lang, split="train")
test_ref  = load_dataset("ai4bharat/naamapadam", lang, split="test")

# NEW  ➜  make their `ner_tags` column a plain int sequence
int_seq = Sequence(Value("int64"))
train_ref = train_ref.cast_column("ner_tags", int_seq)
test_ref  = test_ref.cast_column("ner_tags", int_seq)

# ──────────────────────────────────────────────────────────────
# 4.  OPTIONAL: LOAD + CLEAN YOUR CUSTOM JSON
# ──────────────────────────────────────────────────────────────
if args.custom_data_path:
    log.info("Loading custom JSON: %s", args.custom_data_path)
    custom = load_dataset("json", data_files=args.custom_data_path,
                          split="train")

    # (a) ensure tokens / ner_tags length match
    broken = [i for i, ex in enumerate(custom)
              if len(ex["tokens"]) != len(ex["ner_tags"])]
    if broken:
        log.warning("⚠️  %d rows have mismatching lengths – they’ll be dropped",
                    len(broken))
        custom = custom.select([i for i in range(len(custom)) if i not in broken])

    # (b) normalise & map to ints
    def _clean(batch):
        out = []
        for tags in batch["ner_tags"]:
            out.append([label2id[normalise_tag(t)] for t in tags])
        batch["ner_tags"] = out
        return batch

    custom = custom.map(_clean, batched=True)
    log.info("Custom set after cleaning: %d sentences", len(custom))
    train_all = concatenate_datasets([train_ref, custom])
else:
    train_all = train_ref

# quick label‑distribution print‑out
def label_hist(ds, name):
    flat = list(itertools.chain.from_iterable(ds["ner_tags"]))
    c = collections.Counter(flat)
    log.info("%s label distribution: %s",
             name, {id2label[k]: v for k,v in c.items()})
label_hist(train_all, "TRAIN")
label_hist(test_ref,  "TEST ")

# 5. TRAIN / DEV SPLIT ---------------------------------------------------------
split = train_all.train_test_split(train_size=0.75, seed=args.seed)
train_ds, dev_ds = split["train"], split["test"]
log.info("Train %d  |  Dev %d  |  Test %d", len(train_ds), len(dev_ds), len(test_ref))

# 6. TOKENISATION + LABEL ALIGNMENT -------------------------------------------
tok = AutoTokenizer.from_pretrained(args.model_name, use_fast=True)
def align(batch):
    enc = tok(batch["tokens"], is_split_into_words=True,
              truncation=True, max_length=512)
    new_labels = []
    for i, seq in enumerate(batch["ner_tags"]):
        word_ids = enc.word_ids(batch_index=i)
        prev = None
        aligned = []
        for w in word_ids:
            if w is None:
                aligned.append(-100)
            elif w != prev:
                aligned.append(seq[w])
            else:
                aligned.append(-100)
            prev = w
        new_labels.append(aligned)
    enc["labels"] = new_labels
    return enc

train_ds = train_ds.map(align, batched=True, remove_columns=["ner_tags"])
dev_ds   = dev_ds  .map(align, batched=True, remove_columns=["ner_tags"])
test_ref = test_ref.map(align, batched=True, remove_columns=["ner_tags"])

# 7. MODEL, METRICS, TRAINER ---------------------------------------------------
metric = evaluate.load("seqeval")
def compute_metrics(p):
    preds, labs = p
    preds = np.argmax(preds, axis=2)
    true_preds, true_labs = [], []
    for pr, lb in zip(preds, labs):
        pr_l, lb_l = [], []
        for p_i, l_i in zip(pr, lb):
            if l_i != -100:
                pr_l.append(id2label[p_i])
                lb_l.append(id2label[l_i])
        true_preds.append(pr_l)
        true_labs.append(lb_l)
    res = metric.compute(predictions=true_preds, references=true_labs,
                         zero_division=0)
    return {k.replace("overall_", ""): v for k,v in res.items()}

model = AutoModelForTokenClassification.from_pretrained(
    args.model_name, num_labels=num_labels,
    id2label=id2label, label2id=label2id)

train_args = TrainingArguments(
    output_dir     = args.output_dir,
    eval_strategy="epoch",           # ← one eval *after* each epoch
    save_strategy  ="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="f1",
    num_train_epochs=args.num_train_epochs,
    per_device_train_batch_size=args.per_device_train_batch_size,
    per_device_eval_batch_size=args.per_device_eval_batch_size,
    learning_rate = args.learning_rate,
    weight_decay  = args.weight_decay,
    warmup_steps  = args.warmup_steps,
    seed = args.seed,
    fp16 = torch.cuda.is_available(),
    report_to="none")

trainer = Trainer(
    model=model,
    args=train_args,
    train_dataset=train_ds,
    eval_dataset =dev_ds,
    tokenizer=tok,
    data_collator=DataCollatorForTokenClassification(tok),
    compute_metrics=compute_metrics)

# 8. TRAIN & FINAL EVALUATION --------------------------------------------------
log.info("⏳  Starting fine‑tuning …")
trainer.train()
log.info("✅  Finished training.  Best dev‑set F1: %.4f",
         trainer.state.best_metric or -1)

log.info("🏁  Test‑set metrics:")
print(trainer.evaluate(eval_dataset=test_ref))


Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/1437 [00:00<?, ? examples/s]

config.json:   0%|          | 0.00/507 [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/5.65M [00:00<?, ?B/s]

Map:   0%|          | 0/8777 [00:00<?, ? examples/s]

Map:   0%|          | 0/2926 [00:00<?, ? examples/s]

Map:   0%|          | 0/51 [00:00<?, ? examples/s]

Downloading builder script:   0%|          | 0.00/6.34k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/135M [00:00<?, ?B/s]

Some weights of AlbertForTokenClassification were not initialized from the model checkpoint at ai4bharat/indic-bert and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Loc,Org,Per,Precision,Recall,F1,Accuracy
1,No log,0.761701,"{'precision': 0.7509477104518925, 'recall': 0.9824064234266147, 'f1': 0.8512234632108449, 'number': 25407}","{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 773}","{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 1087}",0.750948,0.915392,0.825056,0.755246
2,0.984600,0.69277,"{'precision': 0.796678765297273, 'recall': 0.9762270240484906, 'f1': 0.8773611602405378, 'number': 25407}","{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 773}","{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 1087}",0.796679,0.909634,0.849418,0.80141
3,0.984600,0.628235,"{'precision': 0.8984942570721214, 'recall': 0.8713346715472114, 'f1': 0.884706070415218, 'number': 25407}","{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 773}","{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 1087}",0.898494,0.811897,0.853004,0.811146
4,0.581000,0.57984,"{'precision': 0.8786852293031566, 'recall': 0.9290746644625497, 'f1': 0.9031776702186682, 'number': 25407}","{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 773}","{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 1087}",0.878685,0.865698,0.872144,0.836954
5,0.581000,0.58173,"{'precision': 0.8437489144405461, 'recall': 0.9559963789506829, 'f1': 0.8963722921356608, 'number': 25407}","{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 773}","{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 1087}",0.84372,0.890784,0.866613,0.831701
6,0.461800,0.563691,"{'precision': 0.8604154809334092, 'recall': 0.9520210965481953, 'f1': 0.9039032866832339, 'number': 25407}","{'precision': 0.125, 'recall': 0.00129366106080207, 'f1': 0.0025608194622279133, 'number': 773}","{'precision': 0.5882352941176471, 'recall': 0.00919963201471941, 'f1': 0.018115942028985504, 'number': 1087}",0.860042,0.887483,0.873547,0.842864
7,0.461800,0.57106,"{'precision': 0.885239755558055, 'recall': 0.9293501790845042, 'f1': 0.9067588325652842, 'number': 25407}","{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 773}","{'precision': 0.7122302158273381, 'recall': 0.09107635694572216, 'f1': 0.16150081566068517, 'number': 1087}",0.883914,0.869586,0.876692,0.845776
8,0.377800,0.578216,"{'precision': 0.8779059057583908, 'recall': 0.9378911323650962, 'f1': 0.9069077069457661, 'number': 25407}","{'precision': 0.2, 'recall': 0.009055627425614488, 'f1': 0.017326732673267325, 'number': 773}","{'precision': 0.601593625498008, 'recall': 0.1389144434222631, 'f1': 0.2257100149476831, 'number': 1087}",0.874512,0.879708,0.877103,0.846433
9,0.377800,0.625349,"{'precision': 0.8630107832009081, 'recall': 0.9576101074507025, 'f1': 0.9078527584469859, 'number': 25407}","{'precision': 0.2727272727272727, 'recall': 0.007761966364812419, 'f1': 0.01509433962264151, 'number': 773}","{'precision': 0.5927272727272728, 'recall': 0.1499540018399264, 'f1': 0.23935389133627025, 'number': 1087}",0.859946,0.898485,0.878793,0.847403
10,0.315500,0.575456,"{'precision': 0.8923216034963454, 'recall': 0.9321840437674657, 'f1': 0.9118173593331921, 'number': 25407}","{'precision': 0.16666666666666666, 'recall': 0.031047865459249677, 'f1': 0.05234460196292257, 'number': 773}","{'precision': 0.40589569160997735, 'recall': 0.32934682612695493, 'f1': 0.3636363636363637, 'number': 1087}",0.872969,0.882605,0.877761,0.846433


model.safetensors:   0%|          | 0.00/135M [00:00<?, ?B/s]

{'eval_loss': 0.47220128774642944, 'eval_LOC': {'precision': 0.9475890985324947, 'recall': 0.9094567404426559, 'f1': 0.9281314168377824, 'number': 497}, 'eval_ORG': {'precision': 0.4, 'recall': 0.2222222222222222, 'f1': 0.2857142857142857, 'number': 9}, 'eval_PER': {'precision': 1.0, 'recall': 0.09090909090909091, 'f1': 0.16666666666666669, 'number': 11}, 'eval_precision': 0.9420289855072463, 'eval_recall': 0.8800773694390716, 'eval_f1': 0.9099999999999999, 'eval_accuracy': 0.8861940298507462, 'eval_runtime': 0.0976, 'eval_samples_per_second': 522.305, 'eval_steps_per_second': 20.483, 'epoch': 20.0}


In [None]:
###############################################################################
# Indic‑NER fine‑tuning on Naamapadam (+ optional custom JSON)
###############################################################################
import argparse, random, json, os, itertools, logging, collections
import numpy as np, torch
from datasets import load_dataset, concatenate_datasets, DatasetDict, Sequence, Value
import evaluate
from transformers import (
    AutoTokenizer,
    AutoModelForTokenClassification,
    DataCollatorForTokenClassification,
    TrainingArguments,
    Trainer,
)

# ──────────────────────────────────────────────────────────────
# 0.  LOGGING
# ──────────────────────────────────────────────────────────────
logging.basicConfig(
    format="%(asctime)s %(levelname)-8s %(message)s",
    level=logging.INFO,
    datefmt="%H:%M:%S",
)
log = logging.getLogger(__name__)

# ──────────────────────────────────────────────────────────────
# 1.  ARGUMENTS & SEEDING
# ──────────────────────────────────────────────────────────────
parser = argparse.ArgumentParser()
parser.add_argument("--model_name", type=str, default="ai4bharat/indic-bert")
parser.add_argument("--languages", nargs="+", default=["as"])
parser.add_argument("--custom_data_path", type=str, default=None,
                    help="Path to your extra JSON file (optional)")
parser.add_argument("--output_dir", type=str,
                    default="./indicner-finetuned-naamapadam")
parser.add_argument("--num_train_epochs", type=int, default=5)
parser.add_argument("--per_device_train_batch_size", type=int, default=32)
parser.add_argument("--per_device_eval_batch_size", type=int, default=32)
parser.add_argument("--learning_rate", type=float, default=3e-5)
parser.add_argument("--weight_decay",  type=float, default=0.01)
parser.add_argument("--warmup_steps",  type=int, default=500)
parser.add_argument("--seed", type=int, default=42)
args, _ = parser.parse_known_args()

np.random.seed(args.seed)
random.seed(args.seed)
torch.manual_seed(args.seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(args.seed)
    log.info("CUDA device: %s", torch.cuda.get_device_name(0))
else:
    log.info("CUDA not available – falling back to CPU")

lang = args.languages[0]          # single‑language run

# ──────────────────────────────────────────────────────────────
# 2.  CANONICAL LABEL SET  (Naamapadam has exactly 7)
# ──────────────────────────────────────────────────────────────
label_list = ["B-LOC","B-ORG","B-PER","I-LOC","I-ORG","I-PER","O"]
id2label   = {i:l for i,l in enumerate(label_list)}  # type: ignore
label2id   = {l:i for i,l in id2label.items()}
num_labels = len(label_list)
log.info("Canonical labels: %s", id2label)

# Helper – map *any* tag outside this list to “O”
def normalise_tag(tag: str) -> str:
    tag = str(tag)
    if tag in ("B-PERSON", "I-PERSON"):         # your PERSON alias
        tag = tag.replace("PERSON", "PER")
    return tag if tag in label2id else "O"

# ──────────────────────────────────────────────────────────────
# 3.  LOAD NAAMAPADAM (train / test)
# ──────────────────────────────────────────────────────────────
train_ref = load_dataset("ai4bharat/naamapadam", lang, split="train")
test_ref  = load_dataset("ai4bharat/naamapadam", lang, split="test")

# NEW  ➜  make their `ner_tags` column a plain int sequence
int_seq = Sequence(Value("int64"))
train_ref = train_ref.cast_column("ner_tags", int_seq)
test_ref  = test_ref.cast_column("ner_tags", int_seq)

# ──────────────────────────────────────────────────────────────
# 4.  OPTIONAL: LOAD + CLEAN YOUR CUSTOM JSON
# ──────────────────────────────────────────────────────────────
if args.custom_data_path:
    log.info("Loading custom JSON: %s", args.custom_data_path)
    custom = load_dataset("json", data_files=args.custom_data_path,
                          split="train")

    # (a) ensure tokens / ner_tags length match
    broken = [i for i, ex in enumerate(custom)
              if len(ex["tokens"]) != len(ex["ner_tags"])]
    if broken:
        log.warning("⚠️  %d rows have mismatching lengths – they’ll be dropped",
                    len(broken))
        custom = custom.select([i for i in range(len(custom)) if i not in broken])

    # (b) normalise & map to ints
    def _clean(batch):
        out = []
        for tags in batch["ner_tags"]:
            out.append([label2id[normalise_tag(t)] for t in tags])
        batch["ner_tags"] = out
        return batch

    custom = custom.map(_clean, batched=True)
    log.info("Custom set after cleaning: %d sentences", len(custom))
    train_all = concatenate_datasets([train_ref, custom])
else:
    train_all = train_ref

# quick label‑distribution print‑out
def label_hist(ds, name):
    flat = list(itertools.chain.from_iterable(ds["ner_tags"]))
    c = collections.Counter(flat)
    log.info("%s label distribution: %s",
             name, {id2label[k]: v for k,v in c.items()})
label_hist(train_all, "TRAIN")
label_hist(test_ref,  "TEST ")

# 5. TRAIN / DEV SPLIT ---------------------------------------------------------
split = train_all.train_test_split(train_size=0.75, seed=args.seed)
train_ds, dev_ds = split["train"], split["test"]
log.info("Train %d  |  Dev %d  |  Test %d", len(train_ds), len(dev_ds), len(test_ref))

# 6. TOKENISATION + LABEL ALIGNMENT -------------------------------------------
tok = AutoTokenizer.from_pretrained(args.model_name, use_fast=True)
def align(batch):
    enc = tok(batch["tokens"], is_split_into_words=True,
              truncation=True, max_length=512)
    new_labels = []
    for i, seq in enumerate(batch["ner_tags"]):
        word_ids = enc.word_ids(batch_index=i)
        prev = None
        aligned = []
        for w in word_ids:
            if w is None:
                aligned.append(-100)
            elif w != prev:
                aligned.append(seq[w])
            else:
                aligned.append(-100)
            prev = w
        new_labels.append(aligned)
    enc["labels"] = new_labels
    return enc

train_ds = train_ds.map(align, batched=True, remove_columns=["ner_tags"])
dev_ds   = dev_ds  .map(align, batched=True, remove_columns=["ner_tags"])
test_ref = test_ref.map(align, batched=True, remove_columns=["ner_tags"])

# 7. MODEL, METRICS, TRAINER ---------------------------------------------------
metric = evaluate.load("seqeval")
def compute_metrics(p):
    preds, labs = p
    preds = np.argmax(preds, axis=2)
    true_preds, true_labs = [], []
    for pr, lb in zip(preds, labs):
        pr_l, lb_l = [], []
        for p_i, l_i in zip(pr, lb):
            if l_i != -100:
                pr_l.append(id2label[p_i])
                lb_l.append(id2label[l_i])
        true_preds.append(pr_l)
        true_labs.append(lb_l)
    res = metric.compute(predictions=true_preds, references=true_labs,
                         zero_division=0)
    return {k.replace("overall_", ""): v for k,v in res.items()}

model = AutoModelForTokenClassification.from_pretrained(
    args.model_name, num_labels=num_labels,
    id2label=id2label, label2id=label2id)

train_args = TrainingArguments(
    output_dir     = args.output_dir,
    eval_strategy="epoch",           # ← one eval *after* each epoch
    save_strategy  ="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="f1",
    num_train_epochs=args.num_train_epochs,
    per_device_train_batch_size=args.per_device_train_batch_size,
    per_device_eval_batch_size=args.per_device_eval_batch_size,
    learning_rate = args.learning_rate,
    weight_decay  = args.weight_decay,
    warmup_steps  = args.warmup_steps,
    seed = args.seed,
    fp16 = torch.cuda.is_available(),
    report_to="none")

trainer = Trainer(
    model=model,
    args=train_args,
    train_dataset=train_ds,
    eval_dataset =dev_ds,
    tokenizer=tok,
    data_collator=DataCollatorForTokenClassification(tok),
    compute_metrics=compute_metrics)

# 8. TRAIN & FINAL EVALUATION --------------------------------------------------
log.info("⏳  Starting fine‑tuning …")
trainer.train()
log.info("✅  Finished training.  Best dev‑set F1: %.4f",
         trainer.state.best_metric or -1)

log.info("🏁  Test‑set metrics:")
print(trainer.evaluate(eval_dataset=test_ref))


Map:   0%|          | 0/2567 [00:00<?, ? examples/s]

Some weights of AlbertForTokenClassification were not initialized from the model checkpoint at ai4bharat/indic-bert and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Loc,Org,Per,Precision,Recall,F1,Accuracy
1,No log,0.390798,"{'precision': 0.9141859544292862, 'recall': 0.9885563037639641, 'f1': 0.9499177139437462, 'number': 25691}","{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 523}","{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 704}",0.914186,0.943495,0.928609,0.917318
2,No log,0.320645,"{'precision': 0.9175696312995418, 'recall': 0.9822505935930871, 'f1': 0.9488090538228714, 'number': 25691}","{'precision': 0.23076923076923078, 'recall': 0.0057361376673040155, 'f1': 0.011194029850746268, 'number': 523}","{'precision': 0.9090909090909091, 'recall': 0.014204545454545454, 'f1': 0.027972027972027972, 'number': 704}",0.917242,0.93796,0.927485,0.919045
3,0.544400,0.267844,"{'precision': 0.9242165034761403, 'recall': 0.9883227589428204, 'f1': 0.9551952449025657, 'number': 25691}","{'precision': 0.48, 'recall': 0.045889101338432124, 'f1': 0.08376963350785341, 'number': 523}","{'precision': 0.8434782608695652, 'recall': 0.1377840909090909, 'f1': 0.23687423687423687, 'number': 704}",0.923077,0.947767,0.935259,0.926245
4,0.544400,0.249893,"{'precision': 0.951655881233346, 'recall': 0.9731034214316298, 'f1': 0.9622601566559535, 'number': 25691}","{'precision': 0.4293193717277487, 'recall': 0.3135755258126195, 'f1': 0.36243093922651937, 'number': 523}","{'precision': 0.5741029641185648, 'recall': 0.5227272727272727, 'f1': 0.5472118959107807, 'number': 704}",0.935478,0.94851,0.941949,0.933228
5,0.236200,0.245809,"{'precision': 0.9505841430677908, 'recall': 0.9786306488653614, 'f1': 0.9644035289604909, 'number': 25691}","{'precision': 0.5506756756756757, 'recall': 0.31166347992351817, 'f1': 0.398046398046398, 'number': 523}","{'precision': 0.5968503937007874, 'recall': 0.5383522727272727, 'f1': 0.5660941000746826, 'number': 704}",0.938057,0.954157,0.946039,0.937907


{'eval_loss': 0.24383287131786346, 'eval_LOC': {'precision': 0.9393346379647749, 'recall': 0.96579476861167, 'f1': 0.9523809523809522, 'number': 497}, 'eval_ORG': {'precision': 0.16666666666666666, 'recall': 0.1111111111111111, 'f1': 0.13333333333333333, 'number': 9}, 'eval_PER': {'precision': 0.3333333333333333, 'recall': 0.2727272727272727, 'f1': 0.3, 'number': 11}, 'eval_precision': 0.9201520912547528, 'eval_recall': 0.9361702127659575, 'eval_f1': 0.9280920421860019, 'eval_accuracy': 0.9291044776119403, 'eval_runtime': 0.0887, 'eval_samples_per_second': 574.886, 'eval_steps_per_second': 22.545, 'epoch': 5.0}


In [3]:
###############################################################################
# Indic-NER fine-tuning on Naamapadam (+ optional custom JSON)
# with hyperparameter tuning
###############################################################################
import argparse, random, json, os, itertools, logging, collections
import numpy as np, torch
from datasets import load_dataset, concatenate_datasets, DatasetDict, Sequence, Value
import evaluate
from transformers import (
    AutoTokenizer,
    AutoModelForTokenClassification,
    DataCollatorForTokenClassification,
    TrainingArguments,
    Trainer,
)

# ──────────────────────────────────────────────────────────────
# 0.  LOGGING
# ──────────────────────────────────────────────────────────────
logging.basicConfig(
    format="%(asctime)s %(levelname)-8s %(message)s",
    level=logging.INFO,
    datefmt="%H:%M:%S",
)
log = logging.getLogger(__name__)

# ──────────────────────────────────────────────────────────────
# 1.  ARGUMENTS & SEEDING
# ──────────────────────────────────────────────────────────────
parser = argparse.ArgumentParser()
parser.add_argument("--model_name", type=str, default="ai4bharat/indic-bert")
parser.add_argument("--languages", nargs="+", default=["as"])
parser.add_argument("--custom_data_path", type=str, default="/content/drive/MyDrive/Yash_final_btp/naamapadam_proj/0.6/0.4/naamapadam_assamese.json",
                    help="Path to your extra JSON file (optional)")
parser.add_argument("--output_dir", type=str,
                    default="./indicner-finetuned-naamapadam")
parser.add_argument("--num_train_epochs", type=int, default=10)
parser.add_argument("--per_device_train_batch_size", type=int, default=32)
parser.add_argument("--per_device_eval_batch_size", type=int, default=32)
parser.add_argument("--learning_rate", type=float, default=1e-5)
parser.add_argument("--weight_decay",  type=float, default=0.01)
parser.add_argument("--warmup_steps",  type=int, default=500)
parser.add_argument("--seed", type=int, default=42)
args, _ = parser.parse_known_args()

np.random.seed(args.seed)
random.seed(args.seed)
torch.manual_seed(args.seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(args.seed)
    log.info("CUDA device: %s", torch.cuda.get_device_name(0))
else:
    log.info("CUDA not available – falling back to CPU")

lang = args.languages[0]

# ──────────────────────────────────────────────────────────────
# 2.  CANONICAL LABEL SET  (Naamapadam has exactly 7)
# ──────────────────────────────────────────────────────────────
label_list = ["B-LOC","B-ORG","B-PER","I-LOC","I-ORG","I-PER","O"]
id2label   = {i:l for i,l in enumerate(label_list)}  # type: ignore
label2id   = {l:i for i,l in id2label.items()}
num_labels = len(label_list)
log.info("Canonical labels: %s", id2label)

# Helper – map *any* tag outside this list to “O”
def normalise_tag(tag: str) -> str:
    tag = str(tag)
    if tag in ("B-PERSON", "I-PERSON"):         # your PERSON alias
        tag = tag.replace("PERSON", "PER")
    return tag if tag in label2id else "O"

# ──────────────────────────────────────────────────────────────
# 3.  LOAD NAAMAPADAM (train / test)
# ──────────────────────────────────────────────────────────────
train_ref = load_dataset("ai4bharat/naamapadam", lang, split="train")
test_ref  = load_dataset("ai4bharat/naamapadam", lang, split="test")

# NEW  ➜  make their `ner_tags` column a plain int sequence
int_seq = Sequence(Value("int64"))
train_ref = train_ref.cast_column("ner_tags", int_seq)
test_ref  = test_ref.cast_column("ner_tags", int_seq)

# ──────────────────────────────────────────────────────────────
# 4.  OPTIONAL: LOAD + CLEAN YOUR CUSTOM JSON
# ──────────────────────────────────────────────────────────────
if args.custom_data_path:
    log.info("Loading custom JSON: %s", args.custom_data_path)
    custom = load_dataset("json", data_files=args.custom_data_path,
                          split="train")

    # (a) ensure tokens / ner_tags length match
    broken = [i for i, ex in enumerate(custom)
              if len(ex["tokens"]) != len(ex["ner_tags"])]
    if broken:
        log.warning("⚠️  %d rows have mismatching lengths – they’ll be dropped",
                    len(broken))
        custom = custom.select([i for i in range(len(custom)) if i not in broken])

    # (b) normalise & map to ints
    def _clean(batch):
        out = []
        for tags in batch["ner_tags"]:
            out.append([label2id[normalise_tag(t)] for t in tags])
        batch["ner_tags"] = out
        return batch

    custom = custom.map(_clean, batched=True)
    log.info("Custom set after cleaning: %d sentences", len(custom))
    train_all = concatenate_datasets([train_ref, custom])
else:
    train_all = train_ref

# quick label-distribution print-out
def label_hist(ds, name):
    flat = list(itertools.chain.from_iterable(ds["ner_tags"]))
    c = collections.Counter(flat)
    log.info("%s label distribution: %s",
             name, {id2label[k]: v for k,v in c.items()})
label_hist(train_all, "TRAIN")
label_hist(test_ref,  "TEST ")

# ──────────────────────────────────────────────────────────────
# 5. TRAIN / DEV SPLIT
# ──────────────────────────────────────────────────────────────
split = train_all.train_test_split(train_size=0.75, seed=args.seed)
train_ds, dev_ds = split["train"], split["test"]
log.info("Train %d  |  Dev %d  |  Test %d",
         len(train_ds), len(dev_ds), len(test_ref))

# ──────────────────────────────────────────────────────────────
# 6. TOKENISATION + LABEL ALIGNMENT
# ──────────────────────────────────────────────────────────────
tok = AutoTokenizer.from_pretrained(args.model_name, use_fast=True)
def align(batch):
    enc = tok(batch["tokens"], is_split_into_words=True,
              truncation=True, max_length=512)
    new_labels = []
    for i, seq in enumerate(batch["ner_tags"]):
        word_ids = enc.word_ids(batch_index=i)
        prev = None
        aligned = []
        for w in word_ids:
            if w is None:
                aligned.append(-100)
            elif w != prev:
                aligned.append(seq[w])
            else:
                aligned.append(-100)
            prev = w
        new_labels.append(aligned)
    enc["labels"] = new_labels
    return enc

train_ds = train_ds.map(align, batched=True, remove_columns=["ner_tags"])
dev_ds   = dev_ds  .map(align, batched=True, remove_columns=["ner_tags"])
test_ref = test_ref.map(align, batched=True, remove_columns=["ner_tags"])

# ──────────────────────────────────────────────────────────────
# 7. MODEL, METRICS, TRAINER
# ──────────────────────────────────────────────────────────────
metric = evaluate.load("seqeval")
def compute_metrics(p):
    preds, labs = p
    preds = np.argmax(preds, axis=2)
    true_preds, true_labs = [], []
    for pr, lb in zip(preds, labs):
        pr_l, lb_l = [], []
        for p_i, l_i in zip(pr, lb):
            if l_i != -100:
                pr_l.append(id2label[p_i])
                lb_l.append(id2label[l_i])
        true_preds.append(pr_l)
        true_labs.append(lb_l)
    res = metric.compute(predictions=true_preds, references=true_labs,
                         zero_division=0)
    return {k.replace("overall_", ""): v for k,v in res.items()}

def model_init():
    return AutoModelForTokenClassification.from_pretrained(
        args.model_name,
        num_labels=num_labels,
        id2label=id2label,
        label2id=label2id
    )

train_args = TrainingArguments(
    output_dir     = args.output_dir,
    eval_strategy  ="epoch",
    save_strategy  ="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="f1",
    num_train_epochs=args.num_train_epochs,
    per_device_train_batch_size=args.per_device_train_batch_size,
    per_device_eval_batch_size=args.per_device_eval_batch_size,
    learning_rate = args.learning_rate,
    weight_decay  = args.weight_decay,
    warmup_steps  = args.warmup_steps,
    seed = args.seed,
    fp16 = torch.cuda.is_available(),
    report_to="none"
)

trainer = Trainer(
    model_init=model_init,
    args=train_args,
    train_dataset=train_ds,
    eval_dataset=dev_ds,
    tokenizer=tok,
    data_collator=DataCollatorForTokenClassification(tok),
    compute_metrics=compute_metrics)

# ──────────────────────────────────────────────────────────────
# 7b.  OPTIONAL: HYPERPARAMETER TUNING (Optuna)
# ──────────────────────────────────────────────────────────────
best_run = trainer.hyperparameter_search(
    direction="maximize",
    backend="optuna",
    n_trials=10,
    hp_space=lambda _: {
        "learning_rate": np.random.uniform(1e-6, 1e-4),
        "per_device_train_batch_size": np.random.choice([16, 32]),
        "weight_decay": np.random.uniform(0.0, 0.3),
    },
    compute_objective=lambda metrics: metrics["eval_f1"],
)
log.info("Best hyperparameters found: %s", best_run)

# ──────────────────────────────────────────────────────────────
# 8. TRAIN & FINAL EVALUATION
# ──────────────────────────────────────────────────────────────
log.info("⏳  Starting fine-tuning …")
trainer.train()
log.info("✅  Finished training.  Best dev-set F1: %.4f",
         trainer.state.best_metric or -1)

log.info("🏁  Test-set metrics:")
print(trainer.evaluate(eval_dataset=test_ref))


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/8.65k [00:00<?, ?B/s]

naamapadam.py:   0%|          | 0.00/2.86k [00:00<?, ?B/s]

0000.parquet:   0%|          | 0.00/654k [00:00<?, ?B/s]

0000.parquet:   0%|          | 0.00/6.77k [00:00<?, ?B/s]

0000.parquet:   0%|          | 0.00/7.29k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/10266 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/51 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/52 [00:00<?, ? examples/s]

Casting the dataset:   0%|          | 0/10266 [00:00<?, ? examples/s]

Casting the dataset:   0%|          | 0/51 [00:00<?, ? examples/s]

Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/1437 [00:00<?, ? examples/s]

config.json:   0%|          | 0.00/507 [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/5.65M [00:00<?, ?B/s]

Map:   0%|          | 0/8777 [00:00<?, ? examples/s]

Map:   0%|          | 0/2926 [00:00<?, ? examples/s]

Map:   0%|          | 0/51 [00:00<?, ? examples/s]

Downloading builder script:   0%|          | 0.00/6.34k [00:00<?, ?B/s]

  trainer = Trainer(


pytorch_model.bin:   0%|          | 0.00/135M [00:00<?, ?B/s]

Some weights of AlbertForTokenClassification were not initialized from the model checkpoint at ai4bharat/indic-bert and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[I 2025-05-11 21:08:54,634] A new study created in memory with name: no-name-04a12f66-7021-4986-9be9-0a580320cafb


model.safetensors:   0%|          | 0.00/135M [00:00<?, ?B/s]

Some weights of AlbertForTokenClassification were not initialized from the model checkpoint at ai4bharat/indic-bert and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Loc,Org,Per,Precision,Recall,F1,Accuracy
1,0.8584,0.607546,"{'precision': 0.8004271015336828, 'recall': 0.9736686739874838, 'f1': 0.8785893133024346, 'number': 25407}","{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 773}","{'precision': 0.4, 'recall': 0.0018399264029438822, 'f1': 0.003663003663003663, 'number': 1087}",0.800362,0.907324,0.850493,0.802981
2,0.5293,0.620513,"{'precision': 0.8243934862080425, 'recall': 0.976345101743614, 'f1': 0.8939582319765033, 'number': 25407}","{'precision': 0.2602739726027397, 'recall': 0.02457956015523933, 'f1': 0.04491725768321513, 'number': 773}","{'precision': 0.5106382978723404, 'recall': 0.22079116835326587, 'f1': 0.30828516377649323, 'number': 1087}",0.818235,0.919243,0.865803,0.824935
3,0.4118,0.545496,"{'precision': 0.9221589032044929, 'recall': 0.8789310032668163, 'f1': 0.9000261975293715, 'number': 25407}","{'precision': 0.4909090909090909, 'recall': 0.10478654592496765, 'f1': 0.17270788912579957, 'number': 773}","{'precision': 0.643312101910828, 'recall': 0.27874885004599814, 'f1': 0.3889602053915276, 'number': 1087}",0.914011,0.833058,0.871659,0.836326
4,0.3369,0.507872,"{'precision': 0.9019488660107131, 'recall': 0.9344668792065179, 'f1': 0.917919969070172, 'number': 25407}","{'precision': 0.5714285714285714, 'recall': 0.12419146183699871, 'f1': 0.20403825717321997, 'number': 773}","{'precision': 0.7307692307692307, 'recall': 0.2621895124195032, 'f1': 0.38591740013540965, 'number': 1087}",0.8974,0.884696,0.891002,0.862449
5,0.2553,0.592992,"{'precision': 0.8817279155827502, 'recall': 0.9471799110481364, 'f1': 0.9132827324478179, 'number': 25407}","{'precision': 0.49557522123893805, 'recall': 0.1448900388098318, 'f1': 0.22422422422422417, 'number': 773}","{'precision': 0.7120181405895691, 'recall': 0.2888684452621895, 'f1': 0.4109947643979058, 'number': 1087}",0.87593,0.898192,0.886921,0.856254
6,0.2319,0.535721,"{'precision': 0.907690530288443, 'recall': 0.9276970913527768, 'f1': 0.9175847705064819, 'number': 25407}","{'precision': 0.44274809160305345, 'recall': 0.22509702457956016, 'f1': 0.2984562607204117, 'number': 773}","{'precision': 0.5520094562647754, 'recall': 0.4296228150873965, 'f1': 0.4831867563372995, 'number': 1087}",0.889914,0.887923,0.888917,0.860536
7,0.1825,0.60504,"{'precision': 0.9130552756027599, 'recall': 0.9271067028771598, 'f1': 0.9200273410799726, 'number': 25407}","{'precision': 0.38362068965517243, 'recall': 0.23027166882276842, 'f1': 0.2877930476960388, 'number': 773}","{'precision': 0.545, 'recall': 0.40110395584176634, 'f1': 0.46210916799152096, 'number': 1087}",0.893097,0.886383,0.889727,0.860993
8,0.1572,0.621727,"{'precision': 0.9041461647022924, 'recall': 0.9329712284016216, 'f1': 0.9183325584999225, 'number': 25407}","{'precision': 0.4168564920273349, 'recall': 0.23673997412677877, 'f1': 0.30198019801980197, 'number': 773}","{'precision': 0.5559400230680508, 'recall': 0.44342226310947563, 'f1': 0.49334698055271237, 'number': 1087}",0.885405,0.893718,0.889542,0.860194
9,0.1367,0.645352,"{'precision': 0.905265973708346, 'recall': 0.9323808399260046, 'f1': 0.9186233640329616, 'number': 25407}","{'precision': 0.3831967213114754, 'recall': 0.24191461836998707, 'f1': 0.2965900079302141, 'number': 773}","{'precision': 0.5577395577395577, 'recall': 0.41766329346826125, 'f1': 0.47764334560757493, 'number': 1087}",0.885693,0.892287,0.888978,0.86065
10,0.111,0.658066,"{'precision': 0.9052310042000764, 'recall': 0.9331286653284527, 'f1': 0.9189681570633952, 'number': 25407}","{'precision': 0.3802521008403361, 'recall': 0.23415265200517466, 'f1': 0.2898318654923939, 'number': 773}","{'precision': 0.5405092592592593, 'recall': 0.4296228150873965, 'f1': 0.4787288569964121, 'number': 1087}",0.884708,0.893241,0.888954,0.860393


[I 2025-05-11 21:15:49,657] Trial 0 finished with value: 0.8889537748416884 and parameters: {}. Best is trial 0 with value: 0.8889537748416884.
Some weights of AlbertForTokenClassification were not initialized from the model checkpoint at ai4bharat/indic-bert and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Loc,Org,Per,Precision,Recall,F1,Accuracy
1,0.8584,0.607546,"{'precision': 0.8004271015336828, 'recall': 0.9736686739874838, 'f1': 0.8785893133024346, 'number': 25407}","{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 773}","{'precision': 0.4, 'recall': 0.0018399264029438822, 'f1': 0.003663003663003663, 'number': 1087}",0.800362,0.907324,0.850493,0.802981
2,0.5308,0.602813,"{'precision': 0.8299979921022689, 'recall': 0.9761876648167828, 'f1': 0.8971766535838954, 'number': 25407}","{'precision': 0.17647058823529413, 'recall': 0.015523932729624839, 'f1': 0.028537455410225922, 'number': 773}","{'precision': 0.5186104218362283, 'recall': 0.1922723091076357, 'f1': 0.28053691275167786, 'number': 1087}",0.8244,0.917703,0.868553,0.830331
3,0.421,0.500999,"{'precision': 0.9201368174044465, 'recall': 0.910575825559885, 'f1': 0.9153313550939663, 'number': 25407}","{'precision': 0.38309859154929576, 'recall': 0.1759379042690815, 'f1': 0.24113475177304963, 'number': 773}","{'precision': 0.6196660482374768, 'recall': 0.30726770929162833, 'f1': 0.4108241082410824, 'number': 1087}",0.906594,0.865698,0.885675,0.856825
4,0.3399,0.517468,"{'precision': 0.9159710190581194, 'recall': 0.9155744479867753, 'f1': 0.9157726905891385, 'number': 25407}","{'precision': 0.6722689075630253, 'recall': 0.1034928848641656, 'f1': 0.17937219730941703, 'number': 773}","{'precision': 0.7186700767263428, 'recall': 0.25850965961361544, 'f1': 0.38024357239512857, 'number': 1087}",0.911874,0.866359,0.888534,0.857538
5,0.2623,0.554863,"{'precision': 0.8941010296249489, 'recall': 0.9467469594993506, 'f1': 0.9196711909768687, 'number': 25407}","{'precision': 0.43729903536977494, 'recall': 0.1759379042690815, 'f1': 0.2509225092250923, 'number': 773}","{'precision': 0.6290050590219224, 'recall': 0.34314627414903404, 'f1': 0.444047619047619, 'number': 1087}",0.883339,0.900833,0.892,0.862877
6,0.2269,0.538479,"{'precision': 0.9098646628071925, 'recall': 0.9340732868894399, 'f1': 0.9218100602058652, 'number': 25407}","{'precision': 0.4787234042553192, 'recall': 0.23285899094437257, 'f1': 0.3133159268929504, 'number': 773}","{'precision': 0.6391304347826087, 'recall': 0.40570377184912604, 'f1': 0.4963421496904897, 'number': 1087}",0.897013,0.893131,0.895068,0.867245
7,0.1899,0.571037,"{'precision': 0.9049422457867828, 'recall': 0.9404888416578109, 'f1': 0.9223731953987493, 'number': 25407}","{'precision': 0.43703703703703706, 'recall': 0.22897800776196636, 'f1': 0.300509337860781, 'number': 773}","{'precision': 0.5347222222222222, 'recall': 0.4250229990800368, 'f1': 0.47360328036904153, 'number': 1087}",0.886536,0.899769,0.893104,0.863734
8,0.1663,0.597636,"{'precision': 0.9011488211443159, 'recall': 0.944739638682253, 'f1': 0.9224295294275888, 'number': 25407}","{'precision': 0.45036319612590797, 'recall': 0.240620957309185, 'f1': 0.3136593591905565, 'number': 773}","{'precision': 0.5531674208144797, 'recall': 0.44986200551977923, 'f1': 0.4961948249619483, 'number': 1087}",0.883471,0.90505,0.89413,0.86636
9,0.1398,0.618357,"{'precision': 0.9162882757014786, 'recall': 0.9292714606210887, 'f1': 0.9227342009614258, 'number': 25407}","{'precision': 0.39920948616600793, 'recall': 0.2613195342820181, 'f1': 0.3158717748240813, 'number': 773}","{'precision': 0.5638051044083526, 'recall': 0.44710211591536336, 'f1': 0.4987172909184197, 'number': 1087}",0.895449,0.891114,0.893276,0.864704
10,0.1173,0.620726,"{'precision': 0.9135525756463269, 'recall': 0.9304522375723225, 'f1': 0.9219249668512597, 'number': 25407}","{'precision': 0.3992248062015504, 'recall': 0.2664941785252264, 'f1': 0.31962761830876646, 'number': 773}","{'precision': 0.54627539503386, 'recall': 0.4452621895124195, 'f1': 0.4906234161175875, 'number': 1087}",0.891895,0.892287,0.892091,0.864048


[I 2025-05-11 21:22:48,832] Trial 1 finished with value: 0.892091079089209 and parameters: {}. Best is trial 1 with value: 0.892091079089209.
Some weights of AlbertForTokenClassification were not initialized from the model checkpoint at ai4bharat/indic-bert and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Loc,Org,Per,Precision,Recall,F1,Accuracy
1,0.8584,0.607546,"{'precision': 0.8004271015336828, 'recall': 0.9736686739874838, 'f1': 0.8785893133024346, 'number': 25407}","{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 773}","{'precision': 0.4, 'recall': 0.0018399264029438822, 'f1': 0.003663003663003663, 'number': 1087}",0.800362,0.907324,0.850493,0.802981
2,0.5308,0.6123,"{'precision': 0.8232627399073461, 'recall': 0.9792183256582832, 'f1': 0.8944936811260719, 'number': 25407}","{'precision': 0.19444444444444445, 'recall': 0.018111254851228976, 'f1': 0.033136094674556214, 'number': 773}","{'precision': 0.4864864864864865, 'recall': 0.19871205151793928, 'f1': 0.28216851730894843, 'number': 1087}",0.816925,0.920857,0.865783,0.825192
3,0.4197,0.498722,"{'precision': 0.9169577962494478, 'recall': 0.898768056047546, 'f1': 0.9077718147485588, 'number': 25407}","{'precision': 0.5207373271889401, 'recall': 0.1461836998706339, 'f1': 0.22828282828282828, 'number': 773}","{'precision': 0.6449814126394052, 'recall': 0.31922723091076355, 'f1': 0.4270769230769231, 'number': 1087}",0.907904,0.854329,0.880302,0.848203
4,0.3331,0.542443,"{'precision': 0.9129146688496472, 'recall': 0.9217538473648994, 'f1': 0.9173129651390521, 'number': 25407}","{'precision': 0.5806451612903226, 'recall': 0.11642949547218628, 'f1': 0.1939655172413793, 'number': 773}","{'precision': 0.6599496221662469, 'recall': 0.24103035878564857, 'f1': 0.353099730458221, 'number': 1087}",0.907117,0.871786,0.889101,0.858224
5,0.2497,0.521162,"{'precision': 0.9085556999770624, 'recall': 0.935411500767505, 'f1': 0.9217880345195385, 'number': 25407}","{'precision': 0.5247148288973384, 'recall': 0.17852522639068563, 'f1': 0.26640926640926643, 'number': 773}","{'precision': 0.6339144215530903, 'recall': 0.36798528058877644, 'f1': 0.46565774155995343, 'number': 1087}",0.898418,0.891334,0.894862,0.865104
6,0.2251,0.53577,"{'precision': 0.9020892616444511, 'recall': 0.9482819695359547, 'f1': 0.9246090377050753, 'number': 25407}","{'precision': 0.48286604361370716, 'recall': 0.20051746442432083, 'f1': 0.283363802559415, 'number': 773}","{'precision': 0.6218487394957983, 'recall': 0.40846366145354185, 'f1': 0.49305941143808985, 'number': 1087}",0.890026,0.905564,0.897728,0.870471
7,0.1771,0.589372,"{'precision': 0.9082924603627238, 'recall': 0.9402526862675641, 'f1': 0.9239962868414945, 'number': 25407}","{'precision': 0.4379746835443038, 'recall': 0.2238033635187581, 'f1': 0.2962328767123288, 'number': 773}","{'precision': 0.5615577889447236, 'recall': 0.41122355105795766, 'f1': 0.47477429633563467, 'number': 1087}",0.891496,0.898852,0.895159,0.868159
8,0.1604,0.599863,"{'precision': 0.9032233826733789, 'recall': 0.946274648718857, 'f1': 0.9242479577126381, 'number': 25407}","{'precision': 0.4288990825688073, 'recall': 0.24191461836998707, 'f1': 0.3093465674110835, 'number': 773}","{'precision': 0.5721212121212121, 'recall': 0.43422263109475623, 'f1': 0.493723849372385, 'number': 1087}",0.886007,0.905894,0.89584,0.869472
9,0.1366,0.622366,"{'precision': 0.9064930162325405, 'recall': 0.9451332309993309, 'f1': 0.9254099466250457, 'number': 25407}","{'precision': 0.4481132075471698, 'recall': 0.24579560155239327, 'f1': 0.3174603174603175, 'number': 773}","{'precision': 0.5725094577553594, 'recall': 0.41766329346826125, 'f1': 0.4829787234042553, 'number': 1087}",0.88992,0.90428,0.897042,0.870614
10,0.1121,0.634707,"{'precision': 0.9084552536576097, 'recall': 0.9409217932065966, 'f1': 0.92440354201307, 'number': 25407}","{'precision': 0.38747553816046965, 'recall': 0.25614489003880986, 'f1': 0.30841121495327106, 'number': 773}","{'precision': 0.5502283105022832, 'recall': 0.44342226310947563, 'f1': 0.4910850738665309, 'number': 1087}",0.887517,0.901676,0.894541,0.868444


[I 2025-05-11 21:29:55,821] Trial 2 finished with value: 0.8945405592242901 and parameters: {}. Best is trial 2 with value: 0.8945405592242901.
Some weights of AlbertForTokenClassification were not initialized from the model checkpoint at ai4bharat/indic-bert and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Loc,Org,Per,Precision,Recall,F1,Accuracy
1,0.8555,0.568183,"{'precision': 0.8299979579334287, 'recall': 0.959853583658047, 'f1': 0.8902151891803098, 'number': 25407}","{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 773}","{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 1087}",0.829998,0.894378,0.860986,0.822822
2,0.5292,0.598683,"{'precision': 0.8249176180807509, 'recall': 0.9754398394143347, 'f1': 0.8938863841298468, 'number': 25407}","{'precision': 0.1282051282051282, 'recall': 0.00646830530401035, 'f1': 0.012315270935960592, 'number': 773}","{'precision': 0.3988919667590028, 'recall': 0.26494940202391903, 'f1': 0.3184079601990049, 'number': 1087}",0.81405,0.919646,0.863632,0.822166
3,0.412,0.490568,"{'precision': 0.9194041867954912, 'recall': 0.8988861337426693, 'f1': 0.9090293947897387, 'number': 25407}","{'precision': 0.4797687861271676, 'recall': 0.1073738680465718, 'f1': 0.17547568710359407, 'number': 773}","{'precision': 0.7777777777777778, 'recall': 0.22539098436062557, 'f1': 0.3495007132667618, 'number': 1087}",0.91464,0.849598,0.88092,0.84806
4,0.334,0.530636,"{'precision': 0.8909468839315542, 'recall': 0.944739638682253, 'f1': 0.9170550928402231, 'number': 25407}","{'precision': 0.6513761467889908, 'recall': 0.09184993531694696, 'f1': 0.16099773242630386, 'number': 773}","{'precision': 0.7466666666666667, 'recall': 0.2575896964121435, 'f1': 0.3830369357045144, 'number': 1087}",0.888022,0.893168,0.890587,0.86145
5,0.2539,0.545108,"{'precision': 0.9132370167903163, 'recall': 0.9205337111819577, 'f1': 0.9168708469725778, 'number': 25407}","{'precision': 0.4851063829787234, 'recall': 0.14747736093143596, 'f1': 0.22619047619047616, 'number': 773}","{'precision': 0.6326530612244898, 'recall': 0.3137074517019319, 'f1': 0.4194341943419434, 'number': 1087}",0.903692,0.874427,0.888818,0.859309
6,0.2241,0.552942,"{'precision': 0.9090559655596556, 'recall': 0.9308458298894006, 'f1': 0.919821869591428, 'number': 25407}","{'precision': 0.40315315315315314, 'recall': 0.2315653298835705, 'f1': 0.29416598192276094, 'number': 773}","{'precision': 0.5714285714285714, 'recall': 0.39742410303587855, 'f1': 0.4688008681497558, 'number': 1087}",0.891424,0.889757,0.89059,0.864133
7,0.1769,0.573633,"{'precision': 0.908678607118149, 'recall': 0.9305309560357382, 'f1': 0.9194749635391346, 'number': 25407}","{'precision': 0.42755344418052255, 'recall': 0.23285899094437257, 'f1': 0.3015075376884422, 'number': 773}","{'precision': 0.576043068640646, 'recall': 0.3937442502299908, 'f1': 0.4677595628415301, 'number': 1087}",0.892135,0.889353,0.890742,0.86242
8,0.1603,0.593259,"{'precision': 0.9026148999962048, 'recall': 0.9360806077065376, 'f1': 0.9190432027204575, 'number': 25407}","{'precision': 0.423963133640553, 'recall': 0.23803363518758086, 'f1': 0.30488815244407624, 'number': 773}","{'precision': 0.5699614890885751, 'recall': 0.40846366145354185, 'f1': 0.4758842443729903, 'number': 1087}",0.885676,0.895258,0.890441,0.864362
9,0.1353,0.63557,"{'precision': 0.9014780182490009, 'recall': 0.94103987090172, 'f1': 0.9208342159486992, 'number': 25407}","{'precision': 0.4109014675052411, 'recall': 0.2535575679172057, 'f1': 0.3136, 'number': 773}","{'precision': 0.55627425614489, 'recall': 0.39558417663293466, 'f1': 0.46236559139784944, 'number': 1087}",0.883444,0.899806,0.89155,0.864248
10,0.1097,0.643016,"{'precision': 0.9041843541540328, 'recall': 0.9389538316212067, 'f1': 0.9212411422834083, 'number': 25407}","{'precision': 0.40160642570281124, 'recall': 0.258732212160414, 'f1': 0.31471282454760036, 'number': 773}","{'precision': 0.5397796817625459, 'recall': 0.40570377184912604, 'f1': 0.4632352941176471, 'number': 1087}",0.8844,0.898412,0.891351,0.864676


[I 2025-05-11 21:36:57,506] Trial 3 finished with value: 0.8913510169923227 and parameters: {}. Best is trial 2 with value: 0.8945405592242901.
Some weights of AlbertForTokenClassification were not initialized from the model checkpoint at ai4bharat/indic-bert and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Loc,Org,Per,Precision,Recall,F1,Accuracy
1,0.8576,0.564415,"{'precision': 0.8638297872340426, 'recall': 0.9268311882552053, 'f1': 0.8942221884671615, 'number': 25407}","{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 773}","{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 1087}",0.86383,0.863608,0.863719,0.827533
2,0.5346,0.694237,"{'precision': 0.7966549856642243, 'recall': 0.9842563073168812, 'f1': 0.880574678240048, 'number': 25407}","{'precision': 0.38461538461538464, 'recall': 0.0129366106080207, 'f1': 0.02503128911138924, 'number': 773}","{'precision': 0.5076586433260394, 'recall': 0.21343146274149033, 'f1': 0.3005181347150259, 'number': 1087}",0.792175,0.925991,0.853872,0.803123
3,0.423,0.486911,"{'precision': 0.9133526677525724, 'recall': 0.9048687369622544, 'f1': 0.9090909090909091, 'number': 25407}","{'precision': 0.41762452107279696, 'recall': 0.1410090556274256, 'f1': 0.21083172147001933, 'number': 773}","{'precision': 0.5482695810564663, 'recall': 0.2769089236430543, 'f1': 0.3679706601466993, 'number': 1087}",0.900658,0.85818,0.878906,0.850201
4,0.3358,0.541509,"{'precision': 0.9122759051673632, 'recall': 0.919313574999016, 'f1': 0.9157812193687512, 'number': 25407}","{'precision': 0.5443786982248521, 'recall': 0.11901681759379043, 'f1': 0.19532908704883228, 'number': 773}","{'precision': 0.7092731829573935, 'recall': 0.26034958601655933, 'f1': 0.3808882907133243, 'number': 1087}",0.906805,0.870356,0.888207,0.856625
5,0.2628,0.523026,"{'precision': 0.8988335576491701, 'recall': 0.946274648718857, 'f1': 0.9219442047742308, 'number': 25407}","{'precision': 0.4375, 'recall': 0.1539456662354463, 'f1': 0.22775119617224876, 'number': 773}","{'precision': 0.6991150442477876, 'recall': 0.2907083716651334, 'f1': 0.41065627030539315, 'number': 1087}",0.89098,0.897679,0.894317,0.865104
6,0.2332,0.546101,"{'precision': 0.8994764397905759, 'recall': 0.9466682410359349, 'f1': 0.9224691736820909, 'number': 25407}","{'precision': 0.44416243654822335, 'recall': 0.22639068564036222, 'f1': 0.29991431019708653, 'number': 773}","{'precision': 0.6135734072022161, 'recall': 0.40754369825206993, 'f1': 0.4897733554449973, 'number': 1087}",0.885626,0.904757,0.895089,0.867217
7,0.1809,0.559697,"{'precision': 0.9215824796891557, 'recall': 0.9241941197307828, 'f1': 0.9228864520693315, 'number': 25407}","{'precision': 0.36899563318777295, 'recall': 0.2186287192755498, 'f1': 0.27457351746547526, 'number': 773}","{'precision': 0.559322033898305, 'recall': 0.4250229990800368, 'f1': 0.4830109775222164, 'number': 1087}",0.900945,0.884292,0.892541,0.864619
8,0.1568,0.600604,"{'precision': 0.9144820663294854, 'recall': 0.9322627622308812, 'f1': 0.9232868168706634, 'number': 25407}","{'precision': 0.43641618497109824, 'recall': 0.19534282018111254, 'f1': 0.26988382484361034, 'number': 773}","{'precision': 0.564935064935065, 'recall': 0.40018399264029436, 'f1': 0.4684975767366721, 'number': 1087}",0.898397,0.89016,0.89426,0.866332
9,0.1365,0.625661,"{'precision': 0.9048070721250522, 'recall': 0.9386389577675444, 'f1': 0.9214125647167916, 'number': 25407}","{'precision': 0.39555555555555555, 'recall': 0.23027166882276842, 'f1': 0.2910874897792314, 'number': 773}","{'precision': 0.5421115065243179, 'recall': 0.4204231830726771, 'f1': 0.4735751295336788, 'number': 1087}",0.885461,0.897899,0.891636,0.864362
10,0.1101,0.638523,"{'precision': 0.9094994814274191, 'recall': 0.931908529145511, 'f1': 0.9205676516329705, 'number': 25407}","{'precision': 0.37475728155339805, 'recall': 0.24967658473479948, 'f1': 0.2996894409937888, 'number': 773}","{'precision': 0.5362485615650172, 'recall': 0.42870285188592455, 'f1': 0.476482617586912, 'number': 1087}",0.887624,0.892507,0.890059,0.862592


[I 2025-05-11 21:43:58,298] Trial 4 finished with value: 0.8900592495062541 and parameters: {}. Best is trial 2 with value: 0.8945405592242901.
Some weights of AlbertForTokenClassification were not initialized from the model checkpoint at ai4bharat/indic-bert and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Loc,Org,Per,Precision,Recall,F1,Accuracy
1,0.8584,0.607546,"{'precision': 0.8004271015336828, 'recall': 0.9736686739874838, 'f1': 0.8785893133024346, 'number': 25407}","{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 773}","{'precision': 0.4, 'recall': 0.0018399264029438822, 'f1': 0.003663003663003663, 'number': 1087}",0.800362,0.907324,0.850493,0.802981
2,0.53,0.622141,"{'precision': 0.8184868421052631, 'recall': 0.9793364033534065, 'f1': 0.8917160929632484, 'number': 25407}","{'precision': 0.23333333333333334, 'recall': 0.018111254851228976, 'f1': 0.03361344537815125, 'number': 773}","{'precision': 0.49336283185840707, 'recall': 0.20515179392824287, 'f1': 0.2897985705003249, 'number': 1087}",0.812597,0.921223,0.863507,0.82051


[I 2025-05-11 21:45:20,713] Trial 5 pruned. 
Some weights of AlbertForTokenClassification were not initialized from the model checkpoint at ai4bharat/indic-bert and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Loc,Org,Per,Precision,Recall,F1,Accuracy
1,0.8584,0.607546,"{'precision': 0.8004271015336828, 'recall': 0.9736686739874838, 'f1': 0.8785893133024346, 'number': 25407}","{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 773}","{'precision': 0.4, 'recall': 0.0018399264029438822, 'f1': 0.003663003663003663, 'number': 1087}",0.800362,0.907324,0.850493,0.802981
2,0.5308,0.602813,"{'precision': 0.8299979921022689, 'recall': 0.9761876648167828, 'f1': 0.8971766535838954, 'number': 25407}","{'precision': 0.17647058823529413, 'recall': 0.015523932729624839, 'f1': 0.028537455410225922, 'number': 773}","{'precision': 0.5186104218362283, 'recall': 0.1922723091076357, 'f1': 0.28053691275167786, 'number': 1087}",0.8244,0.917703,0.868553,0.830331
3,0.421,0.500999,"{'precision': 0.9201368174044465, 'recall': 0.910575825559885, 'f1': 0.9153313550939663, 'number': 25407}","{'precision': 0.38309859154929576, 'recall': 0.1759379042690815, 'f1': 0.24113475177304963, 'number': 773}","{'precision': 0.6196660482374768, 'recall': 0.30726770929162833, 'f1': 0.4108241082410824, 'number': 1087}",0.906594,0.865698,0.885675,0.856825
4,0.3399,0.519016,"{'precision': 0.9176470588235294, 'recall': 0.914865981816035, 'f1': 0.9162544099966494, 'number': 25407}","{'precision': 0.6557377049180327, 'recall': 0.1034928848641656, 'f1': 0.1787709497206704, 'number': 773}","{'precision': 0.7110552763819096, 'recall': 0.26034958601655933, 'f1': 0.3811447811447812, 'number': 1087}",0.91323,0.865772,0.888868,0.857824


[I 2025-05-11 21:48:08,954] Trial 6 pruned. 
Some weights of AlbertForTokenClassification were not initialized from the model checkpoint at ai4bharat/indic-bert and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Loc,Org,Per,Precision,Recall,F1,Accuracy
1,0.8556,0.568311,"{'precision': 0.8720532460771919, 'recall': 0.9230920612429645, 'f1': 0.8968470966138314, 'number': 25407}","{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 773}","{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 1087}",0.872021,0.860124,0.866032,0.828903
2,0.534,0.737639,"{'precision': 0.7953886818891118, 'recall': 0.9830361711339395, 'f1': 0.8793127728488945, 'number': 25407}","{'precision': 0.21428571428571427, 'recall': 0.0038809831824062097, 'f1': 0.007623888182973317, 'number': 773}","{'precision': 0.5141065830721003, 'recall': 0.15087396504139836, 'f1': 0.2332859174964438, 'number': 1087}",0.792305,0.922104,0.852291,0.80221
3,0.405,0.537472,"{'precision': 0.9245723172628305, 'recall': 0.8891644035108435, 'f1': 0.9065227423205795, 'number': 25407}","{'precision': 0.4236453201970443, 'recall': 0.111254851228978, 'f1': 0.1762295081967213, 'number': 773}","{'precision': 0.614190687361419, 'recall': 0.2548298068077277, 'f1': 0.3602080624187256, 'number': 1087}",0.914939,0.841823,0.87686,0.84315


[I 2025-05-11 21:50:18,747] Trial 7 pruned. 
Some weights of AlbertForTokenClassification were not initialized from the model checkpoint at ai4bharat/indic-bert and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Loc,Org,Per,Precision,Recall,F1,Accuracy
1,0.8584,0.607546,"{'precision': 0.8004271015336828, 'recall': 0.9736686739874838, 'f1': 0.8785893133024346, 'number': 25407}","{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 773}","{'precision': 0.4, 'recall': 0.0018399264029438822, 'f1': 0.003663003663003663, 'number': 1087}",0.800362,0.907324,0.850493,0.802981
2,0.5308,0.602813,"{'precision': 0.8299979921022689, 'recall': 0.9761876648167828, 'f1': 0.8971766535838954, 'number': 25407}","{'precision': 0.17647058823529413, 'recall': 0.015523932729624839, 'f1': 0.028537455410225922, 'number': 773}","{'precision': 0.5186104218362283, 'recall': 0.1922723091076357, 'f1': 0.28053691275167786, 'number': 1087}",0.8244,0.917703,0.868553,0.830331
3,0.421,0.500999,"{'precision': 0.9201368174044465, 'recall': 0.910575825559885, 'f1': 0.9153313550939663, 'number': 25407}","{'precision': 0.38309859154929576, 'recall': 0.1759379042690815, 'f1': 0.24113475177304963, 'number': 773}","{'precision': 0.6196660482374768, 'recall': 0.30726770929162833, 'f1': 0.4108241082410824, 'number': 1087}",0.906594,0.865698,0.885675,0.856825
4,0.3399,0.509154,"{'precision': 0.9148492244705141, 'recall': 0.914669185657496, 'f1': 0.9147591962053968, 'number': 25407}","{'precision': 0.6929824561403509, 'recall': 0.10219922380336352, 'f1': 0.17812852311161217, 'number': 773}","{'precision': 0.743455497382199, 'recall': 0.2612695492180313, 'f1': 0.38665759019741325, 'number': 1087}",0.911345,0.865588,0.887877,0.855997


[I 2025-05-11 21:53:06,824] Trial 8 pruned. 
Some weights of AlbertForTokenClassification were not initialized from the model checkpoint at ai4bharat/indic-bert and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Loc,Org,Per,Precision,Recall,F1,Accuracy
1,0.8584,0.607546,"{'precision': 0.8004271015336828, 'recall': 0.9736686739874838, 'f1': 0.8785893133024346, 'number': 25407}","{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 773}","{'precision': 0.4, 'recall': 0.0018399264029438822, 'f1': 0.003663003663003663, 'number': 1087}",0.800362,0.907324,0.850493,0.802981
2,0.5289,0.571531,"{'precision': 0.8449750901906888, 'recall': 0.9679615853898532, 'f1': 0.9022967420017611, 'number': 25407}","{'precision': 0.14736842105263157, 'recall': 0.018111254851228976, 'f1': 0.03225806451612903, 'number': 773}","{'precision': 0.3997308209959623, 'recall': 0.2732290708371665, 'f1': 0.32459016393442625, 'number': 1087}",0.831714,0.913338,0.870617,0.833586
3,0.4092,0.532064,"{'precision': 0.9220736876426475, 'recall': 0.8904632581572007, 'f1': 0.9059928318282842, 'number': 25407}","{'precision': 0.5229885057471264, 'recall': 0.11772315653298836, 'f1': 0.19218585005279828, 'number': 773}","{'precision': 0.636568848758465, 'recall': 0.2594296228150874, 'f1': 0.3686274509803922, 'number': 1087}",0.914285,0.8434,0.877413,0.842978


[I 2025-05-11 21:55:13,653] Trial 9 pruned. 
Some weights of AlbertForTokenClassification were not initialized from the model checkpoint at ai4bharat/indic-bert and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Loc,Org,Per,Precision,Recall,F1,Accuracy
1,0.8441,0.652718,"{'precision': 0.7668900227454355, 'recall': 0.9820128311095367, 'f1': 0.8612208971194836, 'number': 25407}","{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 773}","{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 1087}",0.76689,0.915025,0.834434,0.774831
2,0.5419,0.616695,"{'precision': 0.8228768806669102, 'recall': 0.97516432479238, 'f1': 0.892571510915772, 'number': 25407}","{'precision': 0.11764705882352941, 'recall': 0.00517464424320828, 'f1': 0.009913258983890954, 'number': 773}","{'precision': 0.47161572052401746, 'recall': 0.19871205151793928, 'f1': 0.2796116504854369, 'number': 1087}",0.816836,0.916713,0.863897,0.822194
3,0.4228,0.538142,"{'precision': 0.9288302169658101, 'recall': 0.8778683040107057, 'f1': 0.9026305139619587, 'number': 25407}","{'precision': 0.4018264840182648, 'recall': 0.11384217335058215, 'f1': 0.1774193548387097, 'number': 773}","{'precision': 0.603448275862069, 'recall': 0.3219871205151794, 'f1': 0.4199160167966407, 'number': 1087}",0.916573,0.834048,0.873365,0.838496
4,0.3349,0.56064,"{'precision': 0.9061983629865887, 'recall': 0.9281694021332704, 'f1': 0.9170523041026638, 'number': 25407}","{'precision': 0.5158730158730159, 'recall': 0.08408796895213454, 'f1': 0.14460511679644047, 'number': 773}","{'precision': 0.7345679012345679, 'recall': 0.21895124195032198, 'f1': 0.3373493975903614, 'number': 1087}",0.90224,0.875967,0.88891,0.858424
5,0.2658,0.566423,"{'precision': 0.8924022346368715, 'recall': 0.9430865509505254, 'f1': 0.9170446064642058, 'number': 25407}","{'precision': 0.4358974358974359, 'recall': 0.1539456662354463, 'f1': 0.22753346080305925, 'number': 773}","{'precision': 0.6811320754716981, 'recall': 0.33210671573137074, 'f1': 0.44650587507730366, 'number': 1087}",0.883846,0.896358,0.890058,0.858509
6,0.2345,0.571079,"{'precision': 0.8915867158671587, 'recall': 0.9509977565237927, 'f1': 0.9203344315995963, 'number': 25407}","{'precision': 0.44126074498567336, 'recall': 0.19922380336351875, 'f1': 0.2745098039215686, 'number': 773}","{'precision': 0.5455621301775148, 'recall': 0.42410303587856485, 'f1': 0.4772256728778468, 'number': 1087}",0.875698,0.908681,0.891885,0.863334
7,0.1841,0.581591,"{'precision': 0.9, 'recall': 0.9454481048529932, 'f1': 0.922164424055128, 'number': 25407}","{'precision': 0.4564102564102564, 'recall': 0.23027166882276842, 'f1': 0.30610490111779876, 'number': 773}","{'precision': 0.5581683168316832, 'recall': 0.41490340386384544, 'f1': 0.47598944591029024, 'number': 1087}",0.883893,0.904023,0.893845,0.866417
8,0.1674,0.626232,"{'precision': 0.890546528803545, 'recall': 0.949187231865234, 'f1': 0.9189323070474594, 'number': 25407}","{'precision': 0.4376470588235294, 'recall': 0.240620957309185, 'f1': 0.31051752921535897, 'number': 773}","{'precision': 0.5606595995288575, 'recall': 0.43790248390064396, 'f1': 0.4917355371900826, 'number': 1087}",0.87388,0.908717,0.890958,0.860593
9,0.1429,0.631531,"{'precision': 0.8950543274634695, 'recall': 0.9402526862675641, 'f1': 0.9170969537593335, 'number': 25407}","{'precision': 0.39723320158102765, 'recall': 0.26002587322121606, 'f1': 0.3143080531665363, 'number': 773}","{'precision': 0.536036036036036, 'recall': 0.43790248390064396, 'f1': 0.48202531645569624, 'number': 1087}",0.874733,0.900943,0.887644,0.858224
10,0.1196,0.638251,"{'precision': 0.902084912812737, 'recall': 0.9366316369504467, 'f1': 0.9190337343348717, 'number': 25407}","{'precision': 0.390715667311412, 'recall': 0.2613195342820181, 'f1': 0.3131782945736434, 'number': 773}","{'precision': 0.5383734249713631, 'recall': 0.43238270469181234, 'f1': 0.47959183673469385, 'number': 1087}",0.881131,0.897385,0.889184,0.860536


{'eval_loss': 0.23289106786251068, 'eval_LOC': {'precision': 0.952286282306163, 'recall': 0.9637826961770624, 'f1': 0.9580000000000001, 'number': 497}, 'eval_ORG': {'precision': 0.3333333333333333, 'recall': 0.2222222222222222, 'f1': 0.26666666666666666, 'number': 9}, 'eval_PER': {'precision': 0.5555555555555556, 'recall': 0.45454545454545453, 'f1': 0.5, 'number': 11}, 'eval_precision': 0.9382239382239382, 'eval_recall': 0.9400386847195358, 'eval_f1': 0.9391304347826087, 'eval_accuracy': 0.9402985074626866, 'eval_runtime': 0.1032, 'eval_samples_per_second': 494.166, 'eval_steps_per_second': 19.379, 'epoch': 10.0}
