In [1]:
!pip install -U transformers datasets evaluate seqeval --quiet

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/43.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.6/43.6 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.4/491.4 kB[0m [31m10.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.0/84.0 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m8.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m193.6/193.6 kB[0m [31m12.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m143.5/143.5 kB[0m [31m11.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.8/194.8 kB[0m [31m9.9 MB/s[0m eta [3

In [6]:
###############################################################################
# Indic‑NER fine‑tuning on Naamapadam (+ optional custom JSON) – v2.1
# – minimal edits to your original “working” code
###############################################################################
import argparse, random, json, os, itertools, logging, collections
import numpy as np, torch
from datasets import load_dataset, concatenate_datasets, DatasetDict, Sequence, Value
import evaluate
from transformers import (
    AutoTokenizer,
    AutoModelForTokenClassification,
    DataCollatorForTokenClassification,
    TrainingArguments,
    Trainer,
)

# ──────────────────────────────────────────────────────────────
# 0.  LOGGING
# ──────────────────────────────────────────────────────────────
logging.basicConfig(
    format="%(asctime)s %(levelname)-8s %(message)s",
    level=logging.INFO,
    datefmt="%H:%M:%S",
)
log = logging.getLogger(__name__)

# ──────────────────────────────────────────────────────────────
# 1.  ARGUMENTS & SEEDING
# ──────────────────────────────────────────────────────────────
parser = argparse.ArgumentParser()
parser.add_argument("--model_name", type=str, default="ai4bharat/indic-bert")
parser.add_argument("--languages", nargs="+", default=["as"])
parser.add_argument("--custom_data_path", type=str, default="/content/drive/MyDrive/naamapadam_proj/0.6/0.4/naamapadam_assamese.json",
                    help="Path to your extra JSON file (optional)")
parser.add_argument("--output_dir", type=str,
                    default="./indicner-finetuned-naamapadam")
parser.add_argument("--num_train_epochs", type=int, default=10)
parser.add_argument("--per_device_train_batch_size", type=int, default=32)
parser.add_argument("--per_device_eval_batch_size", type=int, default=32)
parser.add_argument("--learning_rate", type=float, default=1e-5)
parser.add_argument("--weight_decay",  type=float, default=0.01)
parser.add_argument("--warmup_steps",  type=int, default=500)
parser.add_argument("--seed", type=int, default=42)
args, _ = parser.parse_known_args()

np.random.seed(args.seed)
random.seed(args.seed)
torch.manual_seed(args.seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(args.seed)
    log.info("CUDA device: %s", torch.cuda.get_device_name(0))
else:
    log.info("CUDA not available – falling back to CPU")

lang = args.languages[0]          # single‑language run                                  #

# ──────────────────────────────────────────────────────────────
# 2.  CANONICAL LABEL SET  (Naamapadam has exactly 7)
# ──────────────────────────────────────────────────────────────
label_list = ["B-LOC","B-ORG","B-PER","I-LOC","I-ORG","I-PER","O"]
id2label   = {i:l for i,l in enumerate(label_list)}  # type: ignore
label2id   = {l:i for i,l in id2label.items()}
num_labels = len(label_list)
log.info("Canonical labels: %s", id2label)

# Helper – map *any* tag outside this list to “O”
def normalise_tag(tag: str) -> str:
    tag = str(tag)
    if tag in ("B-PERSON", "I-PERSON"):         # your PERSON alias
        tag = tag.replace("PERSON", "PER")
    return tag if tag in label2id else "O"

# ──────────────────────────────────────────────────────────────
# 3.  LOAD NAAMAPADAM (train / test)  –  they’re already ints
# ──────────────────────────────────────────────────────────────
train_ref = load_dataset("ai4bharat/naamapadam", lang, split="train")
test_ref  = load_dataset("ai4bharat/naamapadam", lang, split="test")

# NEW  ➜  make their `ner_tags` column a plain int sequence
int_seq = Sequence(Value("int64"))
train_ref = train_ref.cast_column("ner_tags", int_seq)
test_ref  = test_ref.cast_column("ner_tags", int_seq)

# ──────────────────────────────────────────────────────────────
# 4.  OPTIONAL: LOAD + CLEAN YOUR CUSTOM JSON
# ──────────────────────────────────────────────────────────────
if args.custom_data_path:
    log.info("Loading custom JSON: %s", args.custom_data_path)
    custom = load_dataset("json", data_files=args.custom_data_path,
                          split="train")

    # (a) ensure tokens / ner_tags length match
    broken = [i for i, ex in enumerate(custom)
              if len(ex["tokens"]) != len(ex["ner_tags"])]
    if broken:
        log.warning("⚠️  %d rows have mismatching lengths – they’ll be dropped",
                    len(broken))
        custom = custom.select([i for i in range(len(custom)) if i not in broken])

    # (b) normalise & map to ints
    def _clean(batch):
        out = []
        for tags in batch["ner_tags"]:
            out.append([label2id[normalise_tag(t)] for t in tags])
        batch["ner_tags"] = out
        return batch

    custom = custom.map(_clean, batched=True)
    log.info("Custom set after cleaning: %d sentences", len(custom))
    train_all = concatenate_datasets([train_ref, custom])
else:
    train_all = train_ref

# quick label‑distribution print‑out
def label_hist(ds, name):
    flat = list(itertools.chain.from_iterable(ds["ner_tags"]))
    c = collections.Counter(flat)
    log.info("%s label distribution: %s",
             name, {id2label[k]: v for k,v in c.items()})
label_hist(train_all, "TRAIN")
label_hist(test_ref,  "TEST ")

# 5. TRAIN / DEV SPLIT ---------------------------------------------------------
split = train_all.train_test_split(train_size=0.75, seed=args.seed)
train_ds, dev_ds = split["train"], split["test"]
log.info("Train %d  |  Dev %d  |  Test %d", len(train_ds), len(dev_ds), len(test_ref))

# 6. TOKENISATION + LABEL ALIGNMENT -------------------------------------------
tok = AutoTokenizer.from_pretrained(args.model_name, use_fast=True)
def align(batch):
    enc = tok(batch["tokens"], is_split_into_words=True,
              truncation=True, max_length=512)
    new_labels = []
    for i, seq in enumerate(batch["ner_tags"]):
        word_ids = enc.word_ids(batch_index=i)
        prev = None
        aligned = []
        for w in word_ids:
            if w is None:
                aligned.append(-100)
            elif w != prev:
                aligned.append(seq[w])
            else:
                aligned.append(-100)
            prev = w
        new_labels.append(aligned)
    enc["labels"] = new_labels
    return enc

train_ds = train_ds.map(align, batched=True, remove_columns=["ner_tags"])
dev_ds   = dev_ds  .map(align, batched=True, remove_columns=["ner_tags"])
test_ref = test_ref.map(align, batched=True, remove_columns=["ner_tags"])

# 7. MODEL, METRICS, TRAINER ---------------------------------------------------
metric = evaluate.load("seqeval")
def compute_metrics(p):
    preds, labs = p
    preds = np.argmax(preds, axis=2)
    true_preds, true_labs = [], []
    for pr, lb in zip(preds, labs):
        pr_l, lb_l = [], []
        for p_i, l_i in zip(pr, lb):
            if l_i != -100:
                pr_l.append(id2label[p_i])
                lb_l.append(id2label[l_i])
        true_preds.append(pr_l)
        true_labs.append(lb_l)
    res = metric.compute(predictions=true_preds, references=true_labs,
                         zero_division=0)
    return {k.replace("overall_", ""): v for k,v in res.items()}

model = AutoModelForTokenClassification.from_pretrained(
    args.model_name, num_labels=num_labels,
    id2label=id2label, label2id=label2id)

train_args = TrainingArguments(
    output_dir     = args.output_dir,
    eval_strategy="epoch",           # ← one eval *after* each epoch
    save_strategy  ="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="f1",
    num_train_epochs=args.num_train_epochs,
    per_device_train_batch_size=args.per_device_train_batch_size,
    per_device_eval_batch_size=args.per_device_eval_batch_size,
    learning_rate = args.learning_rate,
    weight_decay  = args.weight_decay,
    warmup_steps  = args.warmup_steps,
    seed = args.seed,
    fp16 = torch.cuda.is_available(),
    report_to="none")

trainer = Trainer(
    model=model,
    args=train_args,
    train_dataset=train_ds,
    eval_dataset =dev_ds,
    tokenizer=tok,
    data_collator=DataCollatorForTokenClassification(tok),
    compute_metrics=compute_metrics)

# 8. TRAIN & FINAL EVALUATION --------------------------------------------------
log.info("⏳  Starting fine‑tuning …")
trainer.train()
log.info("✅  Finished training.  Best dev‑set F1: %.4f",
         trainer.state.best_metric or -1)

log.info("🏁  Test‑set metrics:")
print(trainer.evaluate(eval_dataset=test_ref))


Map:   0%|          | 0/51 [00:00<?, ? examples/s]

Some weights of AlbertForTokenClassification were not initialized from the model checkpoint at ai4bharat/indic-bert and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Loc,Org,Per,Precision,Recall,F1,Accuracy
1,No log,0.75782,"{'precision': 0.756083517682354, 'recall': 0.9820128311095367, 'f1': 0.854364277642708, 'number': 25407}","{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 773}","{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 1087}",0.756084,0.915025,0.827996,0.761127
2,0.979900,0.735514,"{'precision': 0.7727244715597461, 'recall': 0.9870508127681348, 'f1': 0.8668360380912878, 'number': 25407}","{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 773}","{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 1087}",0.772724,0.91972,0.839839,0.78371
3,0.979900,0.662636,"{'precision': 0.9054802045041619, 'recall': 0.8434683354980911, 'f1': 0.873374903207401, 'number': 25407}","{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 773}","{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 1087}",0.90548,0.785932,0.841481,0.793987
4,0.578200,0.576259,"{'precision': 0.8744743637034305, 'recall': 0.933089306096745, 'f1': 0.9028314640973399, 'number': 25407}","{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 773}","{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 1087}",0.874474,0.869439,0.87195,0.836383
5,0.578200,0.569003,"{'precision': 0.8732072410869004, 'recall': 0.9321840437674657, 'f1': 0.901732343422806, 'number': 25407}","{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 773}","{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 1087}",0.873207,0.868596,0.870895,0.835327
6,0.473600,0.578521,"{'precision': 0.8633090948167783, 'recall': 0.948596843389617, 'f1': 0.9039456904958367, 'number': 25407}","{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 773}","{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 1087}",0.863216,0.883889,0.87343,0.841636
7,0.473600,0.593578,"{'precision': 0.8692061413083024, 'recall': 0.9403314047309796, 'f1': 0.9033709564592669, 'number': 25407}","{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 773}","{'precision': 0.5806451612903226, 'recall': 0.01655933762649494, 'f1': 0.032200357781753126, 'number': 1087}",0.868629,0.876847,0.872719,0.839381
8,0.384800,0.600378,"{'precision': 0.8788919584857762, 'recall': 0.9265950328649585, 'f1': 0.9021133102140134, 'number': 25407}","{'precision': 0.06666666666666667, 'recall': 0.00129366106080207, 'f1': 0.0025380710659898475, 'number': 773}","{'precision': 0.6101694915254238, 'recall': 0.06623735050597976, 'f1': 0.11950207468879667, 'number': 1087}",0.877261,0.866065,0.871627,0.837354
9,0.384800,0.586606,"{'precision': 0.875160403299725, 'recall': 0.9395048608651159, 'f1': 0.9061918681902738, 'number': 25407}","{'precision': 0.05263157894736842, 'recall': 0.00129366106080207, 'f1': 0.0025252525252525255, 'number': 773}","{'precision': 0.5511111111111111, 'recall': 0.1140754369825207, 'f1': 0.1890243902439024, 'number': 1087}",0.871943,0.880001,0.875954,0.844491
10,0.338600,0.589701,"{'precision': 0.8769842731391109, 'recall': 0.9371826661943559, 'f1': 0.9060847064195745, 'number': 25407}","{'precision': 0.08, 'recall': 0.00258732212160414, 'f1': 0.005012531328320802, 'number': 773}","{'precision': 0.5714285714285714, 'recall': 0.12879484820607176, 'f1': 0.2102102102102102, 'number': 1087}",0.873528,0.878461,0.875987,0.843749


{'eval_loss': 0.3034144341945648, 'eval_LOC': {'precision': 0.9165085388994307, 'recall': 0.971830985915493, 'f1': 0.9433593750000001, 'number': 497}, 'eval_ORG': {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 9}, 'eval_PER': {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 11}, 'eval_precision': 0.9165085388994307, 'eval_recall': 0.9342359767891683, 'eval_f1': 0.925287356321839, 'eval_accuracy': 0.9123134328358209, 'eval_runtime': 0.1058, 'eval_samples_per_second': 482.035, 'eval_steps_per_second': 18.903, 'epoch': 10.0}


In [7]:
###############################################################################
# Indic‑NER fine‑tuning on Naamapadam (+ optional custom JSON) – v2.1
# – minimal edits to your original “working” code
###############################################################################
import argparse, random, json, os, itertools, logging, collections
import numpy as np, torch
from datasets import load_dataset, concatenate_datasets, DatasetDict, Sequence, Value
import evaluate
from transformers import (
    AutoTokenizer,
    AutoModelForTokenClassification,
    DataCollatorForTokenClassification,
    TrainingArguments,
    Trainer,
)

# ──────────────────────────────────────────────────────────────
# 0.  LOGGING
# ──────────────────────────────────────────────────────────────
logging.basicConfig(
    format="%(asctime)s %(levelname)-8s %(message)s",
    level=logging.INFO,
    datefmt="%H:%M:%S",
)
log = logging.getLogger(__name__)

# ──────────────────────────────────────────────────────────────
# 1.  ARGUMENTS & SEEDING
# ──────────────────────────────────────────────────────────────
parser = argparse.ArgumentParser()
parser.add_argument("--model_name", type=str, default="ai4bharat/indic-bert")
parser.add_argument("--languages", nargs="+", default=["as"])
parser.add_argument("--custom_data_path", type=str, default=None,
                    help="Path to your extra JSON file (optional)")
parser.add_argument("--output_dir", type=str,
                    default="./indicner-finetuned-naamapadam")
parser.add_argument("--num_train_epochs", type=int, default=5)
parser.add_argument("--per_device_train_batch_size", type=int, default=32)
parser.add_argument("--per_device_eval_batch_size", type=int, default=32)
parser.add_argument("--learning_rate", type=float, default=3e-5)
parser.add_argument("--weight_decay",  type=float, default=0.01)
parser.add_argument("--warmup_steps",  type=int, default=500)
parser.add_argument("--seed", type=int, default=42)
args, _ = parser.parse_known_args()

np.random.seed(args.seed)
random.seed(args.seed)
torch.manual_seed(args.seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(args.seed)
    log.info("CUDA device: %s", torch.cuda.get_device_name(0))
else:
    log.info("CUDA not available – falling back to CPU")

lang = args.languages[0]          # single‑language run

# ──────────────────────────────────────────────────────────────
# 2.  CANONICAL LABEL SET  (Naamapadam has exactly 7)
# ──────────────────────────────────────────────────────────────
label_list = ["B-LOC","B-ORG","B-PER","I-LOC","I-ORG","I-PER","O"]
id2label   = {i:l for i,l in enumerate(label_list)}  # type: ignore
label2id   = {l:i for i,l in id2label.items()}
num_labels = len(label_list)
log.info("Canonical labels: %s", id2label)

# Helper – map *any* tag outside this list to “O”
def normalise_tag(tag: str) -> str:
    tag = str(tag)
    if tag in ("B-PERSON", "I-PERSON"):         # your PERSON alias
        tag = tag.replace("PERSON", "PER")
    return tag if tag in label2id else "O"

# ──────────────────────────────────────────────────────────────
# 3.  LOAD NAAMAPADAM (train / test)  –  they’re already ints
# ──────────────────────────────────────────────────────────────
train_ref = load_dataset("ai4bharat/naamapadam", lang, split="train")
test_ref  = load_dataset("ai4bharat/naamapadam", lang, split="test")

# NEW  ➜  make their `ner_tags` column a plain int sequence
int_seq = Sequence(Value("int64"))
train_ref = train_ref.cast_column("ner_tags", int_seq)
test_ref  = test_ref.cast_column("ner_tags", int_seq)

# ──────────────────────────────────────────────────────────────
# 4.  OPTIONAL: LOAD + CLEAN YOUR CUSTOM JSON
# ──────────────────────────────────────────────────────────────
if args.custom_data_path:
    log.info("Loading custom JSON: %s", args.custom_data_path)
    custom = load_dataset("json", data_files=args.custom_data_path,
                          split="train")

    # (a) ensure tokens / ner_tags length match
    broken = [i for i, ex in enumerate(custom)
              if len(ex["tokens"]) != len(ex["ner_tags"])]
    if broken:
        log.warning("⚠️  %d rows have mismatching lengths – they’ll be dropped",
                    len(broken))
        custom = custom.select([i for i in range(len(custom)) if i not in broken])

    # (b) normalise & map to ints
    def _clean(batch):
        out = []
        for tags in batch["ner_tags"]:
            out.append([label2id[normalise_tag(t)] for t in tags])
        batch["ner_tags"] = out
        return batch

    custom = custom.map(_clean, batched=True)
    log.info("Custom set after cleaning: %d sentences", len(custom))
    train_all = concatenate_datasets([train_ref, custom])
else:
    train_all = train_ref

# quick label‑distribution print‑out
def label_hist(ds, name):
    flat = list(itertools.chain.from_iterable(ds["ner_tags"]))
    c = collections.Counter(flat)
    log.info("%s label distribution: %s",
             name, {id2label[k]: v for k,v in c.items()})
label_hist(train_all, "TRAIN")
label_hist(test_ref,  "TEST ")

# 5. TRAIN / DEV SPLIT ---------------------------------------------------------
split = train_all.train_test_split(train_size=0.75, seed=args.seed)
train_ds, dev_ds = split["train"], split["test"]
log.info("Train %d  |  Dev %d  |  Test %d", len(train_ds), len(dev_ds), len(test_ref))

# 6. TOKENISATION + LABEL ALIGNMENT -------------------------------------------
tok = AutoTokenizer.from_pretrained(args.model_name, use_fast=True)
def align(batch):
    enc = tok(batch["tokens"], is_split_into_words=True,
              truncation=True, max_length=512)
    new_labels = []
    for i, seq in enumerate(batch["ner_tags"]):
        word_ids = enc.word_ids(batch_index=i)
        prev = None
        aligned = []
        for w in word_ids:
            if w is None:
                aligned.append(-100)
            elif w != prev:
                aligned.append(seq[w])
            else:
                aligned.append(-100)
            prev = w
        new_labels.append(aligned)
    enc["labels"] = new_labels
    return enc

train_ds = train_ds.map(align, batched=True, remove_columns=["ner_tags"])
dev_ds   = dev_ds  .map(align, batched=True, remove_columns=["ner_tags"])
test_ref = test_ref.map(align, batched=True, remove_columns=["ner_tags"])

# 7. MODEL, METRICS, TRAINER ---------------------------------------------------
metric = evaluate.load("seqeval")
def compute_metrics(p):
    preds, labs = p
    preds = np.argmax(preds, axis=2)
    true_preds, true_labs = [], []
    for pr, lb in zip(preds, labs):
        pr_l, lb_l = [], []
        for p_i, l_i in zip(pr, lb):
            if l_i != -100:
                pr_l.append(id2label[p_i])
                lb_l.append(id2label[l_i])
        true_preds.append(pr_l)
        true_labs.append(lb_l)
    res = metric.compute(predictions=true_preds, references=true_labs,
                         zero_division=0)
    return {k.replace("overall_", ""): v for k,v in res.items()}

model = AutoModelForTokenClassification.from_pretrained(
    args.model_name, num_labels=num_labels,
    id2label=id2label, label2id=label2id)

train_args = TrainingArguments(
    output_dir     = args.output_dir,
    eval_strategy="epoch",           # ← one eval *after* each epoch
    save_strategy  ="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="f1",
    num_train_epochs=args.num_train_epochs,
    per_device_train_batch_size=args.per_device_train_batch_size,
    per_device_eval_batch_size=args.per_device_eval_batch_size,
    learning_rate = args.learning_rate,
    weight_decay  = args.weight_decay,
    warmup_steps  = args.warmup_steps,
    seed = args.seed,
    fp16 = torch.cuda.is_available(),
    report_to="none")

trainer = Trainer(
    model=model,
    args=train_args,
    train_dataset=train_ds,
    eval_dataset =dev_ds,
    tokenizer=tok,
    data_collator=DataCollatorForTokenClassification(tok),
    compute_metrics=compute_metrics)

# 8. TRAIN & FINAL EVALUATION --------------------------------------------------
log.info("⏳  Starting fine‑tuning …")
trainer.train()
log.info("✅  Finished training.  Best dev‑set F1: %.4f",
         trainer.state.best_metric or -1)

log.info("🏁  Test‑set metrics:")
print(trainer.evaluate(eval_dataset=test_ref))


Map:   0%|          | 0/2567 [00:00<?, ? examples/s]

Some weights of AlbertForTokenClassification were not initialized from the model checkpoint at ai4bharat/indic-bert and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Loc,Org,Per,Precision,Recall,F1,Accuracy
1,No log,0.390798,"{'precision': 0.9141859544292862, 'recall': 0.9885563037639641, 'f1': 0.9499177139437462, 'number': 25691}","{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 523}","{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 704}",0.914186,0.943495,0.928609,0.917318
2,No log,0.320645,"{'precision': 0.9175696312995418, 'recall': 0.9822505935930871, 'f1': 0.9488090538228714, 'number': 25691}","{'precision': 0.23076923076923078, 'recall': 0.0057361376673040155, 'f1': 0.011194029850746268, 'number': 523}","{'precision': 0.9090909090909091, 'recall': 0.014204545454545454, 'f1': 0.027972027972027972, 'number': 704}",0.917242,0.93796,0.927485,0.919045
3,0.544400,0.267844,"{'precision': 0.9242165034761403, 'recall': 0.9883227589428204, 'f1': 0.9551952449025657, 'number': 25691}","{'precision': 0.48, 'recall': 0.045889101338432124, 'f1': 0.08376963350785341, 'number': 523}","{'precision': 0.8434782608695652, 'recall': 0.1377840909090909, 'f1': 0.23687423687423687, 'number': 704}",0.923077,0.947767,0.935259,0.926245
4,0.544400,0.249893,"{'precision': 0.951655881233346, 'recall': 0.9731034214316298, 'f1': 0.9622601566559535, 'number': 25691}","{'precision': 0.4293193717277487, 'recall': 0.3135755258126195, 'f1': 0.36243093922651937, 'number': 523}","{'precision': 0.5741029641185648, 'recall': 0.5227272727272727, 'f1': 0.5472118959107807, 'number': 704}",0.935478,0.94851,0.941949,0.933228
5,0.236200,0.245809,"{'precision': 0.9505841430677908, 'recall': 0.9786306488653614, 'f1': 0.9644035289604909, 'number': 25691}","{'precision': 0.5506756756756757, 'recall': 0.31166347992351817, 'f1': 0.398046398046398, 'number': 523}","{'precision': 0.5968503937007874, 'recall': 0.5383522727272727, 'f1': 0.5660941000746826, 'number': 704}",0.938057,0.954157,0.946039,0.937907


{'eval_loss': 0.24383287131786346, 'eval_LOC': {'precision': 0.9393346379647749, 'recall': 0.96579476861167, 'f1': 0.9523809523809522, 'number': 497}, 'eval_ORG': {'precision': 0.16666666666666666, 'recall': 0.1111111111111111, 'f1': 0.13333333333333333, 'number': 9}, 'eval_PER': {'precision': 0.3333333333333333, 'recall': 0.2727272727272727, 'f1': 0.3, 'number': 11}, 'eval_precision': 0.9201520912547528, 'eval_recall': 0.9361702127659575, 'eval_f1': 0.9280920421860019, 'eval_accuracy': 0.9291044776119403, 'eval_runtime': 0.0887, 'eval_samples_per_second': 574.886, 'eval_steps_per_second': 22.545, 'epoch': 5.0}
