https://huggingface.co/d4data/biomedical-ner-all
https://huggingface.co/datasets/singh-aditya/MACCROBAT_biomedical_ner

https://www.freecodecamp.org/news/getting-started-with-ner-models-using-huggingface/
https://medium.com/@minhle_0210/pos-tagging-medical-ner-ffcdaef7a7b3
https://github.com/dreji18/Bio-Epidemiology-NER

https://huggingface.co/distilbert/distilbert-base-uncased
https://huggingface.co/google-bert/bert-base-uncased
https://huggingface.co/emilyalsentzer/Bio_ClinicalBERT
https://huggingface.co/Charangan/MedBERT

https://wandb.ai/jack-morris/david-vs-goliath/reports/Does-Model-Size-Matter-A-Comparison-of-BERT-and-DistilBERT--VmlldzoxMDUxNzU#:~:text=The%20BERT%20authors%20recommend%20fine,5e%2D5%2C%203e%2D5
https://datascience.stackexchange.com/questions/64583/what-are-the-good-parameter-ranges-for-bert-hyperparameters-while-finetuning-it

In [3]:
tokenizer = AutoTokenizer.from_pretrained("d4data/biomedical-ner-all")
model = AutoModelForTokenClassification.from_pretrained("d4data/biomedical-ner-all")

pipe = pipeline("ner", model=model, tokenizer=tokenizer, device=0, aggregation_strategy="max")  # device=0 (gpu)

text = """The patient reported no recurrence of palpitations at follow-up 6 months after the ablation."""

out = pipe(text)
print(out)

spans = []

for row in out:
    spans.append((row["start"], row["end"], row["entity_group"]))

# show_span_ascii_markup(text, spans)
show_span_box_markup(text, spans)

[{'entity_group': 'Sign_symptom', 'score': 0.9999311, 'word': 'palpitations', 'start': 38, 'end': 50}, {'entity_group': 'Clinical_event', 'score': 0.99975544, 'word': 'follow', 'start': 54, 'end': 60}, {'entity_group': 'Date', 'score': 0.999867, 'word': '6 months after', 'start': 64, 'end': 78}]


In [13]:
import warnings
import gc

import torch
from torch.utils.data import random_split

from datasets import load_dataset
from transformers import pipeline, AutoTokenizer, DataCollatorForTokenClassification, AutoModelForTokenClassification, TrainingArguments, Trainer
from transformers.utils.logging import set_verbosity_error

from ipymarkup import show_span_ascii_markup, show_span_box_markup

from mrb_ner_config import *
from mrb_ner_dataset import MRBNERDataset
from mrb_ner_evaluator import MRBNEREvaluator

warnings.filterwarnings("ignore")

In [12]:
# get dataset from huggingface
data = load_dataset(DATA_PATH)

# extract NER labels
label_names = data["train"].features["ner_labels"].feature.names
id2label = {}
label2id = {}

for idx, label in enumerate(label_names):
    id2label[idx] = label
    label2id[label] = idx

# split dataset into train, val and test splits
data_splits = {}
data_splits["train"], data_splits["val"], data_splits["test"]  = random_split(data["train"], [0.7, 0.15, 0.15])

In [8]:
evaluator = MRBNEREvaluator(metric="seqeval", id2label=id2label)

for m in MODELS:
    save = {
        "save_trainer": None,
        "test_metrics": None,
        "param": None
    }
    
    for lr in LRATES: 
        print(f"\n---------------------- Training Model (Model:{m['name']}, Lr:{lr}) ----------------------")

        tokenizer = AutoTokenizer.from_pretrained(m["path"])
        data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)
        model = AutoModelForTokenClassification.from_pretrained(
            pretrained_model_name_or_path=m["path"],
            label2id=label2id,
            id2label=id2label,
            ignore_mismatched_sizes=True,
            num_labels=len(label2id)
        )

        datasets = {}
        datasets["train"] = MRBNERDataset(data=data_splits["train"], tokenizer=tokenizer, id2label=id2label, label2id=label2id, max_len=512)
        datasets["val"] = MRBNERDataset(data=data_splits["val"], tokenizer=tokenizer, id2label=id2label, label2id=label2id, max_len=512)
        datasets["test"] = MRBNERDataset(data=data_splits["test"], tokenizer=tokenizer, id2label=id2label, label2id=label2id, max_len=512)
        
        training_args = TrainingArguments(
            output_dir=f"../output/{m['name']}/{lr}",
            overwrite_output_dir=True,
            per_device_train_batch_size=BATCH_SIZE,
            per_device_eval_batch_size=BATCH_SIZE,
            num_train_epochs=EPOCHS,
            learning_rate=lr,
            weight_decay=WDECAY,
            evaluation_strategy="epoch",
            save_strategy="epoch",
            load_best_model_at_end=True
        )

        trainer = Trainer(
            model=model,
            args=training_args,
            train_dataset=datasets["train"],
            eval_dataset=datasets["val"],
            data_collator=data_collator,
            tokenizer=tokenizer,
            compute_metrics=evaluator.compute_metrics
        )

        trainer.train()
        predictions, _, metrics = trainer.predict(datasets["test"])

        if save["test_metrics"] == None or save["test_metrics"]["test_overall_accuracy"] < metrics["test_overall_accuracy"]:
            best_accuracy = save['test_metrics']['test_overall_accuracy'] if save['test_metrics'] else None
            print(f"Updating best accuracy: {best_accuracy} -> {metrics['test_overall_accuracy']}")

            save["save_trainer"] = trainer
            save["test_metrics"] = metrics
            save["param"] = lr

    print(f"\nSaving best {m['name']} model...")
    for key, value in save["test_metrics"].items():
        print(f"{key}: {value}")

    save["save_trainer"].save_model(f"../output/{m['name']}/save_model")

    del save, tokenizer, data_collator, model, datasets, training_args, trainer, predictions, _, metrics
    gc.collect()
    torch.cuda.empty_cache()


---------------------- Training Model (Model:distilbert-base-uncased, Lr:5e-05) ----------------------


Some weights of DistilBertForTokenClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 35/35 [00:05<00:00,  7.30it/s]
100%|██████████| 35/35 [00:05<00:00,  7.30it/s]

{'eval_loss': 2.166745662689209, 'eval_overall_precision': 0.031503579952267304, 'eval_overall_recall': 0.0234208658623137, 'eval_overall_f1': 0.026867494402605333, 'eval_overall_accuracy': 0.1462021062065875, 'eval_runtime': 0.5138, 'eval_samples_per_second': 58.383, 'eval_steps_per_second': 15.569, 'epoch': 1.0}


100%|██████████| 35/35 [00:06<00:00,  5.42it/s]


{'train_runtime': 6.4521, 'train_samples_per_second': 21.698, 'train_steps_per_second': 5.425, 'train_loss': 2.5291458129882813, 'epoch': 1.0}


100%|██████████| 8/8 [00:00<00:00, 19.00it/s]


Updating best accuracy: None -> 0.1610647693817468

---------------------- Training Model (Model:distilbert-base-uncased, Lr:4e-05) ----------------------


Some weights of DistilBertForTokenClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 35/35 [00:04<00:00,  7.29it/s]
100%|██████████| 35/35 [00:05<00:00,  7.29it/s]

{'eval_loss': 2.2539150714874268, 'eval_overall_precision': 0.02373887240356083, 'eval_overall_recall': 0.014194464158977998, 'eval_overall_f1': 0.01776593382189651, 'eval_overall_accuracy': 0.10015684517140937, 'eval_runtime': 0.5125, 'eval_samples_per_second': 58.542, 'eval_steps_per_second': 15.611, 'epoch': 1.0}


100%|██████████| 35/35 [00:06<00:00,  5.64it/s]


{'train_runtime': 6.2015, 'train_samples_per_second': 22.575, 'train_steps_per_second': 5.644, 'train_loss': 2.6125560215541292, 'epoch': 1.0}


100%|██████████| 8/8 [00:00<00:00, 18.43it/s]



---------------------- Training Model (Model:distilbert-base-uncased, Lr:3e-05) ----------------------


Some weights of DistilBertForTokenClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 35/35 [00:05<00:00,  7.23it/s]
100%|██████████| 35/35 [00:05<00:00,  7.23it/s]

{'eval_loss': 2.4129912853240967, 'eval_overall_precision': 0.0, 'eval_overall_recall': 0.0, 'eval_overall_f1': 0.0, 'eval_overall_accuracy': 0.01646874299798342, 'eval_runtime': 0.509, 'eval_samples_per_second': 58.939, 'eval_steps_per_second': 15.717, 'epoch': 1.0}


100%|██████████| 35/35 [00:06<00:00,  5.43it/s]


{'train_runtime': 6.4458, 'train_samples_per_second': 21.72, 'train_steps_per_second': 5.43, 'train_loss': 2.7533719744001117, 'epoch': 1.0}


100%|██████████| 8/8 [00:00<00:00, 18.69it/s]



---------------------- Training Model (Model:distilbert-base-uncased, Lr:2e-05) ----------------------


Some weights of DistilBertForTokenClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 35/35 [00:04<00:00,  7.22it/s]
100%|██████████| 35/35 [00:05<00:00,  7.22it/s]

{'eval_loss': 2.574371099472046, 'eval_overall_precision': 0.0, 'eval_overall_recall': 0.0, 'eval_overall_f1': 0.0, 'eval_overall_accuracy': 0.0, 'eval_runtime': 0.5068, 'eval_samples_per_second': 59.193, 'eval_steps_per_second': 15.785, 'epoch': 1.0}


100%|██████████| 35/35 [00:06<00:00,  5.47it/s]


{'train_runtime': 6.394, 'train_samples_per_second': 21.895, 'train_steps_per_second': 5.474, 'train_loss': 2.914011710030692, 'epoch': 1.0}


100%|██████████| 8/8 [00:00<00:00, 18.69it/s]



Saving best distilbert-base-uncased model...
test_loss: 1.9678986072540283
test_overall_precision: 0.050677666470241606
test_overall_recall: 0.03194650817236255
test_overall_f1: 0.039188881294144454
test_overall_accuracy: 0.1610647693817468
test_runtime: 0.464
test_samples_per_second: 64.655
test_steps_per_second: 17.241

---------------------- Training Model (Model:biobert, Lr:5e-05) ----------------------


Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 35/35 [00:09<00:00,  3.60it/s]
100%|██████████| 35/35 [00:10<00:00,  3.60it/s]

{'eval_loss': 2.3059959411621094, 'eval_overall_precision': 0.08184143222506395, 'eval_overall_recall': 0.011895910780669145, 'eval_overall_f1': 0.020772476468679, 'eval_overall_accuracy': 0.11390887290167866, 'eval_runtime': 0.867, 'eval_samples_per_second': 34.601, 'eval_steps_per_second': 9.227, 'epoch': 1.0}


100%|██████████| 35/35 [00:11<00:00,  2.94it/s]


{'train_runtime': 11.9027, 'train_samples_per_second': 11.762, 'train_steps_per_second': 2.941, 'train_loss': 2.7616962977818083, 'epoch': 1.0}


100%|██████████| 8/8 [00:00<00:00, 11.40it/s]


Updating best accuracy: None -> 0.09998827804477788

---------------------- Training Model (Model:biobert, Lr:4e-05) ----------------------


Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 35/35 [00:09<00:00,  3.58it/s]
100%|██████████| 35/35 [00:10<00:00,  3.58it/s]

{'eval_loss': 2.415416955947876, 'eval_overall_precision': 0.0, 'eval_overall_recall': 0.0, 'eval_overall_f1': 0.0, 'eval_overall_accuracy': 0.017876607804665358, 'eval_runtime': 0.869, 'eval_samples_per_second': 34.521, 'eval_steps_per_second': 9.206, 'epoch': 1.0}


100%|██████████| 35/35 [00:11<00:00,  2.95it/s]


{'train_runtime': 11.8709, 'train_samples_per_second': 11.793, 'train_steps_per_second': 2.948, 'train_loss': 2.84061519077846, 'epoch': 1.0}


100%|██████████| 8/8 [00:00<00:00, 11.48it/s]



---------------------- Training Model (Model:biobert, Lr:3e-05) ----------------------


Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 35/35 [00:09<00:00,  3.61it/s]
100%|██████████| 35/35 [00:10<00:00,  3.61it/s]

{'eval_loss': 2.5255775451660156, 'eval_overall_precision': 0.0, 'eval_overall_recall': 0.0, 'eval_overall_f1': 0.0, 'eval_overall_accuracy': 0.00021800741225201656, 'eval_runtime': 0.8947, 'eval_samples_per_second': 33.532, 'eval_steps_per_second': 8.942, 'epoch': 1.0}


100%|██████████| 35/35 [00:12<00:00,  2.80it/s]


{'train_runtime': 12.4834, 'train_samples_per_second': 11.215, 'train_steps_per_second': 2.804, 'train_loss': 2.946702575683594, 'epoch': 1.0}


100%|██████████| 8/8 [00:00<00:00, 10.71it/s]



---------------------- Training Model (Model:biobert, Lr:2e-05) ----------------------


Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 35/35 [00:09<00:00,  3.89it/s]
100%|██████████| 35/35 [00:10<00:00,  3.89it/s]

{'eval_loss': 2.7253005504608154, 'eval_overall_precision': 0.0, 'eval_overall_recall': 0.0, 'eval_overall_f1': 0.0, 'eval_overall_accuracy': 0.0, 'eval_runtime': 0.833, 'eval_samples_per_second': 36.014, 'eval_steps_per_second': 9.604, 'epoch': 1.0}


100%|██████████| 35/35 [00:11<00:00,  3.00it/s]


{'train_runtime': 11.6692, 'train_samples_per_second': 11.997, 'train_steps_per_second': 2.999, 'train_loss': 3.1398714338030134, 'epoch': 1.0}


100%|██████████| 8/8 [00:00<00:00, 11.47it/s]



Saving best biobert model...
test_loss: 2.1792454719543457
test_overall_precision: 0.08142493638676845
test_overall_recall: 0.012307692307692308
test_overall_f1: 0.021383227530905442
test_overall_accuracy: 0.09998827804477788
test_runtime: 0.788
test_samples_per_second: 38.07
test_steps_per_second: 10.152

---------------------- Training Model (Model:bio-clinical-bert, Lr:5e-05) ----------------------


Some weights of BertForTokenClassification were not initialized from the model checkpoint at emilyalsentzer/Bio_ClinicalBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 35/35 [00:09<00:00,  3.56it/s]
100%|██████████| 35/35 [00:10<00:00,  3.56it/s]

{'eval_loss': 2.4344639778137207, 'eval_overall_precision': 0.0, 'eval_overall_recall': 0.0, 'eval_overall_f1': 0.0, 'eval_overall_accuracy': 0.0, 'eval_runtime': 0.882, 'eval_samples_per_second': 34.014, 'eval_steps_per_second': 9.07, 'epoch': 1.0}


100%|██████████| 35/35 [00:12<00:00,  2.92it/s]


{'train_runtime': 12.002, 'train_samples_per_second': 11.665, 'train_steps_per_second': 2.916, 'train_loss': 2.7571629115513394, 'epoch': 1.0}


100%|██████████| 8/8 [00:00<00:00, 11.48it/s]


Updating best accuracy: None -> 0.0

---------------------- Training Model (Model:bio-clinical-bert, Lr:4e-05) ----------------------


Some weights of BertForTokenClassification were not initialized from the model checkpoint at emilyalsentzer/Bio_ClinicalBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 35/35 [00:09<00:00,  3.56it/s]
100%|██████████| 35/35 [00:10<00:00,  3.56it/s]

{'eval_loss': 2.529207944869995, 'eval_overall_precision': 0.0, 'eval_overall_recall': 0.0, 'eval_overall_f1': 0.0, 'eval_overall_accuracy': 0.0, 'eval_runtime': 0.873, 'eval_samples_per_second': 34.364, 'eval_steps_per_second': 9.164, 'epoch': 1.0}


100%|██████████| 35/35 [00:11<00:00,  2.93it/s]


{'train_runtime': 11.947, 'train_samples_per_second': 11.718, 'train_steps_per_second': 2.93, 'train_loss': 2.8084527151925225, 'epoch': 1.0}


100%|██████████| 8/8 [00:00<00:00, 11.38it/s]



---------------------- Training Model (Model:bio-clinical-bert, Lr:3e-05) ----------------------


Some weights of BertForTokenClassification were not initialized from the model checkpoint at emilyalsentzer/Bio_ClinicalBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 35/35 [00:09<00:00,  3.58it/s]
100%|██████████| 35/35 [00:10<00:00,  3.58it/s]

{'eval_loss': 2.6124205589294434, 'eval_overall_precision': 0.0, 'eval_overall_recall': 0.0, 'eval_overall_f1': 0.0, 'eval_overall_accuracy': 0.0, 'eval_runtime': 0.8546, 'eval_samples_per_second': 35.106, 'eval_steps_per_second': 9.362, 'epoch': 1.0}


100%|██████████| 35/35 [00:11<00:00,  2.96it/s]


{'train_runtime': 11.8206, 'train_samples_per_second': 11.844, 'train_steps_per_second': 2.961, 'train_loss': 2.8759002685546875, 'epoch': 1.0}


100%|██████████| 8/8 [00:00<00:00, 11.70it/s]



---------------------- Training Model (Model:bio-clinical-bert, Lr:2e-05) ----------------------


Some weights of BertForTokenClassification were not initialized from the model checkpoint at emilyalsentzer/Bio_ClinicalBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 35/35 [00:09<00:00,  3.56it/s]
100%|██████████| 35/35 [00:10<00:00,  3.56it/s]

{'eval_loss': 2.703368663787842, 'eval_overall_precision': 0.0, 'eval_overall_recall': 0.0, 'eval_overall_f1': 0.0, 'eval_overall_accuracy': 0.0, 'eval_runtime': 0.8814, 'eval_samples_per_second': 34.039, 'eval_steps_per_second': 9.077, 'epoch': 1.0}


100%|██████████| 35/35 [00:11<00:00,  2.94it/s]


{'train_runtime': 11.8987, 'train_samples_per_second': 11.766, 'train_steps_per_second': 2.941, 'train_loss': 3.0064926147460938, 'epoch': 1.0}


100%|██████████| 8/8 [00:00<00:00, 10.48it/s]



Saving best bio-clinical-bert model...
test_loss: 2.2728796005249023
test_overall_precision: 0.0
test_overall_recall: 0.0
test_overall_f1: 0.0
test_overall_accuracy: 0.0
test_runtime: 0.782
test_samples_per_second: 38.363
test_steps_per_second: 10.23

---------------------- Training Model (Model:medbert, Lr:5e-05) ----------------------


Some weights of BertForTokenClassification were not initialized from the model checkpoint at Charangan/MedBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 35/35 [00:09<00:00,  3.59it/s]
100%|██████████| 35/35 [00:10<00:00,  3.59it/s]

{'eval_loss': 2.2092010974884033, 'eval_overall_precision': 0.025787965616045846, 'eval_overall_recall': 0.016728624535315983, 'eval_overall_f1': 0.020293122886133032, 'eval_overall_accuracy': 0.13385655112273817, 'eval_runtime': 0.836, 'eval_samples_per_second': 35.885, 'eval_steps_per_second': 9.569, 'epoch': 1.0}


100%|██████████| 35/35 [00:11<00:00,  3.07it/s]


{'train_runtime': 11.411, 'train_samples_per_second': 12.269, 'train_steps_per_second': 3.067, 'train_loss': 2.652541242327009, 'epoch': 1.0}


100%|██████████| 8/8 [00:00<00:00, 11.40it/s]


Updating best accuracy: None -> 0.12050169968350721

---------------------- Training Model (Model:medbert, Lr:4e-05) ----------------------


Some weights of BertForTokenClassification were not initialized from the model checkpoint at Charangan/MedBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 35/35 [00:09<00:00,  3.64it/s]
100%|██████████| 35/35 [00:10<00:00,  3.64it/s]

{'eval_loss': 2.2485191822052, 'eval_overall_precision': 0.03828972559029994, 'eval_overall_recall': 0.022304832713754646, 'eval_overall_f1': 0.028188865398167725, 'eval_overall_accuracy': 0.14333987355570088, 'eval_runtime': 0.8565, 'eval_samples_per_second': 35.026, 'eval_steps_per_second': 9.34, 'epoch': 1.0}


100%|██████████| 35/35 [00:11<00:00,  2.97it/s]


{'train_runtime': 11.7864, 'train_samples_per_second': 11.878, 'train_steps_per_second': 2.97, 'train_loss': 2.717863246372768, 'epoch': 1.0}


100%|██████████| 8/8 [00:00<00:00, 11.22it/s]


Updating best accuracy: 0.12050169968350721 -> 0.1315203375923104

---------------------- Training Model (Model:medbert, Lr:3e-05) ----------------------


Some weights of BertForTokenClassification were not initialized from the model checkpoint at Charangan/MedBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 35/35 [00:09<00:00,  3.59it/s]
100%|██████████| 35/35 [00:10<00:00,  3.59it/s]

{'eval_loss': 2.443417549133301, 'eval_overall_precision': 0.0, 'eval_overall_recall': 0.0, 'eval_overall_f1': 0.0, 'eval_overall_accuracy': 0.038805319380858946, 'eval_runtime': 0.8545, 'eval_samples_per_second': 35.108, 'eval_steps_per_second': 9.362, 'epoch': 1.0}


100%|██████████| 35/35 [00:11<00:00,  2.93it/s]


{'train_runtime': 11.9504, 'train_samples_per_second': 11.715, 'train_steps_per_second': 2.929, 'train_loss': 2.9207567487444197, 'epoch': 1.0}


100%|██████████| 8/8 [00:00<00:00, 11.35it/s]



---------------------- Training Model (Model:medbert, Lr:2e-05) ----------------------


Some weights of BertForTokenClassification were not initialized from the model checkpoint at Charangan/MedBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 35/35 [00:09<00:00,  3.58it/s]
100%|██████████| 35/35 [00:10<00:00,  3.58it/s]

{'eval_loss': 2.6164839267730713, 'eval_overall_precision': 0.0, 'eval_overall_recall': 0.0, 'eval_overall_f1': 0.0, 'eval_overall_accuracy': 0.00021800741225201656, 'eval_runtime': 0.858, 'eval_samples_per_second': 34.965, 'eval_steps_per_second': 9.324, 'epoch': 1.0}


100%|██████████| 35/35 [00:12<00:00,  2.90it/s]


{'train_runtime': 12.0831, 'train_samples_per_second': 11.586, 'train_steps_per_second': 2.897, 'train_loss': 3.1084603445870536, 'epoch': 1.0}


100%|██████████| 8/8 [00:00<00:00, 11.46it/s]



Saving best medbert model...
test_loss: 2.0795881748199463
test_overall_precision: 0.058941728064300064
test_overall_recall: 0.033846153846153845
test_overall_f1: 0.043000244319569995
test_overall_accuracy: 0.1315203375923104
test_runtime: 0.801
test_samples_per_second: 37.453
test_steps_per_second: 9.988


In [31]:
pipe = pipeline("ner", model=model, tokenizer=tokenizer, device=0, aggregation_strategy="max")  # device=0 (gpu)
text = """The patient reported no recurrence of palpitations at follow-up 6 months after the ablation."""

out = pipe(text)
print(out)

spans = []

for row in out:
    spans.append((row["start"], row["end"], row["entity_group"]))

show_span_box_markup(text, spans)

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


[{'entity_group': 'SIGN_SYMPTOM', 'score': 0.49153978, 'word': 'recurrence', 'start': 24, 'end': 34}, {'entity_group': 'SIGN_SYMPTOM', 'score': 0.702181, 'word': 'palpitations', 'start': 38, 'end': 50}, {'entity_group': 'DATE', 'score': 0.46011654, 'word': '6 months after', 'start': 64, 'end': 78}]
