In [1]:
!pip install transformers



In [2]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [3]:
from transformers import pipeline, AutoModelForTokenClassification, AutoTokenizer

In [4]:
# function for easy to use different model
def test_ner_model(model_path, text, label_list):
    # model and tokenizer
    model = AutoModelForTokenClassification.from_pretrained(model_path)
    tokenizer = AutoTokenizer.from_pretrained(model_path)

    # tokenizer input names to exclude `token_type_ids` (especially for distilBert)
    tokenizer.model_input_names = ["input_ids", "attention_mask"]

    # pipeline
    ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer)

    # NER pipeline
    result = ner_pipeline(text)

    # map labels to original names
    for entity in result:
        entity['entity'] = label_list[int(entity['entity'].split('_')[1])]

    return result

In [5]:
# paths to models
model_paths = {
    "BERT": "/content/drive/My Drive/NER_Models_upload/bert_model",
    "DistilBERT": "/content/drive/My Drive/NER_Models_upload/distilbert_model",
    "ALBERT": "/content/drive/My Drive/NER_Models_upload/albert_model",
    "TinyBERT": "/content/drive/My Drive/NER_Models_upload/tinybert_model",
}

In [6]:
# label list
label_list = ['O', 'B-PER', 'I-PER', 'B-ORG', 'I-ORG', 'B-LOC', 'I-LOC', 'B-MISC', 'I-MISC']

# input text
text = "Apple is looking at buying U.K. startup for $1 billion"


In [7]:
# test each model
for model_name, model_path in model_paths.items():
    print(f"Results for {model_name}:")
    try:
        result = test_ner_model(model_path, text, label_list)
        print(result)
    except Exception as e:
        print(f"Error testing {model_name}: {e}")
    print()

Results for BERT:


Device set to use cuda:0


[{'entity': 'B-ORG', 'score': 0.9906002, 'index': 1, 'word': 'apple', 'start': 0, 'end': 5}, {'entity': 'O', 'score': 0.99984443, 'index': 2, 'word': 'is', 'start': 6, 'end': 8}, {'entity': 'O', 'score': 0.99983954, 'index': 3, 'word': 'looking', 'start': 9, 'end': 16}, {'entity': 'O', 'score': 0.9998605, 'index': 4, 'word': 'at', 'start': 17, 'end': 19}, {'entity': 'O', 'score': 0.9997925, 'index': 5, 'word': 'buying', 'start': 20, 'end': 26}, {'entity': 'B-LOC', 'score': 0.99812967, 'index': 6, 'word': 'u', 'start': 27, 'end': 28}, {'entity': 'B-LOC', 'score': 0.99800247, 'index': 7, 'word': '.', 'start': 28, 'end': 29}, {'entity': 'B-LOC', 'score': 0.9983407, 'index': 8, 'word': 'k', 'start': 29, 'end': 30}, {'entity': 'B-LOC', 'score': 0.99783164, 'index': 9, 'word': '.', 'start': 30, 'end': 31}, {'entity': 'O', 'score': 0.99778974, 'index': 10, 'word': 'startup', 'start': 32, 'end': 39}, {'entity': 'O', 'score': 0.9998299, 'index': 11, 'word': 'for', 'start': 40, 'end': 43}, {'ent

Device set to use cuda:0


[{'entity': 'B-ORG', 'score': 0.9906893, 'index': 1, 'word': 'apple', 'start': 0, 'end': 5}, {'entity': 'O', 'score': 0.99941266, 'index': 2, 'word': 'is', 'start': 6, 'end': 8}, {'entity': 'O', 'score': 0.99960333, 'index': 3, 'word': 'looking', 'start': 9, 'end': 16}, {'entity': 'O', 'score': 0.9996271, 'index': 4, 'word': 'at', 'start': 17, 'end': 19}, {'entity': 'O', 'score': 0.99956816, 'index': 5, 'word': 'buying', 'start': 20, 'end': 26}, {'entity': 'B-LOC', 'score': 0.99762434, 'index': 6, 'word': 'u', 'start': 27, 'end': 28}, {'entity': 'B-LOC', 'score': 0.99733645, 'index': 7, 'word': '.', 'start': 28, 'end': 29}, {'entity': 'B-LOC', 'score': 0.9975604, 'index': 8, 'word': 'k', 'start': 29, 'end': 30}, {'entity': 'B-LOC', 'score': 0.9963606, 'index': 9, 'word': '.', 'start': 30, 'end': 31}, {'entity': 'O', 'score': 0.99854493, 'index': 10, 'word': 'startup', 'start': 32, 'end': 39}, {'entity': 'O', 'score': 0.9995789, 'index': 11, 'word': 'for', 'start': 40, 'end': 43}, {'ent

Device set to use cuda:0


[{'entity': 'B-ORG', 'score': 0.9981371, 'index': 1, 'word': '▁apple', 'start': 0, 'end': 5}, {'entity': 'O', 'score': 0.9998919, 'index': 2, 'word': '▁is', 'start': 6, 'end': 8}, {'entity': 'O', 'score': 0.9999038, 'index': 3, 'word': '▁looking', 'start': 9, 'end': 16}, {'entity': 'O', 'score': 0.9999498, 'index': 4, 'word': '▁at', 'start': 17, 'end': 19}, {'entity': 'O', 'score': 0.9997861, 'index': 5, 'word': '▁buying', 'start': 20, 'end': 26}, {'entity': 'B-LOC', 'score': 0.9553788, 'index': 6, 'word': '▁u', 'start': 27, 'end': 28}, {'entity': 'B-LOC', 'score': 0.9622015, 'index': 7, 'word': '.', 'start': 28, 'end': 29}, {'entity': 'B-LOC', 'score': 0.9614002, 'index': 8, 'word': 'k', 'start': 29, 'end': 30}, {'entity': 'B-LOC', 'score': 0.98456776, 'index': 9, 'word': '.', 'start': 30, 'end': 31}, {'entity': 'O', 'score': 0.9886868, 'index': 10, 'word': '▁startup', 'start': 32, 'end': 39}, {'entity': 'O', 'score': 0.99986815, 'index': 11, 'word': '▁for', 'start': 40, 'end': 43}, {

Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


[{'entity': 'B-ORG', 'score': 0.8463931, 'index': 1, 'word': 'apple', 'start': 0, 'end': 5}, {'entity': 'O', 'score': 0.9868329, 'index': 2, 'word': 'is', 'start': 6, 'end': 8}, {'entity': 'O', 'score': 0.9868693, 'index': 3, 'word': 'looking', 'start': 9, 'end': 16}, {'entity': 'O', 'score': 0.9870237, 'index': 4, 'word': 'at', 'start': 17, 'end': 19}, {'entity': 'O', 'score': 0.98492706, 'index': 5, 'word': 'buying', 'start': 20, 'end': 26}, {'entity': 'B-ORG', 'score': 0.47988045, 'index': 6, 'word': 'u', 'start': 27, 'end': 28}, {'entity': 'B-ORG', 'score': 0.42543676, 'index': 7, 'word': '.', 'start': 28, 'end': 29}, {'entity': 'B-ORG', 'score': 0.52627337, 'index': 8, 'word': 'k', 'start': 29, 'end': 30}, {'entity': 'B-ORG', 'score': 0.56629837, 'index': 9, 'word': '.', 'start': 30, 'end': 31}, {'entity': 'O', 'score': 0.6498156, 'index': 10, 'word': 'startup', 'start': 32, 'end': 39}, {'entity': 'O', 'score': 0.98658895, 'index': 11, 'word': 'for', 'start': 40, 'end': 43}, {'ent