In [1]:
!pip install transformers



In [2]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [3]:
from transformers import pipeline, AutoModelForTokenClassification, AutoTokenizer
import time
import torch

In [5]:
# function for easy to use different model
def test_ner_model(model_path, text, label_list):
    # model and tokenizer
    model = AutoModelForTokenClassification.from_pretrained(model_path)
    tokenizer = AutoTokenizer.from_pretrained(model_path)

    # tokenizer input names to exclude `token_type_ids` (especially for distilBert)
    tokenizer.model_input_names = ["input_ids", "attention_mask"]

    # pipeline
    ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer,device=0 if torch.cuda.is_available() else -1)

    # NER pipeline
    result = ner_pipeline(text)

    # Measure time taken for inference
    torch.cuda.synchronize()
    start_time = time.time()
    result = ner_pipeline(text)
    torch.cuda.synchronize()
    inference_time = time.time() - start_time

    # map labels to original names
    for entity in result:
        entity['entity'] = label_list[int(entity['entity'].split('_')[1])]

    return result,inference_time

In [6]:
# paths to models
model_paths = {
    "BERT": "/content/drive/My Drive/NER_Models_upload/bert_model",
    "DistilBERT": "/content/drive/My Drive/NER_Models_upload/distilbert_model",
    "ALBERT": "/content/drive/My Drive/NER_Models_upload/albert_model",
    "TinyBERT": "/content/drive/My Drive/NER_Models_upload/tinybert_model",
}

In [7]:
# label list
label_list = ['O', 'B-PER', 'I-PER', 'B-ORG', 'I-ORG', 'B-LOC', 'I-LOC', 'B-MISC', 'I-MISC']

# input text
text = "Google is collaborating with Stanford University on artificial intelligence research. Meanwhile, Amazon is expanding its operations in India, focusing on e-commerce and cloud computing. Apple recently launched the iPhone 15 in California, drawing attention globally. Microsoft has partnered with NASA to develop space exploration technologies. In Europe, BMW and Daimler are working together on electric vehicle innovation. The World Health Organization, based in Geneva, is tackling global health issues. A start-up in Silicon Valley secured $10 million in funding from Sequoia Capital. Meanwhile, Barcelona FC signed a five-year deal with Adidas. The Eiffel Tower remains a top tourist attraction in Paris."


In [8]:
# test each model
for model_name, model_path in model_paths.items():
    print(f"Results for {model_name}:")
    try:
        result, inference_time = test_ner_model(model_path, text, label_list)
        print(result)
        print(f"Time taken: {inference_time:.5f} seconds")
    except Exception as e:
        print(f"Error testing {model_name}: {e}")
    print()

Results for BERT:


Device set to use cuda:0


[{'entity': 'B-ORG', 'score': 0.996799, 'index': 1, 'word': 'google', 'start': 0, 'end': 6}, {'entity': 'O', 'score': 0.9998468, 'index': 2, 'word': 'is', 'start': 7, 'end': 9}, {'entity': 'O', 'score': 0.9998385, 'index': 3, 'word': 'collaborating', 'start': 10, 'end': 23}, {'entity': 'O', 'score': 0.99979347, 'index': 4, 'word': 'with', 'start': 24, 'end': 28}, {'entity': 'B-ORG', 'score': 0.9896694, 'index': 5, 'word': 'stanford', 'start': 29, 'end': 37}, {'entity': 'I-ORG', 'score': 0.98771477, 'index': 6, 'word': 'university', 'start': 38, 'end': 48}, {'entity': 'O', 'score': 0.99982387, 'index': 7, 'word': 'on', 'start': 49, 'end': 51}, {'entity': 'O', 'score': 0.9962011, 'index': 8, 'word': 'artificial', 'start': 52, 'end': 62}, {'entity': 'O', 'score': 0.99399626, 'index': 9, 'word': 'intelligence', 'start': 63, 'end': 75}, {'entity': 'O', 'score': 0.9997645, 'index': 10, 'word': 'research', 'start': 76, 'end': 84}, {'entity': 'O', 'score': 0.99984765, 'index': 11, 'word': '.',

Device set to use cuda:0


[{'entity': 'B-ORG', 'score': 0.995837, 'index': 1, 'word': 'google', 'start': 0, 'end': 6}, {'entity': 'O', 'score': 0.99962175, 'index': 2, 'word': 'is', 'start': 7, 'end': 9}, {'entity': 'O', 'score': 0.9995365, 'index': 3, 'word': 'collaborating', 'start': 10, 'end': 23}, {'entity': 'O', 'score': 0.9995741, 'index': 4, 'word': 'with', 'start': 24, 'end': 28}, {'entity': 'B-ORG', 'score': 0.98998207, 'index': 5, 'word': 'stanford', 'start': 29, 'end': 37}, {'entity': 'I-ORG', 'score': 0.95199263, 'index': 6, 'word': 'university', 'start': 38, 'end': 48}, {'entity': 'O', 'score': 0.9993948, 'index': 7, 'word': 'on', 'start': 49, 'end': 51}, {'entity': 'O', 'score': 0.902113, 'index': 8, 'word': 'artificial', 'start': 52, 'end': 62}, {'entity': 'O', 'score': 0.8788899, 'index': 9, 'word': 'intelligence', 'start': 63, 'end': 75}, {'entity': 'O', 'score': 0.988516, 'index': 10, 'word': 'research', 'start': 76, 'end': 84}, {'entity': 'O', 'score': 0.9996681, 'index': 11, 'word': '.', 'st

Device set to use cuda:0


[{'entity': 'B-ORG', 'score': 0.99651563, 'index': 1, 'word': '▁google', 'start': 0, 'end': 6}, {'entity': 'O', 'score': 0.999944, 'index': 2, 'word': '▁is', 'start': 7, 'end': 9}, {'entity': 'O', 'score': 0.9999199, 'index': 3, 'word': '▁collaborating', 'start': 10, 'end': 23}, {'entity': 'O', 'score': 0.99989736, 'index': 4, 'word': '▁with', 'start': 24, 'end': 28}, {'entity': 'B-ORG', 'score': 0.9689584, 'index': 5, 'word': '▁stanford', 'start': 29, 'end': 37}, {'entity': 'I-ORG', 'score': 0.9623908, 'index': 6, 'word': '▁university', 'start': 38, 'end': 48}, {'entity': 'O', 'score': 0.9999471, 'index': 7, 'word': '▁on', 'start': 49, 'end': 51}, {'entity': 'O', 'score': 0.9968978, 'index': 8, 'word': '▁artificial', 'start': 52, 'end': 62}, {'entity': 'O', 'score': 0.99776363, 'index': 9, 'word': '▁intelligence', 'start': 63, 'end': 75}, {'entity': 'O', 'score': 0.99982256, 'index': 10, 'word': '▁research', 'start': 76, 'end': 84}, {'entity': 'O', 'score': 0.99989116, 'index': 11, 'w

Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


[{'entity': 'B-ORG', 'score': 0.865369, 'index': 1, 'word': 'google', 'start': 0, 'end': 6}, {'entity': 'O', 'score': 0.98556167, 'index': 2, 'word': 'is', 'start': 7, 'end': 9}, {'entity': 'O', 'score': 0.985399, 'index': 3, 'word': 'collaborating', 'start': 10, 'end': 23}, {'entity': 'O', 'score': 0.9847839, 'index': 4, 'word': 'with', 'start': 24, 'end': 28}, {'entity': 'B-ORG', 'score': 0.834983, 'index': 5, 'word': 'stanford', 'start': 29, 'end': 37}, {'entity': 'I-ORG', 'score': 0.5404507, 'index': 6, 'word': 'university', 'start': 38, 'end': 48}, {'entity': 'O', 'score': 0.96309197, 'index': 7, 'word': 'on', 'start': 49, 'end': 51}, {'entity': 'O', 'score': 0.5173524, 'index': 8, 'word': 'artificial', 'start': 52, 'end': 62}, {'entity': 'O', 'score': 0.77259773, 'index': 9, 'word': 'intelligence', 'start': 63, 'end': 75}, {'entity': 'O', 'score': 0.83922577, 'index': 10, 'word': 'research', 'start': 76, 'end': 84}, {'entity': 'O', 'score': 0.9865014, 'index': 11, 'word': '.', 's