In [228]:
# Loading Fine-Tuning Model
from transformers import XLMRobertaForTokenClassification, XLMRobertaTokenizer
xlm_roberta_model = XLMRobertaForTokenClassification.from_pretrained('../model')
xlm_roberta_tokenize = XLMRobertaTokenizer.from_pretrained('../model')

In [229]:
# PythaiNLP - Tokenizer Download From Library
from pythainlp.tokenize import word_tokenize as pythainlp_tokenizer

In [230]:
# Setup default string data
default_text = """ห้ามเกินขนาดที่แนะนำ"""

In [231]:
# Clean & Display string data - remove new lines syntax in string
cleaned_text = default_text.replace('\n', " ")
print(cleaned_text)

ห้ามเกินขนาดที่แนะนำ


In [232]:
# XLM Roberta Base - Tokenize
tokens = xlm_roberta_tokenize.tokenize(default_text)

In [233]:
# Engine: longest
# tokens = pythainlp_tokenizer(cleaned_text, engine='longest')
# print(tokens)

In [234]:
# Format Tokenized
tokenized_inputs = xlm_roberta_tokenize(tokens,  
                                        is_split_into_words=True,
                                        return_tensors="pt"
                                        )

In [235]:
# Setup Labels for Download XLM Roberta Base
labels_to_id = {
    "O": 0,
    "B-DRUG_NAME": 1, "I-DRUG_NAME": 2,
    "B-DOSAGE": 3, "I-DOSAGE": 4,
    "B-FORM": 5, "I-FORM": 6,
    "B-DRUG_REG_NO": 7, "I-DRUG_REG_NO": 8,
    "B-MFG_DATE": 9, "I-MFG_DATE": 10,
    "B-EXP_DATE": 11, "I-EXP_DATE": 12,
    "B-WARNINGS": 13, "I-WARNINGS": 14,
    "B-INDICATIONS": 15, "I-INDICATIONS": 16,
    "B-USAGE_INSTRUCTIONS": 17, "I-USAGE_INSTRUCTIONS": 18
}
id_to_labels = {v: k for k, v in labels_to_id.items()}

In [236]:
# Predict with Fine-Tuning Model
import torch
from torch.nn import functional as F
with torch.no_grad():
    outputs = xlm_roberta_model(**tokenized_inputs)

logits = outputs.logits
probs = F.softmax(logits, dim=-1)
predictions = torch.argmax(probs, dim=-1)
print(predictions)
for i, token in enumerate(tokens):
    pred_label = id_to_labels[predictions[0][i].item()] 
    confidence = probs[0][i][predictions[0][i]].item() * 100
    print(f"{token}: {pred_label} ({confidence:.2f}%)")

tensor([[ 0, 18, 18, 18, 18, 18, 18, 18,  0]])
▁: O (65.59%)
ห้าม: I-USAGE_INSTRUCTIONS (98.05%)
เกิน: I-USAGE_INSTRUCTIONS (98.14%)
ขนาด: I-USAGE_INSTRUCTIONS (99.07%)
ที่: I-USAGE_INSTRUCTIONS (98.82%)
แนะนํา: I-USAGE_INSTRUCTIONS (99.14%)
