# Inference
started Nov 3rd

In [2]:
from datasets import load_dataset
from transformers import (
    AutoModelForTokenClassification,
    AutoTokenizer,
    DataCollatorForTokenClassification,
    TrainingArguments,
    Trainer,
)
from peft import get_peft_config, PeftModel, PeftConfig, get_peft_model, LoraConfig, TaskType
import evaluate
import torch
import numpy as np

id2label = {
    0: "O",
    1: "B-ap_name1",
    2: "I-ap_name1",
    3: "B-vz1",
    4: "I-vz1",
    5: "B-coordx1",
    6: "I-coordx1",
    7: "B-coordy1",
    8: "I-coordy1",
    9: "B-type1",
    10: "I-type1",
}
label2id = {"O": 0,
          "B-ap_name1": 1,
          "I-ap_name1": 2,
          "B-vz1": 3,
          "I-vz1": 4,
          "B-coordx1": 5,
          "I-coordx1": 6,
          "B-coordy1": 7,
          "I-coordy1": 8,
          "B-type1": 9,
          "I-type1": 10,
         }

In [15]:
peft_model_id = "roberta-large-lora-token-classification/checkpoint-108948/"
config = PeftConfig.from_pretrained(peft_model_id)
inference_model = AutoModelForTokenClassification.from_pretrained(
    config.base_model_name_or_path, num_labels=11, id2label=id2label, label2id=label2id
)
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
model = PeftModel.from_pretrained(inference_model, peft_model_id)

Some weights of the model checkpoint at roberta-large were not used when initializing RobertaForTokenClassification: ['lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.bias']
- This IS expected if you are initializing RobertaForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictio

In [9]:
text = "In this work we studied in detail galaxies M45 and NGC 2132 which are at z=0.1 and they appeared to be red."
inputs = tokenizer(text, return_tensors="pt")

In [10]:
with torch.no_grad():
    logits = model(**inputs).logits

tokens = inputs.tokens()
predictions = torch.argmax(logits, dim=2)

for token, prediction in zip(tokens, predictions[0].numpy()):
    print((token, model.config.id2label[prediction]))

('<s>', 'O')
('In', 'O')
('Ġthis', 'O')
('Ġwork', 'O')
('Ġwe', 'O')
('Ġstudied', 'O')
('Ġin', 'O')
('Ġdetail', 'O')
('Ġgalaxies', 'O')
('ĠM', 'O')
('45', 'O')
('Ġand', 'O')
('ĠN', 'O')
('GC', 'O')
('Ġ2', 'O')
('132', 'O')
('Ġwhich', 'O')
('Ġare', 'O')
('Ġat', 'O')
('Ġz', 'O')
('=', 'O')
('0', 'O')
('.', 'O')
('1', 'O')
('Ġand', 'O')
('Ġthey', 'O')
('Ġappeared', 'O')
('Ġto', 'O')
('Ġbe', 'O')
('Ġred', 'O')
('.', 'O')
('</s>', 'O')
