## Inference noteboook

You can use this notebook to load a trained model and classify new texts.

In [1]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
checkpoint_path = "checkpoints/bert-31-10ep/checkpoint-2700"

# Load the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(checkpoint_path)
model = AutoModelForSequenceClassification.from_pretrained(checkpoint_path)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Sample input text
text = "Congratulations! You are a total stupid person now! I just hope that you die soon!"

# Tokenize the input
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)


In [3]:
tokenizer.decode(inputs["input_ids"][0])

'[CLS] congratulations! you are a total stupid person now! i just hope that you die soon! [SEP]'

In [5]:
import torch
# Function to classify text
def classify_text(text):
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
    with torch.no_grad():
        outputs = model(**inputs)
    probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
    print(probs)
    if all(prob < 0.5 for prob in probs[0]):
        return "NON TOXIC"
    else:
        toxic_labels = [model.config.id2label[i] for i, prob in enumerate(probs[0]) if prob >= 0.1]
        return f"{', '.join(toxic_labels)}"

In [6]:
classify_text(text)

tensor([[7.9998e-01, 3.3263e-03, 1.5818e-02, 7.8910e-02, 1.0150e-01, 4.7327e-04]])


'toxic, insult'