In [1]:
from transformers import BertTokenizer, BertForSequenceClassification
import torch
from transformers import Trainer, TrainingArguments
import numpy as np
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

#load the model
model_path = "./finetuned_model"
tokenizer = BertTokenizer.from_pretrained(model_path)
model = BertForSequenceClassification.from_pretrained(model_path)


import error: No module named 'triton'


In [2]:
from datasets import load_dataset

dataset_name = "data/balanced_record_without_llm.csv"
dataset = load_dataset("csv", data_files=dataset_name)

# processing function which will be applied to the dataset
def preprocess_function(examples):
    text_inputs = examples["default/domain"]
    encoding = tokenizer(text_inputs, truncation=True, padding="max_length", max_length=30)
    encoding["label"] = examples["default/class"]

    return encoding

dataset = dataset.map(preprocess_function, batched=True)

splited_dataset = dataset["train"].train_test_split(test_size=0.2, seed=42)
train_dataset, test_dataset = splited_dataset["train"], splited_dataset["test"]


Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/300000 [00:00<?, ? examples/s]

In [3]:
# sample case
model.eval()
def predict_domain(domain):
    inputs = tokenizer(domain, return_tensors="pt", truncation=True, padding="max_length", max_length=30)

    with torch.no_grad():
        outputs = model(**inputs)

    logits = outputs.logits
    probabilities = torch.nn.functional.softmax(logits, dim=-1)
    predicted_class = torch.argmax(probabilities).item()

    return predicted_class, probabilities.tolist()

domain = "samsungmobile.co.il"
predicted_class, probabilities = predict_domain(domain)

print(f"Domain: {domain}")
print(f"Predicted Class: {predicted_class}")
print(f"Probabilities: {probabilities}")

Domain: samsungmobile.co.il
Predicted Class: 0
Probabilities: [[0.9998581409454346, 0.00010287507757311687, 3.8956888602115214e-05]]


In [None]:
test_dataset = test_dataset.map(
    lambda example: {"predict": predict_domain(example["default/domain"])[0]}
)

Map:   0%|          | 0/60000 [00:00<?, ? examples/s]

In [4]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred  # logits: score
    predictions = np.argmax(logits, axis=-1)
    accuracy = accuracy_score(labels, predictions)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average="weighted")
    return {
        "accuracy": accuracy,
        "precision": precision,
        "recall": recall,
        "f1": f1,
    }


training_args = TrainingArguments(
    output_dir="./results",
    per_device_eval_batch_size=16,
)

trainer = Trainer(
    model=model,                      
    args=training_args,
    eval_dataset=test_dataset,        
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,  
)

  trainer = Trainer(


In [5]:
results = trainer.evaluate()
print("Evaluation results:", results)

Evaluation results: {'eval_loss': 0.06996390223503113, 'eval_model_preparation_time': 0.002, 'eval_accuracy': 0.9867666666666667, 'eval_precision': 0.9868002985250772, 'eval_recall': 0.9867666666666667, 'eval_f1': 0.9867788260539613, 'eval_runtime': 55.237, 'eval_samples_per_second': 1086.228, 'eval_steps_per_second': 67.889}


In [7]:
# test it on the llm domain
dataset_name = "data/llm_domain.csv"
llm_dataset = load_dataset("csv", data_files=dataset_name)
def preprocess_function(examples):
    text_inputs = examples["domain"]
    encoding = tokenizer(text_inputs, truncation=True, padding="max_length", max_length=30)
    encoding["label"] = [2] * len(text_inputs)

    return encoding
llm_dataset = llm_dataset.map(preprocess_function, batched=True)

In [15]:
trainer = Trainer(
    model=model,                      
    args=training_args,
    eval_dataset=llm_dataset,        
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,  
)
results = trainer.evaluate()
print("Evaluation results:", results)

  trainer = Trainer(


Evaluation results: {'eval_train_loss': 9.5806303024292, 'eval_train_model_preparation_time': 0.001, 'eval_train_accuracy': 0.02493721894527828, 'eval_train_precision': 1.0, 'eval_train_recall': 0.02493721894527828, 'eval_train_f1': 0.04866096866096866, 'eval_train_runtime': 15.4468, 'eval_train_samples_per_second': 1108.516, 'eval_train_steps_per_second': 69.335}


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
