In [1]:
from transformers import BertTokenizer, BertForSequenceClassification
import torch
from transformers import Trainer, TrainingArguments
import numpy as np
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

#load the model
model_path = "./finetuned_model"
tokenizer = BertTokenizer.from_pretrained(model_path)
model = BertForSequenceClassification.from_pretrained(model_path)


import error: No module named 'triton'


In [2]:
from datasets import load_dataset

dataset_name = "data/balanced_record_without_llm.csv"
dataset = load_dataset("csv", data_files=dataset_name)

# processing function which will be applied to the dataset
def preprocess_function(examples):
    text_inputs = examples["default/domain"]
    encoding = tokenizer(text_inputs, truncation=True, padding="max_length", max_length=30)
    encoding["label"] = examples["default/class"]

    return encoding

dataset = dataset.map(preprocess_function, batched=True)

splited_dataset = dataset["train"].train_test_split(test_size=0.2, seed=42)
train_dataset, test_dataset = splited_dataset["train"], splited_dataset["test"]


Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/300000 [00:00<?, ? examples/s]

In [3]:
# sample case
model.eval()
def predict_domain(domain):
    inputs = tokenizer(domain, return_tensors="pt", truncation=True, padding="max_length", max_length=30)

    with torch.no_grad():
        outputs = model(**inputs)

    logits = outputs.logits
    probabilities = torch.nn.functional.softmax(logits, dim=-1)
    predicted_class = torch.argmax(probabilities).item()

    return predicted_class, probabilities.tolist()

domain = "samsungmobile.co.il"
predicted_class, probabilities = predict_domain(domain)

print(f"Domain: {domain}")
print(f"Predicted Class: {predicted_class}")
print(f"Probabilities: {probabilities}")

Domain: samsungmobile.co.il
Predicted Class: 0
Probabilities: [[0.9998581409454346, 0.00010287507757311687, 3.8956888602115214e-05]]


In [10]:
test_dataset = test_dataset.map(
    lambda example: {"predict": predict_domain(example["default/domain"])[0]}
)

Map:   0%|          | 0/60000 [00:00<?, ? examples/s]

In [36]:
import pandas as pd
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, precision_score, recall_score, f1_score

df = pd.DataFrame({
    "label": test_dataset["label"],
    "predict": test_dataset["predict"]
})

report = classification_report(df['label'], df['predict'])
cm = confusion_matrix(df['label'], df['predict'])
print(cm)
print(report)
overall_metrics = {
    "Accuracy": accuracy_score(df["label"], df["predict"]),
    "Precision (Macro)": precision_score(df["label"], df["predict"], average="macro"),
    "Recall (Macro)": recall_score(df["label"], df["predict"], average="macro"),
    "F1-score (Macro)": f1_score(df["label"], df["predict"], average="macro"),
    "Precision (Weighted)": precision_score(df["label"], df["predict"], average="weighted"),
    "Recall (Weighted)": recall_score(df["label"], df["predict"], average="weighted"),
    "F1-score (Weighted)": f1_score(df["label"], df["predict"], average="weighted"),
}

overall_metrics_df = pd.DataFrame(overall_metrics, index=["Score"]).transpose()
print(overall_metrics_df)

[[19881   264    49]
 [  317 19374     0]
 [   17   147 19951]]
              precision    recall  f1-score   support

           0       0.98      0.98      0.98     20194
           1       0.98      0.98      0.98     19691
           2       1.00      0.99      0.99     20115

    accuracy                           0.99     60000
   macro avg       0.99      0.99      0.99     60000
weighted avg       0.99      0.99      0.99     60000

                         Score
Accuracy              0.986767
Precision (Macro)     0.986751
Recall (Macro)        0.986750
F1-score (Macro)      0.986746
Precision (Weighted)  0.986800
Recall (Weighted)     0.986767
F1-score (Weighted)   0.986779
