In [1]:
import sys, torch
print("Python:", sys.version)
print("Torch:", torch.__version__)


Python: 3.11.9 (tags/v3.11.9:de54cf5, Apr  2 2024, 10:12:12) [MSC v.1938 64 bit (AMD64)]
Torch: 2.9.0+cpu


In [2]:
import os
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
import mlflow
from sklearn.metrics import accuracy_score


In [3]:
# Projekt gyökér (C:\hf-beadando)
base_dir = os.path.dirname(os.getcwd())
data_dir = os.path.join(base_dir, "data")

train_path = os.path.join(data_dir, "banking77_train_sample.csv")
test_path = os.path.join(data_dir, "banking77_test_sample.csv")

train_df = pd.read_csv(train_path)
test_df = pd.read_csv(test_path)

print("Train sorok:", len(train_df))
print("Test sorok:", len(test_df))
train_df.head()


Train sorok: 200
Test sorok: 80


Unnamed: 0,text,label
0,I am still waiting on my card?,card_arrival
1,What can I do if my card still hasn't arrived ...,card_arrival
2,I have been waiting over a week. Is the card s...,card_arrival
3,Can I track my card while it is in the process...,card_arrival
4,"How do I know if I will get my card, or if it ...",card_arrival


In [4]:
model_name = "facebook/bart-large-mnli"

# Tokenizer és modell betöltése
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

# Zero-shot classification pipeline
classifier = pipeline(
    task="zero-shot-classification",
    model=model,
    tokenizer=tokenizer,
    device=-1  # CPU
)

# Címkék a train mintából
INTENT_LABELS = sorted(train_df["label"].unique().tolist())
INTENT_LABELS


Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

Error while downloading from https://huggingface.co/facebook/bart-large-mnli/resolve/main/model.safetensors: HTTPSConnectionPool(host='cas-bridge.xethub.hf.co', port=443): Read timed out.
Trying to resume download...


model.safetensors:  39%|###8      | 629M/1.63G [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Device set to use cpu


['activate_my_card',
 'card_arrival',
 'card_not_working',
 'exchange_rate',
 'lost_or_stolen_card',
 'request_refund',
 'terminate_account',
 'transfer_not_received_by_recipient']

In [5]:
sample_text = "I lost my card yesterday and need help urgently."
classifier(sample_text, candidate_labels=INTENT_LABELS, multi_label=False)


{'sequence': 'I lost my card yesterday and need help urgently.',
 'labels': ['lost_or_stolen_card',
  'card_not_working',
  'transfer_not_received_by_recipient',
  'activate_my_card',
  'request_refund',
  'card_arrival',
  'terminate_account',
  'exchange_rate'],
 'scores': [0.5825157165527344,
  0.2366284728050232,
  0.050211481750011444,
  0.03608247637748718,
  0.03443913906812668,
  0.022961972281336784,
  0.020946424454450607,
  0.016214342787861824]}

In [6]:
mlflow.set_experiment("banking77_zero_shot_intent")

y_true, y_pred = [], []

with mlflow.start_run():
    mlflow.log_param("model_name", model_name)
    mlflow.log_param("num_labels", len(INTENT_LABELS))
    mlflow.log_param("labels", ",".join(INTENT_LABELS))

    for _, row in test_df.iterrows():
        text = row["text"]
        true_label = row["label"]

        result = classifier(
            text,
            candidate_labels=INTENT_LABELS,
            multi_label=False
        )
        pred_label = result["labels"][0]

        y_true.append(true_label)
        y_pred.append(pred_label)

    acc = accuracy_score(y_true, y_pred)
    mlflow.log_metric("accuracy", acc)
    mlflow.log_artifact(train_path)
    mlflow.log_artifact(test_path)

print(f"Zero-shot accuracy on Banking77 sample: {acc:.3f}")


  return FileStore(store_uri, store_uri)
2025/11/11 10:33:06 INFO mlflow.tracking.fluent: Experiment with name 'banking77_zero_shot_intent' does not exist. Creating a new experiment.


Zero-shot accuracy on Banking77 sample: 0.675
