In [None]:
!pip install -q transformers datasets scikit-learn gradio

In [2]:
import pandas as pd
from transformers import pipeline
from datasets import load_dataset
from sklearn.metrics import accuracy_score, classification_report
import gradio as gr
from tqdm import tqdm # For progress bars

# Configuration
MODEL_NAME = "facebook/bart-large-mnli"
DATASET_NAME = "ag_news" # A standard news classification dataset
SAMPLE_SIZE = 100 # Number of samples to evaluate

print("Libraries imported and configuration set.")

2025-11-25 05:30:38.007218: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1764048638.203847      47 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1764048638.255691      47 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

Libraries imported and configuration set.


In [3]:
# Load the AG News dataset (Split: test)
dataset = load_dataset(DATASET_NAME, split="test")

# Shuffle and select a subset to save time
subset = dataset.shuffle(seed=42).select(range(SAMPLE_SIZE))

# AG News labels are integers (0-3). We need to map them to text.
# 0: World, 1: Sports, 2: Business, 3: Sci/Tech
id2label = {
    0: "World Politics",
    1: "Sports",
    2: "Business",
    3: "Science and Technology"
}

# Prepare candidate labels for the Zero-Shot model
candidate_labels = list(id2label.values())

print(f"Loaded {len(subset)} samples.")
print(f"Candidate Labels: {candidate_labels}")
print(f"Example Text: {subset[0]['text']}")
print(f"Example Actual Label: {id2label[subset[0]['label']]}")

README.md: 0.00B [00:00, ?B/s]

data/train-00000-of-00001.parquet:   0%|          | 0.00/18.6M [00:00<?, ?B/s]

data/test-00000-of-00001.parquet:   0%|          | 0.00/1.23M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/120000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/7600 [00:00<?, ? examples/s]

Loaded 100 samples.
Candidate Labels: ['World Politics', 'Sports', 'Business', 'Science and Technology']
Example Text: Indian board plans own telecast of Australia series The Indian cricket board said on Wednesday it was making arrangements on its own to broadcast next month #39;s test series against Australia, which is under threat because of a raging TV rights dispute.
Example Actual Label: Sports


In [4]:
# Initialize the Zero-Shot Classification Pipeline
# We use device=0 if GPU is available, otherwise CPU (-1)
import torch
device = 0 if torch.cuda.is_available() else -1

classifier = pipeline("zero-shot-classification",
                      model=MODEL_NAME,
                      device=device)

print(f"Model {MODEL_NAME} loaded on {'GPU' if device==0 else 'CPU'}.")

config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

Device set to use cuda:0


Model facebook/bart-large-mnli loaded on GPU.


In [5]:
actual_labels = []
predicted_labels = []

print("Running inference on samples...")

# Iterate through the subset
for example in tqdm(subset):
    text = example['text']
    true_label_id = example['label']
    true_label_text = id2label[true_label_id]

    # Run Zero-Shot Prediction
    result = classifier(text, candidate_labels)

    # The model returns labels sorted by score. The first one is the top prediction.
    predicted_label = result['labels'][0]

    # Store results
    actual_labels.append(true_label_text)
    predicted_labels.append(predicted_label)

# Calculate Metrics
accuracy = accuracy_score(actual_labels, predicted_labels)

print("\n" + "="*30)
print(f"Evaluation Results (Samples: {SAMPLE_SIZE})")
print("="*30)
print(f"Accuracy: {accuracy:.2%}")
print("\nClassification Report:\n")
print(classification_report(actual_labels, predicted_labels, zero_division=0))

Running inference on samples...


 10%|█         | 10/100 [00:01<00:10,  8.21it/s]You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
100%|██████████| 100/100 [00:11<00:00,  8.82it/s]


Evaluation Results (Samples: 100)
Accuracy: 76.00%

Classification Report:

                        precision    recall  f1-score   support

              Business       0.65      0.97      0.78        32
Science and Technology       0.80      0.21      0.33        19
                Sports       0.90      1.00      0.95        27
        World Politics       0.82      0.64      0.72        22

              accuracy                           0.76       100
             macro avg       0.79      0.70      0.69       100
          weighted avg       0.78      0.76      0.73       100






In [6]:
def classify_news(text):
    # Run the pipeline
    output = classifier(text, candidate_labels)
    # Format output for Gradio (Label -> Confidence Score)
    return {label: score for label, score in zip(output['labels'], output['scores'])}

# Create Gradio Interface
iface = gr.Interface(
    fn=classify_news,
    inputs=gr.Textbox(lines=2, placeholder="Enter a news headline here..."),
    outputs=gr.Label(num_top_classes=4),
    title="Zero-Shot News Classifier",
    description="Enter a news snippet, and the AI will categorize it into World Politics, Sports, Business, or Tech without prior training.",
    examples=[
        ["The stock market hit an all-time high today due to tech sector growth."],
        ["The local team won the championship in the final minutes of the game."]
    ]
)

# Launch the app
iface.launch(share=True) # share=True creates a public link

* Running on local URL:  http://127.0.0.1:7860
* Running on public URL: https://ae73844dfc26c0db80.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


