# Multi-Label Review Classification with Hugging Face Transformers

This notebook implements a multi-label classification system to predict:

- `is_spam`: Whether a review is spam (1) or not (0)
- `is_advertisement`: Whether a review is an advertisement (1) or not (0)
- `is_rant_without_visit`: Whether a review is a rant without actual visit (1) or not (0)

The model uses both review text and metadata features for prediction.


## 1. Import Libraries and Setup


In [11]:
import pandas as pd
import numpy as np
import random
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    classification_report,
    f1_score,
    precision_score,
    recall_score,
)
from sklearn.preprocessing import StandardScaler
import torch
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer,
    DataCollatorWithPadding,
)
from datasets import Dataset
import warnings

warnings.filterwarnings("ignore")

# Set seeds for reproducibility
random.seed(42)
np.random.seed(42)
torch.manual_seed(42)

print("Libraries imported successfully!")
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")

Libraries imported successfully!
PyTorch version: 2.8.0
CUDA available: False


## 2. Create Sample Dataset (1000 rows)

Generate a realistic dataset with diverse review types and multi-label scenarios.


In [12]:
def generate_sample_data(n_samples=1000):
    """Generate realistic sample review data with multi-label targets"""

    # Sample business names and categories
    businesses = [
        "Joe's Diner",
        "Tech Solutions Inc",
        "Downtown Cafe",
        "City Hospital",
        "SuperMart",
        "Luxury Hotel",
        "Fast Food Corner",
        "Auto Repair Shop",
        "Beauty Salon",
        "Fitness Center",
        "Book Store",
        "Pizza Palace",
        "Medical Clinic",
        "Shopping Mall",
        "Gas Station",
        "Bank Branch",
    ]

    categories = [
        "Restaurant",
        "Technology",
        "Healthcare",
        "Retail",
        "Hotel",
        "Automotive",
        "Beauty",
        "Fitness",
        "Food",
        "Finance",
    ]

    # Sample review templates for different types
    legitimate_reviews = [
        "Great service and friendly staff. Highly recommend!",
        "Good experience overall. Will come back again.",
        "Nice atmosphere and reasonable prices.",
        "Staff was helpful and the place was clean.",
        "Excellent quality and fast service.",
        "Had a pleasant experience here.",
        "The food was delicious and service was quick.",
        "Clean facilities and professional staff.",
    ]

    spam_reviews = [
        "Best place ever!!! 5 stars always!!!",
        "Amazing amazing amazing! You must visit!",
        "Perfect perfect perfect! No complaints!",
        "Incredible service! Outstanding! Fantastic!",
        "Wow! Such an amazing experience! Unbelievable!",
    ]

    advertisement_reviews = [
        "Check out their new website at www.example.com for great deals!",
        "Visit our store and get 50% off all items this weekend!",
        "Call 555-1234 for special promotions and discounts!",
        "New menu available! Try our premium dishes today!",
        "Follow us on social media for exclusive offers!",
    ]

    rant_reviews = [
        "Terrible place! Never going back! Worst service ever!",
        "This business should be shut down! Horrible experience!",
        "Waste of time and money! Staff is rude and unprofessional!",
        "I demand a refund! This place is a disaster!",
        "Absolutely disgusting! Management doesn't care!",
    ]

    data = []

    for i in range(n_samples):
        # Generate basic review data
        user_id = f"{random.randint(10**17, 10**18)}"
        user_name = f"User_{i+1}"
        time = random.randint(1500000000000, 1700000000000)  # Random timestamp
        rating = random.randint(1, 5)
        pics = random.choice([True, False])
        resp = None if random.random() > 0.2 else "Thank you for your feedback!"
        gmap_id = (
            f"0x{random.randint(10**15, 10**16):x}:0x{random.randint(10**15, 10**16):x}"
        )
        biz_name = random.choice(businesses)
        description = f"Description for {biz_name}"
        category = random.choice(categories)
        avg_rating = round(random.uniform(2.0, 4.8), 1)
        num_of_reviews = random.randint(1, 500)
        hours = "9AM-5PM" if random.random() > 0.3 else None
        address = f"{random.randint(100, 9999)} Main St, City, State"
        price_level = random.randint(0, 4)
        state = "Sample State"

        # Determine review type and labels (multi-label possible)
        review_type = random.choices(
            [
                "legitimate",
                "spam",
                "advertisement",
                "rant",
                "spam_ad",
                "spam_rant",
                "ad_rant",
            ],
            weights=[0.6, 0.1, 0.1, 0.1, 0.05, 0.025, 0.025],
        )[0]

        # Initialize labels
        is_spam = 0
        is_advertisement = 0
        is_rant_without_visit = 0

        # Generate text and labels based on type
        if review_type == "legitimate":
            text = random.choice(legitimate_reviews)
        elif review_type == "spam":
            text = random.choice(spam_reviews)
            is_spam = 1
        elif review_type == "advertisement":
            text = random.choice(advertisement_reviews)
            is_advertisement = 1
        elif review_type == "rant":
            text = random.choice(rant_reviews)
            is_rant_without_visit = 1
        elif review_type == "spam_ad":  # Multi-label: both spam and ad
            text = (
                f"{random.choice(spam_reviews)} {random.choice(advertisement_reviews)}"
            )
            is_spam = 1
            is_advertisement = 1
        elif review_type == "spam_rant":  # Multi-label: both spam and rant
            text = f"{random.choice(spam_reviews)} {random.choice(rant_reviews)}"
            is_spam = 1
            is_rant_without_visit = 1
        elif review_type == "ad_rant":  # Multi-label: both ad and rant
            text = (
                f"{random.choice(advertisement_reviews)} {random.choice(rant_reviews)}"
            )
            is_advertisement = 1
            is_rant_without_visit = 1

        data.append(
            {
                "user_id": user_id,
                "user_name": user_name,
                "time": time,
                "rating": rating,
                "text": text,
                "pics": pics,
                "resp": resp,
                "gmap_id": gmap_id,
                "biz_name": biz_name,
                "description": description,
                "category": category,
                "avg_rating": avg_rating,
                "num_of_reviews": num_of_reviews,
                "hours": hours,
                "address": address,
                "price_level": price_level,
                "state": state,
                "is_spam": is_spam,
                "is_advertisement": is_advertisement,
                "is_rant_without_visit": is_rant_without_visit,
            }
        )

    return pd.DataFrame(data)


# Generate the dataset
df = generate_sample_data(1000)

print(f"Dataset created with {len(df)} rows")
print(f"Dataset shape: {df.shape}")
print("\nFirst 3 rows:")
print(df.head(3))

Dataset created with 1000 rows
Dataset shape: (1000, 20)

First 3 rows:
              user_id user_name           time  rating  \
0  228355989445507485    User_1  1565605751383       2   
1  925514409025013289    User_2  1691769793417       5   
2  513875040606644119    User_3  1598129241107       5   

                                             text   pics  resp  \
0  Wow! Such an amazing experience! Unbelievable!   True  None   
1   The food was delicious and service was quick.  False  None   
2  Good experience overall. Will come back again.  False  None   

                             gmap_id        biz_name  \
0  0x1b410b5202ad6d:0x150123892c6150   Downtown Cafe   
1   0xc746f3ba09dac:0x1f5ecd73fd558b     Joe's Diner   
2   0x12411a5f9633d0:0x78c122e0cbe85  Medical Clinic   

                      description    category  avg_rating  num_of_reviews  \
0   Description for Downtown Cafe     Finance         3.2              16   
1     Description for Joe's Diner  Healthcare      

## 3. Explore Dataset and Label Distribution


In [13]:
# Check label distribution
print("=== LABEL DISTRIBUTION ===")
target_cols = ["is_spam", "is_advertisement", "is_rant_without_visit"]

for col in target_cols:
    positive_count = df[col].sum()
    negative_count = len(df) - positive_count
    print(f"{col}:")
    print(f"  Positive (1): {positive_count} ({positive_count/len(df)*100:.1f}%)")
    print(f"  Negative (0): {negative_count} ({negative_count/len(df)*100:.1f}%)")

# Check multi-label combinations
print("\n=== MULTI-LABEL COMBINATIONS ===")
df["label_combination"] = df[target_cols].apply(
    lambda x: "".join(x.astype(str)), axis=1
)
combination_counts = df["label_combination"].value_counts()

label_map = {
    "000": "Legitimate",
    "100": "Spam only",
    "010": "Advertisement only",
    "001": "Rant only",
    "110": "Spam + Advertisement",
    "101": "Spam + Rant",
    "011": "Advertisement + Rant",
    "111": "All three labels",
}

for combo, count in combination_counts.items():
    label_desc = label_map.get(combo, combo)
    print(f"{label_desc}: {count} ({count/len(df)*100:.1f}%)")

# Show sample reviews for each type
print("\n=== SAMPLE REVIEWS BY TYPE ===")
for combo, description in label_map.items():
    if combo in combination_counts.index:
        sample_text = df[df["label_combination"] == combo]["text"].iloc[0]
        print(f"\n{description}:")
        print(
            f"'{sample_text[:100]}...' (Rating: {df[df['label_combination'] == combo]['rating'].iloc[0]})"
        )

=== LABEL DISTRIBUTION ===
is_spam:
  Positive (1): 161 (16.1%)
  Negative (0): 839 (83.9%)
is_advertisement:
  Positive (1): 185 (18.5%)
  Negative (0): 815 (81.5%)
is_rant_without_visit:
  Positive (1): 146 (14.6%)
  Negative (0): 854 (85.4%)

=== MULTI-LABEL COMBINATIONS ===
Legitimate: 609 (60.9%)
Advertisement only: 109 (10.9%)
Rant only: 99 (9.9%)
Spam only: 82 (8.2%)
Spam + Advertisement: 54 (5.4%)
Spam + Rant: 25 (2.5%)
Advertisement + Rant: 22 (2.2%)

=== SAMPLE REVIEWS BY TYPE ===

Legitimate:
'The food was delicious and service was quick....' (Rating: 5)

Spam only:
'Wow! Such an amazing experience! Unbelievable!...' (Rating: 2)

Advertisement only:
'Visit our store and get 50% off all items this weekend!...' (Rating: 3)

Rant only:
'Waste of time and money! Staff is rude and unprofessional!...' (Rating: 5)

Spam + Advertisement:
'Amazing amazing amazing! You must visit! Call 555-1234 for special promotions and discounts!...' (Rating: 1)

Spam + Rant:
'Wow! Such an amazing e

## 4. Prepare Features for Multi-Label Classification

Combine text and metadata features for better prediction accuracy.


In [14]:
def prepare_features(df):
    """Prepare combined text and metadata features"""

    # Create combined text feature including metadata
    df["combined_text"] = df.apply(
        lambda row: f"Review: {row['text']} "
        f"Business: {row['biz_name']} "
        f"Category: {row['category']} "
        f"Rating: {row['rating']}/5 "
        f"Avg Rating: {row['avg_rating']}/5 "
        f"Reviews: {row['num_of_reviews']} "
        f"Price: {row['price_level']} "
        f"Has Photos: {row['pics']}",
        axis=1,
    )

    # Prepare labels as multi-label format
    labels = df[target_cols].values.tolist()

    return df["combined_text"].tolist(), labels


# Prepare features
texts, labels = prepare_features(df)

print(f"Prepared {len(texts)} text samples")
print(f"Sample combined text (first 200 chars):")
print(f"'{texts[0][:200]}...'")
print(f"\nCorresponding labels: {labels[0]}")

Prepared 1000 text samples
Sample combined text (first 200 chars):
'Review: Wow! Such an amazing experience! Unbelievable! Business: Downtown Cafe Category: Finance Rating: 2/5 Avg Rating: 3.2/5 Reviews: 16 Price: 4 Has Photos: True...'

Corresponding labels: [1, 0, 0]


## 5. Load Pre-trained Transformer Model

Using DistilBERT as a lightweight but effective model for multi-label classification.


In [15]:
# Model configuration
MODEL_NAME = "distilbert-base-uncased"
NUM_LABELS = 3  # is_spam, is_advertisement, is_rant_without_visit

# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForSequenceClassification.from_pretrained(
    MODEL_NAME,
    num_labels=NUM_LABELS,
    problem_type="multi_label_classification",  # Important for multi-label
    id2label={0: "is_spam", 1: "is_advertisement", 2: "is_rant_without_visit"},
    label2id={"is_spam": 0, "is_advertisement": 1, "is_rant_without_visit": 2},
)

print(f"Loaded model: {MODEL_NAME}")
print(f"Model parameters: {model.num_parameters():,}")
print(f"Device: {'CUDA' if torch.cuda.is_available() else 'CPU'}")

# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
print(f"Model moved to: {device}")

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Loaded model: distilbert-base-uncased
Model parameters: 66,955,779
Device: CPU
Model moved to: cpu


## 6. Train-Test Split and Tokenization


In [16]:
# Split the data
X_train, X_test, y_train, y_test = train_test_split(
    texts, labels, test_size=0.2, random_state=42, stratify=df["label_combination"]
)

print(f"Training samples: {len(X_train)}")
print(f"Test samples: {len(X_test)}")


# Tokenize the data
def tokenize_function(examples):
    return tokenizer(examples, truncation=True, padding=True, max_length=512)


# Create training dataset
train_encodings = tokenizer(X_train, truncation=True, padding=True, max_length=512)
test_encodings = tokenizer(X_test, truncation=True, padding=True, max_length=512)


# Create Dataset objects
class ReviewDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item["labels"] = torch.tensor(self.labels[idx], dtype=torch.float)
        return item

    def __len__(self):
        return len(self.labels)


train_dataset = ReviewDataset(train_encodings, y_train)
test_dataset = ReviewDataset(test_encodings, y_test)

print("Dataset objects created successfully!")

Training samples: 800
Test samples: 200
Dataset objects created successfully!


## 7. Training Configuration and Custom Metrics


In [17]:
# Define custom metrics for multi-label classification
def compute_metrics(eval_pred):
    predictions, labels = eval_pred

    # Apply sigmoid and threshold at 0.5 for multi-label classification
    sigmoid = torch.nn.Sigmoid()
    predictions = sigmoid(torch.Tensor(predictions))
    predictions = (predictions > 0.5).int().numpy()

    # Calculate metrics for each label
    results = {}
    label_names = ["is_spam", "is_advertisement", "is_rant_without_visit"]

    for i, label_name in enumerate(label_names):
        true_labels = labels[:, i]
        pred_labels = predictions[:, i]

        f1 = f1_score(true_labels, pred_labels, average="binary")
        precision = precision_score(
            true_labels, pred_labels, average="binary", zero_division=0
        )
        recall = recall_score(
            true_labels, pred_labels, average="binary", zero_division=0
        )

        results[f"{label_name}_f1"] = f1
        results[f"{label_name}_precision"] = precision
        results[f"{label_name}_recall"] = recall

    # Overall metrics
    results["macro_f1"] = f1_score(labels, predictions, average="macro")
    results["micro_f1"] = f1_score(labels, predictions, average="micro")

    return results


# Training arguments with proper logging disabled
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    warmup_steps=100,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=50,
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="macro_f1",
    greater_is_better=True,
    report_to=[],  # Empty list to disable all logging
    disable_tqdm=False,  # Keep progress bars
)

print("Training configuration set up successfully!")
print(f"Training epochs: {training_args.num_train_epochs}")
print(f"Batch size: {training_args.per_device_train_batch_size}")
print("MLflow and other logging integrations disabled.")

Training configuration set up successfully!
Training epochs: 3
Batch size: 8
MLflow and other logging integrations disabled.


## 8. Train the Multi-Label Model


In [18]:
# Clean up any existing MLflow runs to prevent conflicts
import shutil
import os

# Remove mlruns directory if it exists and is corrupted
if os.path.exists("./mlruns"):
    try:
        shutil.rmtree("./mlruns")
        print("Cleaned up existing MLflow directory")
    except Exception as e:
        print(f"Could not clean MLflow directory: {e}")

# Initialize trainer with error handling
try:
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=test_dataset,
        compute_metrics=compute_metrics,
        data_collator=DataCollatorWithPadding(tokenizer=tokenizer),
    )
    print("Trainer initialized successfully")

    print("Starting training...")
    print("This may take several minutes depending on your hardware.")

    # Train the model
    train_result = trainer.train()

    print("Training completed!")
    print(f"Training loss: {train_result.training_loss:.4f}")

except Exception as e:
    print(f"Training failed with error: {e}")
    print("Attempting to train without MLflow integration...")

    # Set environment variable to disable MLflow
    os.environ["DISABLE_MLFLOW_INTEGRATION"] = "TRUE"

    # Try again with additional safeguards
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=test_dataset,
        compute_metrics=compute_metrics,
        data_collator=DataCollatorWithPadding(tokenizer=tokenizer),
    )

    print("Retrying training...")
    train_result = trainer.train()
    print("Training completed on retry!")
    print(f"Training loss: {train_result.training_loss:.4f}")

Cleaned up existing MLflow directory
Trainer initialized successfully
Starting training...
This may take several minutes depending on your hardware.
Trainer initialized successfully
Starting training...
This may take several minutes depending on your hardware.


Epoch,Training Loss,Validation Loss,Is Spam F1,Is Spam Precision,Is Spam Recall,Is Advertisement F1,Is Advertisement Precision,Is Advertisement Recall,Is Rant Without Visit F1,Is Rant Without Visit Precision,Is Rant Without Visit Recall,Macro F1,Micro F1
1,0.2119,0.072969,0.933333,1.0,0.875,1.0,1.0,1.0,0.982456,1.0,0.965517,0.97193,0.973822
2,0.0208,0.013234,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
3,0.0118,0.009555,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


Training completed!
Training loss: 0.1418


## 8b. Alternative Training (if above fails)

If the Trainer approach fails due to MLflow issues, use this simpler training loop:


In [19]:
# Alternative training approach without Trainer class
from torch.utils.data import DataLoader
from torch.optim import AdamW
from tqdm import tqdm


def alternative_training():
    print("Using alternative training approach...")

    # Create data loaders
    train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False)

    # Setup optimizer
    optimizer = AdamW(model.parameters(), lr=2e-5, weight_decay=0.01)

    # Training loop
    model.train()
    total_loss = 0
    num_epochs = 3

    for epoch in range(num_epochs):
        print(f"\nEpoch {epoch + 1}/{num_epochs}")
        epoch_loss = 0

        for batch_idx, batch in enumerate(tqdm(train_loader, desc="Training")):
            # Move batch to device
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["labels"].to(device)

            # Forward pass
            optimizer.zero_grad()
            outputs = model(
                input_ids=input_ids, attention_mask=attention_mask, labels=labels
            )
            loss = outputs.loss

            # Backward pass
            loss.backward()
            optimizer.step()

            epoch_loss += loss.item()

            # Print progress every 50 batches
            if (batch_idx + 1) % 50 == 0:
                avg_loss = epoch_loss / (batch_idx + 1)
                print(f"  Batch {batch_idx + 1}, Average Loss: {avg_loss:.4f}")

        avg_epoch_loss = epoch_loss / len(train_loader)
        total_loss += avg_epoch_loss
        print(f"Epoch {epoch + 1} completed. Average Loss: {avg_epoch_loss:.4f}")

        # Simple evaluation
        if (epoch + 1) % 1 == 0:  # Evaluate every epoch
            model.eval()
            eval_loss = 0
            with torch.no_grad():
                for batch in test_loader:
                    input_ids = batch["input_ids"].to(device)
                    attention_mask = batch["attention_mask"].to(device)
                    labels = batch["labels"].to(device)

                    outputs = model(
                        input_ids=input_ids,
                        attention_mask=attention_mask,
                        labels=labels,
                    )
                    eval_loss += outputs.loss.item()

            avg_eval_loss = eval_loss / len(test_loader)
            print(f"Evaluation Loss: {avg_eval_loss:.4f}")
            model.train()

    avg_training_loss = total_loss / num_epochs
    print(f"\nAlternative training completed!")
    print(f"Average training loss: {avg_training_loss:.4f}")

    return avg_training_loss


# Uncomment the line below if you need to use alternative training
# alternative_training()

## 9. Evaluate Model Performance


In [20]:
# Evaluate on test set
eval_results = trainer.evaluate()

print("=== MODEL EVALUATION RESULTS ===")
print(f"Test Loss: {eval_results['eval_loss']:.4f}")
print("\n=== PER-LABEL METRICS ===")

label_names = ["is_spam", "is_advertisement", "is_rant_without_visit"]
for label in label_names:
    f1 = eval_results.get(f"eval_{label}_f1", 0)
    precision = eval_results.get(f"eval_{label}_precision", 0)
    recall = eval_results.get(f"eval_{label}_recall", 0)

    print(f"\n{label.upper()}:")
    print(f"  F1 Score:   {f1:.4f}")
    print(f"  Precision:  {precision:.4f}")
    print(f"  Recall:     {recall:.4f}")

print("\n=== OVERALL METRICS ===")
print(f"Macro F1:  {eval_results.get('eval_macro_f1', 0):.4f}")
print(f"Micro F1:  {eval_results.get('eval_micro_f1', 0):.4f}")

=== MODEL EVALUATION RESULTS ===
Test Loss: 0.0132

=== PER-LABEL METRICS ===

IS_SPAM:
  F1 Score:   1.0000
  Precision:  1.0000
  Recall:     1.0000

IS_ADVERTISEMENT:
  F1 Score:   1.0000
  Precision:  1.0000
  Recall:     1.0000

IS_RANT_WITHOUT_VISIT:
  F1 Score:   1.0000
  Precision:  1.0000
  Recall:     1.0000

=== OVERALL METRICS ===
Macro F1:  1.0000
Micro F1:  1.0000


## 10. Detailed Prediction Analysis


In [21]:
# Make predictions on test set
predictions = trainer.predict(test_dataset)

# Process predictions
sigmoid = torch.nn.Sigmoid()
probs = sigmoid(torch.Tensor(predictions.predictions)).numpy()
pred_labels = (probs > 0.5).astype(int)

# Convert to DataFrame for analysis
test_df = pd.DataFrame(
    {
        "text": [X_test[i][:100] + "..." for i in range(len(X_test))],  # Truncated text
        "true_spam": [y_test[i][0] for i in range(len(y_test))],
        "true_ad": [y_test[i][1] for i in range(len(y_test))],
        "true_rant": [y_test[i][2] for i in range(len(y_test))],
        "pred_spam": pred_labels[:, 0],
        "pred_ad": pred_labels[:, 1],
        "pred_rant": pred_labels[:, 2],
        "prob_spam": probs[:, 0],
        "prob_ad": probs[:, 1],
        "prob_rant": probs[:, 2],
    }
)

# Calculate accuracy for each label
print("=== LABEL-WISE ACCURACY ===")
spam_acc = (test_df["true_spam"] == test_df["pred_spam"]).mean()
ad_acc = (test_df["true_ad"] == test_df["pred_ad"]).mean()
rant_acc = (test_df["true_rant"] == test_df["pred_rant"]).mean()

print(f"Spam Detection Accuracy:        {spam_acc:.4f}")
print(f"Advertisement Detection Accuracy: {ad_acc:.4f}")
print(f"Rant Detection Accuracy:        {rant_acc:.4f}")

# Show some example predictions
print("\n=== EXAMPLE PREDICTIONS ===")
print("(Showing first 5 test samples)")
print("\nFormat: [True Labels] -> [Predicted Labels] (Probabilities)")

for i in range(min(5, len(test_df))):
    row = test_df.iloc[i]
    true_labels = [row["true_spam"], row["true_ad"], row["true_rant"]]
    pred_labels = [row["pred_spam"], row["pred_ad"], row["pred_rant"]]
    probs = [row["prob_spam"], row["prob_ad"], row["prob_rant"]]

    print(f"\nSample {i+1}:")
    print(f"Text: {row['text']}")
    print(f"True:  {true_labels} -> Pred: {pred_labels}")
    print(
        f"Probabilities: [Spam: {probs[0]:.3f}, Ad: {probs[1]:.3f}, Rant: {probs[2]:.3f}]"
    )

=== LABEL-WISE ACCURACY ===
Spam Detection Accuracy:        1.0000
Advertisement Detection Accuracy: 1.0000
Rant Detection Accuracy:        1.0000

=== EXAMPLE PREDICTIONS ===
(Showing first 5 test samples)

Format: [True Labels] -> [Predicted Labels] (Probabilities)

Sample 1:
Text: Review: Check out their new website at www.example.com for great deals! Terrible place! Never going ...
True:  [np.int64(0), np.int64(1), np.int64(1)] -> Pred: [np.int64(0), np.int64(1), np.int64(1)]
Probabilities: [Spam: 0.047, Ad: 0.943, Rant: 0.927]

Sample 2:
Text: Review: Clean facilities and professional staff. Business: Shopping Mall Category: Healthcare Rating...
True:  [np.int64(0), np.int64(0), np.int64(0)] -> Pred: [np.int64(0), np.int64(0), np.int64(0)]
Probabilities: [Spam: 0.004, Ad: 0.005, Rant: 0.004]

Sample 3:
Text: Review: Call 555-1234 for special promotions and discounts! This business should be shut down! Horri...
True:  [np.int64(0), np.int64(1), np.int64(1)] -> Pred: [np.int64(0), n

## 11. Confusion Matrix and Classification Report


In [22]:
from sklearn.metrics import multilabel_confusion_matrix, classification_report

# Generate detailed classification report
y_true = np.array(y_test)
y_pred = pred_labels

print("=== DETAILED CLASSIFICATION REPORT ===")
target_names = ["is_spam", "is_advertisement", "is_rant_without_visit"]

# Individual classification reports for each label
for i, label_name in enumerate(target_names):
    print(f"\n{label_name.upper()} Classification Report:")
    print(
        classification_report(
            y_true[:, i], y_pred[:, i], target_names=["Negative", "Positive"], digits=4
        )
    )

# Multi-label confusion matrices
print("\n=== CONFUSION MATRICES ===")
cm_multilabel = multilabel_confusion_matrix(y_true, y_pred)

for i, label_name in enumerate(target_names):
    print(f"\n{label_name.upper()} Confusion Matrix:")
    tn, fp, fn, tp = cm_multilabel[i].ravel()
    print(f"True Negatives:  {tn:4d}   False Positives: {fp:4d}")
    print(f"False Negatives: {fn:4d}   True Positives:  {tp:4d}")

    # Calculate additional metrics
    specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
    sensitivity = tp / (tp + fn) if (tp + fn) > 0 else 0
    print(f"Specificity: {specificity:.4f}   Sensitivity: {sensitivity:.4f}")

=== DETAILED CLASSIFICATION REPORT ===

IS_SPAM Classification Report:


TypeError: list indices must be integers or slices, not tuple

## 12. Model Summary and Predictions Function


In [None]:
def predict_review_quality(
    text,
    business_name="Unknown",
    category="General",
    rating=3,
    avg_rating=3.5,
    num_reviews=50,
    price_level=1,
    has_pics=False,
):
    """Predict review quality labels for a single review"""

    # Prepare combined text
    combined_text = (
        f"Review: {text} "
        f"Business: {business_name} "
        f"Category: {category} "
        f"Rating: {rating}/5 "
        f"Avg Rating: {avg_rating}/5 "
        f"Reviews: {num_reviews} "
        f"Price: {price_level} "
        f"Has Photos: {has_pics}"
    )

    # Tokenize
    inputs = tokenizer(
        combined_text,
        return_tensors="pt",
        truncation=True,
        padding=True,
        max_length=512,
    ).to(device)

    # Predict
    model.eval()
    with torch.no_grad():
        outputs = model(**inputs)
        predictions = torch.nn.functional.sigmoid(outputs.logits)

    # Convert to predictions
    probs = predictions.cpu().numpy()[0]
    labels = (probs > 0.5).astype(int)

    results = {
        "is_spam": {"prediction": bool(labels[0]), "confidence": float(probs[0])},
        "is_advertisement": {
            "prediction": bool(labels[1]),
            "confidence": float(probs[1]),
        },
        "is_rant_without_visit": {
            "prediction": bool(labels[2]),
            "confidence": float(probs[2]),
        },
    }

    return results


# Test the prediction function
print("=== TESTING PREDICTION FUNCTION ===")

test_cases = [
    {
        "text": "This place is absolutely amazing! Best food ever!",
        "business_name": "Joe's Diner",
        "category": "Restaurant",
        "rating": 5,
    },
    {
        "text": "Check out our website www.example.com for special deals!",
        "business_name": "Tech Store",
        "category": "Technology",
        "rating": 4,
    },
    {
        "text": "Terrible service! This place should be shut down!",
        "business_name": "Bad Restaurant",
        "category": "Restaurant",
        "rating": 1,
    },
]

for i, test_case in enumerate(test_cases):
    print(f"\nTest Case {i+1}:")
    print(f"Text: '{test_case['text']}'")

    result = predict_review_quality(**test_case)

    print("Predictions:")
    for label, pred in result.items():
        status = "YES" if pred["prediction"] else "NO"
        confidence = pred["confidence"]
        print(f"  {label}: {status} (confidence: {confidence:.3f})")

print("\n=== MODEL TRAINING COMPLETE ===")
print("The model is now ready to predict review quality labels!")
print("Use the predict_review_quality() function for new predictions.")