In [1]:
import pandas as pd
import time
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from huggingface_hub import login
from tqdm import tqdm
from datasets import Dataset

In [2]:
# Authenticate with Hugging Face Hub
login(token="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")  # Replace with your Hugging Face token


The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [3]:
# Load the dataset
file_path = "/kaggle/input/power-tr-train-2/power-tr-train.tsv"  
data = pd.read_csv(file_path, sep="\t")

In [4]:
from sklearn.model_selection import train_test_split

# Stratified sampling to select 10% of the data based on the label distribution
data_subset, _ = train_test_split(data, test_size=0.9, stratify=data["label"], random_state=42)

# Now data_subset is a 10% stratified sample of the original data
true_labels = data_subset["label"].tolist()



In [5]:
data_subset["modified_text"] = data_subset["text_en"].apply(lambda x: f"Classify whether the speaker's party is governing (label 0) or in opposition (label 1): {x}")
modified_text_list = data_subset["modified_text"].tolist() 


In [6]:
# Load the Llama model and tokenizer for sequence classification
model_name = "meta-llama/Llama-3.2-1B"  # Replace with the actual model path or name if locally hosted
tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=True)
model = AutoModelForSequenceClassification.from_pretrained(model_name, use_auth_token=True, num_labels=2)



tokenizer_config.json:   0%|          | 0.00/50.5k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/301 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/843 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.47G [00:00<?, ?B/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [7]:
# Initialize the pipeline for binary classification
classifier = pipeline(
    "text-classification", model=model, tokenizer=tokenizer, device=0  # Set device to GPU
)

In [8]:
# Perform batch inference with progress bar and estimated time
predictions = []
start_time = time.time()
last_print_time = start_time
for i in tqdm(range(0, len(modified_text_list), 8), desc="Running inference"):
    batch = modified_text_list[i:i + 8]
    batch_predictions = classifier(batch)
    predictions.extend(batch_predictions)
    elapsed_time = time.time() - start_time
    processed_samples = i + len(batch)
    estimated_total_time = (elapsed_time / processed_samples) * len(modified_text_list)
    remaining_time = estimated_total_time - elapsed_time

    # Print estimated time every 1 minute
    current_time = time.time()
    if current_time - last_print_time >= 60:
        print(f"\rEstimated time remaining: {remaining_time / 60:.2f} minutes")
        last_print_time = current_time

print() 

Running inference:   5%|▍         | 10/218 [00:16<05:24,  1.56s/it]You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
Running inference:  15%|█▌        | 33/218 [01:01<06:42,  2.17s/it]

Estimated time remaining: 5.70 minutes


Running inference:  30%|██▉       | 65/218 [02:02<05:01,  1.97s/it]

Estimated time remaining: 4.80 minutes


Running inference:  45%|████▍     | 98/218 [03:04<03:24,  1.71s/it]

Estimated time remaining: 3.74 minutes


Running inference:  60%|█████▉    | 130/218 [04:05<02:13,  1.52s/it]

Estimated time remaining: 2.75 minutes


Running inference:  75%|███████▌  | 164/218 [05:07<01:37,  1.80s/it]

Estimated time remaining: 1.66 minutes


Running inference:  89%|████████▉ | 195/218 [06:08<00:47,  2.07s/it]

Estimated time remaining: 0.70 minutes


Running inference: 100%|██████████| 218/218 [06:48<00:00,  1.87s/it]







In [9]:
# Extract predictions
predicted_labels = [int(pred["label"].split("_")[-1]) for pred in predictions]  # Extract numeric label

In [10]:
# Calculate metrics
accuracy = accuracy_score(true_labels, predicted_labels)
precision, recall, f1, _ = precision_recall_fscore_support(true_labels, predicted_labels, average="binary")

# Print metrics
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")


Accuracy: 0.5109
Precision: 0.5127
Recall: 0.9709
F1 Score: 0.6711


In [None]:
# Save the metrics and predictions to a file
results = pd.DataFrame({"text": texts, "true_label": true_labels, "predicted_label": predicted_labels})
results.to_csv("inference_results_with_metrics.csv", index=False)

print("Inference completed. Results saved to 'inference_results_with_metrics.csv'.")