In [2]:
import pandas as pd
import time
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from huggingface_hub import login
from tqdm import tqdm


In [3]:
# Authenticate with Hugging Face Hub
login(token="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")  # Replace with your Hugging Face token


The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [4]:
# Load the dataset
file_path = "/kaggle/input/orientation-train-2/orientation-tr-train.tsv"  
data = pd.read_csv(file_path, sep="\t")

In [5]:
from sklearn.model_selection import train_test_split

# Stratified sampling to select 20% of the data based on the label distribution
data_subset, _ = train_test_split(data, test_size=0.8, stratify=data["label"], random_state=42)

# Now data_subset is a 20% stratified sample of the original data
true_labels = data_subset["label"].tolist()



In [6]:
data_subset["modified_text"] = data_subset["text_en"].apply(lambda x: f"Classify whether the speaker’s party leans left (0) or right (1): {x}")
modified_text_list = data_subset["modified_text"].tolist() 


In [7]:
# Load the Llama model and tokenizer for sequence classification
model_name = "meta-llama/Llama-3.2-1B"  # Replace with the actual model path or name if locally hosted
tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=True)
model = AutoModelForSequenceClassification.from_pretrained(model_name, use_auth_token=True, num_labels=2)

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [8]:
# Initialize the pipeline for binary classification
classifier = pipeline(
    "text-classification", model=model, tokenizer=tokenizer, device=0  # Set device to GPU
)

In [9]:
# Perform batch inference with progress bar and estimated time
predictions = []
start_time = time.time()
last_print_time = start_time
for i in tqdm(range(0, len(modified_text_list), 8), desc="Running inference"):
    batch = modified_text_list[i:i + 8]
    batch_predictions = classifier(batch)
    predictions.extend(batch_predictions)
    elapsed_time = time.time() - start_time
    processed_samples = i + len(batch)
    estimated_total_time = (elapsed_time / processed_samples) * len(modified_text_list)
    remaining_time = estimated_total_time - elapsed_time

    # Print estimated time every 1 minute
    current_time = time.time()
    if current_time - last_print_time >= 60:
        print(f"\rEstimated time remaining: {remaining_time / 60:.2f} minutes")
        last_print_time = current_time

print() 

Running inference:   2%|▏         | 10/404 [00:17<12:08,  1.85s/it]You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
Running inference:   9%|▉         | 38/404 [01:02<10:45,  1.76s/it]

Estimated time remaining: 10.00 minutes


Running inference:  18%|█▊        | 73/404 [02:03<09:08,  1.66s/it]

Estimated time remaining: 9.30 minutes


Running inference:  26%|██▌       | 105/404 [03:04<10:48,  2.17s/it]

Estimated time remaining: 8.72 minutes


Running inference:  35%|███▍      | 140/404 [04:04<08:55,  2.03s/it]

Estimated time remaining: 7.66 minutes


Running inference:  43%|████▎     | 175/404 [05:04<05:45,  1.51s/it]

Estimated time remaining: 6.63 minutes


Running inference:  52%|█████▏    | 210/404 [06:05<06:01,  1.86s/it]

Estimated time remaining: 5.61 minutes


Running inference:  60%|██████    | 244/404 [07:06<04:14,  1.59s/it]

Estimated time remaining: 4.64 minutes


Running inference:  69%|██████▉   | 279/404 [08:07<03:40,  1.77s/it]

Estimated time remaining: 3.62 minutes


Running inference:  77%|███████▋  | 312/404 [09:07<03:25,  2.24s/it]

Estimated time remaining: 2.67 minutes


Running inference:  85%|████████▍ | 342/404 [10:07<01:54,  1.85s/it]

Estimated time remaining: 1.82 minutes


Running inference:  93%|█████████▎| 377/404 [11:09<00:46,  1.72s/it]

Estimated time remaining: 0.78 minutes


Running inference: 100%|██████████| 404/404 [11:55<00:00,  1.77s/it]







In [10]:
# Extract predictions
predicted_labels = [int(pred["label"].split("_")[-1]) for pred in predictions]  # Extract numeric label

In [11]:
# Calculate metrics
accuracy = accuracy_score(true_labels, predicted_labels)
precision, recall, f1, _ = precision_recall_fscore_support(true_labels, predicted_labels, average="binary")

# Print metrics
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")


Accuracy: 0.4819
Precision: 0.5468
Recall: 0.6411
F1 Score: 0.5902


In [None]:
# Save the metrics and predictions to a file
results = pd.DataFrame({"text": texts, "true_label": true_labels, "predicted_label": predicted_labels})
results.to_csv("inference_results_with_metrics.csv", index=False)

print("Inference completed. Results saved to 'inference_results_with_metrics.csv'.")