In [1]:
import pandas as pd
import time
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from huggingface_hub import login
from tqdm import tqdm


In [2]:
# Authenticate with Hugging Face Hub
login(token="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")  # Replace with your Hugging Face token


The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [3]:
# Load the dataset
file_path = "/kaggle/input/orientation-train-2/orientation-tr-train.tsv"  
data = pd.read_csv(file_path, sep="\t")

In [4]:
from sklearn.model_selection import train_test_split

# Stratified sampling to select 20% of the data based on the label distribution
data_subset, _ = train_test_split(data, test_size=0.8, stratify=data["label"], random_state=42)

# Now data_subset is a 20% stratified sample of the original data
true_labels = data_subset["label"].tolist()



In [5]:
data_subset["modified_text"] = data_subset["text"].apply(lambda x: f"Konuşmacının partisinin sola (0) veya sağa (1) eğilimli olup olmadığını sınıflandırın: {x}")
modified_text_list = data_subset["modified_text"].tolist() 


In [6]:
# Load the Llama model and tokenizer for sequence classification
model_name = "meta-llama/Llama-3.2-1B"  # Replace with the actual model path or name if locally hosted
tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=True)
model = AutoModelForSequenceClassification.from_pretrained(model_name, use_auth_token=True, num_labels=2)

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [7]:
# Initialize the pipeline for binary classification
classifier = pipeline(
    "text-classification", model=model, tokenizer=tokenizer, device=0  # Set device to GPU
)

In [8]:
# Perform batch inference with progress bar and estimated time
predictions = []
start_time = time.time()
last_print_time = start_time
for i in tqdm(range(0, len(modified_text_list), 8), desc="Running inference"):
    batch = modified_text_list[i:i + 8]
    batch_predictions = classifier(batch)
    predictions.extend(batch_predictions)
    elapsed_time = time.time() - start_time
    processed_samples = i + len(batch)
    estimated_total_time = (elapsed_time / processed_samples) * len(modified_text_list)
    remaining_time = estimated_total_time - elapsed_time

    # Print estimated time every 1 minute
    current_time = time.time()
    if current_time - last_print_time >= 60:
        print(f"\rEstimated time remaining: {remaining_time / 60:.2f} minutes")
        last_print_time = current_time

print() 

Running inference:   2%|▏         | 10/404 [00:23<16:03,  2.44s/it]You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
Running inference:   7%|▋         | 29/404 [01:00<10:36,  1.70s/it]

Estimated time remaining: 13.06 minutes


Running inference:  14%|█▍        | 57/404 [02:02<14:25,  2.49s/it]

Estimated time remaining: 12.40 minutes


Running inference:  22%|██▏       | 87/404 [03:04<10:16,  1.94s/it]

Estimated time remaining: 11.19 minutes


Running inference:  27%|██▋       | 110/404 [04:06<12:29,  2.55s/it]

Estimated time remaining: 10.94 minutes


Running inference:  35%|███▍      | 140/404 [05:09<11:28,  2.61s/it]

Estimated time remaining: 9.71 minutes


Running inference:  41%|████▏     | 167/404 [06:12<10:09,  2.57s/it]

Estimated time remaining: 8.78 minutes


Running inference:  49%|████▉     | 197/404 [07:15<11:51,  3.44s/it]

Estimated time remaining: 7.60 minutes


Running inference:  55%|█████▌    | 223/404 [08:15<06:06,  2.02s/it]

Estimated time remaining: 6.68 minutes


Running inference:  62%|██████▏   | 250/404 [09:16<06:34,  2.56s/it]

Estimated time remaining: 5.69 minutes


Running inference:  69%|██████▉   | 278/404 [10:18<04:33,  2.17s/it]

Estimated time remaining: 4.65 minutes


Running inference:  75%|███████▌  | 305/404 [11:18<03:53,  2.36s/it]

Estimated time remaining: 3.65 minutes


Running inference:  81%|████████  | 328/404 [12:19<03:19,  2.62s/it]

Estimated time remaining: 2.83 minutes


Running inference:  87%|████████▋ | 353/404 [13:20<02:22,  2.80s/it]

Estimated time remaining: 1.90 minutes


Running inference:  94%|█████████▍| 380/404 [14:21<01:05,  2.71s/it]

Estimated time remaining: 0.88 minutes


Running inference: 100%|██████████| 404/404 [15:12<00:00,  2.26s/it]







In [9]:
# Extract predictions
predicted_labels = [int(pred["label"].split("_")[-1]) for pred in predictions]  # Extract numeric label

In [10]:
# Calculate metrics
accuracy = accuracy_score(true_labels, predicted_labels)
precision, recall, f1, _ = precision_recall_fscore_support(true_labels, predicted_labels, average="binary")

# Print metrics
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")


Accuracy: 0.5823
Precision: 0.5821
Recall: 1.0000
F1 Score: 0.7359


In [None]:
# Save the metrics and predictions to a file
results = pd.DataFrame({"text": texts, "true_label": true_labels, "predicted_label": predicted_labels})
results.to_csv("inference_results_with_metrics.csv", index=False)

print("Inference completed. Results saved to 'inference_results_with_metrics.csv'.")