In [2]:
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
from tqdm import tqdm
import numpy as np
import torch.nn.functional as F

df = pd.read_csv("test.csv")
df = df.dropna(subset=["body"])

model_name = "cardiffnlp/twitter-roberta-base-sentiment"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
model.eval()

label_map = {0: "Negative", 1: "Neutral", 2: "Positive"}

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Updated classification function to include Neutral
def classify_sentiment(text):
    if not isinstance(text, str) or len(text.strip()) == 0:
        return "Neutral"  # or any default fallback

    # Tokenize input
    inputs = tokenizer(text[:512], return_tensors="pt", truncation=True).to(device)
    with torch.no_grad():
        outputs = model(**inputs)
        probs = F.softmax(outputs.logits, dim=-1).squeeze().cpu().numpy()

    label_id = np.argmax(probs)
    return label_map[label_id]

# Apply classification
tqdm.pandas(desc="Classifying with Neutral")
df["sentiment"] = df["body"].progress_apply(classify_sentiment)

df.to_csv("sentiment_practice_output.csv", index=False)
print("Done.")
print(df["sentiment"].value_counts())


Classifying with Neutral:   0%|          | 0/2191 [00:00<?, ?it/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Classifying with Neutral: 100%|██████████| 2191/2191 [02:17<00:00, 15.96it/s]

Done.
sentiment
Neutral     1485
Positive     558
Negative     148
Name: count, dtype: int64



