# Sentiment Classification Lab

In [None]:
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
from pandarallel import pandarallel

pandarallel.initialize(nb_workers=18, verbose=False, progress_bar=True)

In [2]:
FP = "workspace/dev/dataset/01_dataprep/appvocai_discover-01_dataprep-03_tqa-review-dataset.parquet"

## Load Data

In [None]:
df = pd.read_csv(FP)

## Load Model and Tokenizer

In [4]:
# Load model and tokenizer
model_name = "tabularisai/robust-sentiment-analysis"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

## Create Classifier

In [5]:
# Function to predict sentiment
def predict_sentiment(text):
    inputs = tokenizer(
        text.lower(), return_tensors="pt", truncation=True, padding=True, max_length=512
    )
    with torch.no_grad():
        outputs = model(**inputs)

    probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
    predicted_class = torch.argmax(probabilities, dim=-1).item()

    sentiment_map = {
        0: "Very Negative",
        1: "Negative",
        2: "Neutral",
        3: "Positive",
        4: "Very Positive",
    }
    return sentiment_map[predicted_class]

## Perform Inference

In [None]:
df["sentiment"] = df["content"].parallel_apply(predict_sentiment)

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=4817), Label(value='0 / 4817'))), …

## Check Results

In [None]:
df[["content", "sentiment"]].head()