In [1]:
import torch
import torch.nn.functional as F
import numpy as np
from transformers import DistilBertTokenizerFast, DistilBertForSequenceClassification


In [2]:
# Load Emotion model
emotion_tokenizer = DistilBertTokenizerFast.from_pretrained("distilbert_emotion_model")
emotion_model = DistilBertForSequenceClassification.from_pretrained("distilbert_emotion_model")
emotion_model.eval()

# Load Sarcasm model
sarcasm_tokenizer = DistilBertTokenizerFast.from_pretrained("distilbert_sarcasm_model")
sarcasm_model = DistilBertForSequenceClassification.from_pretrained("distilbert_sarcasm_model")
sarcasm_model.eval()


DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): DistilBertSdpaAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)


In [3]:
emotion_labels = ["anger", "fear", "joy", "love", "sadness", "surprise"]
sarcasm_labels = ["not_sarcastic", "sarcastic"]


In [4]:
def predict_emotion_with_sarcasm(text):
    # ----- Emotion -----
    e_inputs = emotion_tokenizer(
        text, return_tensors="pt", truncation=True, padding=True, max_length=128
    )
    with torch.no_grad():
        e_outputs = emotion_model(**e_inputs)
        e_probs = F.softmax(e_outputs.logits, dim=1)[0]
    e_idx = int(torch.argmax(e_probs))
    emotion = emotion_labels[e_idx]
    emotion_conf = float(e_probs[e_idx]) * 100

    # ----- Sarcasm -----
    s_inputs = sarcasm_tokenizer(
        text, return_tensors="pt", truncation=True, padding=True, max_length=128
    )
    with torch.no_grad():
        s_outputs = sarcasm_model(**s_inputs)
        s_probs = F.softmax(s_outputs.logits, dim=1)[0]
    s_idx = int(torch.argmax(s_probs))
    sarcasm = sarcasm_labels[s_idx]
    sarcasm_conf = float(s_probs[s_idx]) * 100

    return {
        "emotion": emotion,
        "emotion_confidence": round(emotion_conf, 2),
        "sarcasm": sarcasm,
        "sarcasm_confidence": round(sarcasm_conf, 2)
    }


In [5]:
predict_emotion_with_sarcasm(
    "Oh wow, just GREAT üòí another meeting that could‚Äôve been an email"
)


{'emotion': 'joy',
 'emotion_confidence': 99.29,
 'sarcasm': 'sarcastic',
 'sarcasm_confidence': 69.75}

In [6]:
result = predict_emotion_with_sarcasm(text)

if result["sarcasm"] == "sarcastic":
    print("‚ö†Ô∏è Emotion may be masked due to sarcasm")
else:
    print("Emotion is reliable")


NameError: name 'text' is not defined

In [7]:
text = "Oh wow, just GREAT üòí another meeting that could‚Äôve been an email"


In [8]:
result = predict_emotion_with_sarcasm(text)
result


{'emotion': 'joy',
 'emotion_confidence': 99.29,
 'sarcasm': 'sarcastic',
 'sarcasm_confidence': 69.75}

In [9]:
text = "I am feeling very low and tired today üòû"
predict_emotion_with_sarcasm(text)


{'emotion': 'sadness',
 'emotion_confidence': 99.55,
 'sarcasm': 'not_sarcastic',
 'sarcasm_confidence': 63.2}

In [11]:
from transformers import DistilBertForSequenceClassification

MODEL_PATH = "distilbert_emotion_model"

model = DistilBertForSequenceClassification.from_pretrained(MODEL_PATH)
model.eval()


DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): DistilBertSdpaAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)


In [13]:
NameError                                 Traceback (most recent call last)
Cell In[12], line 10
      7 preds = []
      8 true_labels

SyntaxError: invalid syntax (3887097613.py, line 1)

In [14]:
# -----------------------------
# Imports
# -----------------------------
import torch
import numpy as np
import pandas as pd

from transformers import (
    DistilBertTokenizerFast,
    DistilBertForSequenceClassification
)

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score

# -----------------------------
# Load cleaned labeled dataset
# (same dataset used in Phase-1)
# -----------------------------
df = pd.read_csv("train.txt", sep=";", names=["text", "label"])
df = df.dropna()

# -----------------------------
# Encode labels
# -----------------------------
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(df["label"].values)

X = df["text"].astype(str).values

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# -----------------------------
# Tokenizer
# -----------------------------
MODEL_PATH = "distilbert_emotion_model"

tokenizer = DistilBertTokenizerFast.from_pretrained(MODEL_PATH)

train_encodings = tokenizer(
    list(X_train),
    truncation=True,
    padding=True,
    max_length=128
)

test_encodings = tokenizer(
    list(X_test),
    truncation=True,
    padding=True,
    max_length=128
)

# -----------------------------
# Dataset class
# -----------------------------
class EmotionDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {k: torch.tensor(v[idx]) for k, v in self.encodings.items()}
        item["labels"] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

eval_dataset = EmotionDataset(test_encodings, y_test)

# -----------------------------
# Load model
# -----------------------------
model = DistilBertForSequenceClassification.from_pretrained(
    MODEL_PATH,
    num_labels=len(label_encoder.classes_)
)

model.eval()

# -----------------------------
# Predictions
# -----------------------------
preds = []
true_labels = []

for batch in eval_dataset:
    with torch.no_grad():
        inputs = {
            "input_ids": batch["input_ids"].unsqueeze(0),
            "attention_mask": batch["attention_mask"].unsqueeze(0)
        }
        outputs = model(**inputs)
        pred = torch.argmax(outputs.logits, dim=1).item()

    preds.append(pred)
    true_labels.append(batch["labels"].item())

# -----------------------------
# Metrics
# -----------------------------
print("Accuracy:", accuracy_score(true_labels, preds))
print(
    classification_report(
        true_labels,
        preds,
        target_names=label_encoder.classes_
    )
)


Accuracy: 0.926875
              precision    recall  f1-score   support

       anger       0.96      0.90      0.93       432
        fear       0.91      0.89      0.90       387
         joy       0.94      0.94      0.94      1072
        love       0.82      0.84      0.83       261
     sadness       0.94      0.98      0.96       933
    surprise       0.85      0.81      0.83       115

    accuracy                           0.93      3200
   macro avg       0.90      0.89      0.90      3200
weighted avg       0.93      0.93      0.93      3200

