<a href="https://colab.research.google.com/github/vnavya2004/BTP/blob/main/Paper(A_Prompt_Based_Topic_Modeling_Method_for_Depression_Detection_on_Low_Resource_Data)_Bangla.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install necessary libraries
!pip install transformers openpyxl

# Import libraries
import pandas as pd
import torch
from transformers import BertTokenizer, BertForNextSentencePrediction
from sklearn.model_selection import train_test_split
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Load the dataset
df = pd.read_excel('spanish_translated.xlsx')  # Replace with your file path
df = df[['Tweets_english', 'Labels']]  # Ensure columns 'text' and 'label' exist

# Define device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Prompt-based emotion detection function
def prompt_emotion(text, tokenizer, model, positive_prompt="I am feeling good.", negative_prompt="I am feeling bad."):
    inputs_pos = tokenizer(text, positive_prompt, return_tensors='pt', truncation=True).to(device)
    inputs_neg = tokenizer(text, negative_prompt, return_tensors='pt', truncation=True).to(device)

    # Predict scores for both positive and negative prompts
    output_pos = model(**inputs_pos).logits
    output_neg = model(**inputs_neg).logits

    # Calculate probabilities
    prob_pos = torch.softmax(output_pos, dim=1)[0][0].item()
    prob_neg = torch.softmax(output_neg, dim=1)[0][0].item()

    # Return emotion score (higher negative score indicates depression)
    return prob_neg - prob_pos

# Zero Training Data Setting - Test Multiple PLMs
def test_plm_on_zero_shot(plm_name):
    tokenizer = BertTokenizer.from_pretrained(plm_name)
    model = BertForNextSentencePrediction.from_pretrained(plm_name).to(device)

    # Calculate emotion scores for each entry in the dataset
    from tqdm import tqdm
    tqdm.pandas()  # Initialize tqdm for use with pandas

# Now, apply the function with a progress bar
    df[f'emotion_score_{plm_name}'] = df['Tweets_english'].progress_apply(lambda x: prompt_emotion(x, tokenizer, model))

    df[f'prediction_{plm_name}'] = df[f'emotion_score_{plm_name}'].apply(lambda x: 1 if x > 0 else 0)

    # Calculate metrics
    y_true = df['Labels']
    y_pred = df[f'prediction_{plm_name}']
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)

    print(f"PLM: {plm_name}")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1-Score: {f1:.4f}")
    print("")

# List of PLMs to test in zero training data setting
plm_names = [
    "bert-base-uncased"
]

# Test each PLM on zero-shot setting
for plm_name in plm_names:
    test_plm_on_zero_shot(plm_name)




 50%|████▉     | 1092/2186 [00:24<00:28, 38.20it/s]

In [5]:

# Few-Shot Setting (Onefold Training) with Adaptive Fusion Model
# Define Fusion Model for Few-Shot Adaptive Voting
class FusionModel(nn.Module):
    def __init__(self, input_dim):
        super(FusionModel, self).__init__()
        self.fc = nn.Linear(input_dim, 1)

    def forward(self, x):
        return torch.sigmoid(self.fc(x))

# Use a specific PLM (e.g., bert-base-uncased) for few-shot setting
plm_name = "bert-base-uncased"
tokenizer = BertTokenizer.from_pretrained(plm_name)
model = BertForNextSentencePrediction.from_pretrained(plm_name).to(device)

# Calculate emotion scores using prompt-emotion method
from tqdm import tqdm
tqdm.pandas()  # Initialize tqdm for use with pandas

# Now, apply the function with a progress bar
df['emotion_score'] = df['tweets_english'].progress_apply(lambda x: prompt_emotion(x, tokenizer, model))

# Convert emotion scores and labels to tensors
X = torch.tensor(df['emotion_score'].values).float().view(-1, 1).to(device)
y = torch.tensor(df['labels'].values).float().view(-1, 1).to(device)

# Split into onefold train, validation, and test sets
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Initialize and train FusionModel for adaptive voting
fusion_model = FusionModel(1).to(device)
criterion = nn.BCELoss()
optimizer = optim.Adam(fusion_model.parameters(), lr=0.001)
epochs = 10

# Training loop for the fusion model
for epoch in range(epochs):
    fusion_model.train()
    optimizer.zero_grad()
    outputs = fusion_model(X_train)
    loss = criterion(outputs, y_train)
    loss.backward()
    optimizer.step()
    print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}")

# Validation and Test Metrics
fusion_model.eval()
with torch.no_grad():
    # Validate on validation set
    val_outputs = fusion_model(X_val)
    val_loss = criterion(val_outputs, y_val)
    val_pred = (val_outputs >= 0.5).float()
    val_accuracy = accuracy_score(y_val.cpu(), val_pred.cpu())
    val_precision = precision_score(y_val.cpu(), val_pred.cpu())
    val_recall = recall_score(y_val.cpu(), val_pred.cpu())
    val_f1 = f1_score(y_val.cpu(), val_pred.cpu())

    print("\nValidation Results:")
    print(f"Validation Loss: {val_loss.item():.4f}")
    print(f"Accuracy: {val_accuracy:.4f}")
    print(f"Precision: {val_precision:.4f}")
    print(f"Recall: {val_recall:.4f}")
    print(f"F1-Score: {val_f1:.4f}")

    # Test on test set
    test_outputs = fusion_model(X_test)
    test_pred = (test_outputs >= 0.5).float()
    test_accuracy = accuracy_score(y_test.cpu(), test_pred.cpu())
    test_precision = precision_score(y_test.cpu(), test_pred.cpu())
    test_recall = recall_score(y_test.cpu(), test_pred.cpu())
    test_f1 = f1_score(y_test.cpu(), test_pred.cpu())

    print("\nTest Results:")
    print(f"Accuracy: {test_accuracy:.4f}")
    print(f"Precision: {test_precision:.4f}")
    print(f"Recall: {test_recall:.4f}")
    print(f"F1-Score: {test_f1:.4f}")




Epoch [1/10], Loss: 0.5717
Epoch [2/10], Loss: 0.5716
Epoch [3/10], Loss: 0.5715
Epoch [4/10], Loss: 0.5715
Epoch [5/10], Loss: 0.5714
Epoch [6/10], Loss: 0.5713
Epoch [7/10], Loss: 0.5713
Epoch [8/10], Loss: 0.5712
Epoch [9/10], Loss: 0.5711
Epoch [10/10], Loss: 0.5711

Validation Results:
Validation Loss: 0.5319
Accuracy: 0.7522
Precision: 0.4000
Recall: 0.2690
F1-Score: 0.3217

Test Results:
Accuracy: 0.7407
Precision: 0.4455
Recall: 0.2565
F1-Score: 0.3256
