In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
data=pd.read_csv('Cleaned_Data.csv')

In [2]:
data = data.drop_duplicates(subset='statement').reset_index(drop=True)
label_encoder = LabelEncoder()
data['label'] = label_encoder.fit_transform(data['status'])
label_mapping = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))
X_train, X_test, y_train, y_test = train_test_split(
    data['statement'], data['label'], test_size=0.2, random_state=42, stratify=data['label']
)
print(label_mapping)
print(f"Training samples: {len(X_train)}, Testing samples: {len(X_test)}")


{'Anxiety': np.int64(0), 'Bipolar': np.int64(1), 'Depression': np.int64(2), 'Normal': np.int64(3), 'Personality disorder': np.int64(4), 'Stress': np.int64(5), 'Suicidal': np.int64(6)}
Training samples: 40244, Testing samples: 10061


In [3]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract")

def tokenize_texts(texts):
    return tokenizer(
        list(texts),
        padding=True,
        truncation=True,
        max_length=256,
        return_tensors="pt"
    )

train_encodings = tokenize_texts(X_train)
test_encodings = tokenize_texts(X_test)


tokenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


config.json:   0%|          | 0.00/385 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/225k [00:00<?, ?B/s]

In [4]:
import torch
from transformers import AutoModelForSequenceClassification

num_labels = data['status'].nunique()

model = AutoModelForSequenceClassification.from_pretrained(
    "microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract",
    num_labels=num_labels
)

pytorch_model.bin:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [7]:
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, Dataset
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract")

label2id = {label: idx for idx, label in enumerate(data['status'].unique())}
id2label = {idx: label for label, idx in label2id.items()}

data['label'] = data['status'].map(label2id)

train_texts, val_texts, train_labels, val_labels = train_test_split(
    data['statement'].tolist(),
    data['label'].tolist(),
    test_size=0.2,
    stratify=data['label'],
    random_state=42
)

class PsychDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len=128):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        encoding = self.tokenizer(
            self.texts[idx],
            truncation=True,
            padding='max_length',
            max_length=self.max_len,
            return_tensors="pt"
        )
        item = {key: val.squeeze(0) for key, val in encoding.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

train_dataset = PsychDataset(train_texts, train_labels, tokenizer)
val_dataset = PsychDataset(val_texts, val_labels, tokenizer)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16)

In [9]:
from transformers import AutoModelForSequenceClassification
from torch.optim import AdamW

import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

num_labels = len(label2id)

model = AutoModelForSequenceClassification.from_pretrained(
    "microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract",
    num_labels=num_labels
).to(device)

optimizer = AdamW(model.parameters(), lr=2e-5)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [10]:
from transformers import get_scheduler
from torch.utils.data import DataLoader
from tqdm import tqdm

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16)

lr_scheduler = get_scheduler(
    name="linear", optimizer=optimizer, num_warmup_steps=0, num_training_steps=len(train_loader)*3
)

for epoch in range(3):
    model.train()
    total_loss = 0
    loop = tqdm(train_loader, leave=True)
    for batch in loop:
        batch = {k: v.to(device) for k, v in batch.items()}
        outputs = model(**batch)
        loss = outputs.loss
        total_loss += loss.item()
        loss.backward()
        optimizer.step()
        lr_scheduler.step()
        optimizer.zero_grad()
        loop.set_description(f"Epoch {epoch+1}")
        loop.set_postfix(loss=loss.item())
        
    avg_loss = total_loss / len(train_loader)
    print(f"Epoch {epoch+1} completed with average loss: {avg_loss}")

model.save_pretrained("./psychbert_model")
tokenizer.save_pretrained("./psychbert_model")

Epoch 1: 100%|██████████| 2516/2516 [28:25<00:00,  1.47it/s, loss=0.706] 


Epoch 1 completed with average loss: 0.6244616079616504


Epoch 2: 100%|██████████| 2516/2516 [28:30<00:00,  1.47it/s, loss=0.13]  


Epoch 2 completed with average loss: 0.41772713388548793


Epoch 3: 100%|██████████| 2516/2516 [28:39<00:00,  1.46it/s, loss=0.13]  


Epoch 3 completed with average loss: 0.3206699542332565


('./psychbert_model\\tokenizer_config.json',
 './psychbert_model\\special_tokens_map.json',
 './psychbert_model\\vocab.txt',
 './psychbert_model\\added_tokens.json',
 './psychbert_model\\tokenizer.json')

In [11]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import torch.nn.functional as F

device = "cuda" if torch.cuda.is_available() else "cpu"

model = AutoModelForSequenceClassification.from_pretrained("./psychbert_model").to(device)
tokenizer = AutoTokenizer.from_pretrained("./psychbert_model")

explanations = {
    "Stress": (
        "You're showing signs of stress. This often stems from acute stressors like deadlines, interpersonal conflicts, or overwhelming responsibilities. "
        "It triggers the 'fight-or-flight' response, leading to irritability, tension, and fatigue. "
        "Practicing mindfulness, progressive muscle relaxation, and scheduled self-care breaks can help your mind decompress."
    ),
    
    "Anxiety": (
        "This indicates anxiety, which is a heightened state of worry or fear of potential threats — often even when no immediate danger exists. "
        "Your brain's amygdala may be overactive, causing hypervigilance and restlessness. "
        "Cognitive reframing, breathing exercises (like the 4-7-8 method), and journaling about worries can help calm this response."
    ),
    
    "Depression": (
        "You're showing signs of depression — a persistent state of low mood, hopelessness, and lack of motivation. "
        "It often arises from prolonged stress, unresolved trauma, or neurochemical imbalances (like low serotonin or dopamine). "
        "Maintaining structure, seeking connection with others, and engaging in small rewarding activities (behavioral activation) can ease the heaviness. "
        "Professional therapy is highly encouraged if these feelings persist."
    ),
    
    "Suicidal": (
        "⚠️ This indicates suicidal ideation, often a result of overwhelming psychological pain and a sense of hopelessness. "
        "The brain may feel 'cognitive constriction' — a tunnel vision where problems seem insurmountable. "
        "Please know: help is available, and these feelings can pass. Contact a mental health helpline immediately or talk to someone who can support you. "
        "You are not alone, and you matter deeply."
    ),
    
    "Bipolar": (
        "This suggests symptoms resembling bipolar disorder, characterized by alternating periods of emotional highs (mania) and lows (depression). "
        "During manic phases, impulsivity and overconfidence can dominate; during lows, feelings of despair and lethargy may arise. "
        "Mood regulation strategies and long-term professional guidance are essential. Awareness is the first step to stability."
    ),
    
    "Personality disorder": (
        "Signs indicate traits associated with personality disorders — persistent patterns of thinking, feeling, and behaving that can impact relationships and well-being. "
        "Examples include borderline tendencies (emotional instability), or narcissistic traits (sensitivity to criticism). "
        "Developing emotional intelligence and working with a therapist can help in understanding and reshaping these patterns."
    ),
    
    "Neutral": (
        "Your mental state seems balanced and stable. You're showing signs of psychological well-being, emotional regulation, and resilience. "
        "Keep nurturing this state through self-awareness, positive affirmations, and healthy routines!"
    ),
}

def analyze_input(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True).to(device)
    with torch.no_grad():
        outputs = model(**inputs)
    logits = outputs.logits
    probs = F.softmax(logits, dim=1)
    predicted_class = torch.argmax(probs, dim=1).item()

    id2label = {v: k for k, v in label2id.items()}

    label = id2label[predicted_class]
    confidence = probs[0][predicted_class].item()
    
    explanation = explanations.get(label, "Stay mindful and keep taking care of yourself.")

    print("\n🧠 Psychological Analysis Report:")
    print(f"Detected Mental State: **{label}**")
    print(f"Confidence: {confidence*100:.2f}%")
    print(f"Why do I feel this way? {explanation}")
    
while True:
    user_input = input("\nEnter your thoughts (or type 'exit' to quit): ")
    if user_input.lower() == "exit":
        print("Take care! Stay mindful. 💙")
        break
    analyze_input(user_input)

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.



🧠 Psychological Analysis Report:
Detected Mental State: **Normal**
Confidence: 93.27%
Why do I feel this way? Stay mindful and keep taking care of yourself.

🧠 Psychological Analysis Report:
Detected Mental State: **Normal**
Confidence: 96.27%
Why do I feel this way? Stay mindful and keep taking care of yourself.

🧠 Psychological Analysis Report:
Detected Mental State: **Normal**
Confidence: 81.04%
Why do I feel this way? Stay mindful and keep taking care of yourself.

🧠 Psychological Analysis Report:
Detected Mental State: **Suicidal**
Confidence: 89.90%
Why do I feel this way? ⚠️ This indicates suicidal ideation, often a result of overwhelming psychological pain and a sense of hopelessness. The brain may feel 'cognitive constriction' — a tunnel vision where problems seem insurmountable. Please know: help is available, and these feelings can pass. Contact a mental health helpline immediately or talk to someone who can support you. You are not alone, and you matter deeply.

🧠 Psycholo