# 🧠 Mental Health Tweet Classification with BERT
Fine-tuned using Hugging Face Transformers and PyTorch Trainer.

In [None]:

# Install all necessary packages
!pip install -U transformers datasets --quiet


In [None]:

# Imports
import pandas as pd
from datasets import load_dataset, Dataset
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report
import torch


In [None]:

# Load and convert dataset
dataset = load_dataset("btwitssayan/sentiment-analysis-for-mental-health")
df = pd.DataFrame(dataset['train'])
print(df.head())


In [None]:

# Encode labels
le = LabelEncoder()
df['label'] = le.fit_transform(df['status'])

# Train/test split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(df['statement'], df['label'], test_size=0.2, stratify=df['label'], random_state=42)


In [None]:

# Tokenize
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

def tokenize_fn(example):
    return tokenizer(example['text'], truncation=True, padding='max_length', max_length=128)

train_ds = Dataset.from_dict({'text': X_train.tolist(), 'label': y_train.tolist()})
test_ds = Dataset.from_dict({'text': X_test.tolist(), 'label': y_test.tolist()})

train_ds = train_ds.map(tokenize_fn, batched=True)
test_ds = test_ds.map(tokenize_fn, batched=True)

train_ds.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])
test_ds.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])


In [None]:

# Load model
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=len(le.classes_))


In [None]:

# Set training args
training_args = TrainingArguments(
    output_dir='./results',
    evaluation_strategy='epoch',
    learning_rate=5e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_dir='./logs',
    load_best_model_at_end=True
)


In [None]:

# Initialize trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_ds,
    eval_dataset=test_ds,
    tokenizer=tokenizer
)

trainer.train()


In [None]:

# Evaluate
preds = trainer.predict(test_ds)
y_pred = preds.predictions.argmax(axis=1)
print(classification_report(y_test, y_pred, target_names=le.classes_))
