In [1]:
#Download Required Libraries
#!pip install transformers datasets
!pip install -U datasets



In [3]:
#Load Required Libraries
from pathlib import Path
from transformers import AutoModelForCausalLM, AutoTokenizer
from datasets import load_dataset
from transformers import Trainer, TrainingArguments

In [8]:
## Step 2: Load Pretrained Model and Tokenizer
model_name = "microsoft/DialoGPT-medium"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# ✅ Fix: Set pad_token to eos_token for padding compatibility
tokenizer.pad_token = tokenizer.eos_token


# Re-create the notebook file after kernel reset

# Mental Health Conversational Model using DialoGPT (Hugging Face Transformers)

## Step 1: Install Required Libraries









# You can use datasets like EmpatheticDialogues or custom data







In [9]:
# Chat
input_text = "User: I'm feeling a bit anxious today. <|sep|> Bot:"
input_ids = tokenizer.encode(input_text, return_tensors="pt")
output_ids = model.generate(input_ids, max_length=100, pad_token_id=tokenizer.eos_token_id)
print(tokenizer.decode(output_ids[0], skip_special_tokens=True))

User: I'm feeling a bit anxious today. <|sep|> Bot: I'm feeling anxious too.


In [23]:
## Step 3: Prepare Data (Format: prompt-response pairs)
# Example format:
# "User: I feel really anxious today. <|sep|> Bot: I'm sorry you're feeling that way. I'm here if you need to talk."


dataset = load_dataset("nbertagnolli/counsel-chat", download_mode="force_redownload")


def build_prompt_response(example):
    if example["questionText"] is None or example["answerText"] is None:
        return {"input_text": "", "response_text": ""}  # or skip later
    prompt = f"User: {example['questionText'].strip()} <|sep|> Bot:"
    response = example['answerText'].strip()
    return {"input_text": prompt, "response_text": response}


formatted_dataset = dataset["train"].map(build_prompt_response)

Repo card metadata block was not found. Setting CardData to empty.


20220401_counsel_chat.csv:   0%|          | 0.00/4.13M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/2775 [00:00<?, ? examples/s]

Map:   0%|          | 0/2775 [00:00<?, ? examples/s]

In [24]:
## Step 4: Tokenize
def tokenize_function(example):
    input_ids = tokenizer.encode(example["input_text"] + " " + example["response_text"], truncation=True, padding="max_length", max_length=128)
    return {"input_ids": input_ids, "labels": input_ids}

tokenized_dataset = formatted_dataset.map(tokenize_function)

Map:   0%|          | 0/2775 [00:00<?, ? examples/s]

In [26]:
## Step 5: Fine-Tuning Setup


training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=2,
    num_train_epochs=1,
    logging_dir="./logs",
    logging_steps=10,
    save_strategy="no"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
)

In [None]:
## Step 6: Train
trainer.train()



Step,Training Loss
10,3.5405
20,3.1823
30,3.0124
40,2.5292


In [None]:
## Step 7: Inference
input_text = "User: I've been feeling very down lately. <|sep|> Bot:"
input_ids = tokenizer.encode(input_text, return_tensors="pt")
output = trainer.model.generate(input_ids, max_length=100, pad_token_id=tokenizer.eos_token_id)
print(tokenizer.decode(output[0], skip_special_tokens=True))

# Save to file
#output_path = Path("/mnt/data/mental_health_chatbot_dialoGPT.py")
#output_path.write_text(notebook_content.strip())

#output_path