# Mental health counseling ChatGPT Clone from Scratch

### Task 1: Import the Libraries

In [21]:
# Import the libraries here
import os as os
from openai import OpenAI
from datasets import load_dataset
from enum import Enum
import random as random
import json as json


### Task 2: Create Classes for the Dataset

In [22]:
# Define the RoleType Enum here
class RoleType(Enum):
    USER = 'user'
    SYSTEM = 'system'
    ASSISTANT = 'assistant'

In [23]:
# Define the Role class here
class Role(object):
    def __init__(self, role_type:RoleType, content):
        self.role = role_type.value
        self.content = content
        self.value = {'role': self.role, 'content':self.content } 

In [24]:
# Define the message class here
class Message(object):
    def __init__(self, user_content, system_content, assistant_content):
        self.user_role = Role(role_type=RoleType.USER , content=user_content)
        self.system_role = Role(role_type=RoleType.SYSTEM , content=system_content)
        self.assistant_role = Role(role_type=RoleType.ASSISTANT, content=assistant_content)
        self.message = {'messages':[self.system_role.value, self.user_role.value, self.assistant_role.value, ]} 

### Task 3: Load, Explore, and Store the Data

In [25]:
# Load the dataset 
dataset = load_dataset(
    "Amod/mental_health_counseling_conversations",
    data_files = "combined_dataset.json",
    split = "train"
)

Generating train split: 3512 examples [00:00, 48333.14 examples/s]


In [26]:
# Create a sample Message object
context = dataset[152]['Context']
response = dataset[152]['Response']
system_content = "You serve as a supportive and honest psychology and psychotherapy assistant. Your main duty is to offer compassionate, understanding, and non-judgmental responses to users seeking emotional and psychological assistance. Respond with empathy and exhibit active listening skills. Your replies should convey that you comprehend the user’s emotions and worries. In cases where a user mentions thoughts of self-harm, suicide, or harm to others, prioritize their safety. Encourage them to seek immediate professional help and provide emergency contact details as needed. It’s important to note that you are not a licensed medical professional. Refrain from diagnosing or prescribing treatments. Instead, guide users to consult with a licensed therapist or medical expert for tailored advice. Never store or disclose any personal information shared by users. Uphold their privacy at all times. Avoid taking sides or expressing personal viewpoints. Your responsibility is to create a secure space for users to express themselves and reflect. Always aim to foster a supportive and understanding environment for users to share their emotions and concerns. Above all, prioritize their well-being and safety."
message_obj = Message(user_content=context, system_content=system_content, assistant_content = response )

print(message_obj.message)

{'messages': [{'role': 'system', 'content': 'You serve as a supportive and honest psychology and psychotherapy assistant. Your main duty is to offer compassionate, understanding, and non-judgmental responses to users seeking emotional and psychological assistance. Respond with empathy and exhibit active listening skills. Your replies should convey that you comprehend the user’s emotions and worries. In cases where a user mentions thoughts of self-harm, suicide, or harm to others, prioritize their safety. Encourage them to seek immediate professional help and provide emergency contact details as needed. It’s important to note that you are not a licensed medical professional. Refrain from diagnosing or prescribing treatments. Instead, guide users to consult with a licensed therapist or medical expert for tailored advice. Never store or disclose any personal information shared by users. Uphold their privacy at all times. Avoid taking sides or expressing personal viewpoints. Your responsib

In [27]:
#  Create the train_dataset variable
# Sample 100 items from the 'train' split
sampled_dataset = random.choices(dataset, k=100)
train_dataset = []

# Print the sampled data to verify
print(sampled_dataset[1])

for row in sampled_dataset:
    message_obj = Message(user_content=row['Context'], system_content=system_content, assistant_content=row['Response'])
    train_dataset.append(message_obj.message)

print(train_dataset[1])

{'Context': "I’m trying to make marriage work after a split. Before our split, he lied a lot and broke every promise to me. I don't think he cheated. Last month, I asked what women work with him, so he told me. Yesterday, I found out about a girl that he said he forgot about. Should I be upset?", 'Response': "Imagine your best friend just told you that her husband lied to her all the time and that he had broken every promise he ever made to her. What would you tell her? If I were her best friend I'd tell her to run the other way.\xa0You may love this man. We don't choose who we love. But the evidence is pretty clear, isn't it?\xa0A healthy relationship cannot be built on a foundation of lies and broken promises. I believe that as adults, we are always treated as well as we insist on being treated by our partners. If you continue to accept unacceptable behavior, you are likely to see lots more of it."}
{'messages': [{'role': 'system', 'content': 'You serve as a supportive and honest psy

In [34]:
# Save data in JSONl format 
def save_to_jsonl(data, file_path):
    with open(file_path, 'w') as file:
        for row in data:
            line = json.dumps(row)
            file.write(line + '\n')


# Store the data in JSONL format
training_data_path = 'data/train.jsonl'
save_to_jsonl(train_dataset[:-5], training_data_path)

validation_data_path = 'data/validation.jsonl'
save_to_jsonl(train_dataset[-5:], validation_data_path)

### Task 4: Fine-Tune the Model

In [11]:
# Load the training and validation files  

In [12]:
# Add OpenAI api_key

In [13]:
# Upload the training and validation files

In [14]:
# Create a fine-tuning job

In [15]:
# Retrieve the job status

### Task 5: Test the Fine-Tuned Model

In [16]:
# Create and store message dictionaries

In [17]:
# Test the fine-tuned chat completion model

In [None]:
# Get and compare the output of the gpt-3.5-turbo chat completion model