In [1]:
!pip uninstall -y llama-cpp-python
!CMAKE_ARGS="-DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=86" pip install llama-cpp-python --no-cache-dir
!pip install openai
!pip install tenacity
!pip install -U "huggingface_hub[cli]"
!pip install pandas

[0mCollecting llama-cpp-python
  Downloading llama_cpp_python-0.3.1.tar.gz (63.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.9/63.9 MB[0m [31m238.3 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25h  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Installing backend dependencies ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
[?25hCollecting typing-extensions>=4.5.0 (from llama-cpp-python)
  Downloading typing_extensions-4.12.2-py3-none-any.whl.metadata (3.0 kB)
Collecting diskcache>=5.6.1 (from llama-cpp-python)
  Downloading diskcache-5.6.3-py3-none-any.whl.metadata (20 kB)
Downloading diskcache-5.6.3-py3-none-any.whl (45 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.5/45.5 kB[0m [31m237.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading typing_extensions-4.12.2-py3-none-any.whl (37 kB)
Building wheels for collected packages: llama-cpp-py

In [None]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'

import time
import logging
from datetime import datetime
from openai import OpenAI
from tenacity import retry, stop_after_attempt, wait_random_exponential, retry_if_exception_type
import re
from os import getenv
from llama_cpp import Llama
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm.notebook import tqdm
import numpy as np
from queue import Queue
from threading import Lock
import time
import pandas as pd
import sys

log_directory = "logs"
os.makedirs(log_directory, exist_ok=True)
log_file = os.path.join(log_directory, f"cbt_simulation_base_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log")
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler(log_file),
        logging.StreamHandler()
    ]
)

response_log_file = os.path.join(log_directory, f"responses_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log")

class RateLimiter:
    def __init__(self, requests_per_minute=20):
        self.requests_per_minute = requests_per_minute
        self.interval = 60.0 / requests_per_minute
        self.last_request = time.time()
        self.lock = Lock()

    def wait(self):
        with self.lock:
            current_time = time.time()
            time_since_last = current_time - self.last_request
            if time_since_last < self.interval:
                time.sleep(self.interval - time_since_last)
            self.last_request = time.time()

rate_limiter = RateLimiter(requests_per_minute=20)

openrouter_client = OpenAI(
    base_url="https://openrouter.ai/api/v1",
    api_key="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",
)

STOP_TOKENS = [
    "### Instruction:",
    "### Response:",
    "Patient:",
    "? Patient:",
    "?\n\n### Patient:",
    "### Patient:",
    "\nPatient:",
    "\n### Response:",
    "?\nPatient:",
    "situations\nPatient:",
    "\n\nPatient:",
    "?\n\nPatient:",
    ".\nPatient:",
    "\n\n### Response:",
    ".Patient:",
    "\n\n### Patient:",
    "\nTherapist:",
    "\n\n### Patient:\n"
]

patient_profiles = [
        {
        "name": "Alicia Rodriguez",
        "age": "28",
        "gender": "Female",
        "ethnicity": "Hispanic",
        "education": "Master's Degree",
        "occupation": "Software Developer",
        "symptom_severity": "Moderate",
        "engagement_level": "Medium",
        "life_events": ["Recent job promotion", "Death of a loved one", "Moving to a new city"],
        "family_background": "First-generation immigrant",
        "hobbies_interests": ["Coding", "Salsa dancing", "Reading sci-fi novels"],
        "social_support": "Limited social connections",
        "personality_traits": ["Perfectionist", "Introverted", "Analytical"],
        "coping_mechanisms": ["Overworking", "Problem-solving", "Social withdrawal"],
        "background": "Alicia is a first-generation Hispanic American, born to parents who immigrated from Mexico when she was just two years old. Growing up in a close-knit community in East Los Angeles, she excelled academically, driven by her parents' emphasis on education as a path to success. Alicia's passion for technology led her to pursue a computer science degree, graduating summa cum laude from UCLA before earning her Master's from Stanford. Now working as a software developer at TechNova, a rapidly growing AI startup in San Francisco, Alicia has recently been promoted to team lead. This promotion, while a significant achievement, has intensified her struggle with imposter syndrome. As one of the few women, and even fewer Latinas, in a leadership position, she feels constant pressure to prove her worth and justify her rapid rise within the company.The unexpected loss of her grandmother, Maria, three months ago has profoundly affected Alicia. Maria, who helped raise her while her parents worked multiple jobs, was Alicia's primary source of emotional support and connection to her cultural roots. This loss has left Alicia feeling unmoored and intensely lonely in a city where she's lived for only a year and has yet to form deep friendships. Alicia's perfectionistic tendencies, instilled by the high expectations of her immigrant parents and reinforced by her competitive academic and professional environments, have made it difficult for her to acknowledge her emotional struggles or seek help. She often works 12-hour days, taking on additional projects and staying late to review her team's code, using work as a distraction from her grief and anxiety. Her depression manifests in various ways: persistent fatigue that coffee can't seem to cure, difficulty concentrating during important meetings, and a pervasive sense of worthlessness that contradicts her impressive resume. Alicia finds herself withdrawing from the few social connections she has, canceling salsa classes and declining invitations from coworkers. At night, she often loses herself in complex coding projects or science fiction novels, temporary escapes that ultimately reinforce her isolation. Despite outward appearances of success, Alicia's internal struggle is taking a toll. She's experiencing frequent headaches, disrupted sleep patterns, and has started to make uncharacteristic errors in her work. The contrast between her professional achievements and her emotional turmoil has left Alicia feeling increasingly disconnected from herself and unsure of her future path."
    },
    {
        "name": "Marcus Thompson",
        "age": "42",
        "gender": "Male",
        "ethnicity": "African American",
        "education": "Master's Degree",
        "occupation": "High School Teacher",
        "symptom_severity": "Moderate to Severe",
        "engagement_level": "High",
        "life_events": ["Divorce", "Career change", "Caring for an ill family member"],
        "family_background": "Single-parent household",
        "hobbies_interests": ["Community activism", "Jazz music", "Basketball", "Reading historical biographies"],
        "social_support": "Active in community groups",
        "personality_traits": ["Empathetic", "Ambitious", "Sensitive"],
        "coping_mechanisms": ["Exercise", "Helping others", "Occasional stress eating"],
        "background": "Marcus is a 42-year-old African American high school history teacher and community activist in inner-city Chicago. He's been teaching at Lincoln Park High School for the past 15 years, where he's known for his engaging lessons on civil rights and social movements. Marcus is deeply passionate about education and social justice, often organizing after-school programs and weekend workshops to help at-risk students. However, he's increasingly overwhelmed by the systemic issues affecting his students, such as poverty, gang violence, and lack of resources. Three years ago, Marcus went through a difficult divorce from his wife of 12 years, Sarah. They have two children: Jamal (10) and Aisha (8). The divorce was primarily due to growing apart and communication breakdown, exacerbated by Marcus's long work hours and community commitments. He's now learning to co-parent while dealing with significant financial pressures, including alimony payments and the cost of maintaining two households. Adding to his stress, Marcus's mother, Gloria (68), was recently diagnosed with early-stage Alzheimer's. As an only child from a single-parent household, Marcus feels a strong responsibility to care for her, often spending weekends at her home in the suburbs, which further strains his time and emotions. Marcus's depression manifests as irritability, difficulty concentrating during lesson planning, and persistent feelings of guilt about not being able to do more for his students, children, and mother. He often lies awake at night, ruminating on perceived failures and the overwhelming challenges faced by his community. Growing up in the Englewood neighborhood with a single mother who worked multiple jobs, Marcus developed a strong work ethic and determination to succeed. He was the first in his family to attend college, earning a scholarship to Northwestern University where he completed both his bachelor's and master's degrees in Education and African American Studies. This background fuels his determination to be present for his own children despite the challenges of co-parenting and his busy schedule. To cope with his depression, Marcus tries to maintain a rigorous exercise routine, including early morning runs along Lake Michigan and weekend basketball games with a local community league. He's also heavily involved in his local church, leading youth mentorship programs. However, he often finds himself stress eating late at night, particularly favoring his mother's recipe for sweet potato pie, which brings both comfort and guilt. Marcus's high engagement in therapy is driven by his desire to be a positive role model for both his children and students. He's aware of the stigma surrounding mental health in his community and hopes that by addressing his own struggles, he can encourage others to seek help. Despite his challenges, Marcus remains committed to making a difference in his community, embodying the change he wishes to see in the world."
    },
    {
        "name": "Samantha Chen",
        "age": "35",
        "gender": "Female",
        "ethnicity": "East Asian",
        "education": "Bachelor's Degree",
        "occupation": "Entrepreneur",
        "symptom_severity": "Moderate",
        "engagement_level": "Medium",
        "life_events": ["Career change", "Financial difficulties", "Identity crisis"],
        "family_background": "High-achieving family with pressure to succeed",
        "hobbies_interests": ["Sustainable fashion design", "Yoga", "Organic gardening"],
        "social_support": "Supportive partner, strained family relationships",
        "personality_traits": ["Creative", "Anxious", "Perfectionist"],
        "coping_mechanisms": ["Mindfulness meditation", "Journaling", "Emotional eating"],
        "background": "Samantha is a second-generation Chinese American born and raised in San Francisco. Her parents immigrated from Guangzhou, China in the 1980s and worked tirelessly to establish a successful import-export business. Growing up, Samantha excelled academically, graduating as valedictorian from her high school and earning a degree in Business Administration from UC Berkeley. For the past decade, Samantha climbed the corporate ladder at a prestigious tech company, reaching a senior management position by age 32. However, she increasingly felt unfulfilled and at odds with the company's environmental practices. After months of internal struggle and heated discussions with her parents, Samantha quit her job to launch 'GreenThread', an eco-friendly clothing line. While passionate about her new venture, Samantha grapples with severe anxiety about the business's uncertain future. She's invested a significant portion of her savings and taken out loans, adding financial stress to her already fragile mental state. The guilt of disappointing her parents, who view her career change as reckless and ungrateful, weighs heavily on her. Their relationship has become strained, with tense weekly family dinners and frequent arguments about her choices. Samantha's depression manifests as chronic insomnia, often lying awake until 3 or 4 AM, ruminating over business decisions and family conflicts. She's lost 15 pounds in the past three months due to a near-complete loss of appetite, surviving mostly on coffee and occasional meal replacement shakes. Once outgoing, she now frequently cancels plans with friends and has stopped attending her weekly book club. Her marriage to Michael, a supportive software engineer, is showing signs of strain. While he tries to be understanding, he's frustrated by Samantha's emotional distance and her refusal to seek professional help until now. They haven't been intimate in months, and their communication has devolved into brief, practical exchanges about household matters. Samantha's perfectionism, once an asset in her corporate career, now paralyzes her with indecision in her new role as an entrepreneur. She obsesses over every detail of her clothing designs and business plan, often redoing work multiple times and missing deadlines. While she attempts to cope through daily meditation and journaling, Samantha often finds herself binge-eating late at night, particularly favoring sugary cereals and ice cream - comfort foods from her childhood. She feels shame about this behavior, seeing it as a personal failure and further proof of her inability to control her life. Samantha's engagement in therapy is inconsistent. Some sessions she's fully present and eager to work on herself, while in others she's distant and defensive, especially when discussing her family or the possibility of medication. Her reluctance partly stems from the stigma around mental health in her community, where seeking therapy is often seen as a sign of weakness or failure."
    },
    {
        "name": "Derek Olsen",
        "age": "55",
        "gender": "Male",
        "ethnicity": "Caucasian",
        "education": "High School",
        "occupation": "Unemployed (Former Construction Worker)",
        "symptom_severity": "Severe",
        "engagement_level": "Low",
        "life_events": ["Chronic illness diagnosis", "Job loss", "Financial difficulties"],
        "family_background": "Blue-collar family with history of substance abuse",
        "hobbies_interests": ["Woodworking", "Fishing", "Watching sports"],
        "social_support": "Estranged from family",
        "personality_traits": ["Reserved", "Pessimistic", "Resilient"],
        "coping_mechanisms": ["Substance use", "Social withdrawal", "Anger or aggression"],
        "background": "Derek Olsen is a 55-year-old Caucasian male who spent 30 years as a skilled construction worker, specializing in commercial building projects. His career came to an abrupt halt three years ago when he suffered a severe back injury on a job site, resulting in multiple herniated discs and chronic, debilitating pain. The injury forced him onto disability, a transition he's struggled to accept. Growing up in a blue-collar family in a small Midwest town, Derek was the middle child of five. His father, a factory worker, battled alcoholism, while his mother worked part-time as a waitress to make ends meet. This upbringing instilled in Derek a strong work ethic but also normalized substance use as a coping mechanism. He dropped out of high school in his junior year to start working full-time in construction, eventually earning his GED at 28. Derek married his high school sweetheart, Sarah, at 22, and they had two children: Emma (now 30) and Jake (now 27). The marriage ended in divorce after 15 years, largely due to Derek's workaholic tendencies and growing alcohol dependency. Post-divorce, Derek maintained a close relationship with his children until his injury, when his depression and substance abuse worsened, causing a rift. The chronic pain from his injury is a constant in Derek's life, affecting his sleep, mood, and ability to perform even simple tasks. He's been prescribed various pain medications, which he sometimes misuses to cope with both physical and emotional pain. The loss of his career has hit Derek hard financially and emotionally. Once the breadwinner and a respected craftsman, he now struggles with feelings of worthlessness and a loss of identity. Derek's depression manifests in prolonged periods of isolation, where he can go days without leaving his small, cluttered apartment. He's lost interest in his former hobbies of woodworking and fishing, with his tools and fishing gear gathering dust in the garage. His social circle, once comprised of coworkers and drinking buddies, has dwindled to almost nothing. He spends most days watching TV, particularly sports, which serves as his primary connection to the outside world. Financially, Derek is barely staying afloat on disability payments and occasional under-the-table odd jobs. The stress of potential eviction and mounting medical bills exacerbates his mental state. He's behind on child support payments, which further strains his relationship with his children. Derek's resistance to therapy stems from a deeply ingrained belief that men should be self-reliant and that seeking help is a sign of weakness. This attitude, combined with his skepticism about mental health treatment, makes engaging in therapy challenging. When he does attend sessions, he's often guarded, deflecting with sarcasm or responding with minimal information. Despite his gruff exterior, Derek harbors deep-seated feelings of guilt and shame about his current situation and how it's affected his relationship with his children. He occasionally experiences passive suicidal ideation, though he's never made a plan or attempt. His resilience, a trait that served him well in his physically demanding career, now manifests as a stubborn determination to handle his problems on his own, even as evidence mounts that this approach isn't working."
    },
    {
        "name": "Naomi Patel",
        "age": "23",
        "gender": "Non-binary",
        "ethnicity": "South Asian",
        "education": "Some College",
        "occupation": "Graduate Student",
        "symptom_severity": "Moderate to Severe",
        "engagement_level": "High",
        "life_events": ["Coming out as LGBTQ+", "Moving to a new city", "Academic struggles"],
        "family_background": "Conservative immigrant family",
        "hobbies_interests": ["Environmental activism", "Poetry writing", "Indie music", "Vegetarian cooking"],
        "social_support": "LGBTQ+ support network",
        "personality_traits": ["Sensitive", "Ambitious", "Creative"],
        "coping_mechanisms": ["Journaling", "Seeking social support", "Occasional self-harm"],
        "background": "Naomi is a 23-year-old South Asian American graduate student pursuing a Master's degree in Environmental Science at UC Berkeley. Born and raised in a small town in New Jersey, they moved to California for their studies, marking their first time living away from their close-knit family. Naomi's parents immigrated from Gujarat, India in the late 1990s and have instilled strong cultural values and high academic expectations in their children. Growing up, Naomi excelled academically, graduating as valedictorian from their high school. However, they've always felt a disconnect between their internal identity and the external expectations placed upon them. This internal struggle intensified during their undergraduate years at Rutgers University, where they first encountered concepts of gender fluidity and non-binary identities. Naomi's decision to come out as non-binary six months ago has been met with mixed reactions. While their younger sister has been supportive, their parents struggle to understand, often still using feminine pronouns and expressing disappointment about Naomi's choice to cut their hair short and adopt a more androgynous style. This family tension has exacerbated Naomi's feelings of isolation and self-doubt. In their graduate program, Naomi is passionate about researching the impact of climate change on coastal communities, inspired by their family's roots in Gujarat's coastal region. However, the rigorous academic environment, combined with being in a new city and navigating their identity, has led to increased stress and depressive symptoms. Naomi often finds themselves overwhelmed by deadlines, experiencing chronic fatigue, and struggling to concentrate during lectures and while writing papers. Naomi has found some solace in Berkeley's vibrant LGBTQ+ community, attending support group meetings and participating in climate change protests. They've also started exploring vegetarian cooking as a way to connect with their cultural roots while aligning with their environmental values. Despite these positive outlets, Naomi has resorted to self-harm on three occasions in the past two months when feeling particularly overwhelmed. Recognizing the need for professional help, Naomi has sought therapy, showing high engagement in sessions. They're motivated to develop healthier coping mechanisms, build resilience, and find ways to bridge the gap between their identity and cultural background. Naomi hopes that through therapy, they can improve their academic performance, strengthen their relationships, and cultivate a stronger sense of self-acceptance."
    }
]

def log_response(speaker, response):
    """
    Records each response to a designated log file with a timestamp.
    """
    try:
        with open(response_log_file, "a", encoding="utf-8") as f:
            timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            f.write(f"\n[{timestamp}] {speaker}:\n{response}\n")
            f.write("-" * 80 + "\n")
    except IOError as e:
        logging.error(f"Error writing to response log: {e}")

def verify_gpu_setup():
    """
    Checks GPU availability and displays diagnostic information.
    """
    try:
        import torch
        print(f"CUDA available: {torch.cuda.is_available()}")
        if torch.cuda.is_available():
            print(f"Current GPU Device: {torch.cuda.get_device_name(0)}")
            print(f"GPU Memory Usage:")
            print(f"Allocated: {torch.cuda.memory_allocated(0)//1024**2}MB")
            print(f"Cached: {torch.cuda.memory_reserved(0)//1024**2}MB")
    except ImportError:
        print("PyTorch not available")

    test_prompt = "This is a test."
    start_time = time.time()
    for model in models:  # models will be defined later
        response = model(test_prompt, max_tokens=10)
    end_time = time.time()
    print(f"Test inference time: {end_time - start_time:.2f} seconds")

def count_words(text):
    """
    Returns the number of words in the provided text.
    """
    return len(text.split())

def detect_session_end(text):
    """
    Identifies if the text contains phrases that indicate the end of a therapy session.
    """
    goodbye_phrases = [
        r"\bgoodbye\b",
        r"\bbye\b",
        r"\bsee you next time\b",
        r"\buntil our next session\b",
        r"\bthat's all for today\b",
        r"\bwe're out of time\b",
        r"\bsee you next week\b",
        r"\btake care\b",
        r"\bhave a good week\b",
        r"\bmain topics discussed\b",
        r"\bmain points discussed\b",
        r"\bour time is up\b",
        r"\buntil next time\b",
        r"\bsee you soon\b",
        r"\bI'll see you at our next appointment\b",
        r"\bdon't hesitate to reach out\b"
    ]

    for phrase in goodbye_phrases:
        if re.search(phrase, text, re.IGNORECASE):
            return True, phrase

    return False, ""

def retry_with_validation(func):
    """
    Decorator that retries a function with exponential backoff and validates its response.
    """
    @retry(
        stop=stop_after_attempt(10),
        wait=wait_random_exponential(min=1, max=60),
        retry=retry_if_exception_type((Exception, ValueError))
    )
    def wrapper(*args, **kwargs):
        rate_limiter.wait()
        response = func(*args, **kwargs)

        if response is None:
            raise ValueError("Null response received")

        cleaned_response = response.strip()

        invalid_responses = ['', '.', '\n', ' ', '...']
        if cleaned_response in invalid_responses:
            logging.warning(f"Invalid response detected: '{cleaned_response}'")
            raise ValueError(f"Invalid response detected: '{cleaned_response}'")

        if len(cleaned_response.split()) < 2:
            logging.warning(f"Response too short: '{cleaned_response}'")
            raise ValueError(f"Response too short: '{cleaned_response}'")

        return response
    return wrapper

@retry(
    stop=stop_after_attempt(3),
    wait=wait_random_exponential(min=1, max=60),
    retry=retry_if_exception_type((Exception, ValueError))
)
def get_cbt_response(conversation_history, model):
    """
    Generates a Cognitive Behavioral Therapy (CBT) response based on conversation history.
    """
    try:
        instruction = "You are an AI CBT therapist. Respond appropriately in the following conversation:"

        conversation_text = []
        for msg in conversation_history:
            speaker = "Patient" if msg["role"] == "user" else "Therapist"
            content = msg["content"].strip()
            conversation_text.append(f"{speaker}: {content}")

        prompt = f"<|im_start|>system\n{instruction}<|im_end|>\n<|im_start|>user\n{chr(10).join(conversation_text)}<|im_end|>\n<|im_start|>assistant"

        generated = model(
            prompt=prompt,
            max_tokens=600,
            stop=STOP_TOKENS
        )

        if generated is None or 'choices' not in generated or not generated['choices']:
            raise ValueError("Empty response from model")

        generated_response = generated['choices'][0]['text'].strip()

        if not generated_response or generated_response in ['.', '...', '\n', ' ']:
            raise ValueError(f"Invalid response generated: '{generated_response}'")

        cleaned_response = re.sub(r"^(Therapist:?\s*)", "", generated_response)

        if not cleaned_response.endswith(('.', '!', '?')):
            cleaned_response += '.'

        if len(cleaned_response.split()) < 3:
            raise ValueError("Response too short")

        log_response("Therapist", cleaned_response)
        return cleaned_response

    except Exception as e:
        logging.error(f"Error generating CBT response: {e}")
        raise

@retry_with_validation
def generate_patient_response(conversation_history, patient_profile, session_number, previous_summary, is_first_message=False, model_index=0):
    """
    Produces a patient response based on their profile and session context.
    """
    try:
        system_message = (
            f"You are roleplaying as a patient named {patient_profile['name']} in a text-based CBT session with an AI therapist named Nova. "
            f"Your detailed profile: {patient_profile}. "
            f"This is session number {session_number}. "
            f"Respond in character based on your profile, adopting speech patterns and vocabulary appropriate for your age, background, and personality traits. Most importantly be SPECIFIC and detailed with your responses and interaction. Don't be vague and generic. Make your story and responses lifelike and realistic. "
            f"Keep your responses short, ideally 1-5 sentences. "
            f"You are not psychologically minded and may need extra explanation to understand CBT concepts. Don't be afraid to ask for clarification and more details. If you need an example to understand please ask. Let the therapist lead the session, put the onus on them. "
            f"If the therapist mentions something about you that is incorrect, that you haven't told them or that doesn't align with your experience, you should correct them. Your therapist is a LLM so they can hallucinate, be very watchful for instances where the therapist references things that did not happen and always point them out. "
            f"Use the summary of the previous session to inform your responses in the current session. "
            f"AVOID REPETITION in your responses. "
            f"When the therapist says something, you don't have to respond to every detail they mentioned. It's okay to respond to only one or two things in therapist's message because real people don't programmatically respond to every aspect of a statement. "
        )

        if is_first_message:
            if previous_summary:
                initial_message = f"Hi, I'm {patient_profile['name']}. This is my {session_number} CBT session. Last time we talked about: {previous_summary}"
            else:
                initial_message = f"Hi, I'm {patient_profile['name']}. This is my first CBT session. I'm here because I've been feeling down lately."

            return initial_message

        messages = [{"role": "system", "content": system_message}]

        relevant_history = conversation_history[-10:]
        for message in relevant_history:
            role = "assistant" if message["role"] == "user" else "user"
            messages.append({"role": role, "content": message["content"]})

        response = openrouter_client.chat.completions.create(
            model="deepseek/deepseek-chat",
            messages=messages,
            max_tokens=300,
            temperature=1
        )

        generated_response = response.choices[0].message.content.strip()

        if not generated_response or generated_response in ['.', '...', '\n', ' ']:
            raise ValueError(f"Invalid patient response generated: '{generated_response}'")

        cleaned_response = re.sub(r"^(Patient:?\s*)", "", generated_response)

        if len(cleaned_response.split()) < 3:
            raise ValueError("Patient response too short")

        log_response("Patient", cleaned_response)
        return cleaned_response

    except Exception as e:
        logging.error(f"Error generating patient response: {e}")
        raise

def validate_response_quality(response):
    """
    Assesses the quality of a response based on predefined criteria.
    """
    if not response:
        return False

    cleaned = response.strip()

    if len(cleaned.split()) < 3:
        return False

    if cleaned in ['.', '...', '\n', ' ']:
        return False

    has_structure = any(char in cleaned for char in '.!?')
    if not has_structure:
        return False

    return True

@retry_with_validation
def generate_session_summary(conversation_history, patient_profile, session_number):
    """
    Creates a summary of the therapy session focusing on key aspects.
    """
    system_message = (
        f"Concisely summarize the key points of the therapy session focusing on the following aspects:\n"
        "1. Main topics discussed\n"
        "2. Techniques or exercises that were introduced or practiced\n"
        "3. Things that appeared helpful or important\n"
        "4. Any homework or tasks assigned\n"
        "5. Any insights or realizations that appeared during the session\n"
    )

    messages = [
        {"role": "system", "content": system_message},
        {"role": "user", "content": f"Session number: {session_number}\nFull conversation history:\n" +
         "\n".join([f"{'Me' if msg['role'] == 'user' else 'Therapist'}: {msg['content']}"
                   for msg in conversation_history])}
    ]

    max_attempts = 10
    for attempt in range(max_attempts):
        try:
            completion = openrouter_client.chat.completions.create(
                model="anthropic/claude-3.5-sonnet",
                messages=messages,
                temperature=0.3,
            )
            summary = completion.choices[0].message.content.strip()

            word_count = len(summary.split())
            if summary and not summary.isspace() and summary != '.' and word_count >= 80:
                return "Session Summary:\n\n" + summary
            else:
                if attempt == max_attempts - 1:
                    fallback_summary = (
                        f"Session {session_number} covered various therapeutic topics and exchanges between "
                        f"the therapist and {patient_profile['name']}. The discussion included exploration of "
                        f"the patient's thoughts, feelings, and experiences. Several therapeutic techniques were "
                        f"discussed and practiced during the session. The patient showed engagement with the "
                        f"therapeutic process and the therapist provided appropriate guidance and support throughout "
                        f"the session. Key themes included mental health management and coping strategies."
                    )
                    return "Session Summary:\n\n" + fallback_summary
                logging.warning(f"Invalid summary generated (attempt {attempt + 1}), retrying...")
                continue

        except Exception as e:
            logging.error(f"Error generating session summary: {e}")
            if attempt == max_attempts - 1:
                fallback_summary = (
                    f"Session {session_number} covered various therapeutic topics and exchanges between "
                    f"the therapist and {patient_profile['name']}. The discussion included exploration of "
                    f"the patient's thoughts, feelings, and experiences. Several therapeutic techniques were "
                    f"discussed and practiced during the session. The patient showed engagement with the "
                    f"therapeutic process and the therapist provided appropriate guidance and support throughout "
                    f"the session. Key themes included mental health management and coping strategies."
                )
                return "Session Summary:\n\n" + fallback_summary
            continue

def initialize_llama_model(num_instances=2):
    """
    Loads multiple instances of the LLaMA model for concurrent processing.
    """
    models = []
    for _ in range(num_instances):
        model = Llama.from_pretrained(
            repo_id="bartowski/Qwen2.5-7B-Instruct-GGUF",
            filename="Qwen2.5-7B-Instruct-Q8_0.gguf",
            verbose=False,
            temperature=1,
            repeat_penalty=1.1,
            top_k=40,
            top_p=0.95,
            min_p=0.05,
            n_ctx=10000,
            n_batch=10000,
            n_gpu_layers=35,
            offload_kqv=True,
            f16_kv=True,
            stop=STOP_TOKENS,
            max_tokens=600,
        )
        models.append(model)
    return models

def conduct_session(patient_profile, session_number, previous_summary, model):
    """
    Executes a single therapy session, managing conversation flow and logging.
    """
    conversation_history = []
    summary_conversation_history = []
    full_conversation = f"Session Number: {session_number}\n\n"
    turn_count = 0
    word_count = 0
    last_therapist_message = ""
    termination_reason = None
    termination_details = None

    try:
        max_retries = 10
        for attempt in range(max_retries):
            try:
                patient_message = generate_patient_response(
                    conversation_history,
                    patient_profile,
                    session_number,
                    previous_summary,
                    is_first_message=True
                )
                if patient_message:
                    break
            except Exception as e:
                if attempt == max_retries - 1:
                    raise
                logging.warning(f"Attempt {attempt + 1} failed for initial patient message: {e}")
                time.sleep(2)

        conversation_history.append({"role": "user", "content": patient_message})
        full_conversation += f"Patient: {patient_message}\n\n"
        turn_count += 1
        word_count += count_words(patient_message)

        while turn_count < 50 and word_count < 5000:
            for attempt in range(max_retries):
                try:
                    cbt_response = get_cbt_response(conversation_history, model)
                    if cbt_response:
                        break
                except Exception as e:
                    if attempt == max_retries - 1:
                        raise
                    logging.warning(f"Attempt {attempt + 1} failed for CBT response: {e}")
                    time.sleep(2)

            conversation_history.append({"role": "assistant", "content": cbt_response})
            if turn_count < 48:
                summary_conversation_history.append({"role": "assistant", "content": cbt_response})
            full_conversation += f"Therapist: {cbt_response}\n\n"
            turn_count += 1
            word_count += count_words(cbt_response)
            last_therapist_message = cbt_response

            session_end, end_phrase = detect_session_end(last_therapist_message)
            if session_end:
                termination_reason = "Natural Session End"
                termination_details = f"Detected end phrase: '{end_phrase}'"
                if summary_conversation_history and summary_conversation_history[-1]["role"] == "assistant":
                    summary_conversation_history.pop()
                logging.info(f"Session {session_number} for {patient_profile['name']} terminated naturally with phrase: '{end_phrase}'")
                break

            for attempt in range(max_retries):
                try:
                    patient_message = generate_patient_response(
                        conversation_history,
                        patient_profile,
                        session_number,
                        previous_summary
                    )
                    if patient_message:
                        break
                except Exception as e:
                    if attempt == max_retries - 1:
                        raise
                    logging.warning(f"Attempt {attempt + 1} failed for patient response: {e}")
                    time.sleep(2)

            conversation_history.append({"role": "user", "content": patient_message})
            summary_conversation_history.append({"role": "user", "content": patient_message})
            full_conversation += f"Patient: {patient_message}\n\n"
            turn_count += 1
            word_count += count_words(patient_message)

            if word_count >= 5000:
                termination_reason = "Word Limit Exceeded"
                termination_details = f"Word count: {word_count}/5000"
                logging.info(f"Session {session_number} for {patient_profile['name']} terminated due to word limit: {word_count} words")
                full_conversation += f"****Session Terminated: {termination_reason} - {termination_details}****\n"
                break

        if turn_count >= 50:
            termination_reason = "Turn Limit Exceeded"
            termination_details = f"Turn count: {turn_count}/50"
            if summary_conversation_history and summary_conversation_history[-1]["role"] == "assistant":
                summary_conversation_history.pop()
            logging.info(f"Session {session_number} for {patient_profile['name']} terminated due to turn limit: {turn_count} turns")
            full_conversation += f"****Session Terminated: {termination_reason} - {termination_details}****\n"

        for attempt in range(max_retries):
            try:
                session_summary = generate_session_summary(summary_conversation_history, patient_profile, session_number)
                if session_summary:
                    break
            except Exception as e:
                if attempt == max_retries - 1:
                    raise
                logging.warning(f"Attempt {attempt + 1} failed for session summary: {e}")
                time.sleep(2)

        if termination_reason:
            full_conversation += f"\nTermination Details:\n"
            full_conversation += f"Reason: {termination_reason}\n"
            full_conversation += f"Details: {termination_details}\n\n"

        full_conversation += f"\n{session_summary}\n"

        logging.info(f"Session {session_number} completed for {patient_profile['name']}:")
        logging.info(f"- Total turns: {turn_count}")
        logging.info(f"- Total words: {word_count}")
        logging.info(f"- Termination reason: {termination_reason}")
        logging.info(f"- Termination details: {termination_details}")

        return full_conversation, session_summary

    except Exception as e:
        error_msg = f"Critical error in session {session_number} for {patient_profile['name']}: {str(e)}"
        logging.error(error_msg)
        if full_conversation:
            termination_reason = "Critical Error"
            termination_details = str(e)
            error_conversation = full_conversation + f"\nTermination Details:\n"
            error_conversation += f"Reason: {termination_reason}\n"
            error_conversation += f"Details: {termination_details}\n"
            return error_conversation, f"Session terminated due to error: {str(e)}"
        raise

def conduct_therapy_course(patient_profile, model):
    """
    Manages a series of therapy sessions for a single patient.
    """
    output_dir = f"CBT_Depression_Simulations_{patient_profile['name']}"
    os.makedirs(output_dir, exist_ok=True)

    num_sessions = 20
    previous_summary = None

    for session_number in range(1, num_sessions + 1):
        logging.info(f"Starting session {session_number} for {patient_profile['name']}")
        try:
            conversation, session_summary = conduct_session(patient_profile, session_number, previous_summary, model)

            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            filename = f"session_{session_number}_{timestamp}.txt"
            filepath = os.path.join(output_dir, filename)

            with open(filepath, "w", encoding="utf-8") as file:
                file.write(conversation)
            logging.info(f"Session {session_number} for {patient_profile['name']} saved to: {filepath}")

            previous_summary = session_summary

        except Exception as e:
            logging.error(f"Error in session {session_number} for {patient_profile['name']}: {e}")
            continue

def conduct_parallel_sessions(patient_profiles, num_concurrent=2):
    """
    Executes therapy sessions for multiple patients simultaneously.
    """
    models = initialize_llama_model(num_concurrent)

    with ThreadPoolExecutor(max_workers=num_concurrent) as executor:
        futures = []
        for i, profile in enumerate(patient_profiles[:num_concurrent]):
            futures.append(
                executor.submit(conduct_therapy_course, profile, models[i])
            )

        for future in tqdm(as_completed(futures), total=len(futures), desc="Processing patients"):
            try:
                future.result()
            except Exception as e:
                logging.error(f"Error in parallel session: {e}")

def main():
    """
    Orchestrates the CBT simulation for all patient profiles, managing batching and concurrency.
    """
    try:
        import torch
        available_vram = torch.cuda.get_device_properties(0).total_memory / (1024**3)
        batch_size = max(1, min(3, int(available_vram / 15)))
    except:
        batch_size = 2

    logging.info(f"Using batch size of {batch_size} for parallel processing")

    for i in range(0, len(patient_profiles), batch_size):
        batch = patient_profiles[i:i+batch_size]
        conduct_parallel_sessions(batch, num_concurrent=batch_size)
        if i + batch_size < len(patient_profiles):
            time.sleep(30)

    logging.info("All CBT simulations completed")

if __name__ == "__main__":
    models = initialize_llama_model(2)
    verify_gpu_setup()

    main()


Qwen2.5-7B-Instruct-Q8_0.gguf:   0%|          | 0.00/8.10G [00:00<?, ?B/s]

CUDA available: True
Current GPU Device: NVIDIA A40
GPU Memory Usage:
Allocated: 0MB
Cached: 0MB


2024-11-15 18:17:57,780 - INFO - Using batch size of 2 for parallel processing


Test inference time: 0.56 seconds


2024-11-15 18:18:01,147 - INFO - Starting session 1 for Alicia Rodriguez
2024-11-15 18:18:01,147 - INFO - Starting session 1 for Marcus Thompson


Processing patients:   0%|          | 0/2 [00:00<?, ?it/s]

2024-11-15 18:18:07,294 - INFO - HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
2024-11-15 18:18:10,313 - INFO - HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
2024-11-15 18:18:16,830 - INFO - HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
2024-11-15 18:18:19,842 - INFO - HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
2024-11-15 18:18:34,521 - INFO - HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
2024-11-15 18:18:42,412 - INFO - HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
2024-11-15 18:18:55,440 - INFO - HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
2024-11-15 18:19:11,652 - INFO - HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
2024-11-15 18:19:15,365 - INFO - HTTP Request: POST https://openrouter.a

Processing patients:   0%|          | 0/2 [00:00<?, ?it/s]

2024-11-15 19:07:40,129 - INFO - HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
2024-11-15 19:07:43,118 - INFO - HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
2024-11-15 19:07:49,714 - INFO - HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
2024-11-15 19:07:52,924 - INFO - HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
2024-11-15 19:08:05,254 - INFO - HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
2024-11-15 19:08:15,309 - INFO - HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
2024-11-15 19:08:31,554 - INFO - HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
2024-11-15 19:08:54,735 - INFO - HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
2024-11-15 19:09:00,317 - INFO - HTTP Request: POST https://openrouter.a

Processing patients:   0%|          | 0/1 [00:00<?, ?it/s]

2024-11-15 20:06:34,193 - INFO - HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
2024-11-15 20:06:54,794 - INFO - HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
2024-11-15 20:07:24,880 - INFO - HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
2024-11-15 20:07:53,100 - INFO - Session 1 for Naomi Patel terminated naturally with phrase: '\bdon't hesitate to reach out\b'
2024-11-15 20:07:53,232 - INFO - HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
2024-11-15 20:07:58,983 - INFO - Session 1 completed for Naomi Patel:
2024-11-15 20:07:58,986 - INFO - - Total turns: 8
2024-11-15 20:07:58,987 - INFO - - Total words: 1613
2024-11-15 20:07:58,988 - INFO - - Termination reason: Natural Session End
2024-11-15 20:07:58,990 - INFO - - Termination details: Detected end phrase: '\bdon't hesitate to reach out\b'
2024-11-15 20:07:58,994 - INFO - Session 1 for Nao