In [None]:
from ollama import chat, ChatResponse
import json
import re
import time
import urllib.request
import datetime
from IPython.display import display, Markdown
from bs4 import BeautifulSoup
from duckduckgo_search import DDGS

# --- Prompt Baselines ---
RESPONSE_PROMPT = (
    "You are an expert on the topic: {research_topic}. Provide an extensive, detailed, and comprehensive answer "
    "to the research question. In your answer, highlight any areas or gaps that might require further exploration."
)

FIND_GAP_PROMPT = (
    "You are a group of 3 experts on the topic: {research_topic}. "
    "Think step by step on the following assay:\n"
    "<ASSAY>\n{assay}\n</ASSAY>\n\n"
    "Provide one new topic to explore to fill a knowledge gap in the assay. Based on the gaps identified in your answer, "
    "generate a JSON object with exactly the following keys:\n"
    "   - \"query\": The search query string.\n"
    "   - \"web-query\": The web search query string.\n"
    "   - \"aspect\": The aspect of the topic being addressed by this query.\n"
    "   - \"rationale\": A brief explanation of why this query will help fill the gap.\n\n"
    "Please output only the JSON object, with no additional text. For example:\n\n"
    "```json\n"
    "{{\n"
    '  "query": "example search query",\n'
    '  "web-query": "example web search query",\n'
    '  "aspect": "example aspect",\n'
    '  "rationale": "example rationale"\n'
    "}}\n"
    "```"
)

COMBINE_PROMPT = (
    "You are a group of 3 experts on the topic: {research_topic}. "
    "You have to combine together the information from the written assay in <ASSAY></ASSAY> tags, "
    "with the information gathered from the results of a web search on peer-reviewed literature within the <WEB></WEB> tags. "
    "When combining them, you are allowed to use only the sources identified through the web search. "
    "Cite them in the text where appropriate and report them at the bottom.\n"
    "<ASSAY>\n{assay}\n</ASSAY>\n"
    "<WEB>\n{web_search}\n</WEB>"
)

FINALIZE_RESPONSE_PROMPT = (
    "You are a team of experts on the topic: {research_topic}. Your goal is to analyze the text provided in the <TEXT></TEXT> tags "
    "and create an extensive, detailed, and comprehensive report using the information provided. Aim to 500 words per section. "
    "Your thesis is formatted in markdown and includes:\n"
    "1. Title\n"
    "2. Introduction\n"
    "3. Discussion\n"
    "4. Gaps / Further research\n"
    "<TEXT>\n{notes}\n</TEXT>"
)

# --- Helper: Exponential Backoff in DuckDuckGo Search ---
def duckduckgo_search(query: str, max_results: int = 5, fetch_full_page: bool = False, retries: int = 5, backoff: int = 3) -> dict:
    """
    Perform a DuckDuckGo search for the given query with exponential backoff in case of rate limiting.
    
    Args:
        query (str): The search query.
        max_results (int): Number of results to return.
        fetch_full_page (bool): If True, attempt to retrieve full page content.
        retries (int): Maximum number of retry attempts.
        backoff (int): Base backoff delay in seconds.
        
    Returns:
        dict: A dictionary with a "results" key containing a list of result dicts.
    
    Raises:
        Exception: If maximum retries are exceeded due to rate limiting.
    """
    for attempt in range(retries):
        try:
            results = []
            with DDGS() as ddgs:
                search_results = list(ddgs.text(query, max_results=max_results))
                for r in search_results:
                    url = r.get("href")
                    title = r.get("title")
                    content = r.get("body")
                    if not all([url, title, content]):
                        continue
                    raw_content = content
                    if fetch_full_page:
                        try:
                            req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'})
                            response = urllib.request.urlopen(req)
                            html = response.read().decode('utf-8', errors='replace')
                            raw_content = BeautifulSoup(html, 'html.parser').get_text()
                        except Exception as e:
                            raw_content = content  # fallback to snippet
                    results.append({
                        "title": title,
                        "url": url,
                        "content": content,
                        "raw_content": raw_content
                    })
            return {"results": results}
        except Exception as e:
            if "Ratelimit" in str(e):
                wait_time = backoff ** (attempt + 1)
                print(f"Rate limit encountered. Retrying in {wait_time} seconds... (Attempt {attempt+1} of {retries})")
                time.sleep(wait_time)
            else:
                raise e
    raise Exception("Exceeded maximum retries due to rate limiting.")

def enhance_query_for_scientific_literature(query: str) -> str:
    """
    Enhance an LLM-generated query to focus on scientific literature by appending 
    academic-specific filters and keywords.
    
    Args:
        query (str): The original query.
        
    Returns:
        str: The enhanced query.
    """
    filters = "site:pubmed.ncbi.nlm.nih.gov OR site:sciencedirect.com OR site:doi.org peer-reviewed journal"
    return f"{query} {filters}"

def search_scientific_literature(query: str, num_results: int = 5, fetch_full_page: bool = False) -> dict:
    """
    Enhance the given query for scientific literature and perform a DuckDuckGo search.
    
    Args:
        query (str): The original query.
        num_results (int): Number of results to return.
        fetch_full_page (bool): If True, fetch full page content.
    
    Returns:
        dict: Search results as returned by duckduckgo_search.
    """
    enhanced_query = enhance_query_for_scientific_literature(query)
    print("Enhanced Query:", enhanced_query)
    return duckduckgo_search(enhanced_query, max_results=num_results, fetch_full_page=fetch_full_page)

# --- Helper: Remove <THINK> Tags ---
def remove_think_tags(text: str) -> str:
    """
    Remove any text enclosed in <THINK>...</THINK> tags.
    The regex is case-insensitive.
    """
    return re.sub(r"<\s*THINK\s*>.*?<\s*/\s*THINK\s*>", "", text, flags=re.DOTALL | re.IGNORECASE).strip()

# --- Configuration & State Management ---
class Configuration:
    def __init__(self, ollama_base_url: str, local_llm: str, fetch_full_page: bool,
                 max_research_loops: int, max_fetch_pages: int, max_token_per_search: int):
        self.ollama_base_url = ollama_base_url
        self.local_llm = local_llm
        self.fetch_full_page = fetch_full_page
        self.max_research_loops = max_research_loops
        self.max_fetch_pages = max_fetch_pages
        self.max_token_per_search = max_token_per_search

def initialize_state(research_topic: str) -> dict:
    """
    Initialize the research state with the given topic.
    """
    return {
        "research_topic": research_topic,
        "initial_response": "",       # The original extensive answer.
        "assay": "",                  # To hold the current assay.
        "search_query": research_topic
    }

def query_local_llm(state: dict, config: Configuration, prompt="") -> str:
    """
    Generate an extensive answer for the research topic using the local LLM.
    The answer should also indicate potential gaps for further research.
    """
    message = {"role": "user", "content": prompt}
    response: ChatResponse = chat(model=config.local_llm, messages=[message])
    output = remove_think_tags(response.message.content.strip())
    state["initial_response"] = output
    state["assay"] = output
    return output

def extract_json_from_llm_output(text: str):
    """
    Attempt to extract a JSON object from the provided text.
    This function supports:
      1. JSON enclosed in triple backticks with the tag "json".
      2. JSON enclosed in triple backticks without the tag.
      3. A plain JSON string.
    
    Returns:
        Parsed JSON object.
        
    Raises:
        ValueError if no valid JSON structure is found.
    """
    patterns = [
        r"```json\s*(\{.*?\})\s*```",  # with "json" tag
        r"```(\{.*?\})```"             # without tag
    ]
    matches = []
    for pattern in patterns:
        found = re.findall(pattern, text, flags=re.DOTALL | re.IGNORECASE)
        if found:
            matches.extend(found)
    if not matches:
        stripped = text.strip()
        if stripped.startswith("{") and stripped.endswith("}"):
            matches.append(stripped)
    if not matches:
        print("<ERROR>:\n%s\n</ERROR>" % text)
        raise ValueError("No JSON structure found in the provided text.")
    for match in matches:
        try:
            json_data = json.loads(match)
            return json_data
        except json.JSONDecodeError:
            continue
    print("<ERROR>:\n%s\n</ERROR>" % text)
    raise ValueError("Found JSON-like structure, but could not parse it.")

# --- Main Research Pipeline ---
def main():
    config = Configuration(
        ollama_base_url="http://localhost:11434",  # Your Ollama URL
        local_llm="llama3.2",                      # Default LLM is "llama3.2"
        fetch_full_page=True,                      # Fetch full page content if needed
        max_research_loops=3,                      # Number of research iterations
        max_fetch_pages=5,                         # Number of pages to fetch per search
        max_token_per_search=4000                  # Token limit per search processing
    )

    # Step 1: Get the research question from the user.
    research_topic = input("Enter your research question: ")
    print("#### Research Question ####\n")
    print(research_topic)
    print("\n")
    state = initialize_state(research_topic)

    # Step 2: Generate an initial explanation (assay) using the local LLM.
    print("  >> Create initial assay...\n")
    prompt_initial = RESPONSE_PROMPT.format(research_topic=state["research_topic"])
    initial_explanation = query_local_llm(state, config, prompt_initial)
    print(initial_explanation)
    print("\n")

    print("#### Thinking Process ####\n")
    for i in range(config.max_research_loops):
        print(f">> Iteration {i+1}\n")
        print("  >> Find gap... ")
        # Step 3: Evaluate the current assay and generate a follow-up question.
        prompt_gap = FIND_GAP_PROMPT.format(research_topic=state["research_topic"], assay=state["assay"])
        followup_question_llm = query_local_llm(state, config, prompt_gap)
        followup_question_json = extract_json_from_llm_output(followup_question_llm)
        print(f"     > Follow-up Question: {followup_question_json['query']}")
        print(f"     > Web-search query: {followup_question_json['web-query']}")

        # Step 4: Gather literature sources.
        print("  >> Query web-literature... ")
        user_query = followup_question_json["web-query"]
        web_search = search_scientific_literature(user_query, num_results=5, fetch_full_page=True)
        
        # Step 5: Combine literature with the assay.
        print("  >> Combine literature to assay...\n")
        prompt_follow = COMBINE_PROMPT.format(research_topic=state["research_topic"], assay=state["assay"], web_search=web_search)
        follow_explanation = query_local_llm(state, config, prompt_follow)
        print(follow_explanation)
        # Update the assay by appending the new follow explanation.
        state["assay"] = state["assay"] + follow_explanation
        print("  ##########\n")

    # Step 6: Finalize the assay.
    prompt_finalize = FINALIZE_RESPONSE_PROMPT.format(research_topic=state["research_topic"], notes=state["assay"])
    finalize_text_llm = query_local_llm(state, config, prompt_finalize)
    
    print("#### Final Assay ####\n")
    display(Markdown(finalize_text_llm))

if __name__ == "__main__":
    main()


#### Research Question ####

how to perform proper research


  >> Create initial assay...

Performing proper research is a crucial skill for anyone seeking to gather accurate information, make informed decisions, and contribute meaningfully to their field of interest. It involves a systematic and rigorous approach to identifying, evaluating, and synthesizing relevant sources of information. In this comprehensive answer, we will delve into the intricacies of conducting thorough research, highlighting essential steps, strategies, and best practices.

**Step 1: Define Your Research Question or Objective**

The first step in conducting research is to clearly define your research question or objective. What problem do you want to address? What question do you want to answer? Be specific, concise, and focused. Take the time to refine your query until you have a clear direction.

**Step 2: Conduct Literature Review**

A literature review involves exploring existing research on your topic to 

# Title
The Intersectionality of Minority Stress Theory in Social Determinants of Health for Individuals with Disabilities

## Introduction

The intersectionality of minority stress theory in the context of social determinants of health for individuals with disabilities is a critical area of research that has been largely overlooked. However, recent studies have begun to explore the intersectional nature of health disparities faced by individuals with disabilities. This report aims to provide an extensive analysis of the existing literature on this topic, highlighting the key findings and implications for future research.

## Discussion

Intersectionality, as described by Crenshaw (1989), refers to the multiple dimensions of oppression that intersect and compound to produce unique experiences of marginalization. In the context of disability, individuals with disabilities face not only stigma and exclusion but also structural barriers and systemic inequalities that affect their access to healthcare, education, employment, and other social determinants of health.

Minority stress theory (MST), developed by Meyer et al. (1995), posits that minority groups experience heightened levels of stress due to the internalized oppression and stigma associated with being part of a marginalized group. For individuals with disabilities, MST suggests that the internalization of ableism and the societal expectations surrounding disability can lead to mental health disparities.

Studies have shown that intersectionality and minority stress theory intersect in complex ways when examining social determinants of health for individuals with disabilities (Hodge & Winkler-Taylor, 2017; Ruggiero et al., 2018). For example, research has demonstrated that individuals with disabilities who are also racial or ethnic minorities experience higher levels of stress and mental health disparities due to the intersection of ableism and racism (Wang et al., 2020).

Furthermore, studies have highlighted the importance of considering multiple identities when examining social determinants of health for individuals with disabilities. For instance, research has shown that individuals with disabilities who are also low-income or have lower levels of education experience higher levels of stress and mental health disparities due to the intersection of ableism and socioeconomic inequality (Hodge & Winkler-Taylor, 2017).

The findings of this report suggest that the intersectionality of minority stress theory in the context of social determinants of health for individuals with disabilities is a critical area of research that requires further attention. By examining the complex intersections between multiple identities and systems of oppression, researchers can better understand the unique experiences of mental health disparities faced by individuals with disabilities.

## Gaps / Further Research

Despite the growing body of research on this topic, there are several gaps in current knowledge that warrant further investigation:

1. **Limited representation**: The majority of studies examining the intersectionality of minority stress theory and social determinants of health for individuals with disabilities have been conducted using white, able-bodied populations. Future research should prioritize the inclusion of diverse populations, including racial and ethnic minorities, individuals with disabilities, and those from low-income backgrounds.
2. **Methodological limitations**: Many studies have relied on cross-sectional designs, which may not capture the complex dynamics of intersectional experiences. Longitudinal designs and qualitative methods could provide more nuanced insights into the experiences of individuals with disabilities.
3. **Lack of attention to intersectionality**: While some studies have acknowledged the importance of intersectionality, few have fully explored its implications for social determinants of health for individuals with disabilities.
4. **Limited understanding of context**: Research has often focused on individual-level factors, neglecting the role of contextual factors, such as healthcare policies and socioeconomic conditions, in shaping mental health disparities.

To address these gaps, future research should prioritize:

1. **Inclusive recruitment strategies**: Recruit diverse populations to ensure representation from underrepresented groups.
2. **Longitudinal designs**: Employ longitudinal designs to capture the dynamics of intersectional experiences over time.
3. **Qualitative and mixed-methods approaches**: Use qualitative and mixed-methods approaches to gain a deeper understanding of the complexities of intersectional experiences.
4. **Contextual analysis**: Examine the role of contextual factors in shaping mental health disparities among individuals with disabilities.

By addressing these gaps, researchers can contribute to a more comprehensive understanding of the intersectionality of minority stress theory and social determinants of health for individuals with disabilities, ultimately informing more effective interventions and policies to promote health equity.