In [25]:
from ollama import chat, ChatResponse
import re

# --- Prompt Baselines ---
INITIAL_RESPONSE_PROMPT = (
    "You are an expert on the topic {research_topic}. Provide an extensive, detailed, and comprehensive answer "
    "to the research question. In your answer, highlight any areas or gaps that might require further exploration."
)

FIND_GAP_PROMPT = (
    "You are a group of 3 experts on the topic {research_topic}."
    "Think step by step on the following assay: "
    "<ASSAY>"
    "{assay}"
    "</ASSAY>"
    "provide one new topic to explor to fill a knowledge gap in the assay."
    "Based on the gaps identified in your answer, generate a JSON object with the following keys:\n"
    '   - "query": "The search query string."\n'
    '   - "aspect": "The aspect of the topic being addressed by this query."\n'
    '   - "rationale": "Why this query will help fill the gap."\n'
)

# --- Helper: Remove <THINK> Tags ---
def remove_think_tags(text: str) -> str:
    """
    Remove any text enclosed in <THINK>...</THINK> tags.
    The regex is case-insensitive.
    """
    return re.sub(r"<\s*THINK\s*>.*?<\s*/\s*THINK\s*>", "", text, flags=re.DOTALL | re.IGNORECASE).strip()

# --- Configuration & State Management ---
class Configuration:
    def __init__(self, ollama_base_url: str, local_llm: str, fetch_full_page: bool,
                 max_research_loops: int, max_fetch_pages: int, max_token_per_search: int):
        self.ollama_base_url = ollama_base_url
        self.local_llm = local_llm
        self.fetch_full_page = fetch_full_page
        self.max_research_loops = max_research_loops
        self.max_fetch_pages = max_fetch_pages
        self.max_token_per_search = max_token_per_search

def query_local_llm(state: dict, config: Configuration, prompt="") -> str:
    """
    Generate an extensive initial answer for the research topic.
    The answer should also indicate potential gaps for further research.
    """
    message = {"role": "user", "content": prompt}
    response: ChatResponse = chat(model=config.local_llm, messages=[message])
    initial_response = remove_think_tags(response.message.content.strip())
    state["initial_response"] = initial_response
    state["assay"] = initial_response
    return initial_response

def initialize_state(research_topic: str) -> dict:
    """
    Initialize the research state with the given topic.
    """
    return {
        "research_topic": research_topic,
        "initial_response": "",       # The original extensive answer.
        "assay":"",
        "search_query": research_topic
    }

def extract_json_from_llm_output(text):
    # Regular expression pattern to match JSON within triple backticks
    pattern = r'```json(\s*{.*?}\s*)```'

    # Find matches using DOTALL to match across multiple lines
    matches = re.findall(pattern, text, re.DOTALL)

    if not matches:
        raise ValueError("No JSON structure found in the provided text.")

    # Iterate through matches and attempt to parse as JSON
    for match in matches:
        try:
            json_data = json.loads(match)
            return json_data
        except json.JSONDecodeError as e:
            # If parsing fails, continue to the next match
            continue

    # If no valid JSON was parsed, raise an error
    raise ValueError("Found JSON-like structure, but could not parse it.")

def main():
    config = Configuration(
        ollama_base_url="http://localhost:11434",  # Your Ollama URL
        local_llm="deepseek-r1:8b",                      # Default LLM is "llama3.2"
        fetch_full_page=True,                      # Fetch full page content if needed
        max_research_loops=3,                      # Number of research iterations
        max_fetch_pages=5,                         # Number of pages to fetch per search
        max_token_per_search=4000                  # Token limit per search processing
    )

    # Step 1: Get the research question from the user
    research_topic = input("Enter your research question: ")
    print("### Research Question")
    print(research_topic)
    print("\n")
    state = initialize_state(research_topic)

    # Step 2: Generate an initial explanation using the local LLM. This answer will be the first instance of the assay
    prompt_initial = INITIAL_RESPONSE_PROMPT.format(research_topic=state["research_topic"])
    initial_explanation = query_local_llm(state, config, prompt_initial)
    print("### Initial Explanation")
    print(initial_explanation)
    print("\n")

    print("### Thinking Process")
    for i in range(config.max_research_loops):
        # Step 3: Evaluate the current assay and generate a follow-up question
        prompt_gap = FIND_GAP_PROMPT.format(research_topic=state["research_topic"], assay=state["assay"])
        followup_question = query_local_llm(state, config, prompt_gap)
        print(f"\n    >> Iteration {i+1} - Follow-up Question:\n")
        #print(followup_question)
        json_follow = extract_json_from_llm_output(followup_question)
        print(json_follow)

        # Step 4: Reiterate on a new topic
        prompt_follow = INITIAL_RESPONSE_PROMPT.format(research_topic=json_follow["query"])
        follow_explanation = query_local_llm(state, config, prompt_initial)
        print("### Initial Explanation")
        print(follow_explanation)
        print("\n")

        # Step 5: Add the data to the assay
        state["assay"] =  state["assay"] + follow_explanation

    print(state["assay"])

    # # Step 6: Ask the LLM to finalize the assay by integrating all gathered information and adding references
    # prompt_final = f"As an expert in the field, please complete the following assay by integrating all gathered information and include literature and web references at the end:\n{assay}"
    # final_assay = query_local_llm(prompt_final)
    # print("\nFinal Assay:\n", final_assay)

    # # Step 7: Save the final assay locally
    # try:
    #     with open("final_assay.txt", "w", encoding="utf-8") as f:
    #         f.write(final_assay)
    #     print("\nFinal assay saved to 'final_assay.txt'.")
    # except Exception as e:
    #     print("Error saving the final assay:", e)

if __name__ == "__main__":
    main()

# What is the state of the art of open source LLMs?

### Research Question
How big is the moon?


### Initial Explanation
The Moon is a celestial body that orbits Earth, with its size relative to Earth being a fascinating aspect of astronomical study. Here is a comprehensive overview of the Moon's size and related characteristics:

1. **Size Dimensions:**
   - **Diameter:** The average diameter of the Moon is approximately 2,400 kilometers.
   - **Radius:** This radius is roughly half of the diameter, around 1,200 kilometers. This makes the Moon's radius about 1/4th that of Earth's, which has a radius of about 6,371 kilometers.
   - **Circumference:** Calculated as π times the diameter, the Moon's circumference is approximately 7,500 kilometers.

2. **Mass:**
   - The Moon's mass is estimated to be about 7.35 × 10²² kilograms. This mass is significantly less than Earth's mass, which is roughly 5.97 × 10²⁴ kilograms.

3. **Surface Area:**
   - The surface area of the Moon is calculated using the formula πr², resulting in a value of approx

ValueError: No JSON structure found in the provided text.