In [1]:
# enable reloading
%load_ext autoreload
# all the modules should be reloaded before executing the code
%autoreload 2

In [2]:
import asyncio
from pathlib import Path
from typing import Literal

from langchain.chat_models import init_chat_model
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage, ToolMessage
from langchain_core.runnables import RunnableConfig
from langgraph.graph import END, START, StateGraph
from langgraph.types import Command


In [3]:
"""Research Agent Subgraph."""

try:
    from .configuration import Configuration
    from .states import ConductResearch, ResearchComplete, StatesKeys, SupervisorState
    from .utils import get_notes_from_tool_calls, is_token_limit_exceeded
except ImportError:
    import rootutils

    rootutils.setup_root(search_from=str(Path.cwd().parent), indicator=[".git", "pyproject.toml"], pythonpath=True)
    from src.agent.configuration import Configuration
    from src.agent.researcher_agent import researcher_subgraph
    from src.agent.states import ConductResearch, ResearchComplete, StatesKeys, SupervisorState
    from src.agent.utils import get_notes_from_tool_calls, is_token_limit_exceeded
    # from src.agent.prompts import PROJECT_RESEARCH_AGENT_PROMPT, SEARCH_INSTRUCTIONS


In [None]:
#  Initialize a configurable model that we will use throughout the agent
configurable_model = init_chat_model(
    configurable_fields=("model", "max_tokens", "api_key"),
)
RESEARCH_SYSTEM_PROMPT = """You are a research assistant conducting deep research on the user's input topic. Use the tools and search methods provided to research the user's input topic. For context,
<Task>
Your job is to use tools and search methods to find information that can answer the question that a user asks.
You can use any of the tools provided to you to find resources that can help answer the research question. You can call these tools in series or in parallel, your research is conducted in a tool-calling loop.
</Task>

<Tool Calling Guidelines>
- Make sure you review all of the tools you have available to you, match the tools to the user's request, and select the tool that is most likely to be the best fit.
- In each iteration, select the BEST tool for the job, this may or may not be general websearch.
- When selecting the next tool to call, make sure that you are calling tools with arguments that you have not already tried.
- Tool calling is costly, so be sure to be very intentional about what you look up. Some of the tools may have implicit limitations. As you call tools, feel out what these limitations are, and adjust your tool calls accordingly.
- This could mean that you need to call a different tool, or that you should call "ResearchComplete", e.g. it's okay to recognize that a tool has limitations and cannot do what you need it to.
- Don't mention any tool limitations in your output, but adjust your tool calls accordingly.

<Tool Calling Guidelines>

<Criteria for Finishing Research>
- In addition to tools for research, you will also be given a special "ResearchComplete" tool. This tool is used to indicate that you are done with your research.
- The user will give you a sense of how much effort you should put into the research. This does not translate ~directly~ to the number of tool calls you should make, but it does give you a sense of the depth of the research you should conduct.
- DO NOT call "ResearchComplete" unless you are satisfied with your research.
- One case where it's recommended to call this tool is if you see that your previous tool calls have stopped yielding useful information.
</Criteria for Finishing Research>

<Helpful Tips>
1. If you haven't conducted any searches yet, start with broad searches to get necessary context and background information. Once you have some background, you can start to narrow down your searches to get more specific information.
2. Different topics require different levels of research depth. If the question is broad, your research can be more shallow, and you may not need to iterate and call tools as many times.
3. If the question is detailed, you may need to be more stingy about the depth of your findings, and you may need to iterate and call tools more times to get a fully detailed answer.
</Helpful Tips>

<Critical Reminders>
- You MUST conduct research using web search or a different tool before you are allowed to call "ResearchComplete"! You cannot call "ResearchComplete" without conducting research first!
- Do not repeat or summarize your research findings unless the user explicitly asks you to do so. Your main job is to call tools. You should call tools until you are satisfied with the research findings, and then call "ResearchComplete".
</Critical Reminders>
"""


async def supervisor(state: SupervisorState, config: RunnableConfig):
    Command[Literal["supervisor_tool"]]
    config = Configuration.from_runnable_config(config)
    research_model_config = {
        "model": config.research_model,
        "max_tokens": config.research_model_max_tokens,
        # "api_key": config.research_model_api_key,
        "tags": ["langsmith:nostream"],
    }

    lead_research_tool = [ConductResearch, ResearchComplete]
    research_model = (
        configurable_model.bind_tools(lead_research_tool)
        .with_retry(stop_after_attempt=config.max_structured_output_retries)
        .with_config(
            research_model_config,
        )
    )
    supervisor_message = state.get(StatesKeys.SUPERVISOR_MSGS.value, [])
    response = await research_model.ainvoke(supervisor_message)
    return Command(
        goto="supervisor_tool",
        update={
            StatesKeys.SUPERVISOR_MSGS.value: [response],
            "research_iterations": state.get("research_iterations", 0) + 1,
        },
    )


async def supervisor_tool(state: SupervisorState, config: RunnableConfig) -> Command[Literal["supervisor", "__end__"]]:
    """Supervisor tool."""
    configurable = Configuration.from_runnable_config(config)
    supervisor_messages = state.get(StatesKeys.SUPERVISOR_MSGS.value, [])
    research_iterations = state.get("research_iterations", 0)
    most_recent_message = supervisor_messages[-1]

    # Exit Criteria
    # 1. we have exceeded our max guardrail research  iteration
    # 2. No tool call were made by supervisor
    # 3. The most recent message contain a ResearchComplete tool call and there is only one tool call in the message
    exceeded_allowed_iterations = research_iterations >= configurable.max_research_iterations
    no_tool_calls = not most_recent_message.tool_calls
    research_complete_tool_call = any(
        tool_call["name"] == "ResearchComplete" for tool_call in most_recent_message.tool_calls
    )
    if exceeded_allowed_iterations or no_tool_calls or research_complete_tool_call:
        return Command(
            goto=END,
            update={
                "notes": get_notes_from_tool_calls(supervisor_messages),
                "research_brief": state.get("research_brief", ""),
            },
        )
    # otherwise, continue with research
    try:
        all_conduct_research = [
            tool_call for tool_call in most_recent_message.tool_calls if tool_call["name"] == "ConductResearch"
        ]
        # Limit total concurrent research units/calls
        conduct_research_calls = all_conduct_research[: configurable.max_concurrent_research_units]
        overflow_conduct_research_calls = all_conduct_research[configurable.max_concurrent_research_units :]

        coros = [
            researcher_subgraph.ainvoke(
                {
                    "research_messages": [
                        SystemMessage(content=RESEARCH_SYSTEM_PROMPT),
                        HumanMessage(content=tool_call["args"]["research_topic"]),
                    ],
                    "research_topic": tool_call["args"]["research_topic"],
                },
                config,
            )
            for tool_call in conduct_research_calls
        ]
        tool_results = await asyncio.gather(*coros)
        tool_messages = [
            ToolMessage(
                content=observation.get(
                    "compressed_research",
                    "Error synthesizing research report: Maximum retries exceeded",
                ),
                name=tool_call["name"],
                tool_call_id=tool_call["id"],
            )
            for observation, tool_call in zip(tool_results, conduct_research_calls, strict=False)
        ]
        # Handle any tool calls made > max_concurrent_research_units
        for overflow_conduct_research_call in overflow_conduct_research_calls:
            tool_messages.append(
                ToolMessage(
                    content=f"Error: Did not run this research as you have already exceeded the maximum number of concurrent research units. Please try again with {configurable.max_concurrent_research_units} or fewer research units.",
                    name="ConductResearch",
                    tool_call_id=overflow_conduct_research_call["id"],
                ),
            )
        raw_notes_concat = "\n".join(["\n".join(observation.get("raw_notes", [])) for observation in tool_results])
        return Command(
            goto="supervisor",
            update={StatesKeys.SUPERVISOR_MSGS.value: tool_messages, "raw_notes": [raw_notes_concat]},
        )
    except Exception as e:
        # import traceback

        # print(traceback.format_exc())
        if is_token_limit_exceeded(e, configurable.research_model):
            print(f"Token limit exceeded while reflecting: {e}")
        else:
            print(f"Other error in reflection phase: {e}")
        return Command(
            goto=END,
            update={
                "notes": get_notes_from_tool_calls(supervisor_messages),
                "research_brief": state.get("research_brief", ""),
            },
        )


supervisor_builder = StateGraph(SupervisorState, config_schema=Configuration)
supervisor_builder.add_node("supervisor", supervisor)
supervisor_builder.add_node("supervisor_tool", supervisor_tool)
supervisor_builder.add_edge(START, "supervisor")
supervisor_builder.add_edge("supervisor_tool", END)
supervisor_graph = supervisor_builder.compile(name="Supervisor")


In [5]:
conversation = [
    #  HumanMessage(content=PROJECT_IDEA),
    AIMessage(""" 1. **Target Audience and Purpose:**
   - You mentioned this is a personal project for fun and learning, as well as to showcase your skills to potential interviewers. Is there a specific aspect of the project you want to highlight to interviewers (e.g., AI capabilities, UI design, integration skills)?
2. **Image Processing:**
   - For the feature that takes pictures of handwritten notes, do you have any specific requirements or preferences for the image capture process (e.g., camera integration, file upload)?
   - Are there any particular challenges you anticipate with processing equations and block diagrams, and do you have any existing tools or libraries in mind to handle these?
3. **Integration with Notion:**
   - Could you elaborate on how you envision the integration with Notion? For example, do you have a specific structure or organization in mind for the notes within Notion?
   - Are there any specific Notion API functionalities you plan to use or any constraints you are aware of?
4. **LangGraph and UI:**
   - You mentioned using LangGraph's prebuilt UI. Could you provide more details on how you plan to utilize it? Are there any customizations or additional features you want to add to the UI?
5. **Multi-Agent System:**
   - You are considering a multi-agent system. Could you describe how you envision the interaction between the agents, and what specific roles each agent will play?
6. **MVP and Timeline:**
   - For the MVP, you aim to convert images to text and format them properly within two weeks. Are there any specific milestones or deliverables you have in mind for this initial phase?
7. **Future Development:**
   - You mentioned the possibility of developing a simple Android app using Flutter later on. Do you have any specific features or functionalities in mind for the mobile version?
 """),
    HumanMessage(
        content="""
1. nothing specific, but good working app, because i am going to use it regularly
2. for the time being we can go with file upload and when i decide to make android app using flutter then we can integrate images directly from the camera
3. I am thinking to fetch all the pages and sub pages and find pages with name and create a sub page and add notes over there. for example if topic is about optimizer then i will find page with deep learning and create a new sub page and add notes over there.
4. No, first i want to use only langgraph UI, it is really nice and easy to use
5. I am also thinking to use multi agent system one agent for image to text conversion and 2nd agent for text to notion or formatting (markdown)
6. no nothing specific milestone or anything
7. no just simple camera app with little instruction/chat box for Agent if needed
""",
    ),
    AIMessage("""1. **App Usage and Quality:**
   - You want a reliable and well-functioning app since you'll be using it regularly. Are there any specific performance or usability criteria you have in mind to ensure it meets your needs?
2. **Image Capture and Processing:**
   - For now, you prefer to start with a file upload feature for images. When you transition to an Android app using Flutter, do you have any specific requirements for how the camera integration should work?
3. **Notion Integration:**
   - You plan to fetch pages and sub-pages in Notion, find specific pages by name, and create sub-pages to add notes. Could you clarify if you have a specific naming convention or criteria for identifying these pages?
   - How do you envision the notes being structured or formatted once they are added to Notion?
4. **LangGraph UI:**
   - You are satisfied with using LangGraph's UI as it is. Are there any specific features or interactions within the UI that are crucial for your workflow?
5. **Multi-Agent System:**
   - You plan to use one agent for image-to-text conversion and another for text formatting and integration with Notion. Could you describe any specific interactions or data flow between these agents?
6. **Milestones and Timeline:**
   - While you don't have specific milestones, do you have a rough timeline or order of priorities for implementing different features?
7. **Future Android App:**
   - For the future Android app, you mentioned a simple camera app with a chat box for agent instructions. Are there any additional features or user interactions you envision for this app?
Please let me know if there are any other aspects of the project you'd like to discuss or clarify further.
 """),
    HumanMessage(
        content="1. no, i have not thought about it yet, we can check after first MVP is done"
        "2.i will use langgraph UI to upload the images for the time being"
        "3. simple chat ui with upload image button and text input box"
        "4. No not yet, because i have not use it. i will see once we reach to that point"
        "5. first agent will convert image to text and send this text information to 2nd agent and 2nd agent will format it and send it to notion"
        "6. no milestone or anything"
        "7.no nothing",
    ),
]
PROJECT_IDEA = (
    "create plan to develop Agentic AI note taking app using langgraph for my personal use (personal project for fun and learning) and i also want to show off my skills to my potential interviewer to get hired. it should do following "
    "1. take pictures of hand-written notes "
    "2. it will automatically format the hand-written notes (it might contains equations and block diagrams) "
    "3. find proper section (if section found then create sub page or create a new page) in my notion "
    "4. add this notes with proper format"
    "5. i want to use LangGraph's pre-build UI for interaction from PC"
    "6. for MVP (which can convert image to text and format it properly) in 2 weeks"
    "7. I am also thinking to use multi agent system one agent for image to text conversion and 2nd agent for text to notion or formatting (markdown)"
)


In [None]:
from src.agent.utils import get_today_str

research_brief = """
How can I develop an agentic AI-powered note-taking application leveraging LangGraph for my personal use, with features including image-to-text conversion, formatting, and integration with Notion, while showcasing my skills? I am seeking to:
1. Employ a robust Optical Character Recognition (OCR) solution for converting handwritten notes, potentially containing equations and block diagrams, into digital text
   - Specific OCR tool preferences are currently open-ended; the research should explore both existing solutions and state-of-the-art alternatives.
2. Customize LangGraph's prebuilt UI for seamless interaction from a PC without any specific customization specifications, leaving room for creative design choices.
3. Develop a seamless interface with Notion, utilizing either existing APIs or custom integration methods.
   - The research should identify the best method for ensuring efficient data management and retrieval in Notion.
4. Implement a multi-agent system where one agent focuses on image-to-text conversion and another takes charge of text integration and formatting using markdown.
   - The interaction dynamics between agents require exploration to ensure efficiency and smooth operation.
5. Deliver an initial MVP capable of converting images to well-formatted text within 2 weeks.
6. Maintain an open architectural design, allowing flexibility in programming languages and frameworks, as no constraints have been specified.
7. Enhance the application to demonstrate my capabilities to potential employers, focusing on cutting-edge approach and effective solutions.
"""
LEAD_RESEARCHER_PROMPT = """You are a research supervisor. Your job is to conduct research by calling the "ConductResearch" tool. For context, today's date is {date}.

<Task>
Your focus is to call the "ConductResearch" tool to conduct research against the overall research question passed in by the user.
When you are completely satisfied with the research findings returned from the tool calls, then you should call the "ResearchComplete" tool to indicate that you are done with your research.
</Task>

<Instructions>
1. When you start, you will be provided a research question from a user.
2. You should immediately call the "ConductResearch" tool to conduct research for the research question. You can call the tool up to {max_concurrent_research_units} times in a single iteration.
3. Each ConductResearch tool call will spawn a research agent dedicated to the specific topic that you pass in. You will get back a comprehensive report of research findings on that topic.
4. Reason carefully about whether all of the returned research findings together are comprehensive enough for a detailed report to answer the overall research question.
5. If there are important and specific gaps in the research findings, you can then call the "ConductResearch" tool again to conduct research on the specific gap.
6. Iteratively call the "ConductResearch" tool until you are satisfied with the research findings, then call the "ResearchComplete" tool to indicate that you are done with your research.
7. Don't call "ConductResearch" to synthesize any information you've gathered. Another agent will do that after you call "ResearchComplete". You should only call "ConductResearch" to research net new topics and get net new information.
</Instructions>


<Important Guidelines>
**The goal of conducting research is to get information, not to write the final report. Don't worry about formatting!**
- A separate agent will be used to write the final report.
- Do not grade or worry about the format of the information that comes back from the "ConductResearch" tool. It's expected to be raw and messy. A separate agent will be used to synthesize the information once you have completed your research.
- Only worry about if you have enough information, not about the format of the information that comes back from the "ConductResearch" tool.
- Do not call the "ConductResearch" tool to synthesize information you have already gathered.

**Parallel research saves the user time, but reason carefully about when you should use it**
- Calling the "ConductResearch" tool multiple times in parallel can save the user time.
- You should only call the "ConductResearch" tool multiple times in parallel if the different topics that you are researching can be researched independently in parallel with respect to the user's overall question.
- This can be particularly helpful if the user is asking for a comparison of X and Y, if the user is asking for a list of entities that each can be researched independently, or if the user is asking for multiple perspectives on a topic.
- Each research agent needs to be provided all of the context that is necessary to focus on a sub-topic.
- Do not call the "ConductResearch" tool more than {max_concurrent_research_units} times at once. This limit is enforced by the user. It is perfectly fine, and expected, that you return less than this number of tool calls.
- If you are not confident in how you can parallelize research, you can call the "ConductResearch" tool a single time on a more general topic in order to gather more background information, so you have more context later to reason about if it's necessary to parallelize research.
- Each parallel "ConductResearch" linearly scales cost. The benefit of parallel research is that it can save the user time, but carefully think about whether the additional cost is worth the benefit.
- For example, if you could search three clear topics in parallel, or break them each into two more subtopics to do six total in parallel, you should think about whether splitting into smaller subtopics is worth the cost. The researchers are quite comprehensive, so it's possible that you could get the same information with less cost by only calling the "ConductResearch" tool three times in this case.
- Also consider where there might be dependencies that cannot be parallelized. For example, if asked for details about some entities, you first need to find the entities before you can research them in detail in parallel.

**Different questions require different levels of research depth**
- If a user is asking a broader question, your research can be more shallow, and you may not need to iterate and call the "ConductResearch" tool as many times.
- If a user uses terms like "detailed" or "comprehensive" in their question, you may need to be more stingy about the depth of your findings, and you may need to iterate and call the "ConductResearch" tool more times to get a fully detailed answer.

**Research is expensive**
- Research is expensive, both from a monetary and time perspective.
- As you look at your history of tool calls, as you have conducted more and more research, the theoretical "threshold" for additional research should be higher.
- In other words, as the amount of research conducted grows, be more stingy about making even more follow-up "ConductResearch" tool calls, and more willing to call "ResearchComplete" if you are satisfied with the research findings.
- You should only ask for topics that are ABSOLUTELY necessary to research for a comprehensive answer.
- Before you ask about a topic, be sure that it is substantially different from any topics that you have already researched. It needs to be substantially different, not just rephrased or slightly different. The researchers are quite comprehensive, so they will not miss anything.
- When you call the "ConductResearch" tool, make sure to explicitly state how much effort you want the sub-agent to put into the research. For background research, you may want it to be a shallow or small effort. For critical topics, you may want it to be a deep or large effort. Make the effort level explicit to the researcher.
</Important Guidelines>


<Crucial Reminders>
- If you are satisfied with the current state of research, call the "ResearchComplete" tool to indicate that you are done with your research.
- Calling ConductResearch in parallel will save the user time, but you should only do this if you are confident that the different topics that you are researching are independent and can be researched in parallel with respect to the user's overall question.
- You should ONLY ask for topics that you need to help you answer the overall research question. Reason about this carefully.
- When calling the "ConductResearch" tool, provide all context that is necessary for the researcher to understand what you want them to research. The independent researchers will not get any context besides what you write to the tool each time, so make sure to provide all context to it.
- This means that you should NOT reference prior tool call results or the research brief when calling the "ConductResearch" tool. Each input to the "ConductResearch" tool should be a standalone, fully explained topic.
- Do NOT use acronyms or abbreviations in your research questions, be very clear and specific.
</Crucial Reminders>

With all of the above in mind, call the ConductResearch tool to conduct research on specific topics, OR call the "ResearchComplete" tool to indicate that you are done with your research.
"""

sys_msg = SystemMessage(content=LEAD_RESEARCHER_PROMPT.format(date=get_today_str(), max_concurrent_research_units=1))

In [7]:
async for event in supervisor_graph.astream(
    {
        StatesKeys.SUPERVISOR_MSGS.value: [sys_msg, HumanMessage(content=research_brief)],
        "research_brief": research_brief,
        # "research_topic": PROJECT_IDEA,
    },
    stream_mode="update",
):
    print(event)


CancelledError: 