In [1]:
from langchain.document_loaders import PyPDFLoader
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

documents = []
dc_name = ["soybean_konw.pdf", "soybean2.pdf"]
for tmp_name in dc_name:
    # print(len(PyPDFLoader(tmp_name).load()))
    documents += PyPDFLoader(tmp_name).load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=200)
texts = text_splitter.split_documents(documents[:])
for idx, text in enumerate(texts):
    text.metadata["id"] = idx
from langchain_chroma import Chroma
from langchain.schema import Document
from langchain_community.embeddings import HuggingFaceBgeEmbeddings

top_k = 10

model_name = '/mnt/workspace/.cache/modelscope/hub/maple77/zpoint_large_embedding_zh'
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': True}
hf = HuggingFaceBgeEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)
vectorstore = Chroma(persist_directory="soybean_db2", embedding_function=hf)
retriever = vectorstore.as_retriever(
    search_type="similarity",
    search_kwargs={"k": top_k}
)

KeyboardInterrupt: 

In [2]:
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.tools import tool

@tool
def exponentiate(x: float, y: float) -> float:
    """Raise 'x' to the 'y'."""
    return x**y

In [3]:
tools = [exponentiate]

In [4]:
from langchain import hub
from langchain_openai import ChatOpenAI

from langgraph.prebuilt import create_react_agent

# Get the prompt to use - you can modify this!
prompt = hub.pull("wfh/react-agent-executor")
prompt.pretty_print()

# Choose the LLM that will drive the agent
llm = ChatOpenAI(
    temperature=0.6,
    model="glm-4-0520",
    openai_api_key="661a7aa0aeb8ca129eb4647461123230.bl9w581QKpnMfBvs",
    openai_api_base="https://open.bigmodel.cn/api/paas/v4/"
)
agent_executor = create_react_agent(llm, tools, messages_modifier=prompt)


You are a helpful assistant.


[33;1m[1;3m{{messages}}[0m


In [5]:
agent_executor.invoke({"messages": [("user", "what is 3 ** 12?")]})

{'messages': [HumanMessage(content='what is 3 ** 12?', id='4d2ec882-4b70-4830-ac17-2d2f44976848'),
  AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_8827320252104390940', 'function': {'arguments': '{"x": 3, "y": 12}', 'name': 'exponentiate'}, 'type': 'function', 'index': 0}]}, response_metadata={'token_usage': {'completion_tokens': 17, 'prompt_tokens': 136, 'total_tokens': 153}, 'model_name': 'glm-4-0520', 'system_fingerprint': None, 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-e2fc84c7-a138-4932-9329-07b0df884633-0', tool_calls=[{'name': 'exponentiate', 'args': {'x': 3, 'y': 12}, 'id': 'call_8827320252104390940'}], usage_metadata={'input_tokens': 136, 'output_tokens': 17, 'total_tokens': 153}),
  ToolMessage(content='531441.0', name='exponentiate', id='88d8d3d8-489a-4ff5-8cb6-0446e9b2eff7', tool_call_id='call_8827320252104390940'),
  AIMessage(content='3 ** 12 is 531441.0.', response_metadata={'token_usage': {'completion_tokens': 15, 'prompt_tokens': 

In [6]:
import operator
from typing import Annotated, List, Tuple, TypedDict


class PlanExecute(TypedDict):
    input: str
    plan: List[str]
    past_steps: Annotated[List[List], operator.add]
    response: str

In [7]:
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.output_parsers import PydanticOutputParser

class Plan(BaseModel):
    """Plan to follow in future"""

    steps1: str = Field(
        ...,
        description="Step 1: The different steps to be followed should be arranged in order. If you feel that you no longer need to customize the steps, return to no."
    )
    steps2: str = Field(
        ...,
        description="Step 2: The different steps to be followed should be arranged in order. If you feel that you no longer need to customize the steps, return to no."
    )
    steps3: str = Field(
        ...,
        description="Step 3: The different steps to be followed should be arranged in order. If you feel that you no longer need to customize the steps, return to no."
    )
parser = PydanticOutputParser(pydantic_object=Plan)

In [8]:
from langchain_core.prompts import ChatPromptTemplate


planner_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """For the given objective, come up with a simple step by step plan. \
This plan should involve individual tasks, that if executed correctly will yield the correct answer. Do not add any superfluous steps. \
The result of the final step should be the final answer. Make sure that each step has all the information needed - do not skip steps.
Answer the user query. Wrap the output in `json` tags\n{format_instructions}""",
        ),
        ("placeholder", "{messages}"),
    ]
).partial(format_instructions=parser.get_format_instructions())

llm = ChatOpenAI(
    temperature=0.6,
    model="glm-4-0520",
    openai_api_key="661a7aa0aeb8ca129eb4647461123230.bl9w581QKpnMfBvs",
    openai_api_base="https://open.bigmodel.cn/api/paas/v4/"
)

planner = planner_prompt | llm | parser

In [10]:
planner.invoke(
    {
        "messages": [
            ("user", "Task decomposition and Load the iris dataset and write the code for sklearn machine learning regression?")
        ]
    }
)

Plan(steps1="Import the necessary libraries, including pandas for data manipulation, sklearn's datasets to load the iris data, train_test_split to split the dataset, and a regression model like LinearRegression.", steps2="Load the iris dataset using sklearn's datasets module.", steps3='Separate the features (X) from the target variable (y), and then split them into training and test sets using the train_test_split function.')

In [11]:
from typing import Union


class Response(BaseModel):
    """Response to user."""

    response: str


class Act(BaseModel):
    """Action to perform."""

    action: Union[Response, Plan] = Field(
        description="Action to perform. If you want to respond to user, use Response. "
        "If you need to further use tools to get the answer, use Plan."
    )
parser2 = PydanticOutputParser(pydantic_object=Act)

In [12]:
replanner_prompt = ChatPromptTemplate.from_template(
    """For the given objective, come up with a simple step by step plan. \
    This plan should involve individual tasks, that if executed correctly will yield the correct answer. Do not add any superfluous steps. \
    The result of the final step should be the final answer. Make sure that each step has all the information needed - do not skip steps.

    Your objective was this:
    {input}

    Your original plan was this:
    {plan}

    You have currently done the follow steps:
    {past_steps}

    Update your plan accordingly. If no more steps are needed and you can return to the user, then respond with that. 
    Otherwise, fill out the plan. Only add steps to the plan that still NEED to be done. Do not return previously done steps as part of the plan.
    Complete the plan quickly and succinctly. Don't make too many plans.
    Answer the user query. Wrap the output in `json` tags\n{format_instructions}"""
).partial(format_instructions=parser2.get_format_instructions())


# replanner = replanner_prompt | ChatOpenAI(
#     model="gpt-4o", temperature=0
# ).with_structured_output(Act)


llm = ChatOpenAI(
    temperature=0.6,
    model="glm-4-0520",
    openai_api_key="661a7aa0aeb8ca129eb4647461123230.bl9w581QKpnMfBvs",
    openai_api_base="https://open.bigmodel.cn/api/paas/v4/"
)

replanner = replanner_prompt | llm | parser2

In [13]:
from typing import Literal


async def execute_step(state: PlanExecute):
    plan = state["plan"]
    plan_str = "\n".join(f"{i+1}. {step}" for i, step in enumerate(plan))
    # print(plan_str)
    task = plan[0]
    task_formatted = f"""For the following plan:
{plan_str}\n\nYou are tasked with executing step {1}, {task}."""
    agent_response = await agent_executor.ainvoke(
        {"messages": [("user", task_formatted)]}
    )
    return {
        "past_steps": [task, agent_response["messages"][-1].content],
    }


async def plan_step(state: PlanExecute):
    plan = await planner.ainvoke({"messages": [("user", state["input"])]})
    
    return {"plan": [plan.steps1, plan.steps2, plan.steps3]}


async def replan_step(state: PlanExecute):
    print('\n现在进行重新规划\n')
    # print(state)
    
    output = await replanner.ainvoke(state)
    print('\n现在输出重新规划结果\n')
    print(output)
    if isinstance(output.action, Response):
        return {"response": output.action.response}
    else:
        z = [output.action.steps1, output.action.steps2, output.action.steps3]
        return {"plan": [j for j in z if 'no' not in j.lower()]}


def should_end(state: PlanExecute) -> Literal["agent", "__end__"]:
    if "response" in state and state["response"]:
        return "__end__"
    else:
        return "agent"

In [14]:
from langgraph.graph import StateGraph, START

workflow = StateGraph(PlanExecute)

# Add the plan node
workflow.add_node("planner", plan_step)

# Add the execution step
workflow.add_node("agent", execute_step)

# Add a replan node
workflow.add_node("replan", replan_step)

workflow.add_edge(START, "planner")

# From plan we go to agent
workflow.add_edge("planner", "agent")

# From agent, we replan
workflow.add_edge("agent", "replan")

workflow.add_conditional_edges(
    "replan",
    # Next, we pass in the function that will determine which node is called next.
    should_end,
)

# Finally, we compile it!
# This compiles it into a LangChain Runnable,
# meaning you can use it as you would any other runnable
app = workflow.compile()

In [None]:
from IPython.display import Image, display

display(Image(app.get_graph(xray=False).draw_mermaid_png()))

In [16]:
config = {"recursion_limit": 15}
inputs = {"input": "Task decomposition and Load the iris dataset and write the code for sklearn machine learning regression?"}
async for event in app.astream(inputs, config=config):
    for k, v in event.items():
        if k != "__end__":
            print('————————————————————————————————————————————————当前的K')
            print(k)
            print('————————————————————————————————————————————————当前的V')
            print(v)

————————————————————————————————————————————————当前的K
planner
————————————————————————————————————————————————当前的V
{'plan': ['Define the individual tasks to answer the user query.', 'Arrange the tasks in a logical order.', 'Format the arranged tasks as a JSON object that fits the given schema.']}
————————————————————————————————————————————————当前的K
agent
————————————————————————————————————————————————当前的V
{'past_steps': ['Define the individual tasks to answer the user query.', "To execute step 1, we first need to understand the user query. However, you haven't provided the specific user query we need to answer. Could you please provide the user query that we need to break down into individual tasks?"]}

现在进行重新规划


现在输出重新规划结果

action=Plan(steps1='Import the necessary libraries.', steps2='Load the iris dataset using sklearn.', steps3='Create a machine learning regression model using sklearn.')
————————————————————————————————————————————————当前的K
replan
———————————————————————————————————

OutputParserException: Failed to parse Act from completion {"action": "Plan", "steps1": "Split the dataset into training and testing sets.", "steps2": "Create a Logistic Regression model and train it on the training set.", "steps3": "Evaluate the model on the testing set and output the performance metrics."}. Got: 2 validation errors for Act
action
  value is not a valid dict (type=type_error.dict)
action
  value is not a valid dict (type=type_error.dict)

{'past_steps': ['Split the dataset into training and testing sets.',
  "To execute step 1, you would typically use a programming language like Python and a library such as scikit-learn, which provides a convenient function to split datasets. However, since I can't execute code, I'll guide you through the process conceptually.\n\nHere's how you would generally split a dataset into training and testing sets using scikit-learn:\n\n```python\nfrom sklearn.model_selection import train_test_split\n\n# Assuming 'data' is your dataset and 'labels' are the corresponding labels\n\n# Define the size of the testing set (commonly 20% or 30% of the whole dataset)\ntest_size = 0.2\n\n# Split the data into training and testing sets\nX_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=test_size, random_state=42)\n\n# 'X_train' and 'y_train' are the training data and labels\n# 'X_test' and 'y_test' are the testing data and labels\n```\n\nThe `train_test_split` function takes the 