## Imports

In [111]:
from langchain.chains import LLMMathChain
from langchain_community.utilities import DuckDuckGoSearchAPIWrapper
from langchain_core.tools import Tool
from langchain_experimental.plan_and_execute import (
    PlanAndExecute,
    load_agent_executor,
    load_chat_planner,
)
from langchain_openai import ChatOpenAI, OpenAI

In [10]:
from langchain_community.utilities import SerpAPIWrapper

## Tools

In [11]:
search = SerpAPIWrapper()
llm = OpenAI(temperature=0)
llm_math_chain = LLMMathChain.from_llm(llm=llm, verbose=True)
tools = [
    Tool(
        name="Search",
        func=search.run,
        description="useful for when you need to answer questions about current events",
    ),
    Tool(
        name="Calculator",
        func=llm_math_chain.run,
        description="useful for when you need to answer questions about math",
    ),
]

In [47]:
from zxftools_dev.llms import get_llm

In [53]:
from llama_index.core.llms import ChatMessage, MessageRole

In [54]:
llm = get_llm()

In [60]:

SYSTEM_PROMPT = (
    "Let's first understand the problem and devise a plan to solve the problem."
    " Please output the plan starting with the header 'Plan:' "
    "and then followed by a numbered list of steps. "
    "Please make the plan the minimum number of steps required "
    "to accurately complete the task. If the task is a question, "
    "the final step should almost always be 'Given the above steps taken, "
    "please respond to the users original question'. "
    "At the end of your plan, say '<END_OF_PLAN>'"
)

In [61]:
ChatMessage(role=MessageRole.SYSTEM,content=SYSTEM_PROMPT)

ChatMessage(role=<MessageRole.SYSTEM: 'system'>, content="Let's first understand the problem and devise a plan to solve the problem. Please output the plan starting with the header 'Plan:' and then followed by a numbered list of steps. Please make the plan the minimum number of steps required to accurately complete the task. If the task is a question, the final step should almost always be 'Given the above steps taken, please respond to the users original question'. At the end of your plan, say '<END_OF_PLAN>'", additional_kwargs={})

In [65]:
aa = llm.chat(messages=[ChatMessage(role=MessageRole.SYSTEM,content=SYSTEM_PROMPT),
                   ChatMessage(role=MessageRole.USER,content='英国现任首相是谁?他们现在的年龄是多少?'),
                  ])

In [None]:
## plan

In [71]:
print(aa.message.content)

Plan:
1. 找到英国现任首相的名字。
2. 查找他们的出生日期。
3. 计算他们现在的年龄。
4. 输出首相的名字和年龄。
5. 回答用户的问题。
<END_OF_PLAN>


In [77]:
def parse(text: str):
        steps = [Step(value=v) for v in re.split("\n\s*\d+\. ", text)[1:]]
        return Plan(steps=steps)


In [79]:
text = aa.message.content

In [83]:
text
plans = re.split("\n\s*\d+\. ", text)[1:]

In [84]:
plans

['找到英国现任首相的名字。',
 '查找他们的出生日期。',
 '计算他们现在的年龄。',
 '输出首相的名字和年龄。',
 '回答用户的问题。\n<END_OF_PLAN>']

In [None]:
inputs = {
"previous_steps": plans[0],
"current_step": ,

}

In [96]:
from zxftools_dev.rag import ReactAgentMaker

In [97]:
maker = ReactAgentMaker()

In [98]:
agent = maker.create_agent()

In [99]:
HUMAN_MESSAGE_TEMPLATE = """Previous steps: {previous_steps}

Current objective: {current_step}

{agent_scratchpad}"""


In [None]:
agent.chat(HUMAN_MESSAGE_TEMPLATE.format())

In [103]:
result = llm.complete('''
Q: in a dance class of 20 students, 20% enrolled in contemporary dance, 25% of the remaining enrolled in jazz dance, 
    and the rest enrolled in hip-hop dance. what percentage of the entire students enrolled in hip-hop dance?
    
A: Let's first understand the problem and devise a plan to solve the problem. Then,let's carry out the plan and solve the problem step by step.


''')

In [106]:
print(result.text)

Understanding the problem:
- There are 20 students in the dance class.
- 20% of the students enrolled in contemporary dance.
- The remaining students did not enroll in contemporary dance.
- 25% of the remaining students enrolled in jazz dance.
- The rest of the students enrolled in hip-hop dance.
- We need to find the percentage of students who enrolled in hip-hop dance.

Devising a plan:
1. Calculate the number of students who enrolled in contemporary dance.
2. Calculate the number of students who did not enroll in contemporary dance.
3. Calculate the number of students who enrolled in jazz dance.
4. Calculate the number of students who enrolled in hip-hop dance.
5. Calculate the percentage of students who enrolled in hip-hop dance.

Carrying out the plan:
1. Number of students who enrolled in contemporary dance = 20% of 20 = 4.
2. Number of students who did not enroll in contemporary dance = 20 - 4 = 16.
3. Number of students who enrolled in jazz dance = 25% of 16 = 4.
4. Number of s

In [107]:
result2 = llm.complete('''
Q: in a dance class of 20 students, 20% enrolled in contemporary dance, 25% of the remaining enrolled in jazz dance, 
    and the rest enrolled in hip-hop dance. what percentage of the entire students enrolled in hip-hop dance?
    
A: Let's think step by step.

''')

In [110]:
print(result2.text)

Step 1: Find the number of students who enrolled in contemporary dance.
20% of 20 students = 0.2 x 20 = 4 students

Step 2: Find the number of students who did not enroll in contemporary dance.
20 - 4 = 16 students

Step 3: Find the number of students who enrolled in jazz dance.
25% of 16 students = 0.25 x 16 = 4 students

Step 4: Find the number of students who enrolled in hip-hop dance.
16 - 4 = 12 students

Step 5: Find the percentage of the entire students who enrolled in hip-hop dance.
12/20 x 100% = 60%

Therefore, 60% of the entire students enrolled in hip-hop dance.


# Multi-Agent Simulated Environment: Petting Zoo

In this example, we show how to define multi-agent simulations with simulated environments. Like [ours single-agent example with Gymnasium](https://python.langchain.com/en/latest/use_cases/agent_simulations/gymnasium.html), we create an agent-environment loop with an externally defined environment. The main difference is that we now implement this kind of interaction loop with multiple agents instead. We will use the [Petting Zoo](https://pettingzoo.farama.org/) library, which is the multi-agent counterpart to [Gymnasium](https://gymnasium.farama.org/).

## Install `pettingzoo` and other dependencies

In [1]:
!pip install pettingzoo pygame rlcard

## Import modules

In [2]:
import collections
import inspect

import tenacity
from langchain.output_parsers import RegexParser
from langchain.schema import (
    HumanMessage,
    SystemMessage,
)
from langchain_openai import ChatOpenAI

## `GymnasiumAgent`
Here we reproduce the same `GymnasiumAgent` defined from [our Gymnasium example](https://python.langchain.com/en/latest/use_cases/agent_simulations/gymnasium.html). If after multiple retries it does not take a valid action, it simply takes a random action. 

In [3]:
class GymnasiumAgent:
    @classmethod
    def get_docs(cls, env):
        return env.unwrapped.__doc__

    def __init__(self, model, env):
        self.model = model
        self.env = env
        self.docs = self.get_docs(env)

        self.instructions = """
Your goal is to maximize your return, i.e. the sum of the rewards you receive.
I will give you an observation, reward, terminiation flag, truncation flag, and the return so far, formatted as:

Observation: <observation>
Reward: <reward>
Termination: <termination>
Truncation: <truncation>
Return: <sum_of_rewards>

You will respond with an action, formatted as:

Action: <action>

where you replace <action> with your actual action.
Do nothing else but return the action.
"""
        self.action_parser = RegexParser(
            regex=r"Action: (.*)", output_keys=["action"], default_output_key="action"
        )

        self.message_history = []
        self.ret = 0

    def random_action(self):
        action = self.env.action_space.sample()
        return action

    def reset(self):
        self.message_history = [
            SystemMessage(content=self.docs),
            SystemMessage(content=self.instructions),
        ]

    def observe(self, obs, rew=0, term=False, trunc=False, info=None):
        self.ret += rew

        obs_message = f"""
Observation: {obs}
Reward: {rew}
Termination: {term}
Truncation: {trunc}
Return: {self.ret}
        """
        self.message_history.append(HumanMessage(content=obs_message))
        return obs_message

    def _act(self):
        act_message = self.model.invoke(self.message_history)
        self.message_history.append(act_message)
        action = int(self.action_parser.parse(act_message.content)["action"])
        return action

    def act(self):
        try:
            for attempt in tenacity.Retrying(
                stop=tenacity.stop_after_attempt(2),
                wait=tenacity.wait_none(),  # No waiting time between retries
                retry=tenacity.retry_if_exception_type(ValueError),
                before_sleep=lambda retry_state: print(
                    f"ValueError occurred: {retry_state.outcome.exception()}, retrying..."
                ),
            ):
                with attempt:
                    action = self._act()
        except tenacity.RetryError:
            action = self.random_action()
        return action

## Main loop

In [4]:
def main(agents, env):
    env.reset()

    for name, agent in agents.items():
        agent.reset()

    for agent_name in env.agent_iter():
        observation, reward, termination, truncation, info = env.last()
        obs_message = agents[agent_name].observe(
            observation, reward, termination, truncation, info
        )
        print(obs_message)
        if termination or truncation:
            action = None
        else:
            action = agents[agent_name].act()
        print(f"Action: {action}")
        env.step(action)
    env.close()

## `PettingZooAgent`

The `PettingZooAgent` extends the `GymnasiumAgent` to the multi-agent setting. The main differences are:
- `PettingZooAgent` takes in a `name` argument to identify it among multiple agents
- the function `get_docs` is implemented differently because the `PettingZoo` repo structure is structured differently from the `Gymnasium` repo

In [5]:
class PettingZooAgent(GymnasiumAgent):
    @classmethod
    def get_docs(cls, env):
        return inspect.getmodule(env.unwrapped).__doc__

    def __init__(self, name, model, env):
        super().__init__(model, env)
        self.name = name

    def random_action(self):
        action = self.env.action_space(self.name).sample()
        return action

## Rock, Paper, Scissors
We can now run a simulation of a multi-agent rock, paper, scissors game using the `PettingZooAgent`.

In [6]:
from pettingzoo.classic import rps_v2

env = rps_v2.env(max_cycles=3, render_mode="human")
agents = {
    name: PettingZooAgent(name=name, model=ChatOpenAI(temperature=1), env=env)
    for name in env.possible_agents
}
main(agents, env)


Observation: 3
Reward: 0
Termination: False
Truncation: False
Return: 0
        
Action: 1

Observation: 3
Reward: 0
Termination: False
Truncation: False
Return: 0
        
Action: 1

Observation: 1
Reward: 0
Termination: False
Truncation: False
Return: 0
        
Action: 2

Observation: 1
Reward: 0
Termination: False
Truncation: False
Return: 0
        
Action: 1

Observation: 1
Reward: 1
Termination: False
Truncation: False
Return: 1
        
Action: 0

Observation: 2
Reward: -1
Termination: False
Truncation: False
Return: -1
        
Action: 0

Observation: 0
Reward: 0
Termination: False
Truncation: True
Return: 1
        
Action: None

Observation: 0
Reward: 0
Termination: False
Truncation: True
Return: -1
        
Action: None


## `ActionMaskAgent`

Some `PettingZoo` environments provide an `action_mask` to tell the agent which actions are valid. The `ActionMaskAgent` subclasses `PettingZooAgent` to use information from the `action_mask` to select actions.

In [7]:
class ActionMaskAgent(PettingZooAgent):
    def __init__(self, name, model, env):
        super().__init__(name, model, env)
        self.obs_buffer = collections.deque(maxlen=1)

    def random_action(self):
        obs = self.obs_buffer[-1]
        action = self.env.action_space(self.name).sample(obs["action_mask"])
        return action

    def reset(self):
        self.message_history = [
            SystemMessage(content=self.docs),
            SystemMessage(content=self.instructions),
        ]

    def observe(self, obs, rew=0, term=False, trunc=False, info=None):
        self.obs_buffer.append(obs)
        return super().observe(obs, rew, term, trunc, info)

    def _act(self):
        valid_action_instruction = "Generate a valid action given by the indices of the `action_mask` that are not 0, according to the action formatting rules."
        self.message_history.append(HumanMessage(content=valid_action_instruction))
        return super()._act()

## Tic-Tac-Toe
Here is an example of a Tic-Tac-Toe game that uses the `ActionMaskAgent`.

In [8]:
from pettingzoo.classic import tictactoe_v3

env = tictactoe_v3.env(render_mode="human")
agents = {
    name: ActionMaskAgent(name=name, model=ChatOpenAI(temperature=0.2), env=env)
    for name in env.possible_agents
}
main(agents, env)


Observation: {'observation': array([[[0, 0],
        [0, 0],
        [0, 0]],

       [[0, 0],
        [0, 0],
        [0, 0]],

       [[0, 0],
        [0, 0],
        [0, 0]]], dtype=int8), 'action_mask': array([1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=int8)}
Reward: 0
Termination: False
Truncation: False
Return: 0
        
Action: 0
     |     |     
  X  |  -  |  -  
_____|_____|_____
     |     |     
  -  |  -  |  -  
_____|_____|_____
     |     |     
  -  |  -  |  -  
     |     |     

Observation: {'observation': array([[[0, 1],
        [0, 0],
        [0, 0]],

       [[0, 0],
        [0, 0],
        [0, 0]],

       [[0, 0],
        [0, 0],
        [0, 0]]], dtype=int8), 'action_mask': array([0, 1, 1, 1, 1, 1, 1, 1, 1], dtype=int8)}
Reward: 0
Termination: False
Truncation: False
Return: 0
        
Action: 1
     |     |     
  X  |  -  |  -  
_____|_____|_____
     |     |     
  O  |  -  |  -  
_____|_____|_____
     |     |     
  -  |  -  |  -  
     |     |     

Observation

## Texas Hold'em No Limit
Here is an example of a Texas Hold'em No Limit game that uses the `ActionMaskAgent`.

In [9]:
from pettingzoo.classic import texas_holdem_no_limit_v6

env = texas_holdem_no_limit_v6.env(num_players=4, render_mode="human")
agents = {
    name: ActionMaskAgent(name=name, model=ChatOpenAI(temperature=0.2), env=env)
    for name in env.possible_agents
}
main(agents, env)


Observation: {'observation': array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 1., 0., 0., 0., 0.,
       0., 0., 2.], dtype=float32), 'action_mask': array([1, 1, 0, 1, 1], dtype=int8)}
Reward: 0
Termination: False
Truncation: False
Return: 0
        
Action: 1

Observation: {'observation': array([0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.,
       0., 0., 2.], dtype=float32), 'action_mask': array([1, 1, 0, 1, 1], dtype=int8)}
Reward: 0
Termination: False
Truncation: False
Return: 0
        
Action: 1

Observation: {'observation': array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 