In [10]:
from typing import Self

from aviary.core import (
    Environment,
    Frame,
    Message,
    Messages,
    TaskDataset,
    Tool,
    ToolRequestMessage,
    ToolResponseMessage,
)
from pydantic import BaseModel, Field, ConfigDict
from typing import List, Dict, Any, Optional, Literal

from ldp.agent import Agent
from ldp.graph import OpResult


In [2]:
class MySimpleAgentState(BaseModel):
    """Simple bucket to store available tools and previous messages."""

    tools: list[Tool] = Field(default_factory=list)
    messages: list[Message] = Field(default_factory=list)

    def get_next_state(
        self,
        obs: list[Message] | None = None,
    ) -> Self:
        """
        Return the next agent state based on current state and optional messages.

        Args:
            obs: Optional observation messages to use in creating the next state.

        Returns:
            The next agent state.
        """
        return type(self)(
            tools=self.tools,
            messages=self.messages + (obs or []),
        )

In [None]:
import os 
os.environ['OPENAI_API_KEY']='<secret>'

In [111]:
from typing import Any

from ldp.graph import LLMCallOp, compute_graph


class MySimpleAgent(BaseModel, Agent[MySimpleAgentState]):
    """Simple agent that can invoke tools with a language model."""

    llm_model: dict[str, Any] = Field(
        default={
            "name": "gpt-4o-2024-08-06",
            "temperature": 0.1,
        },
        description="Configuration for the LLM object.",
    )

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

        # Create a Op that the agent will use to call the LLM API
        self._llm_call_op = LLMCallOp()

    async def init_state(self, tools: list[Tool]) -> MySimpleAgentState:
        return MySimpleAgentState(tools=tools)

    @compute_graph()
    async def get_asv(
        self, agent_state: MySimpleAgentState, obs: list[Message]
    ) -> tuple[OpResult[ToolRequestMessage], MySimpleAgentState, float]:
        # Obtain the next agent state, given the environment observation
        next_state = agent_state.get_next_state(obs)
        result = await self._llm_call_op(
            self.llm_model, msgs=next_state.messages, tools=next_state.tools
        )

        # Extend the the agent state with the new ToolRequestMessage
        next_state.messages = [*next_state.messages, result.value]

        # Agent returns an OpResult, the next agent state and the value, which we set to 0.0
        return result, next_state, 0.0

In [15]:
class MDCrowEnvConfig(BaseModel):
    model_config = ConfigDict(extra="forbid")

    correct_reward: float = 1.0
    incorrect_reward: float = 0.0
    tool_failure_reward: float = -1.0
    tool_success_reward: float = 0.0
    rel_tol: float = 1e-4

    done_on_failure: bool = True




In [121]:
import requests 
import contextlib
import json 
from typing import Dict, Optional


class MDCrowEnv(Environment[None]):
    def __init__(
        self,
        problem_id: str,
        problem: str,
        answer: float,
        config: MDCrowEnvConfig | None = None,
    ):
        # The problem is not part of the state because it is always the same.
        # Putting it in the state would imply it is somehow affected by .step()
        # or re-initialized by .reset().
        self.problem_id = problem_id
        self.problem = problem
        self.answer = float(answer)  # If passed in as a 0d tensor  # noqa: FURB123

        self.config = config if config is not None else MDCrowEnvConfig()

    @classmethod
    def from_task(cls, task: str) -> "MDCrowEnv":
        return cls(problem_id="task", problem=task, answer=0.0)

    async def reset(self) -> tuple[Messages, list[Tool]]:
        self.state = None  # this environment is effectively stateless
        self.tools = [
            Tool.from_function(self.download_pdb_file),
            Tool.from_function(self.submit_answer),
        ]
        return [Message(content=self.problem)], self.tools

    async def step(
        self, action: ToolRequestMessage
    ) -> tuple[Messages, float, bool, bool]:
        if not action.tool_calls:
            return (
                [
                    Message(
                        content=(
                            "Must call one of the provided tools"
                            f" ({self.download_pdb_file.__name__} or"
                            f" {self.submit_answer.__name__})."
                        )
                    )
                ],
                self.config.tool_failure_reward,
                self.config.done_on_failure,
                False,
            )

        valid_action, invalid_action = self.filter_invalid_tool_calls(action)

        invalid_response_msgs = [
            ToolResponseMessage.from_call(tool_call, content="")
            for tool_call in invalid_action.tool_calls
        ]

        if valid_action.tool_calls:
            # TODO: Just let exec_tool_calls handle invalid tool calls
            # once someone can take a closer look at what response, reward, done
            # would be in that case.
            results = await self.exec_tool_calls(
                valid_action, handle_invalid_tool_calls=False
            )
            response_msgs = []
            total_reward = 0.0
            any_done = False

            for tool_call, result in zip(valid_action.tool_calls, results, strict=True):
                response, reward, done = json.loads(result.content)

                response_msgs.append(
                    ToolResponseMessage.from_call(tool_call, content=str(response))
                )

                total_reward += reward
                any_done |= done

            return (  # type: ignore[return-value]
                response_msgs + invalid_response_msgs,
                total_reward,
                any_done,
                False,
            )

        return (  # type: ignore[return-value]
            invalid_response_msgs,
            self.config.tool_failure_reward * len(invalid_response_msgs),
            self.config.done_on_failure,
            False,
        )

    def submit_answer(self, answer: str) -> tuple[bool, float, Literal[True]]:
        """Submit the proposed answer and check if it is correct. This action is terminal.

        Args:
            answer: Proposed answer.

        Returns:
            Three-tuple of if correct, associated reward (correct_reward if correct,
                tool_failure_reward if tool failure, otherwise incorrect_reward), and
                True indicating done.
        """
        try:
            correct: bool = (
                abs(float(answer) - self.answer)
                / (abs(self.answer) + self.config.rel_tol)
                < self.config.rel_tol
            )
            reward = (
                self.config.correct_reward if correct else self.config.incorrect_reward
            )
        except ValueError:
            return False, self.config.tool_failure_reward, True
        else:
            return correct, reward, True

    def calculator(self, expr: str) -> tuple[float | str, float, bool]:
        """Calculate a mathematical expression.

        Args:
            expr: A valid Python expression.

        Returns:
            A three-tuple where the first element is the float evaluation if successful,
                or a string containing the failure cause if unsuccessful, the second
                element is the reward associated with success or failure, and the third
                element is a boolean indicating if this action is terminal.
        """
        try:
            expr = expr.strip()
            result = eval(expr)  # noqa: S307  # pylint: disable=eval-used
            with contextlib.suppress(ValueError):  # If possible, downcast float to int
                if int(result) == result:
                    result = int(result)
        except Exception as exc:
            return (
                f"Error using calculator: {exc!r}.",
                self.config.tool_failure_reward,
                self.config.done_on_failure,
            )
        return result, self.config.tool_success_reward, False
    
    def download_pdb_file(
            self,
            query_string: str, 
            # path_registry: Optional[PathRegistry] = None
        ) -> Dict[str, Optional[str]]:
        """
        Searches RCSB's Protein Data Bank using the given query string and downloads the corresponding PDB or CIF file.

        Args:
            query_string (str): The search term for querying the PDB database.
            path_registry (Optional[PathRegistry]): An instance of PathRegistry to manage file paths.

        Returns:
            Dict[str, Optional[str]]: A dictionary containing the filename and file ID if successful, otherwise None.
        """
        # if path_registry is None:
        #     path_registry = PathRegistry.get_instance()
        
        url = "https://search.rcsb.org/rcsbsearch/v2/query?json={search-request}"
        query = {
            "query": {"type": "terminal", "service": "full_text", "parameters": {"value": query_string}},
            "return_type": "entry"
        }
        response = requests.post(url, json=query)
        
        if response.status_code == 204:
            return {"filename": None, "file_id": None}
        
        filetype = "cif" if "cif" in query_string.lower() else "pdb"
        
        results = response.json().get("result_set", [])
        if results:
            pdbid = max(results, key=lambda x: x["score"])["identifier"]
            download_url = f"https://files.rcsb.org/download/{pdbid}.{filetype}"
            pdb_response = requests.get(download_url)
            
            # filename = path_registry.write_file_name("PROTEIN", protein_name=pdbid, description="raw", file_format=filetype)
            # file_id = path_registry.get_fileid(filename, "PROTEIN")
            # directory = path_registry.ckpt_pdb
        #get current directory
            filename = f"{pdbid}.{filetype}"
            directory = "pdb_files"
            file_id = f"{pdbid}.{filetype}"
            # Write the PDB file to disk, if file not already exists
            if not os.path.exists(directory):
                os.makedirs(directory)
            with open(f"{directory}/{filename}", "w") as file:
                file.write(pdb_response.text)
            
            # path_registry.map_path(file_id, f"{directory}/{filename}", "PDB file downloaded from RSCB")
            
            return "downloaded! " + filename, self.config.tool_success_reward, False
        
        return "Failed! " +filename, self.config.tool_failure_reward, False
    
    def export_frame(self) -> Frame:
        return Frame(
            state={
                "problem_id": self.problem_id,
                "problem": self.problem,
                "answer": self.answer,
            }
        )



In [122]:

env = MDCrowEnv.from_task("Download fibronectin structure from PDB.")
agent = MySimpleAgent()

async def main(idx: int = 0):
    # env = GSM8kDataset(split="train").get_new_env_by_idx(idx)
    # agent = MySimpleAgent()

    # Get initial question, available tools from the environment
    obs, tools = await env.reset()
    print(f"Question: {obs[0].content}")

    # Get initial agent state
    agent_state = await agent.init_state(tools=tools)
    print(agent_state, "\n",agent_state.messages)
    step = 1
    done = False
    while not done:
        action, agent_state, _ = await agent.get_asv(agent_state, obs)
        print(agent_state, "\n",agent_state.messages)
        obs, reward, done, _ = await env.step(action.value)
        print(
            f"Step {step} - {print_action_obs(action, obs)}, environment reward {reward}"
        )
        step += 1
    agent.get_asv(agent_state,obs)
    print("Finished! \n")
    return agent_state


def print_action_obs(action: ToolRequestMessage, obs: list[ToolResponseMessage]):
    tool_calls = action.value.tool_calls
    msg = ""
    for tool_call, tool_answer in zip(tool_calls, obs, strict=True):
        tool_name = tool_call.function.name
        tool_args = tool_call.function.arguments
        msg += f"agent action: {tool_name}({tool_args}), environment answer: {tool_answer.content} "
    return msg


for i in range(3):
    agent_state = await main(i)



Question: Download fibronectin structure from PDB.
tools=[Tool(type='function', info=FunctionInfo(name='download_pdb_file', description="Searches RCSB's Protein Data Bank using the given query string and downloads the corresponding PDB or CIF file.", parameters=Parameters(type='object', properties={'query_string': {'description': 'The search term for querying the PDB database.', 'title': 'Query String', 'type': 'string'}}, required=['query_string']))), Tool(type='function', info=FunctionInfo(name='submit_answer', description='Submit the proposed answer and check if it is correct. This action is terminal.', parameters=Parameters(type='object', properties={'answer': {'description': 'Proposed answer.', 'title': 'Answer', 'type': 'string'}}, required=['answer'])))] messages=[] 
 []
tools=[Tool(type='function', info=FunctionInfo(name='download_pdb_file', description="Searches RCSB's Protein Data Bank using the given query string and downloads the corresponding PDB or CIF file.", parameters=



Step 1 - agent action: download_pdb_file({'query_string': 'fibronectin'}), environment answer: downloaded! 5TFY.pdb , environment reward 0.0


  agent.get_asv(agent_state,obs)


tools=[Tool(type='function', info=FunctionInfo(name='download_pdb_file', description="Searches RCSB's Protein Data Bank using the given query string and downloads the corresponding PDB or CIF file.", parameters=Parameters(type='object', properties={'query_string': {'description': 'The search term for querying the PDB database.', 'title': 'Query String', 'type': 'string'}}, required=['query_string']))), Tool(type='function', info=FunctionInfo(name='submit_answer', description='Submit the proposed answer and check if it is correct. This action is terminal.', parameters=Parameters(type='object', properties={'answer': {'description': 'Proposed answer.', 'title': 'Answer', 'type': 'string'}}, required=['answer'])))] messages=[Message(role='user', content='Download fibronectin structure from PDB.'), ToolRequestMessage(role='assistant', content=None, function_call=None, tool_calls=[ToolCall(id='call_In6tugbsgGDWyOsH4oRQfniy', type='function', function=ToolCallFunction(arguments={'query_string



Step 1 - agent action: download_pdb_file({'query_string': 'fibronectin'}), environment answer: downloaded! 5TFY.pdb , environment reward 0.0




tools=[Tool(type='function', info=FunctionInfo(name='download_pdb_file', description="Searches RCSB's Protein Data Bank using the given query string and downloads the corresponding PDB or CIF file.", parameters=Parameters(type='object', properties={'query_string': {'description': 'The search term for querying the PDB database.', 'title': 'Query String', 'type': 'string'}}, required=['query_string']))), Tool(type='function', info=FunctionInfo(name='submit_answer', description='Submit the proposed answer and check if it is correct. This action is terminal.', parameters=Parameters(type='object', properties={'answer': {'description': 'Proposed answer.', 'title': 'Answer', 'type': 'string'}}, required=['answer'])))] messages=[Message(role='user', content='Download fibronectin structure from PDB.'), ToolRequestMessage(role='assistant', content=None, function_call=None, tool_calls=[ToolCall(id='call_9stqlXPs8visyoNaBKiXOGJK', type='function', function=ToolCallFunction(arguments={'query_string



Step 1 - agent action: download_pdb_file({'query_string': 'fibronectin'}), environment answer: downloaded! 5TFY.pdb , environment reward 0.0
tools=[Tool(type='function', info=FunctionInfo(name='download_pdb_file', description="Searches RCSB's Protein Data Bank using the given query string and downloads the corresponding PDB or CIF file.", parameters=Parameters(type='object', properties={'query_string': {'description': 'The search term for querying the PDB database.', 'title': 'Query String', 'type': 'string'}}, required=['query_string']))), Tool(type='function', info=FunctionInfo(name='submit_answer', description='Submit the proposed answer and check if it is correct. This action is terminal.', parameters=Parameters(type='object', properties={'answer': {'description': 'Proposed answer.', 'title': 'Answer', 'type': 'string'}}, required=['answer'])))] messages=[Message(role='user', content='Download fibronectin structure from PDB.'), ToolRequestMessage(role='assistant', content=None, fun

In [123]:
agent_state.messages

[Message(role='user', content='Download fibronectin structure from PDB.'),
 ToolRequestMessage(role='assistant', content=None, function_call=None, tool_calls=[ToolCall(id='call_U7sW6qTYQTBvsKnrdzevUDZX', type='function', function=ToolCallFunction(arguments={'query_string': 'fibronectin'}, name='download_pdb_file'))]),
 ToolResponseMessage(role='tool', content='downloaded! 5TFY.pdb', name='download_pdb_file', tool_call_id='call_U7sW6qTYQTBvsKnrdzevUDZX'),
 ToolRequestMessage(role='assistant', content=None, function_call=None, tool_calls=[ToolCall(id='call_Ratd1206eJAklqbNJwQzhKeH', type='function', function=ToolCallFunction(arguments={'answer': '5TFY.pdb'}, name='submit_answer'))])]

In [53]:
agent.llm_model

{'model': 'gpt-4o-2024-08-06'}

In [52]:
from lmi import LiteLLMModel as LLMModel
llm__ops = LLMCallOp()
ans = await llm__ops(agent.llm_model, msgs=agent_state.messages, tools=agent_state.tools)

TypeError: ldp.graph.common_ops.LLMCallOp._call_single_and_maybe_validate() got multiple values for keyword argument 'model'

In [63]:
agent_state = await agent.init_state(tools=tools)

# agent_state.messages, agent_state.tools
agent_state.__dict__


{'tools': [Tool(type='function', info=FunctionInfo(name='download_pdb_file', description="Searches RCSB's Protein Data Bank using the given query string and downloads the corresponding PDB or CIF file.", parameters=Parameters(type='object', properties={'query_string': {'description': 'The search term for querying the PDB database.', 'title': 'Query String', 'type': 'string'}}, required=['query_string']))),
  Tool(type='function', info=FunctionInfo(name='submit_answer', description='Submit the proposed answer and check if it is correct. This action is terminal.', parameters=Parameters(type='object', properties={'answer': {'description': 'Proposed answer.', 'title': 'Answer', 'type': 'string'}}, required=['answer'])))],
 'messages': []}

In [106]:
env = MDCrowEnv.from_task("Download fibronectin structure from PDB.")
agent = MySimpleAgent()
obs, tools = await env.reset()
agent_state = await agent.init_state(tools=tools)
print(agent_state)
print(100*'-')
agent_state = agent_state.get_next_state(obs)
print(agent_state)
print(100*'-')
#get asv
# result, agent_state, _ = await agent.get_asv(agent_state, obs)
#get LLM model
llm__ops = agent._llm_call_op
model = LLMModel(config=agent.llm_model)
answ = await llm__ops._call_single_and_maybe_validate(model, messages=agent_state.messages, tools=agent_state.tools, **agent.llm_model)
# ans = await llm__ops(agent.llm_model, msgs=agent_state.messages, tools=agent_state.tools)
# print(result)
print("this Ran")

agent_state = await agent.init_state(tools=tools)
print(100*'-')
print(agent_state)
print(100*'-')
obs, tools = await env.reset()
print(100*'-')
print(obs)
print(100*'-')
print(tools)


print(f"Before 1st call: {agent._llm_call_op}")
try:
    values = await agent.get_asv(agent_state, obs)
except:
     continue
print(f"After 1st call: {agent._llm_call_op}")

print(f"Before 2nd call: {agent._llm_call_op}")
await agent.get_asv(agent_state, obs)

# agent.

tools=[Tool(type='function', info=FunctionInfo(name='download_pdb_file', description="Searches RCSB's Protein Data Bank using the given query string and downloads the corresponding PDB or CIF file.", parameters=Parameters(type='object', properties={'query_string': {'description': 'The search term for querying the PDB database.', 'title': 'Query String', 'type': 'string'}}, required=['query_string']))), Tool(type='function', info=FunctionInfo(name='submit_answer', description='Submit the proposed answer and check if it is correct. This action is terminal.', parameters=Parameters(type='object', properties={'answer': {'description': 'Proposed answer.', 'title': 'Answer', 'type': 'string'}}, required=['answer'])))] messages=[]
----------------------------------------------------------------------------------------------------
tools=[Tool(type='function', info=FunctionInfo(name='download_pdb_file', description="Searches RCSB's Protein Data Bank using the given query string and downloads the

TypeError: LLMCallOp._call_single_and_maybe_validate() got multiple values for argument 'model'

In [80]:
answ

LLMResult(id=UUID('953e7eb5-6702-424e-9f42-97400845ca8e'), session_id=None, name=None, config=None, prompt=[Message(role='user', content='Download fibronectin structure from PDB.')], text=None, messages=[ToolRequestMessage(role='assistant', content=None, function_call=None, tool_calls=[ToolCall(id='call_SiraiHeDb8V0ulgDq6bX4KLW', type='function', function=ToolCallFunction(arguments={'query_string': 'fibronectin'}, name='download_pdb_file'))])], prompt_count=124, completion_count=20, model='gpt-4o-2024-11-20', date='2025-03-04T12:12:10.790708', seconds_to_first_token=0.0, seconds_to_last_token=1.2406637079984648, logprob=None, reasoning_content=None, cost=0.00051)

In [70]:
llm_model = LLMModel(config={"model":"gpt-4o-2024-08-06", "temperature":0.1})
answ = await llm__ops._call_single_and_maybe_validate(llm_model, messages=agent_state.messages, tools=agent_state.tools)

In [71]:
answ

LLMResult(id=UUID('3e5d1c3d-05f0-41bd-b1ab-0a707f997dd9'), session_id=None, name=None, config=None, prompt=[Message(role='user', content='Download fibronectin structure from PDB.')], text=None, messages=[ToolRequestMessage(role='assistant', content=None, function_call=None, tool_calls=[ToolCall(id='call_3vm31H5iE1advfdxJalglWTO', type='function', function=ToolCallFunction(arguments={'query_string': 'fibronectin'}, name='download_pdb_file'))])], prompt_count=124, completion_count=20, model='gpt-4o-2024-11-20', date='2025-03-04T12:12:10.790708', seconds_to_first_token=0.0, seconds_to_last_token=0.9108512500024517, logprob=None, reasoning_content=None, cost=0.00051)

In [50]:
len((agent.llm_model,agent_state.messages,agent_state.tools))

3

In [46]:
config = {
            "model": "gpt-4o-2024-08-06",
            "temperature": 0.1,
        }
model = LLMModel(config=config)


In [40]:
a, tools = await env.reset()
a, tools

([Message(role='user', content='Download fibronectin structure from PDB.')],
 [Tool(type='function', info=FunctionInfo(name='download_pdb_file', description="Searches RCSB's Protein Data Bank using the given query string and downloads the corresponding PDB or CIF file.", parameters=Parameters(type='object', properties={'query_string': {'description': 'The search term for querying the PDB database.', 'title': 'Query String', 'type': 'string'}}, required=['query_string']))),
  Tool(type='function', info=FunctionInfo(name='submit_answer', description='Submit the proposed answer and check if it is correct. This action is terminal.', parameters=Parameters(type='object', properties={'answer': {'description': 'Proposed answer.', 'title': 'Answer', 'type': 'string'}}, required=['answer'])))])

In [47]:
agent.llm_model

{'model': 'gpt-4o-2024-08-06'}

In [64]:
agent = MySimpleAgent()

agent_state = await agent.init_state(tools=tools)
agent_state

MySimpleAgentState(tools=[Tool(type='function', info=FunctionInfo(name='download_pdb_file', description="Searches RCSB's Protein Data Bank using the given query string and downloads the corresponding PDB or CIF file.", parameters=Parameters(type='object', properties={'query_string': {'description': 'The search term for querying the PDB database.', 'title': 'Query String', 'type': 'string'}}, required=['query_string']))), Tool(type='function', info=FunctionInfo(name='submit_answer', description='Submit the proposed answer and check if it is correct. This action is terminal.', parameters=Parameters(type='object', properties={'answer': {'description': 'Proposed answer.', 'title': 'Answer', 'type': 'string'}}, required=['answer'])))], messages=[])

In [65]:
agent.__dict__

{'llm_model': {'model': 'gpt-4o-2024-08-06'},
 '_llm_call_op': LLMCallOp (name=_llm_call_op, id=5273353424)}

In [35]:
ans = await agent.get_asv(agent_state, a)

TypeError: ldp.graph.common_ops.LLMCallOp._call_single_and_maybe_validate() got multiple values for keyword argument 'model'

In [22]:
env.__dict__

{'problem_id': 'task',
 'problem': 'Download fibronectin structure from PDB.',
 'answer': 0.0,
 'config': MDCrowEnvConfig(correct_reward=1.0, incorrect_reward=0.0, tool_failure_reward=-1.0, tool_success_reward=0.0, rel_tol=0.0001, done_on_failure=True),
 'state': None,
 'tools': [Tool(type='function', info=FunctionInfo(name='download_pdb_file', description="Searches RCSB's Protein Data Bank using the given query string and downloads the corresponding PDB or CIF file.", parameters=Parameters(type='object', properties={'query_string': {'description': 'The search term for querying the PDB database.', 'title': 'Query String', 'type': 'string'}}, required=['query_string']))),
  Tool(type='function', info=FunctionInfo(name='submit_answer', description='Submit the proposed answer and check if it is correct. This action is terminal.', parameters=Parameters(type='object', properties={'answer': {'description': 'Proposed answer.', 'title': 'Answer', 'type': 'string'}}, required=['answer'])))]}