In [8]:
from mdagent import MDAgent
import os 
import mdtraj as md

In [9]:
import sys
import os
parent_dir = os.path.abspath(os.path.join(os.getcwd(), '../..'))
sys.path.append(parent_dir)
from robustness_prompts import get_prompt

prompt_3_descriptive = get_prompt("descriptive", 3)
prompt_3_natural = get_prompt("natural", 3)

prompt_3_descriptive, prompt_3_natural

('Complete all of the following tasks: 1. Download the PDB file 1LYZ. 2. Report the secondary structure assignments of the downloaded PDB structure. 3. Simulate for 1 ps at 300 K.',
 'Simulate 1LYZ for 1 ps at 300 K. Report the secondary structure assignments of the downloaded PDB structure.')

In [10]:
llm_model = "claude-3-5-sonnet-20241022"
tools = "all"

In [4]:
agent_1 = MDAgent(agent_type="Structured", model=llm_model, top_k_tools=tools, use_memory=False)
agent_1.run(prompt_3_descriptive)

Thought: Let's break this down into steps. First, we need to download the PDB file 1LYZ using the PDB downloader tool.

Action:
```
{
    "action": "PDBFileDownloader",
    "action_input": "1LYZ"
}
```

PDB file found with this ID: 1LYZ
Thought: Now that we have downloaded the PDB file, let's analyze its secondary structure using ComputeDSSP. Then we'll set up and run a simulation.

Action:
```
{
    "action": "ComputeDSSP",
    "action_input": {
        "traj_file": "1LYZ_213440",
        "target_frames": "first"
    }
}
```

Thought: Now that we have the secondary structure information, let's set up and run a short simulation at 300 K for 1 ps. We'll use the SetUpandRunFunction tool with appropriate parameters.

Action:
```
{
    "action": "SetUpandRunFunction",
    "action_input": {
        "pdb_id": "1LYZ_213440",
        "forcefield_files": ["amber14-all.xml", "amber14/tip3pfb.xml"],
        "save": true,
        "system_params": {
            "nonbondedMethod": "PME",
           

({'input': '\n    You are an expert molecular dynamics scientist, and\n    your task is to respond to the question or\n    solve the problem to the best of your ability using\n    the provided tools.\n\n    You can only respond with a single complete\n    \'Thought, Action, Action Input\' format\n    OR a single \'Final Answer\' format.\n\n    Complete format:\n    Thought: (reflect on your progress and decide what to do next)\n    Action:\n    ```\n    {\n        "action": (the action name, it should be the name of a tool),\n        "action_input": (the input string for the action)\n    }\n    \'\'\'\n\n    OR\n\n    Final Answer: (the final response to the original input\n    question, once all steps are complete)\n\n    You are required to use the tools provided,\n    using the most specific tool\n    available for each action.\n    Your final answer should contain all information\n    necessary to answer the question and its subquestions.\n    Before you finish, reflect on your pro

In [5]:
registry_1 = agent_1.path_registry
print(registry_1.list_path_names_and_descriptions().replace(",", "\n"))

Files found in registry: 1LYZ_213440: PDB file downloaded from RSCB
 PDBFile ID: 1LYZ_213440
 rec0_213443: dssp values for trajectory with id: 1LYZ_213440
 1LYZ_213457: Cleaned File:  Removed Heterogens
 and Water Removed.  Replaced Nonstandard Residues. Added Hydrogens at pH 7.0. Missing Atoms Added and replaces nonstandard residues. 
 top_sim0_213502: Initial positions for simulation sim0_213502
 sim0_213502: Basic Simulation of Protein 1LYZ_213457
 rec0_213519: Simulation trajectory for protein 1LYZ_213457 and simulation sim0_213502
 rec1_213519: Simulation state log for protein 1LYZ_213457 and simulation sim0_213502
 rec2_213519: Simulation pdb frames for protein 1LYZ_213457 and simulation sim0_213502


In [5]:
# make sure pdb was downloaded
assert os.path.exists(registry_1.get_mapped_path("1LYZ_213440"))

In [6]:
# make sure dssp was computed correctly
from mdagent.tools.base_tools import ComputeDSSP

dssp = ComputeDSSP(registry_1)
dssp._run(traj_file= "1LYZ_213440", target_frames="first")

"{'residues in helix': 50, 'residues in strand': 14, 'residues in coil': 65, 'residues not assigned, not a protein residue': 101}"

In [7]:
# make sure trajectory and topology exist
traj_path_1 = registry_1.get_mapped_path("rec0_213519")
top_path_1 = registry_1.get_mapped_path("top_sim0_213502")

assert os.path.exists(traj_path_1)
assert os.path.exists(top_path_1)

In [11]:
agent_2 = MDAgent(agent_type="Structured", model=llm_model, top_k_tools=tools, use_memory=False)
agent_2.run(prompt_3_natural)

Thought: Let me break this down into steps:
1. First, I need to download the PDB file for 1LYZ
2. Then I need to clean the structure
3. Run a simulation
4. Finally analyze the secondary structure

Let's start by downloading the PDB file.

Action:
```
{
    "action": "PDBFileDownloader",
    "action_input": "1LYZ"
}
```

PDB file found with this ID: 1LYZ
Thought: Now that I have the PDB file, I should clean it and then run a simulation. After that, I can analyze the secondary structure. Let's clean the structure first.

Action:
```
{
    "action": "CleaningToolFunction",
    "action_input": {
        "pdb_id": "1LYZ_221539",
        "replace_nonstandard_residues": true,
        "add_missing_atoms": true,
        "remove_heterogens": true,
        "remove_water": true,
        "add_hydrogens": true,
        "add_hydrogens_ph": 7.0
    }
}
```

file path: /Users/samanthacox/Desktop/md-agent/ckpt/ckpt_41/pdb/1LYZ_raw_20250122_221539.pdb
Thought: Now that I have the cleaned structure, I can

({'input': '\n    You are an expert molecular dynamics scientist, and\n    your task is to respond to the question or\n    solve the problem to the best of your ability using\n    the provided tools.\n\n    You can only respond with a single complete\n    \'Thought, Action, Action Input\' format\n    OR a single \'Final Answer\' format.\n\n    Complete format:\n    Thought: (reflect on your progress and decide what to do next)\n    Action:\n    ```\n    {\n        "action": (the action name, it should be the name of a tool),\n        "action_input": (the input string for the action)\n    }\n    \'\'\'\n\n    OR\n\n    Final Answer: (the final response to the original input\n    question, once all steps are complete)\n\n    You are required to use the tools provided,\n    using the most specific tool\n    available for each action.\n    Your final answer should contain all information\n    necessary to answer the question and its subquestions.\n    Before you finish, reflect on your pro

In [12]:
registry_2 = agent_2.path_registry
print(registry_2.list_path_names_and_descriptions().replace(",", "\n"))

Files found in registry: 1LYZ_221539: PDB file downloaded from RSCB
 PDBFile ID: 1LYZ_221539
 1LYZ_221544: Cleaned File:  Removed Heterogens
 and Water Removed.  Replaced Nonstandard Residues. Added Hydrogens at pH 7.0. Missing Atoms Added and replaces nonstandard residues. 
 top_sim0_221550: Initial positions for simulation sim0_221550
 sim0_221550: Basic Simulation of Protein 1LYZ_221544
 rec0_221554: Simulation trajectory for protein 1LYZ_221544 and simulation sim0_221550
 rec1_221554: Simulation state log for protein 1LYZ_221544 and simulation sim0_221550
 rec2_221554: Simulation pdb frames for protein 1LYZ_221544 and simulation sim0_221550
 rec0_222323: dssp values for trajectory with id: rec2_221554


In [13]:
# make sure pdb was downloaded
assert os.path.exists(registry_2.get_mapped_path("1LYZ_221539"))

In [14]:
# make sure dssp was computed correctly
from mdagent.tools.base_tools import ComputeDSSP

dssp = ComputeDSSP(registry_2)
dssp._run(traj_file= "1LYZ_221539", target_frames="first")

"{'residues in helix': 50, 'residues in strand': 14, 'residues in coil': 65, 'residues not assigned, not a protein residue': 101}"

In [15]:
# make sure trajectory and topology exist
traj_path_2 = registry_2.get_mapped_path("sim0_221550")
top_path_2 = registry_2.get_mapped_path("top_sim0_221550")

assert os.path.exists(traj_path_2)
assert os.path.exists(top_path_2)