In [1]:
from mdagent import MDAgent


In [2]:
descriptive_prompt_8 = "Complete the following MD tasks:  1.- Download the PDB file fpr 4RMB 2.- Pre-process the file by adding missing atoms, hydrogens and replacing non-standard residues. 3.- Simulate for 1ps at 300 K, 400K and 500K 4.- Plot Radius of gyration for each simulation and 5. Analyze the secondary structures of each simulation "
non_descriptive_prompt_8 = "Simulate 4RMB at 100K, 200K, and 300K for 1 ps each.. Then plot the radius of gyration over time for all three simulations. Lastly, compare the change in secondary structure for the three analyses throughout the simulation. "

In [3]:
llm_model = "gpt-4-turbo-2024-04-09"
tools = "all"

In [4]:
agent_1 = MDAgent(agent_type="Structured", model=llm_model, top_k_tools=tools)
agent_2 = MDAgent(agent_type="Structured", model=llm_model, top_k_tools=tools)

In [5]:
agent_1.run(descriptive_prompt_8)

Thought: The task involves multiple steps starting from downloading a PDB file, preprocessing it, running simulations at different temperatures, and analyzing the results. The first step is to download the PDB file for the protein with the PDB ID "4RMB".

Action:
```json
{
    "action": "PDBFileDownloader",
    "action_input": {
        "query": "4RMB"
    }
}
```PDB file found with this ID: 4RMB
The next step is to preprocess the downloaded PDB file by adding missing atoms, replacing non-standard residues, and adding hydrogens. This will prepare the protein structure for accurate simulation.

Action:
```json
{
    "action": "CleaningToolFunction",
    "action_input": {
        "pdb_id": "4RMB_213807",
        "add_missing_atoms": true,
        "replace_nonstandard_residues": true,
        "add_hydrogens": true,
        "remove_heterogens": true,
        "remove_water": true
    }
}
```file path: /Users/jorgemedina/Desktop/md-agent/ckpt/ckpt_438/pdb/4RMB_raw_20240930_213807.pdb
The nex

ValueError: too many values to unpack (expected 2)

In [6]:
agent_2.run(non_descriptive_prompt_8)

Thought: To address the question, I need to first set up and run molecular dynamics simulations for the protein with PDB ID 4RMB at three different temperatures (100K, 200K, and 300K) for 1 picosecond each. After running these simulations, I will calculate and plot the radius of gyration over time for each simulation. Finally, I will compare the changes in secondary structure across the three simulations. I will start by setting up and running the first simulation at 100K.

Action:
```json
{
    "action": "SetUpandRunFunction",
    "action_input": {
        "pdb_id": "4RMB",
        "forcefield_files": ["amber14-all.xml", "amber14/tip3p.xml"],
        "system_params": {
            "nonbondedMethod": "PME",
            "nonbondedCutoff": "1 * nanometers",
            "constraints": "HBonds",
            "rigidWater": true,
            "solvate": true
        },
        "integrator_params": {
            "integrator_type": "LangevinMiddle",
            "Temperature": "100 * kelvin",
   

({'input': '\n    You are an expert molecular dynamics scientist, and\n    your task is to respond to the question or\n    solve the problem to the best of your ability using\n    the provided tools.\n\n    You can only respond with a single complete\n    \'Thought, Action, Action Input\' format\n    OR a single \'Final Answer\' format.\n\n    Complete format:\n    Thought: (reflect on your progress and decide what to do next)\n    Action:\n    ```\n    {\n        "action": (the action name, it should be the name of a tool),\n        "action_input": (the input string for the action)\n    }\n    \'\'\'\n\n    OR\n\n    Final Answer: (the final response to the original input\n    question, once all steps are complete)\n\n    You are required to use the tools provided,\n    using the most specific tool\n    available for each action.\n    Your final answer should contain all information\n    necessary to answer the question and its subquestions.\n    Before you finish, reflect on your pro

<Figure size 640x480 with 0 Axes>

In [None]:
import mdagent
from mdagent import MDAgent
import os
import mdtraj as md

agent = MDAgent(agent_type="Structured", model="gpt-4o-2024-05-13", top_k_tools="all",ckpt_dir="ckpt_192")
registry = agent.path_registry

print(registry.list_path_names_and_descriptions().replace(",", "\n"))

In [None]:
#descriptive prompt
traj_1 = registry.get_mapped_path("rec0_195318")
top_1 = registry.get_mapped_path("top_sim0_195316")

traj_2 = registry.get_mapped_path("rec0_205734")
top_2 = registry.get_mapped_path("top_sim0_205733")

traj_3 = registry.get_mapped_path("rec0_215557")
top_3 = registry.get_mapped_path("top_sim0_215556")

assert os.path.exists(traj_1)
assert os.path.exists(top_1)
assert os.path.exists(traj_2)
assert os.path.exists(top_2)
assert os.path.exists(traj_3)
assert os.path.exists(top_3)


In [None]:
traj1 = md.load(traj_1, top=top_1)
traj2 = md.load(traj_2, top=top_2)
traj3 = md.load(traj_3, top=top_3)

print(traj1.n_atoms,traj1.top.n_residues, traj1.top.n_chains,traj1.n_frames)
print(traj2.n_atoms,traj2.top.n_residues, traj2.top.n_chains,traj2.n_frames)
print(traj3.n_atoms,traj3.top.n_residues, traj3.top.n_chains,traj3.n_frames)

In [None]:
from IPython.display import Image
rgypath = registry.get_mapped_path("fig0_225424")
Image(filename=rgypath)

In [None]:
#non-descriptive prompt
traj_1 = registry.get_mapped_path("rec0_225454")
top_1 = registry.get_mapped_path("top_sim0_225454")

traj_2 = registry.get_mapped_path("rec0_225532")
top_2 = registry.get_mapped_path("top_sim0_225532")

traj_3 = registry.get_mapped_path("rec0_225608")
top_3 = registry.get_mapped_path("top_sim0_225608")

assert os.path.exists(traj_1)
assert os.path.exists(top_1)
assert os.path.exists(traj_2)
assert os.path.exists(top_2)
assert os.path.exists(traj_3)
assert os.path.exists(top_3)

In [None]:
traj1 = md.load(traj_1, top=top_1)
traj2 = md.load(traj_2, top=top_2)
traj3 = md.load(traj_3, top=top_3)

print(traj1.n_atoms,traj1.top.n_residues, traj1.top.n_chains,traj1.n_frames)
print(traj2.n_atoms,traj2.top.n_residues, traj2.top.n_chains,traj2.n_frames)
print(traj3.n_atoms,traj3.top.n_residues, traj3.top.n_chains,traj3.n_frames)

In [None]:
rgypath = registry.get_mapped_path("fig0_225644")

In [None]:
from IPython.display import Image
Image(filename=rgypath)

In [None]:
import mdtraj as md

#get dssp 
number_of_chains = traj1.n_chains
secondary_structure = md.compute_dssp(traj1,simplified=True)
print("Number of chains: ",number_of_chains)
print("Number of sheets: ",len([i for i in secondary_structure[0] if i == 'E']))
print("Number of helices: ",len([i for i in secondary_structure[0] if i == 'H']))
print("Number of coils: ",len([i for i in secondary_structure[0] if i == 'C']))

In [None]:
number_of_chains = traj2.n_chains
secondary_structure = md.compute_dssp(traj2,simplified=True)
print("Number of chains: ",number_of_chains)
print("Number of sheets: ",len([i for i in secondary_structure[0] if i == 'E']))
print("Number of helices: ",len([i for i in secondary_structure[0] if i == 'H']))
print("Number of coils: ",len([i for i in secondary_structure[0] if i == 'C']))



In [None]:
number_of_chains = traj3.n_chains
secondary_structure = md.compute_dssp(traj3,simplified=True)
print("Number of chains: ",number_of_chains)
print("Number of sheets: ",len([i for i in secondary_structure[0] if i == 'E']))
print("Number of helices: ",len([i for i in secondary_structure[0] if i == 'H']))
print("Number of coils: ",len([i for i in secondary_structure[0] if i == 'C']))
