In [1]:
from mdagent import MDAgent

In [2]:
#todays date and time
import datetime

start = datetime.datetime.now()
date = start.strftime("%Y-%m-%d")
print("date:",date)
time = start.strftime("%H:%M:%S")
print("time:",time)

date: 2024-09-30
time: 21:39:45


In [3]:
descriptive_prompt_7 = "Complete the following MD tasks: 1. Download the PDB file for 1MBN. 2. Preprocess the file by adding missing atoms, hydrogens and replacing non-standard residues. 3. Simulate for 1 ps at 300 K and 400 K. 4. Calculate the RMSD for each simulation. 5. Analyze the secondary structure for each simulation."
non_descriptive_prompt_7 = "Simulate PDB ID 1MBN at two different temperatures: 300K, 400K for 1 ps each. Plot RMSD of both over time, and compare the final secondary structures at the end of the simulations. "

In [4]:
llm_model = "gpt-3.5-turbo-0125"
tools = "all"

In [5]:
agent_1 = MDAgent(agent_type="Structured", model=llm_model, top_k_tools=tools)

##### descriptive prompt

In [6]:
start1 = datetime.datetime.now()
agent_1.run(descriptive_prompt_7)

Thought: The task involves multiple steps in molecular dynamics, including downloading a PDB file, preprocessing it, simulating at different temperatures, calculating RMSD, and analyzing secondary structure. I will need to use a combination of tools to accomplish each step efficiently.

Action:
```
{
    "action": "PDBFileDownloader",
    "action_input": "1MBN"
}
```PDB file found with this ID: 1MBN
Thought: The next step is to preprocess the downloaded PDB file by adding missing atoms, hydrogens, and replacing non-standard residues to prepare it for simulation.

Action:
```
{
    "action": "CleaningToolFunction",
    "action_input": {
        "pdb_id": "1MBN_213950",
        "replace_nonstandard_residues": true,
        "add_missing_atoms": true,
        "remove_heterogens": true,
        "remove_water": true,
        "add_hydrogens": true,
        "add_hydrogens_ph": 7.0
    }
}
```file path: /Users/jorgemedina/Desktop/md-agent/ckpt/ckpt_445/pdb/1MBN_raw_20240930_213950.pdb
Action:
`

({'input': '\n    You are an expert molecular dynamics scientist, and\n    your task is to respond to the question or\n    solve the problem to the best of your ability using\n    the provided tools.\n\n    You can only respond with a single complete\n    \'Thought, Action, Action Input\' format\n    OR a single \'Final Answer\' format.\n\n    Complete format:\n    Thought: (reflect on your progress and decide what to do next)\n    Action:\n    ```\n    {\n        "action": (the action name, it should be the name of a tool),\n        "action_input": (the input string for the action)\n    }\n    \'\'\'\n\n    OR\n\n    Final Answer: (the final response to the original input\n    question, once all steps are complete)\n\n    You are required to use the tools provided,\n    using the most specific tool\n    available for each action.\n    Your final answer should contain all information\n    necessary to answer the question and its subquestions.\n    Before you finish, reflect on your pro

In [7]:
end = datetime.datetime.now()
elapsed = end - start1
print(f"agent_1 duration: {elapsed.total_seconds()/60:.2f} minutes")

agent_1 duration: 1.73 minutes


In [8]:
registry = agent_1.path_registry
all_names = registry.list_path_names()
print(all_names)

Names found in registry: 1MBN_154901, 1MBN_154906, top_sim0_154913, sim0_154913, rec0_154914, rec1_154914, rec2_154914


In [9]:
paths_and_descriptions = registry.list_path_names_and_descriptions()
print("\n".join(paths_and_descriptions.split(",")))

Files found in registry: 1MBN_154901: PDB file downloaded from RSCB
 PDBFile ID: 1MBN_154901
 1MBN_154906: Cleaned File:  Removed Heterogens
 and Water Removed.  Replaced Nonstandard Residues. Added Hydrogens at pH 7.0. Missing Atoms Added and replaces nonstandard residues. 
 top_sim0_154913: Initial positions for simulation sim0_154913
 sim0_154913: Basic Simulation of Protein 1MBN_154906
 rec0_154914: Simulation trajectory for protein 1MBN_154906 and simulation sim0_154913
 rec1_154914: Simulation state log for protein 1MBN_154906 and simulation sim0_154913
 rec2_154914: Simulation pdb frames for protein 1MBN_154906 and simulation sim0_154913


In [None]:
#plotting rmsd of both simulations
from IPython.display import Image
rmsd1ID = 'fig0_114210'
rmsd2ID = 'fig0_114213'
path1 = registry.get_mapped_path(rmsd1ID)
path2 = registry.get_mapped_path(rmsd2ID)

Image(filename=path1)

In [None]:
Image(filename=path2)

In [None]:
# note: it fails to do DSSP before it timed out, so we don't analyze those here

##### non-descriptive prompt

In [10]:
#todays date and time
import datetime

start = datetime.datetime.now()
date = start.strftime("%Y-%m-%d")
print("date:",date)
time = start.strftime("%H:%M:%S")
print("time:",time)

date: 2024-09-18
time: 15:50:41


In [11]:
agent_2 = MDAgent(agent_type="Structured", model=llm_model, top_k_tools=tools)

In [12]:
start2 = datetime.datetime.now()
agent_2.run(non_descriptive_prompt_7)

Thought: To simulate the PDB ID 1MBN at different temperatures and compare the RMSD and final secondary structures, I will need to set up and run short simulations at 300K and 400K for 1 ps each. Then, I will analyze the RMSD over time and compute the final secondary structures for comparison.

Action:
```
{
    "action": "SetUpandRunFunction",
    "action_input": {
        "pdb_id": "1MBN",
        "forcefield_files": ["amber14/protein.ff14SB.xml", "amber14/tip3p.xml"],
        "save": true,
        "system_params": {
            "nonbondedMethod": "NoCutoff",
            "constraints": "HBonds",
            "rigidWater": true
        },
        "integrator_params": {
            "integrator_type": "LangevinMiddle",
            "Temperature": "300 * kelvin",
            "Friction": "1.0 / picoseconds",
            "Timestep": "0.002 * picoseconds",
            "Pressure": "1.0 * bar"
        },
        "simulation_params": {
            "Ensemble": "NVT",
            "Number of Steps"

({'input': '\n    You are an expert molecular dynamics scientist, and\n    your task is to respond to the question or\n    solve the problem to the best of your ability using\n    the provided tools.\n\n    You can only respond with a single complete\n    \'Thought, Action, Action Input\' format\n    OR a single \'Final Answer\' format.\n\n    Complete format:\n    Thought: (reflect on your progress and decide what to do next)\n    Action:\n    ```\n    {\n        "action": (the action name, it should be the name of a tool),\n        "action_input": (the input string for the action)\n    }\n    \'\'\'\n\n    OR\n\n    Final Answer: (the final response to the original input\n    question, once all steps are complete)\n\n    You are required to use the tools provided,\n    using the most specific tool\n    available for each action.\n    Your final answer should contain all information\n    necessary to answer the question and its subquestions.\n    Before you finish, reflect on your pro

In [13]:
end = datetime.datetime.now()
elapsed = end - start2
print(f"agent_2 duration: {elapsed.total_seconds()/60:.2f} minutes")

agent_2 duration: 0.28 minutes


In [15]:
registry = agent_2.path_registry
all_names = registry.list_path_names()
print(all_names)

Names found in registry: 1MBN_143018, 1MBN_143035, top_sim0_143055, sim0_143055, rec0_143101, rec1_143101, rec2_143101, top_sim0_144011, sim0_144011, rec0_144016, rec1_144016, rec2_144016


In [16]:
paths_and_descriptions = registry.list_path_names_and_descriptions()
print("\n".join(paths_and_descriptions.split(",")))

Files found in registry: 1MBN_143018: PDB file downloaded from RSCB
 PDBFile ID: 1MBN_143018
 1MBN_143035: Cleaned File:  Removed Heterogens
 and Water Removed.  Replaced Nonstandard Residues. Added Hydrogens at pH 7.0. Missing Atoms Added and replaces nonstandard residues. 
 top_sim0_143055: Initial positions for simulation sim0_143055
 sim0_143055: Basic Simulation of Protein 1MBN_143035
 rec0_143101: Simulation trajectory for protein 1MBN_143035 and simulation sim0_143055
 rec1_143101: Simulation state log for protein 1MBN_143035 and simulation sim0_143055
 rec2_143101: Simulation pdb frames for protein 1MBN_143035 and simulation sim0_143055
 top_sim0_144011: Initial positions for simulation sim0_144011
 sim0_144011: Basic Simulation of Protein 1MBN_143035
 rec0_144016: Simulation trajectory for protein 1MBN_143035 and simulation sim0_144011
 rec1_144016: Simulation state log for protein 1MBN_143035 and simulation sim0_144011
 rec2_144016: Simulation pdb frames for protein 1MBN_1430

In [None]:
#plotting rmsd of both simulations
from IPython.display import Image
rmsd1ID = 'fig0_062655'
rmsd2ID = 'fig0_062658'
path1 = registry.get_mapped_path(rmsd1ID)
path2 = registry.get_mapped_path(rmsd2ID)

Image(filename=path1)

In [None]:
Image(filename=path2)

In [None]:
import mdtraj as md
import numpy as np

traj_path = registry.get_mapped_path("rec0_062543")
top_path = registry.get_mapped_path("top_sim0_062542")
traj = md.load(traj_path, top=top_path)

# Compute the secondary structure of the trajectory
dssp_final = md.compute_dssp(traj[-1], simplified=True)
dssp_initial = md.compute_dssp(traj[0], simplified=True)

print('Computing DSSP for 1MBN at 300K')

print('=== BEFORE ===')
print("Number of initial sheets: ",len([i for i in dssp_initial[0] if i == 'E']))
print("Number of initial helices: ",len([i for i in dssp_initial[0] if i == 'H']))
print("Number of initial coils: ",len([i for i in dssp_initial[0] if i == 'C']))

print('=== AFTER ===')
print("Number of final sheets: ",len([i for i in dssp_final[0] if i == 'E']))
print("Number of final helices: ",len([i for i in dssp_final[0] if i == 'H']))
print("Number of final coils: ",len([i for i in dssp_final[0] if i == 'C']))

In [None]:
import mdtraj as md
import numpy as np

traj_path = registry.get_mapped_path("rec0_062614")
top_path = registry.get_mapped_path("top_sim0_062613")
traj = md.load(traj_path, top=top_path)

# Compute the secondary structure of the trajectory
dssp_final = md.compute_dssp(traj[-1], simplified=True)
dssp_initial = md.compute_dssp(traj[0], simplified=True)

print('Computing DSSP for 1MBN at 400K')

print('=== BEFORE ===')
print("Number of initial sheets: ",len([i for i in dssp_initial[0] if i == 'E']))
print("Number of initial helices: ",len([i for i in dssp_initial[0] if i == 'H']))
print("Number of initial coils: ",len([i for i in dssp_initial[0] if i == 'C']))

print('=== AFTER ===')
print("Number of final sheets: ",len([i for i in dssp_final[0] if i == 'E']))
print("Number of final helices: ",len([i for i in dssp_final[0] if i == 'H']))
print("Number of final coils: ",len([i for i in dssp_final[0] if i == 'C']))