In [1]:
from mdagent import MDAgent

In [2]:
#todays date and time
import datetime

start = datetime.datetime.now()
date = start.strftime("%Y-%m-%d")
print("date:",date)
time = start.strftime("%H:%M:%S")
print("time:",time)

date: 2024-09-30
time: 21:37:23


In [3]:
descriptive_prompt_7 = "Complete the following MD tasks: 1. Download the PDB file for 1MBN. 2. Preprocess the file by adding missing atoms, hydrogens and replacing non-standard residues. 3. Simulate for 1 ps at 300 K and 400 K. 4. Calculate the RMSD for each simulation. 5. Analyze the secondary structure for each simulation."
non_descriptive_prompt_7 = "Simulate PDB ID 1MBN at two different temperatures: 300K, 400K for 1 ps each. Plot RMSD of both over time, and compare the final secondary structures at the end of the simulations. "

In [4]:
llm_model = "gpt-4-turbo-2024-04-09"
tools = "all"

In [5]:
agent_1 = MDAgent(agent_type="Structured", model=llm_model, top_k_tools=tools)

##### descriptive prompt

In [6]:
start1 = datetime.datetime.now()
agent_1.run(descriptive_prompt_7)

Thought: The task involves multiple steps starting with downloading a PDB file, preprocessing it, running simulations at different temperatures, and then analyzing the results. I will begin by downloading the PDB file for the protein with ID 1MBN.

Action:
```json
{
    "action": "PDBFileDownloader",
    "action_input": {
        "query": "1MBN"
    }
}
```PDB file found with this ID: 1MBN
Now that the PDB file for 1MBN has been successfully downloaded, the next step is to preprocess this file. This involves adding missing atoms, replacing non-standard residues, and adding hydrogens. I will use the CleaningToolFunction to perform these tasks.

Action:
```json
{
    "action": "CleaningToolFunction",
    "action_input": {
        "pdb_id": "1MBN_213753",
        "add_missing_atoms": true,
        "replace_nonstandard_residues": true,
        "add_hydrogens": true,
        "add_hydrogens_ph": 7.0,
        "remove_heterogens": false,
        "remove_water": false
    }
}
```file path: /Use

({'input': '\n    You are an expert molecular dynamics scientist, and\n    your task is to respond to the question or\n    solve the problem to the best of your ability using\n    the provided tools.\n\n    You can only respond with a single complete\n    \'Thought, Action, Action Input\' format\n    OR a single \'Final Answer\' format.\n\n    Complete format:\n    Thought: (reflect on your progress and decide what to do next)\n    Action:\n    ```\n    {\n        "action": (the action name, it should be the name of a tool),\n        "action_input": (the input string for the action)\n    }\n    \'\'\'\n\n    OR\n\n    Final Answer: (the final response to the original input\n    question, once all steps are complete)\n\n    You are required to use the tools provided,\n    using the most specific tool\n    available for each action.\n    Your final answer should contain all information\n    necessary to answer the question and its subquestions.\n    Before you finish, reflect on your pro

In [7]:
end = datetime.datetime.now()
elapsed = end - start1
print(f"agent_1 duration: {elapsed.total_seconds()/60:.2f} minutes")

agent_1 duration: 12.32 minutes


In [8]:
registry = agent_1.path_registry
all_names = registry.list_path_names()
print(all_names)

Names found in registry: 1MBN_213753, 1MBN_213802, 1MBN_213834, top_sim0_213848, sim0_213848, rec0_213853, rec1_213853, rec2_213853, top_sim0_214415, sim0_214415, rec0_214427, rec1_214427, rec2_214427, rmsd_213848, fig0_214856, rmsd_214415, fig0_214903, rec0_214918, rec0_214933


In [9]:
paths_and_descriptions = registry.list_path_names_and_descriptions()
print("\n".join(paths_and_descriptions.split(",")))

Files found in registry: 1MBN_213753: PDB file downloaded from RSCB
 PDBFile ID: 1MBN_213753
 1MBN_213802: Cleaned File:  Replaced Nonstandard Residues. Added Hydrogens at pH 7.0. Missing Atoms Added and replaces nonstandard residues. 
 1MBN_213834: Cleaned File:  Removed Heterogens
 and Water Removed.  Replaced Nonstandard Residues. Added Hydrogens at pH 7.0. Missing Atoms Added and replaces nonstandard residues. 
 top_sim0_213848: Initial positions for simulation sim0_213848
 sim0_213848: Basic Simulation of Protein 1MBN_213834
 rec0_213853: Simulation trajectory for protein 1MBN_213834 and simulation sim0_213848
 rec1_213853: Simulation state log for protein 1MBN_213834 and simulation sim0_213848
 rec2_213853: Simulation pdb frames for protein 1MBN_213834 and simulation sim0_213848
 top_sim0_214415: Initial positions for simulation sim0_214415
 sim0_214415: Basic Simulation of Protein 1MBN_213834
 rec0_214427: Simulation trajectory for protein 1MBN_213834 and simulation sim0_214415


In [None]:
#plotting rmsd of both simulations
from IPython.display import Image
rmsd1ID = 'fig0_114210'
rmsd2ID = 'fig0_114213'
path1 = registry.get_mapped_path(rmsd1ID)
path2 = registry.get_mapped_path(rmsd2ID)

Image(filename=path1)

In [None]:
Image(filename=path2)

In [None]:
# note: it fails to do DSSP before it timed out, so we don't analyze those here

##### non-descriptive prompt

In [10]:
#todays date and time
import datetime

start = datetime.datetime.now()
date = start.strftime("%Y-%m-%d")
print("date:",date)
time = start.strftime("%H:%M:%S")
print("time:",time)

date: 2024-09-18
time: 13:13:03


In [11]:
agent_2 = MDAgent(agent_type="Structured", model=llm_model, top_k_tools=tools)

In [13]:
start2 = datetime.datetime.now()
agent_2.run(non_descriptive_prompt_7)

Thought: To address the question, I need to simulate the protein with PDB ID 1MBN at two different temperatures (300K and 400K) for 1 picosecond each. After the simulations, I will compute the RMSD over time for both temperatures and compare the final secondary structures. I will start by setting up and running the simulations for both temperatures.

Action:
```json
{
    "action": "SetUpandRunFunction",
    "action_input": {
        "pdb_id": "1MBN",
        "forcefield_files": ["amber99sb.xml"],
        "system_params": {
            "nonbondedMethod": "PME",
            "nonbondedCutoff": "1 * nanometers",
            "constraints": "HBonds",
            "rigidWater": true,
            "solvate": true
        },
        "integrator_params": {
            "integrator_type": "LangevinMiddle",
            "Temperature": "300 * kelvin",
            "Friction": "1.0 / picoseconds",
            "Timestep": "0.002 * picoseconds"
        },
        "simulation_params": {
            "Ensemb



System built successfully
<openmm.openmm.System; proxy of <Swig Object of type 'OpenMM::System *' at 0x30a3f5410> >
Setting up integrator...
Creating simulation...
simulation set!
Performing energy minimization...
Minimization complete!
Initial Positions saved to initial_positions.pdb
Equilibrating...
Simulating...
Done!
Standalone simulation script written to /Users/jorgemedina/Desktop/md-agent/ckpt/ckpt_364/simulations/NVT_1MBN_143035_20240918_143055.py
['TRAJ_sim0_143055_1MBN_143035_20240918_143101.dcd', 'prompt8.ipynb', 'LOG_sim0_143055_1MBN_143035_20240918_143101.txt', 'prompt2.ipynb', 'prompt6.ipynb', 'TOP_sim0_143055_1MBN_143035_20240918_143101.pdb', 'prompt4.ipynb', 'prompt3.ipynb', 'prompt1.ipynb', 'prompt5.ipynb', 'prompt7.ipynb']
Now that the simulation at 300K has been successfully completed, I need to set up and run the simulation at 400K using the same cleaned PDB file. After completing both simulations, I will compute the RMSD over time for each temperature and compare t

({'input': '\n    You are an expert molecular dynamics scientist, and\n    your task is to respond to the question or\n    solve the problem to the best of your ability using\n    the provided tools.\n\n    You can only respond with a single complete\n    \'Thought, Action, Action Input\' format\n    OR a single \'Final Answer\' format.\n\n    Complete format:\n    Thought: (reflect on your progress and decide what to do next)\n    Action:\n    ```\n    {\n        "action": (the action name, it should be the name of a tool),\n        "action_input": (the input string for the action)\n    }\n    \'\'\'\n\n    OR\n\n    Final Answer: (the final response to the original input\n    question, once all steps are complete)\n\n    You are required to use the tools provided,\n    using the most specific tool\n    available for each action.\n    Your final answer should contain all information\n    necessary to answer the question and its subquestions.\n    Before you finish, reflect on your pro

In [14]:
end = datetime.datetime.now()
elapsed = end - start2
print(f"agent_2 duration: {elapsed.total_seconds()/60:.2f} minutes")

agent_2 duration: 17.86 minutes


In [15]:
registry = agent_2.path_registry
all_names = registry.list_path_names()
print(all_names)

Names found in registry: 1MBN_143018, 1MBN_143035, top_sim0_143055, sim0_143055, rec0_143101, rec1_143101, rec2_143101, top_sim0_144011, sim0_144011, rec0_144016, rec1_144016, rec2_144016


In [16]:
paths_and_descriptions = registry.list_path_names_and_descriptions()
print("\n".join(paths_and_descriptions.split(",")))

Files found in registry: 1MBN_143018: PDB file downloaded from RSCB
 PDBFile ID: 1MBN_143018
 1MBN_143035: Cleaned File:  Removed Heterogens
 and Water Removed.  Replaced Nonstandard Residues. Added Hydrogens at pH 7.0. Missing Atoms Added and replaces nonstandard residues. 
 top_sim0_143055: Initial positions for simulation sim0_143055
 sim0_143055: Basic Simulation of Protein 1MBN_143035
 rec0_143101: Simulation trajectory for protein 1MBN_143035 and simulation sim0_143055
 rec1_143101: Simulation state log for protein 1MBN_143035 and simulation sim0_143055
 rec2_143101: Simulation pdb frames for protein 1MBN_143035 and simulation sim0_143055
 top_sim0_144011: Initial positions for simulation sim0_144011
 sim0_144011: Basic Simulation of Protein 1MBN_143035
 rec0_144016: Simulation trajectory for protein 1MBN_143035 and simulation sim0_144011
 rec1_144016: Simulation state log for protein 1MBN_143035 and simulation sim0_144011
 rec2_144016: Simulation pdb frames for protein 1MBN_1430

In [None]:
#plotting rmsd of both simulations
from IPython.display import Image
rmsd1ID = 'fig0_062655'
rmsd2ID = 'fig0_062658'
path1 = registry.get_mapped_path(rmsd1ID)
path2 = registry.get_mapped_path(rmsd2ID)

Image(filename=path1)

In [None]:
Image(filename=path2)

In [None]:
import mdtraj as md
import numpy as np

traj_path = registry.get_mapped_path("rec0_062543")
top_path = registry.get_mapped_path("top_sim0_062542")
traj = md.load(traj_path, top=top_path)

# Compute the secondary structure of the trajectory
dssp_final = md.compute_dssp(traj[-1], simplified=True)
dssp_initial = md.compute_dssp(traj[0], simplified=True)

print('Computing DSSP for 1MBN at 300K')

print('=== BEFORE ===')
print("Number of initial sheets: ",len([i for i in dssp_initial[0] if i == 'E']))
print("Number of initial helices: ",len([i for i in dssp_initial[0] if i == 'H']))
print("Number of initial coils: ",len([i for i in dssp_initial[0] if i == 'C']))

print('=== AFTER ===')
print("Number of final sheets: ",len([i for i in dssp_final[0] if i == 'E']))
print("Number of final helices: ",len([i for i in dssp_final[0] if i == 'H']))
print("Number of final coils: ",len([i for i in dssp_final[0] if i == 'C']))

In [None]:
import mdtraj as md
import numpy as np

traj_path = registry.get_mapped_path("rec0_062614")
top_path = registry.get_mapped_path("top_sim0_062613")
traj = md.load(traj_path, top=top_path)

# Compute the secondary structure of the trajectory
dssp_final = md.compute_dssp(traj[-1], simplified=True)
dssp_initial = md.compute_dssp(traj[0], simplified=True)

print('Computing DSSP for 1MBN at 400K')

print('=== BEFORE ===')
print("Number of initial sheets: ",len([i for i in dssp_initial[0] if i == 'E']))
print("Number of initial helices: ",len([i for i in dssp_initial[0] if i == 'H']))
print("Number of initial coils: ",len([i for i in dssp_initial[0] if i == 'C']))

print('=== AFTER ===')
print("Number of final sheets: ",len([i for i in dssp_final[0] if i == 'E']))
print("Number of final helices: ",len([i for i in dssp_final[0] if i == 'H']))
print("Number of final coils: ",len([i for i in dssp_final[0] if i == 'C']))