In [None]:
from mdagent import MDAgent
import os

In [None]:
descriptive_prompt_10 = "Complete all of the following tasks: 1. Download the PDB file 1LYZ. 2. Tell me how many residues are in the chains and beta sheets. 3. Simulate for 1 ps at 300 K. 4. Compute the RMSD of the simulation at 300 K. 5. Compute the radius of gyration over time for the simulation at 300 K. 6. Compute the SASA (solvent accessible surface area) for the simulation at 300 K. 7. Simulate for 1 ps at 400 K. 8. Compute the RMSD of the simulation at 400 K. 9. Compute the radius of gyration over time for the simulation at 400 K. 10. Compute the SASA (solvent accessible surface area) for the simulation at 400 K."
non_descriptive_prompt_10 = "Simulate 1LYZ for 1 ps at 300K and 400K. Tell me how many residues are in the chains and beta sheets, and compute the RMSD, SASA, and radius of gyration of both simulations."

In [None]:
llm_model = "gpt-4o-2024-08-06"
tools = "all"

In [None]:
#todays date and time
import datetime
now = datetime.datetime.now()
date = now.strftime("%Y-%m-%d")
print("date:",date)
time = now.strftime("%H:%M:%S")
print("time:",time)

# descriptive prompt

In [None]:
agent_1 = MDAgent(agent_type="Structured", model=llm_model, top_k_tools=tools)

In [None]:
agent_1.run(descriptive_prompt_10)

In [None]:
registry = agent_1.path_registry
print("ckpt_dir:",os.path.basename(registry.ckpt_dir))
paths_and_descriptions = registry.list_path_names_and_descriptions()
print("\n".join(paths_and_descriptions.split(",")))

In [None]:
import re
import os
matches = re.findall(r"rec0_\d+", paths_and_descriptions)
traj_id1 = matches[0]
traj_id2 = matches[1]
traj_path1 = registry.get_mapped_path(traj_id1)
traj_path2 = registry.get_mapped_path(traj_id2)
assert os.path.exists(traj_path1)
assert os.path.exists(traj_path2)

matches = re.findall(r"top_sim0_\d+", paths_and_descriptions)
top_id1 = matches[0]
top_id2 = matches[1]
top_path1 = registry.get_mapped_path(top_id1)
top_path2 = registry.get_mapped_path(top_id2)
assert os.path.exists(top_path1)
assert os.path.exists(top_path2)
print(f"It's asserted that file paths for {traj_id1}, {top_id1}, {traj_id2} and {top_id2} exist")

In [None]:
# this checks DSSP for the first frame, change to the last frame if needed
import mdtraj as md

print("First simulation:")
traj_path1 = registry.get_mapped_path(traj_id1)
top_path1 = registry.get_mapped_path(top_id1)
traj = md.load(traj_path1, top=top_path1)[0] # first frame
secondary_structure = md.compute_dssp(traj,simplified=True)
print("Number of chains: ",traj.n_chains)
print("Number of residues in sheets: ",len([i for i in secondary_structure if i == 'E']))
print("Number of residues in helices: ",len([i for i in secondary_structure if i == 'H']))
print("Number of residues in coils: ",len([i for i in secondary_structure if i == 'C']))

print("Second simulation:")
traj_path2 = registry.get_mapped_path(traj_id2)
top_path2 = registry.get_mapped_path(top_id2)
traj = md.load(traj_path2, top=top_path2)[0] # first frame
secondary_structure = md.compute_dssp(traj,simplified=True)
print("Number of chains: ",traj.n_chains)
print("Number of residues in sheets: ",len([i for i in secondary_structure if i == 'E']))
print("Number of residues in helices: ",len([i for i in secondary_structure if i == 'H']))
print("Number of residues in coils: ",len([i for i in secondary_structure if i == 'C']))

In [None]:
from IPython.display import Image
matches = re.findall(r"fig0_\d+", paths_and_descriptions)
fig_id1 = matches[0]
fig_path1 = registry.get_mapped_path(fig_id1)
assert os.path.exists(fig_path1), f"This figure {fig_id1} does not exist."
Image(filename=fig_path1)

In [None]:
fig_id2 = matches[1]
fig_path2 = registry.get_mapped_path(fig_id2)
assert os.path.exists(fig_path1), f"This figure {fig_id2} does not exist."
Image(filename=fig_path2)

In [None]:
fig_id3 = matches[2]
fig_path3 = registry.get_mapped_path(fig_id3)
assert os.path.exists(fig_path3), f"This figure {fig_id3} does not exist."
Image(filename=fig_path3)

In [None]:
fig_id4 = matches[3]
fig_path4 = registry.get_mapped_path(fig_id4)
assert os.path.exists(fig_path4), f"This figure {fig_id4} does not exist."
Image(filename=fig_path4)

In [None]:
fig_id5 = matches[4]
fig_path5 = registry.get_mapped_path(fig_id5)
assert os.path.exists(fig_path5), f"This figure {fig_id5} does not exist."
Image(filename=fig_path5)

In [None]:
fig_id6 = matches[5]
fig_path6 = registry.get_mapped_path(fig_id6)
assert os.path.exists(fig_path6), f"This figure {fig_id6} does not exist."
Image(filename=fig_path6)

# non-descriptive prompt

In [None]:
agent_2 = MDAgent(agent_type="Structured", model=llm_model, top_k_tools=tools)

In [None]:
agent_2.run(non_descriptive_prompt_10)

In [None]:
registry = agent_2.path_registry
print("ckpt_dir:",os.path.basename(registry.ckpt_dir))
paths_and_descriptions = registry.list_path_names_and_descriptions()
print("\n".join(paths_and_descriptions.split(",")))

In [None]:
import re
import os
matches = re.findall(r"rec0_\d+", paths_and_descriptions)
traj_id1 = matches[0]
traj_id2 = matches[1]
traj_path1 = registry.get_mapped_path(traj_id1)
traj_path2 = registry.get_mapped_path(traj_id2)
assert os.path.exists(traj_path1)
assert os.path.exists(traj_path2)

matches = re.findall(r"top_sim0_\d+", paths_and_descriptions)
top_id1 = matches[0]
top_id2 = matches[1]
top_path1 = registry.get_mapped_path(top_id1)
top_path2 = registry.get_mapped_path(top_id2)
assert os.path.exists(top_path1)
assert os.path.exists(top_path2)
print(f"It's asserted that file paths for {traj_id1}, {top_id1}, {traj_id2} and {top_id2} exist")

In [None]:
# this checks DSSP for the first frame, change to the last frame if needed
import mdtraj as md

print("First simulation:")
traj_path1 = registry.get_mapped_path(traj_id1)
top_path1 = registry.get_mapped_path(top_id1)
traj = md.load(traj_path1, top=top_path1)[0] # first frame
secondary_structure = md.compute_dssp(traj,simplified=True)
print("Number of chains: ",traj.n_chains)
print("Number of residues in sheets: ",len([i for i in secondary_structure if i == 'E']))
print("Number of residues in helices: ",len([i for i in secondary_structure if i == 'H']))
print("Number of residues in coils: ",len([i for i in secondary_structure if i == 'C']))

print("Second simulation:")
traj_path2 = registry.get_mapped_path(traj_id2)
top_path2 = registry.get_mapped_path(top_id2)
traj = md.load(traj_path2, top=top_path2)[0] # first frame
secondary_structure = md.compute_dssp(traj,simplified=True)
print("Number of chains: ",traj.n_chains)
print("Number of residues in sheets: ",len([i for i in secondary_structure if i == 'E']))
print("Number of residues in helices: ",len([i for i in secondary_structure if i == 'H']))
print("Number of residues in coils: ",len([i for i in secondary_structure if i == 'C']))

In [None]:
from IPython.display import Image
matches = re.findall(r"fig0_\d+", paths_and_descriptions)
fig_id1 = matches[0]
fig_path1 = registry.get_mapped_path(fig_id1)
assert os.path.exists(fig_path1), f"This figure {fig_id1} does not exist."
Image(filename=fig_path1)

In [None]:
fig_id2 = matches[1]
fig_path2 = registry.get_mapped_path(fig_id2)
assert os.path.exists(fig_path1), f"This figure {fig_id2} does not exist."
Image(filename=fig_path2)

In [None]:
fig_id3 = matches[2]
fig_path3 = registry.get_mapped_path(fig_id3)
assert os.path.exists(fig_path3), f"This figure {fig_id3} does not exist."
Image(filename=fig_path3)

In [None]:
fig_id4 = matches[3]
fig_path4 = registry.get_mapped_path(fig_id4)
assert os.path.exists(fig_path4), f"This figure {fig_id4} does not exist."
Image(filename=fig_path4)

In [None]:
fig_id5 = matches[4]
fig_path5 = registry.get_mapped_path(fig_id5)
assert os.path.exists(fig_path5), f"This figure {fig_id5} does not exist."
Image(filename=fig_path5)

In [None]:
fig_id6 = matches[5]
fig_path6 = registry.get_mapped_path(fig_id6)
assert os.path.exists(fig_path6), f"This figure {fig_id6} does not exist."
Image(filename=fig_path6)