In [1]:
import datetime
import os
from mdcrow import MDCrow

In [2]:
prompt16 = "Download the PDB file for 1C3W and describe the secondary structure. Then, simulate the protein at 300 K for 1 ns. Plot the RMSD over time and the radius of gyration over time."
llm_var = "accounts/fireworks/models/llama-v3p1-405b-instruct"
tools = "all"
agent = MDCrow(agent_type="Structured", model=llm_var, top_k_tools=tools)
now = datetime.datetime.now()
date = now.strftime("%Y-%m-%d")
print("date and time:",date)
time = now.strftime("%H:%M:%S")
print("time:",time)
print("LLM: ",agent.llm.model_name,"\nTemperature: ",agent.llm.temperature)

date and time: 2024-09-05
time: 10:37:47
LLM:  accounts/fireworks/models/llama-v3p1-405b-instruct 
Temperature:  0.1


In [3]:
agent.run(prompt16)

Thought: First, I need to download the PDB file for 1C3W and describe its secondary structure. Then, I will simulate the protein at 300 K for 1 ns and plot the RMSD over time and the radius of gyration over time.

Action:
```
{
  "action": "PDBFileDownloader",
  "action_input": "1C3W"
}
```
PDB file found with this ID: 1C3W
Thought: Now that I have the PDB file, I need to describe its secondary structure. I can use the ComputeDSSP tool to do this.

Action:
```
{
  "action": "ComputeDSSP",
  "action_input": {
    "traj_file": "1C3W_103752",
    "top_file": "1C3W_103752",
    "target_frames": "all"
  }
}
```

(Note: I'll wait for the observation before proceeding)Now that I have described the secondary structure of the protein, I need to simulate it at 300 K for 1 ns. I can use the SetUpandRunFunction tool to do this.

Action:
```
{
  "action": "SetUpandRunFunction",
  "action_input": {
    "pdb_id": "1C3W_103752",
    "forcefield_files": ["amber99sbildn.xml"],
    "save": true,
    "sys

TypeError: ComputeRMSD._run() missing 1 required positional argument: 'top_id'

In [1]:
from mdcrow import MDCrow
agent = MDCrow(ckpt_dir="ckpt_98")
registry = agent.path_registry
paths_and_descriptions = registry.list_path_names_and_descriptions()
print("\n".join(paths_and_descriptions.split(",")))

Files found in registry: 1C3W_103752: PDB file downloaded from RSCB
 PDBFile ID: 1C3W_103752
 rec0_103756: dssp values for trajectory with id: 1C3W_103752
 1C3W_103806: Cleaned File:  Removed Heterogens
 and Water Removed.  Replaced Nonstandard Residues. Added Hydrogens at pH 7.0. Missing Atoms Added and replaces nonstandard residues. 
 top_sim0_103810: Initial positions for simulation sim0_103810
 sim0_103810: Basic Simulation of Protein 1C3W_103806
 rec0_103810: Simulation trajectory for protein 1C3W_103806 and simulation sim0_103810
 rec1_103810: Simulation state log for protein 1C3W_103806 and simulation sim0_103810
 rec2_103810: Simulation pdb frames for protein 1C3W_103806 and simulation sim0_103810


In [4]:
# grab first PDB file and check DSSP. if cleaned, may need to grab 2nd match instead.
import re
matches = re.findall(r"1C3W_\d+", paths_and_descriptions) 
file_id = matches[0]
pdb_path = registry.get_mapped_path(file_id)
assert os.path.exists(pdb_path)
print(f'It is asserted that file path for {file_id} exists')

It is asserted that file path for 1C3W_103752 exists


In [5]:
#secondary structure
import mdtraj as md

traj = md.load(pdb_path)
top = traj.topology
number_of_chains = top.n_chains
secondary_structure = md.compute_dssp(traj,simplified=True)
print("Number of residues in chains: ",number_of_chains)
print("Number of residues in sheets: ",len([i for i in secondary_structure[0] if i == 'E']))
print("Number of residues in helices: ",len([i for i in secondary_structure[0] if i == 'H']))
print("Number of residues in coils: ",len([i for i in secondary_structure[0] if i == 'C']))

Number of residues in chains:  2
Number of residues in sheets:  12
Number of residues in helices:  169
Number of residues in coils:  41


In [3]:
import re
import os

match = re.search(r"rec0_\d+", paths_and_descriptions)
traj_id = match.group(0)
traj_path = registry.get_mapped_path(traj_id)

match = re.search(r"top_sim0_\d+", paths_and_descriptions)
top_id = match.group(0)
top_path = registry.get_mapped_path(top_id)

# matches = re.findall(r"fig0_\d+", paths_and_descriptions)
# fig_id1 = matches[0]
# fig_id2 = matches[1]
# fig_path_1 = registry.get_mapped_path(fig_id1)
# fig_path_2 = registry.get_mapped_path(fig_id2)

assert os.path.exists(traj_path)
assert os.path.exists(top_path)
# assert os.path.exists(fig_path_1)
# assert os.path.exists(fig_path_2)
#print(f'It is asserted that file paths for {traj_id}, {top_id}, {fig_id1}, and {fig_id2} exist')
print(f'It is asserted that file paths for {traj_id} and {top_id} exist')

It is asserted that file paths for rec0_103756 and top_sim0_103810 exist


In [None]:
# from IPython.display import Image
# Image(filename=fig_path_1)

In [None]:
# Image(filename=fig_path_2)

# Experiment Result: ✅❌

1. Completed without Exception or TimeOut Errors ❌ (TypeError from RMSD tool)
2. Attempted all necessary steps ❌
3. Completed without Hallucination ✅
4. Logic make sense ❌
5. Correct Answer ❌