In [1]:
import datetime
import os
from mdcrow import MDCrow

In [2]:
prompt21 = "Download and clean protein 1A3N."
llm_var = "accounts/fireworks/models/llama-v3p1-405b-instruct"
tools = "all"
agent = MDCrow(agent_type="Structured", model=llm_var, top_k_tools=tools)
now = datetime.datetime.now()
date = now.strftime("%Y-%m-%d")
print("date and time:",date)
time = now.strftime("%H:%M:%S")
print("time:",time)
print("LLM: ",agent.llm.model_name,"\nTemperature: ",agent.llm.temperature)

date and time: 2024-09-05
time: 11:10:50
LLM:  accounts/fireworks/models/llama-v3p1-405b-instruct 
Temperature:  0.1


In [3]:
agent.run(prompt21)

Thought: First, I need to download the protein 1A3N. Then, I will clean the protein by removing heterogens, adding missing atoms and hydrogens, replacing nonstandard residues, and removing water.

Action:
```
{
  "action": "PDBFileDownloader",
  "action_input": "1A3N"
}
```
PDB file found with this ID: 1A3N
Now that I have downloaded the protein 1A3N, I need to clean it by removing heterogens, adding missing atoms and hydrogens, replacing nonstandard residues, and removing water.

Action:
```
{
  "action": "CleaningToolFunction",
  "action_input": {
    "pdb_id": "1A3N_111056",
    "replace_nonstandard_residues": true,
    "add_missing_atoms": true,
    "remove_heterogens": true,
    "remove_water": true,
    "add_hydrogens": true,
    "add_hydrogens_ph": 7.0
  }
}
```file path: /gpfs/fs2/scratch/qcampbe2/repos/md-agent/ckpt/ckpt_102/pdb/1A3N_raw_20240905_111056.pdb
Thought: I have successfully downloaded and cleaned the protein 1A3N. Now, I can provide the final answer.

Final Answer:

({'input': '\n    You are an expert molecular dynamics scientist, and\n    your task is to respond to the question or\n    solve the problem to the best of your ability using\n    the provided tools.\n\n    You can only respond with a single complete\n    \'Thought, Action, Action Input\' format\n    OR a single \'Final Answer\' format.\n\n    Complete format:\n    Thought: (reflect on your progress and decide what to do next)\n    Action:\n    ```\n    {\n        "action": (the action name, it should be the name of a tool),\n        "action_input": (the input string for the action)\n    }\n    \'\'\'\n\n    OR\n\n    Final Answer: (the final response to the original input\n    question, once all steps are complete)\n\n    You are required to use the tools provided,\n    using the most specific tool\n    available for each action.\n    Your final answer should contain all information\n    necessary to answer the question and its subquestions.\n    Before you finish, reflect on your pro

In [4]:
now = datetime.datetime.now()
date = now.strftime("%Y-%m-%d")
print("date and time:",date)
time = now.strftime("%H:%M:%S")
print("time:",time)

registry = agent.path_registry
print('ckpt_dir:',os.path.basename(registry.ckpt_dir))
paths_and_descriptions = registry.list_path_names_and_descriptions()
print("\n".join(paths_and_descriptions.split(",")))

date and time: 2024-09-05
time: 11:11:35
ckpt_dir: ckpt_102
Files found in registry: 1A3N_111056: PDB file downloaded from RSCB
 PDBFile ID: 1A3N_111056
 1A3N_111132: Cleaned File:  Removed Heterogens
 and Water Removed.  Replaced Nonstandard Residues. Added Hydrogens at pH 7.0. Missing Atoms Added and replaces nonstandard residues. 


In [8]:
import re
import os

matches = re.findall(r"1A3N_\d+", paths_and_descriptions)
file_id1 = matches[0]
file_id2 = matches[2]
file_path1 = registry.get_mapped_path(file_id1)
file_path2 = registry.get_mapped_path(file_id2)
assert os.path.exists(file_path1), "Path does not exist"
assert os.path.exists(file_path2), "Path does not exist"
assert file_path1 != file_path2
print(f"It's asserted that file paths for {file_id1} and {file_id2} exist")

It's asserted that file paths for 1A3N_111056 and 1A3N_111132 exist


In [10]:
import mdtraj as md
raw_file = md.load(file_path1)
clean_file = md.load(file_path2)

In [11]:
#residues before
residues_before = set([str(res)[:3] for res in raw_file.top.residues])
residues_after = set([str(res)[:3] for res in clean_file.top.residues])
print("Residues before:",residues_before)
print("Residues after:",residues_after)
print("Residues removed:",residues_before - residues_after)

Residues before: {'LEU', 'GLY', 'ASN', 'GLU', 'HEM', 'LYS', 'ASP', 'VAL', 'PRO', 'PHE', 'HIS', 'TRP', 'SER', 'ARG', 'HOH', 'THR', 'MET', 'ALA', 'TYR', 'CYS', 'GLN'}
Residues after: {'LEU', 'GLY', 'ASN', 'GLU', 'LYS', 'ASP', 'VAL', 'PRO', 'PHE', 'HIS', 'TRP', 'SER', 'ARG', 'THR', 'MET', 'ALA', 'TYR', 'CYS', 'GLN'}
Residues removed: {'HOH', 'HEM'}


# Experiment Result: ✅❌

1. Completed without Exception or TimeOut Errors  ✅
2. Attempted all necessary steps  ✅
3. Completed without Hallucination  ✅
4. Logic make sense  ✅
5. Correct Answer  ✅