In [1]:
import datetime
import os
from mdcrow import MDCrow

In [2]:
prompt21 = "Download and clean protein 1A3N."
llm_var = "accounts/fireworks/models/llama-v3p1-70b-instruct"
tools = "all"
agent = MDCrow(agent_type="Structured", model=llm_var, top_k_tools=tools)
now = datetime.datetime.now()
date = now.strftime("%Y-%m-%d")
print("date and time:",date)
time = now.strftime("%H:%M:%S")
print("time:",time)
print("LLM: ",agent.llm.model_name,"\nTemperature: ",agent.llm.temperature)

date and time: 2024-09-08
time: 17:34:44
LLM:  accounts/fireworks/models/llama-v3p1-70b-instruct 
Temperature:  0.1


In [3]:
answer = agent.run(prompt21)

Thought:

 To download

 and clean protein 

1A3N, I need

 to first download

 the protein structure from

 the Protein Data Bank (P

DB) and then clean

 it by removing any unnecessary

 atoms or residues.

Action:


```
{
  "action": "

PDBFileDownloader",
  "action

_input": "1A3N

"
}
```


PDB file found with this ID: 1A3N


Thought:

 Now that I have downloaded the protein structure

, I need to clean it by removing any unnecessary atoms

 or residues. I will use the CleaningToolFunction to

 remove heterogens, add missing

 atoms and hydrogens, replace non

standard residues, and remove

 water.

Action:
```
{
  "action

": "CleaningToolFunction",
  "action

_input": {
    "pdb_id":

 "1A3N_173446",


    "replace_nonstandard_residues

": true,
    "add_missing_atoms

": true,
    "remove_h

eterogens": true,
    "remove

_water": true,
    "add_hydro

gens": true,
    "add

_hydrogens_ph

": 7.0
  }


}
```

file path: /gpfs/fs2/scratch/qcampbe2/repos/md-agent/ckpt/ckpt_122/pdb/1A3N_raw_20240908_173446.pdb


Thought:

 Now that I have

 cleaned the

 protein structure,

 I need to confirm that

 the cleaning process

 was successful

 and the protein is

 ready for further analysis

.

Action:
```
{
 

 "action": "Final Answer

",
 

 "action_input": "Protein

 1A3N has

 been successfully downloaded

 and cleaned. The cleaned file

 ID is 1A3

N_173503

."
}
```

In [4]:
now = datetime.datetime.now()
date = now.strftime("%Y-%m-%d")
print("date and time:",date)
time = now.strftime("%H:%M:%S")
print("time:",time)

registry = agent.path_registry
print('ckpt_dir:',os.path.basename(registry.ckpt_dir))
paths_and_descriptions = registry.list_path_names_and_descriptions()
print("\n".join(paths_and_descriptions.split(",")))

date and time: 2024-09-08
time: 17:35:05
ckpt_dir: ckpt_122
Files found in registry: 1A3N_173446: PDB file downloaded from RSCB
 PDBFile ID: 1A3N_173446
 1A3N_173503: Cleaned File:  Removed Heterogens
 and Water Removed.  Replaced Nonstandard Residues. Added Hydrogens at pH 7.0. Missing Atoms Added and replaces nonstandard residues. 


In [5]:
import re
import os

matches = re.findall(r"1A3N_\d+", paths_and_descriptions)
file_id1 = matches[0]
file_id2 = matches[2]
file_path1 = registry.get_mapped_path(file_id1)
file_path2 = registry.get_mapped_path(file_id2)
assert os.path.exists(file_path1), "Path does not exist"
assert os.path.exists(file_path2), "Path does not exist"
assert file_path1 != file_path2
print(f"It's asserted that file paths for {file_id1} and {file_id2} exist")

It's asserted that file paths for 1A3N_173446 and 1A3N_173503 exist


In [6]:
import mdtraj as md
raw_file = md.load(file_path1)
clean_file = md.load(file_path2)

In [7]:
#residues before
residues_before = set([str(res)[:3] for res in raw_file.top.residues])
residues_after = set([str(res)[:3] for res in clean_file.top.residues])
print("Residues before:",residues_before)
print("Residues after:",residues_after)
print("Residues removed:",residues_before - residues_after)

Residues before: {'LEU', 'HIS', 'THR', 'ALA', 'MET', 'CYS', 'GLN', 'HOH', 'LYS', 'SER', 'HEM', 'GLY', 'PHE', 'GLU', 'ASN', 'TRP', 'TYR', 'PRO', 'ARG', 'ASP', 'VAL'}
Residues after: {'LEU', 'HIS', 'THR', 'ALA', 'MET', 'CYS', 'GLN', 'LYS', 'SER', 'GLY', 'PHE', 'GLU', 'ASN', 'TRP', 'TYR', 'PRO', 'ARG', 'ASP', 'VAL'}
Residues removed: {'HEM', 'HOH'}


# Experiment Result: ✅❌

1. Completed without Exception or TimeOut Errors ✅
2. Attempted all necessary steps ✅
3. Completed without Hallucination ✅
4. Logic make sense ✅
5. Correct Answer ✅