In [7]:
from mdcrow import MDCrow
import os

In [2]:
#todays date and time
import datetime

start = datetime.datetime.now()
date = start.strftime("%Y-%m-%d")
print("date:",date)
time = start.strftime("%H:%M:%S")
print("time:",time)

date: 2024-08-19
time: 15:48:43


In [3]:
model="accounts/fireworks/models/llama-v3p1-405b-instruct"

agent = MDCrow(
    agent_type='Structured', 
    model=model, 
    top_k_tools="all"
)
print("LLM: ", agent.llm.model_name,"\nTemperature: ", agent.llm.temperature)

LLM:  accounts/fireworks/models/llama-v3p1-405b-instruct 
Temperature:  0.1


In [4]:
prompt11 = "Download the PDB files for 8PFK and 8PFQ. Then, compare the secondary structures of the two proteins, including the number of atoms, secondary structures, number of chains, etc."
answer = agent.run(prompt11)

Thought: First, I need to download the PDB files for 8PFK and 8PFQ. Then, I can compare their secondary structures.

Action:
```
{
  "action": "PDBFileDownloader",
  "action_input": "8PFK"
}
```
PDB file found with this ID: 8PFK
Now that I have downloaded the PDB file for 8PFK, I need to download the PDB file for 8PFQ.

Action:
```
{
  "action": "PDBFileDownloader",
  "action_input": "8PFQ"
}
```

(I will wait for the observation before proceeding)PDB file found with this ID: 8PFQ
Now that I have downloaded the PDB files for 8PFK and 8PFQ, I can compare their secondary structures.

Action:
```
{
  "action": "SummarizeProteinStructure",
  "action_input": {
    "traj_file": "8PFK_154847",
    "top_file": "8PFK_154847"
  }
}
```
(I will wait for the observation before proceeding)Now that I have summarized the protein structure for 8PFK, I need to summarize the protein structure for 8PFQ.

Action:
```
{
  "action": "SummarizeProteinStructure",
  "action_input": {
    "traj_file": "8PFQ_154

In [5]:
end = datetime.datetime.now()
elapsed = end - start
print(f"duration: {elapsed.total_seconds()/60:.2f} minutes")

duration: 0.31 minutes


In [8]:
registry = agent.path_registry
print(os.path.basename(registry.ckpt_dir))
paths_and_descriptions = registry.list_path_names_and_descriptions()
print("\n".join(paths_and_descriptions.split(",")))

ckpt_92
Files found in registry: 8PFK_154847: PDB file downloaded from RSCB
 PDBFile ID: 8PFK_154847
 8PFQ_154850: PDB file downloaded from RSCB
 PDBFile ID: 8PFQ_154850
 rec0_154856: dssp values for trajectory with id: 8PFK_154847
 rec0_154859: dssp values for trajectory with id: 8PFQ_154850


In [10]:
import re
import os
match = re.search(r"8PFK_\d+", paths_and_descriptions)
file_id1 = match.group(0)
path1 = registry.get_mapped_path(file_id1)
assert os.path.exists(path1)

match = re.search(r"8PFQ_\d+", paths_and_descriptions)
file_id2 = match.group(0)
path2 = registry.get_mapped_path(file_id2)
assert os.path.exists(path2)
print('It is asserted that pdb files for 8PFK and 8PFQ exist')

It is asserted that pdb files for 8PFK and 8PFQ exist


In [13]:
import mdtraj as md
traj = md.load(path1)
top = traj.topology
number_of_chains = top.n_chains
number_of_atoms = top.n_atoms
print('protein 8PFK')
print('Number of chains: ', number_of_chains)
print('Number of atoms: ', number_of_atoms)
secondary_structure = md.compute_dssp(traj,simplified=True)
print("Number of residues in sheets: ",len([i for i in secondary_structure[0] if i == 'E']))
print("Number of residues in helices: ",len([i for i in secondary_structure[0] if i == 'H']))
print("Number of residues in coils: ",len([i for i in secondary_structure[0] if i == 'C']))

traj = md.load(path2)
top = traj.topology
number_of_chains = top.n_chains
number_of_atoms = top.n_atoms
print('\nprotein 8PFQ')
print('Number of chains: ', number_of_chains)
print('Number of atoms: ', number_of_atoms)
secondary_structure = md.compute_dssp(traj,simplified=True)
print("Number of residues in sheets: ",len([i for i in secondary_structure[0] if i == 'E']))
print("Number of residues in helices: ",len([i for i in secondary_structure[0] if i == 'H']))
print("Number of residues in coils: ",len([i for i in secondary_structure[0] if i == 'C']))

protein 8PFK
Number of chains:  2
Number of atoms:  487
Number of residues in sheets:  0
Number of residues in helices:  0
Number of residues in coils:  0

protein 8PFQ
Number of chains:  6
Number of atoms:  950
Number of residues in sheets:  0
Number of residues in helices:  0
Number of residues in coils:  0


# Experiment Result: ✅❌

1. Completed without Exception or TimeOut Errors ✅
2. Attempted all necessary steps ✅
3.  Completed without Hallucination ✅
4. Logic make sense ✅
5.  Correct Answer ✅