In [1]:
from mdcrow import MDCrow

In [2]:
#todays date and time
import datetime

start = datetime.datetime.now()
date = start.strftime("%Y-%m-%d")
print("date:",date)
time = start.strftime("%H:%M:%S")
print("time:",time)

date: 2024-09-09
time: 10:37:20


In [3]:
model='gpt-4-turbo-2024-04-09'
mda = MDCrow(
    agent_type='Structured', 
    model=model, 
    tools_model=model,
    use_memory=False, 
    top_k_tools="all"
)
print("LLM: ", mda.llm.model_name,"\nTemperature: ", mda.llm.temperature)

LLM:  gpt-4-turbo-2024-04-09 
Temperature:  0.1


In [4]:
prompt = 'Download the PDB files for 8PFK and 8PFQ. Then, compare the secondary structures of the two proteins, including the number of atoms, secondary structures, number of chains, etc.'
answer = mda.run(prompt)

Thought: To address the question, I need to first download the PDB files for the proteins 8PFK and 8PFQ. After obtaining these files, I will analyze their secondary structures and other requested properties.

Action:
```json
{
    "action": "PDBFileDownloader",
    "action_input": {
        "query": "8PFK"
    }
}
```PDB file found with this ID: 8PFK
Now that I have the PDB file for 8PFK, I need to download the PDB file for 8PFQ to proceed with the comparison of their secondary structures and other properties.

Action:
```json
{
    "action": "PDBFileDownloader",
    "action_input": {
        "query": "8PFQ"
    }
}
```PDB file found with this ID: 8PFQ
Now that I have the PDB files for both 8PFK and 8PFQ, I will proceed to analyze their secondary structures and summarize their properties such as the number of atoms, secondary structures, number of chains, etc. I will use the SummarizeProteinStructure tool for both PDB files.

Action:
```json
{
    "action": "SummarizeProteinStructure",

In [5]:
end = datetime.datetime.now()
elapsed = end - start
print(f"duration: {elapsed.total_seconds()/60:.2f} minutes")

duration: 0.75 minutes


# Confirm the downloaded files and their structures

In [6]:
registry = mda.path_registry
all_names = registry.list_path_names()
print(all_names)

Names found in registry: 8PFK_103726, 8PFQ_103730, rec0_103744, rec0_103750


In [7]:
import re
import os
pattern1 = re.compile(r"8PFK_\d+")
pattern2 = re.compile(r"8PFQ_\d+")
match1 = pattern1.search(all_names)
match2 = pattern2.search(all_names)
assert match1
assert match2
print('It is asserted that 8PFK and 8PFQ are in path_registry')
path1 = registry.get_mapped_path(match1.group(0))
path2 = registry.get_mapped_path(match2.group(0))
assert os.path.exists(path1)
assert os.path.exists(path2)
print('It is asserted these pdb files physically exist')

It is asserted that 8PFK and 8PFQ are in path_registry
It is asserted these pdb files physically exist


In [8]:
import mdtraj as md
traj = md.load(path1)
top = traj.topology
number_of_chains = top.n_chains
number_of_atoms = top.n_atoms
print('protein 8PFK')
print('Number of chains: ', number_of_chains)
print('Number of atoms: ', number_of_atoms)
traj = md.load(path2)
top = traj.topology
number_of_chains = top.n_chains
number_of_atoms = top.n_atoms
print('\nprotein 8PFQ')
print('Number of chains: ', number_of_chains)
print('Number of atoms: ', number_of_atoms)

protein 8PFK
Number of chains:  2
Number of atoms:  487

protein 8PFQ
Number of chains:  6
Number of atoms:  950


# Experiment Result:

1. Completed without Exception or TimeOut Errors ✅
2. Attempted all necessary steps ✅
3. No hallucination ✅
4. Logic make sense ✅
5. Correct Answer ✅
6. Number of steps completed: 4
7. % steps completed: 100️%