In [1]:
from mdagent import MDAgent

In [2]:
#todays date and time
import datetime

start = datetime.datetime.now()
date = start.strftime("%Y-%m-%d")
print("date:",date)
time = start.strftime("%H:%M:%S")
print("time:",time)

date: 2024-07-11
time: 11:12:26


In [3]:
model = "gpt-3.5-turbo-0125"
mda = MDAgent(
    agent_type='Structured', 
    model=model, 
    tools_model=model,
    use_memory=False, 
    top_k_tools="all"
)
print("LLM: ", mda.llm.model_name,"\nTemperature: ", mda.llm.temperature)

LLM:  gpt-3.5-turbo-0125 
Temperature:  0.1


In [4]:
prompt = '''Download the PDB files for 8PFK and 8PFQ. Then, compare the secondary structure of the two proteins, 
including number atoms, secondary structure, number of chains, etc..'''
answer = mda.run(prompt)

Thought: To compare the secondary structure of the proteins 8PFK and 8PFQ, I need to download their PDB files and analyze the secondary structure information.

Action: PDBFileDownloader
Action Input: 
```
{
  "query": "8PFK"
}
```

```
{
  "action": "PDBFileDownloader",
  "action_input": {
    "query": "8PFK"
  }
}
```PDB file found with this ID: 8PFK


2024-07-11 11:12:29.492 
  command:

    streamlit run /Users/jorgemedina/miniforge3/envs/mdagent2/lib/python3.11/site-packages/ipykernel_launcher.py [ARGUMENTS]


Action:
```
{
  "action": "PDBFileDownloader",
  "action_input": {
    "query": "8PFQ"
  }
}
```PDB file found with this ID: 8PFQ
Action:
```
{
  "action": "ComputeDSSP",
  "action_input": {
    "traj_file": "8PFK_111229",
    "top_file": null
  }
}
```Action:
```
{
  "action": "ComputeDSSP",
  "action_input": {
    "traj_file": "8PFQ_111231",
    "top_file": null
  }
}
```Action:
```
{
  "action": "SummarizeProteinStructure",
  "action_input": {
    "traj_file": "8PFK_111229",
    "top_file": null
  }
}
```Action:
```
{
  "action": "SummarizeProteinStructure",
  "action_input": {
    "traj_file": "8PFQ_111231",
    "top_file": null
  }
}
```Action:
```
{
  "action": "Final Answer",
  "action_input": "The secondary structure comparison between proteins 8PFK and 8PFQ is as follows:\n\n8PFK:\n- Number of atoms: 487\n- Number of residues: 109\n- Number of chains: 2\n- Secondary structure: No helices, strands, or coils identified\n\n8PFQ:\n- Number of atoms: 950\n- Number of residues: 194\

In [5]:
end = datetime.datetime.now()
elapsed = end - start
print(f"duration: {elapsed.total_seconds()/60:.2f} minutes")

duration: 0.21 minutes


# Confirm the downloaded files and their structures

In [6]:
registry = mda.path_registry
all_names = registry.list_path_names()
print(all_names)

Names found in registry: 8PFK_111229, 8PFQ_111231, rec0_111232, rec0_111233


In [7]:
import re
import os
pattern1 = re.compile(r"8PFK_\d+")
pattern2 = re.compile(r"8PFQ_\d+")
match1 = pattern1.search(all_names)
match2 = pattern2.search(all_names)
assert match1
assert match2
print('It is asserted that 8PFK and 8PFQ are in path_registry')
path1 = registry.get_mapped_path(match1.group(0))
path2 = registry.get_mapped_path(match2.group(0))
assert os.path.exists(path1)
assert os.path.exists(path2)
print('It is asserted these pdb files physically exist')

It is asserted that 8PFK and 8PFQ are in path_registry
It is asserted these pdb files physically exist


In [8]:
import mdtraj as md
traj = md.load(path1)
top = traj.topology
number_of_chains = top.n_chains
number_of_atoms = top.n_atoms
print('protein 8PFK')
print('Number of chains: ', number_of_chains)
print('Number of atoms: ', number_of_atoms)
traj = md.load(path2)
top = traj.topology
number_of_chains = top.n_chains
number_of_atoms = top.n_atoms
print('\nprotein 8PFQ')
print('Number of chains: ', number_of_chains)
print('Number of atoms: ', number_of_atoms)

protein 8PFK
Number of chains:  2
Number of atoms:  487

protein 8PFQ
Number of chains:  6
Number of atoms:  950


In [13]:
#get the secondary structure of the proteins
traj = md.load(path1)
ss = md.compute_dssp(traj)
print('protein 8PFK')

print("Number of sheets: ",len([i for i in ss.flatten() if i == 'E']))
print("Number of helices: ",len([i for i in ss.flatten() if i == 'H']))
print("Number of coils: ",len([i for i in ss.flatten() if i == 'C']))

traj = md.load(path2)
print('\nprotein 8PFQ')
ss = md.compute_dssp(traj)
print("Number of sheets: ",len([i for i in ss.flatten() if i == 'E']))
print("Number of helices: ",len([i for i in ss.flatten() if i == 'H']))
print("Number of coils: ",len([i for i in ss.flatten() if i == 'C']))
      

protein 8PFK
Number of sheets:  0
Number of helices:  0
Number of coils:  0

protein 8PFQ
Number of sheets:  0
Number of helices:  0
Number of coils:  0


# Experiment Result:
### Completed without Exception or TimeOut Errors ✅
### Attempted all necessary steps ✅
### Logic make sense ✅
### Correct Answer  ✅