In [1]:
from mdagent import MDAgent

In [2]:
#todays date and time
import datetime

start = datetime.datetime.now()
date = start.strftime("%Y-%m-%d")
print("date:",date)
time = start.strftime("%H:%M:%S")
print("time:",time)

date: 2024-07-10
time: 19:37:56


In [3]:
model='gpt-4o'
mda = MDAgent(
    agent_type='Structured', 
    model=model, 
    tools_model=model,
    use_memory=False, 
    top_k_tools="all"
)
print("LLM: ", mda.llm.model_name,"\nTemperature: ", mda.llm.temperature)

LLM:  gpt-4o 
Temperature:  0.1


In [4]:
prompt = '''Download the PDB files for 8PFK and 8PFQ. Then, compare the secondary structure of the two proteins, 
including number atoms, secondary structure, number of chains, etc..'''
answer = mda.run(prompt)

Thought: To address the question, I need to first download the PDB files for the proteins 8PFK and 8PFQ. Then, I will analyze and compare their secondary structures, including the number of atoms, secondary structure, number of chains, etc.

Action: 
```
{
  "action": "PDBFileDownloader",
  "action_input": "8PFK"
}
```PDB file found with this ID: 8PFK


2024-07-10 19:37:58.741 
  command:

    streamlit run /scratch/qcampbe2/myenvs/mdagent/lib/python3.12/site-packages/ipykernel_launcher.py [ARGUMENTS]


Thought: I need to download the PDB file for the second protein, 8PFQ, before proceeding with the analysis and comparison.

Action:
```
{
  "action": "PDBFileDownloader",
  "action_input": "8PFQ"
}
```PDB file found with this ID: 8PFQ
Now that I have successfully downloaded the PDB files for both proteins, 8PFK and 8PFQ, I will proceed with analyzing their secondary structures, including the number of atoms, secondary structure, number of chains, etc.

Action:
```
{
  "action": "SummarizeProteinStructure",
  "action_input": {
    "traj_file": "8PFK_193758"
  }
}
```
To complete the analysis, I need to summarize the protein structure for the second protein, 8PFQ, and then compare the results.

Action:
```
{
  "action": "SummarizeProteinStructure",
  "action_input": {
    "traj_file": "8PFQ_193800"
  }
}
```Now that I have the structural summaries for both proteins, I need to analyze their secondary structures to provide a comprehensive comparison. I will use the ComputeDSSP tool to dete

In [5]:
end = datetime.datetime.now()
elapsed = end - start
print(f"duration: {elapsed.total_seconds()/60:.2f} minutes")

duration: 0.33 minutes


# Confirm the downloaded files and their structures

In [8]:
registry = mda.path_registry
all_names = registry.list_path_names()
print(all_names)

Names found in registry: 8PFK_193758, 8PFQ_193800, rec0_193809, rec0_193810


In [13]:
import re
import os
pattern1 = re.compile(r"8PFK_\d+")
pattern2 = re.compile(r"8PFQ_\d+")
match1 = pattern1.search(all_names)
match2 = pattern2.search(all_names)
assert match1
assert match2
print('It is asserted that 8PFK and 8PFQ are in path_registry')
path1 = registry.get_mapped_path(match1.group(0))
path2 = registry.get_mapped_path(match2.group(0))
assert os.path.exists(path1)
assert os.path.exists(path2)
print('It is asserted these pdb files physically exist')

It is asserted that 8PFK and 8PFQ are in path_registry
It is asserted these pdb files physically exist


In [20]:
import mdtraj as md
traj = md.load(path1)
top = traj.topology
number_of_chains = top.n_chains
number_of_atoms = top.n_atoms
print('protein 8PFK')
print('Number of chains: ', number_of_chains)
print('Number of atoms: ', number_of_atoms)
traj = md.load(path2)
top = traj.topology
number_of_chains = top.n_chains
number_of_atoms = top.n_atoms
print('\nprotein 8PFQ')
print('Number of chains: ', number_of_chains)
print('Number of atoms: ', number_of_atoms)

protein 8PFK
Number of chains:  2
Number of atoms:  487

protein 8PFQ
Number of chains:  6
Number of atoms:  950
