In [1]:
from mdcrow import MDCrow
import os

In [2]:
prompt11 = "Download the PDB files for 8PFK and 8PFQ. Then, compare the secondary structures of the two proteins, including the number of atoms, secondary structures, number of chains, etc."
llm_var = "claude-3-5-sonnet-20240620"
tools = "all"
agent = MDCrow(agent_type="Structured", model=llm_var, top_k_tools=tools, use_memory=False)
print(llm_var)

claude-3-5-sonnet-20240620


In [3]:
answer = agent.run(prompt11)

Thought: To answer this question, I need to first download the PDB files for 8PFK and 8PFQ, and then analyze their structures. Let's start by downloading the PDB file for 8PFK.

Action:
```
{
    "action": "PDBFileDownloader",
    "action_input": "8PFK"
}
```PDB file found with this ID: 8PFK
Thought: Great, we've successfully downloaded the PDB file for 8PFK. Now, let's download the PDB file for 8PFQ.

Action:
```
{
    "action": "PDBFileDownloader",
    "action_input": "8PFQ"
}
```PDB file found with this ID: 8PFQ
Thought: Now that we have both PDB files downloaded, we can analyze their structures. Let's start by summarizing the protein structure for 8PFK.

Action:
```
{
    "action": "SummarizeProteinStructure",
    "action_input": {
        "traj_file": "8PFK_214903",
        "top_file": "8PFK_214903"
    }
}
```Thought: We've successfully summarized the protein structure for 8PFK. Now, let's do the same for 8PFQ to compare them.

Action:
```
{
    "action": "SummarizeProteinStructu

In [4]:
registry = agent.path_registry
print(os.path.basename(registry.ckpt_dir))
paths_and_descriptions = registry.list_path_names_and_descriptions()
print("\n".join(paths_and_descriptions.split(",")))

ckpt_26
Files found in registry: 8PFK_214903: PDB file downloaded from RSCB
 PDBFile ID: 8PFK_214903
 8PFQ_214905: PDB file downloaded from RSCB
 PDBFile ID: 8PFQ_214905
 rec0_214912: dssp values for trajectory with id: 8PFK_214903
 rec0_214914: dssp values for trajectory with id: 8PFQ_214905


In [5]:
import re
import os
match = re.search(r"8PFK_\d+", paths_and_descriptions)
file_id1 = match.group(0)
path1 = registry.get_mapped_path(file_id1)
assert os.path.exists(path1)

match = re.search(r"8PFQ_\d+", paths_and_descriptions)
file_id2 = match.group(0)
path2 = registry.get_mapped_path(file_id2)
assert os.path.exists(path2)

In [6]:
import mdtraj as md
traj = md.load(path1)
top = traj.topology
number_of_chains = top.n_chains
number_of_atoms = top.n_atoms
print('protein 8PFK')
print('Number of chains: ', number_of_chains)
print('Number of atoms: ', number_of_atoms)
secondary_structure = md.compute_dssp(traj,simplified=True)
print("Number of residues in sheets: ",len([i for i in secondary_structure[0] if i == 'E']))
print("Number of residues in helices: ",len([i for i in secondary_structure[0] if i == 'H']))
print("Number of residues in coils: ",len([i for i in secondary_structure[0] if i == 'C']))

traj = md.load(path2)
top = traj.topology
number_of_chains = top.n_chains
number_of_atoms = top.n_atoms
print('\nprotein 8PFQ')
print('Number of chains: ', number_of_chains)
print('Number of atoms: ', number_of_atoms)
secondary_structure = md.compute_dssp(traj,simplified=True)
print("Number of residues in sheets: ",len([i for i in secondary_structure[0] if i == 'E']))
print("Number of residues in helices: ",len([i for i in secondary_structure[0] if i == 'H']))
print("Number of residues in coils: ",len([i for i in secondary_structure[0] if i == 'C']))

protein 8PFK
Number of chains:  2
Number of atoms:  487
Number of residues in sheets:  0
Number of residues in helices:  0
Number of residues in coils:  0

protein 8PFQ
Number of chains:  6
Number of atoms:  950
Number of residues in sheets:  0
Number of residues in helices:  0
Number of residues in coils:  0
