In [1]:
from mdcrow import MDCrow
import os

In [2]:
prompt11 = "Download the PDB files for 8PFK and 8PFQ. Then, compare the secondary structures of the two proteins, including the number of atoms, secondary structures, number of chains, etc."
llm_var = "claude-3-opus-20240229"
tools = "all"
agent = MDCrow(agent_type="Structured", model=llm_var, top_k_tools=tools, use_memory=False)
print(llm_var)

claude-3-opus-20240229


In [3]:
answer = agent.run(prompt11)

Thought: To compare the secondary structures of 8PFK and 8PFQ, I first need to download the PDB files for each protein. I can use the PDBFileDownloader tool to do this.

Action:
```
{
  "action": "PDBFileDownloader",
  "action_input": "8PFK"
}
```

PDB file found with this ID: 8PFK
Thought: Now that I have downloaded the PDB file for 8PFK, I need to download the PDB file for 8PFQ as well using the PDBFileDownloader tool.

Action:
```
{
  "action": "PDBFileDownloader",
  "action_input": "8PFQ"
}
```PDB file found with this ID: 8PFQ
Thought: Now that I have downloaded the PDB files for both 8PFK and 8PFQ, I can use the SummarizeProteinStructure tool to get information about the number of atoms, residues, chains, frames, and bonds in each protein. I'll start with 8PFK.

Action:
```
{
  "action": "SummarizeProteinStructure",
  "action_input": {
    "traj_file": "8PFK_232021",
    "top_file": "8PFK_232021"
  }
}
```

Thought: Now that I have summarized the structure of 8PFK, I should do the

In [4]:
registry = agent.path_registry
print(os.path.basename(registry.ckpt_dir))
paths_and_descriptions = registry.list_path_names_and_descriptions()
print("\n".join(paths_and_descriptions.split(",")))

ckpt_39
Files found in registry: 8PFK_232021: PDB file downloaded from RSCB
 PDBFile ID: 8PFK_232021
 8PFQ_232027: PDB file downloaded from RSCB
 PDBFile ID: 8PFQ_232027
 rec0_232050: dssp values for trajectory with id: 8PFK_232021
 rec0_232058: dssp values for trajectory with id: 8PFQ_232027


In [11]:
from mdcrow.tools.base_tools.analysis_tools.secondary_structure import SummarizeProteinStructure

summarizer = SummarizeProteinStructure(registry)

print (summarizer._run("8PFK_232021"))
print (summarizer._run("8PFQ_232027"))

{'n_atoms': 487, 'n_residues': 109, 'n_chains': 2, 'n_frames': 1, 'n_bonds': 420}
{'n_atoms': 950, 'n_residues': 194, 'n_chains': 6, 'n_frames': 1, 'n_bonds': 833}


In [5]:
import re
import os
match = re.search(r"8PFK_\d+", paths_and_descriptions)
file_id1 = match.group(0)
path1 = registry.get_mapped_path(file_id1)
assert os.path.exists(path1)

match = re.search(r"8PFQ_\d+", paths_and_descriptions)
file_id2 = match.group(0)
path2 = registry.get_mapped_path(file_id2)
assert os.path.exists(path2)

In [6]:
import mdtraj as md
traj = md.load(path1)
top = traj.topology
number_of_chains = top.n_chains
number_of_atoms = top.n_atoms
print('protein 8PFK')
print('Number of chains: ', number_of_chains)
print('Number of atoms: ', number_of_atoms)
secondary_structure = md.compute_dssp(traj,simplified=True)
print("Number of residues in sheets: ",len([i for i in secondary_structure[0] if i == 'E']))
print("Number of residues in helices: ",len([i for i in secondary_structure[0] if i == 'H']))
print("Number of residues in coils: ",len([i for i in secondary_structure[0] if i == 'C']))

traj = md.load(path2)
top = traj.topology
number_of_chains = top.n_chains
number_of_atoms = top.n_atoms
print('\nprotein 8PFQ')
print('Number of chains: ', number_of_chains)
print('Number of atoms: ', number_of_atoms)
secondary_structure = md.compute_dssp(traj,simplified=True)
print("Number of residues in sheets: ",len([i for i in secondary_structure[0] if i == 'E']))
print("Number of residues in helices: ",len([i for i in secondary_structure[0] if i == 'H']))
print("Number of residues in coils: ",len([i for i in secondary_structure[0] if i == 'C']))

protein 8PFK
Number of chains:  2
Number of atoms:  487
Number of residues in sheets:  0
Number of residues in helices:  0
Number of residues in coils:  0

protein 8PFQ
Number of chains:  6
Number of atoms:  950
Number of residues in sheets:  0
Number of residues in helices:  0
Number of residues in coils:  0
