In [1]:
from mdcrow import MDCrow
import os

In [2]:
descriptive_prompt_2 = "Complete all of the following tasks: 1. Download the PDB file 1LYZ. 2. Tell me how many residues are in the chains and beta sheets."
non_descriptive_prompt_2 = "Download the PDB file 1LYZ. Tell me how many residues are in the chains and beta sheets."

In [3]:
llm_model = "gpt-4o-2024-08-06"
tools = "all"

In [4]:
#todays date and time
import datetime
now = datetime.datetime.now()
date = now.strftime("%Y-%m-%d")
print("date:",date)
time = now.strftime("%H:%M:%S")
print("time:",time)

date: 2024-10-18
time: 00:50:38


# descriptive prompt

In [5]:
agent_1 = MDCrow(agent_type="Structured", model=llm_model, top_k_tools=tools)

In [6]:
agent_1.run(descriptive_prompt_2)

Thought: To complete the tasks, I need to first download the PDB file for 1LYZ. After downloading, I will analyze the structure to determine the number of residues in the chains and beta sheets.

Action:
```json
{
  "action": "PDBFileDownloader",
  "action_input": "1LYZ"
}
```PDB file found with this ID: 1LYZ
To determine the number of residues in the chains and beta sheets, I need to analyze the downloaded PDB file for 1LYZ.

Action:
```json
{
  "action": "SummarizeProteinStructure",
  "action_input": {
    "traj_file": "1LYZ_005041"
  }
}
```Action:
```json
{
  "action": "ComputeDSSP",
  "action_input": {
    "traj_file": "1LYZ_005041",
    "target_frames": "all"
  }
}
```To complete the tasks, I need to provide the number of residues in the chains and beta sheets based on the observations from the DSSP analysis.

Final Answer: The PDB file 1LYZ contains 230 residues in total, distributed across 2 chains. According to the DSSP analysis, there are 14 residues in beta sheets (strands).

({'input': '\n    You are an expert molecular dynamics scientist, and\n    your task is to respond to the question or\n    solve the problem to the best of your ability using\n    the provided tools.\n\n    You can only respond with a single complete\n    \'Thought, Action, Action Input\' format\n    OR a single \'Final Answer\' format.\n\n    Complete format:\n    Thought: (reflect on your progress and decide what to do next)\n    Action:\n    ```\n    {\n        "action": (the action name, it should be the name of a tool),\n        "action_input": (the input string for the action)\n    }\n    \'\'\'\n\n    OR\n\n    Final Answer: (the final response to the original input\n    question, once all steps are complete)\n\n    You are required to use the tools provided,\n    using the most specific tool\n    available for each action.\n    Your final answer should contain all information\n    necessary to answer the question and its subquestions.\n    Before you finish, reflect on your pro

In [7]:
registry = agent_1.path_registry
print("ckpt_dir:",os.path.basename(registry.ckpt_dir))
paths_and_descriptions = registry.list_path_names_and_descriptions()
print("\n".join(paths_and_descriptions.split(",")))

ckpt_dir: ckpt_21
Files found in registry: 1LYZ_005041: PDB file downloaded from RSCB
 PDBFile ID: 1LYZ_005041
 rec0_005045: dssp values for trajectory with id: 1LYZ_005041


In [8]:
import mdtraj as md
import os
import re

match = re.search(rf"1LYZ_\d+", paths_and_descriptions)
file_id = match.group(0)
pdb_path = registry.get_mapped_path(file_id)
assert os.path.exists(pdb_path)
print(f'It is asserted that file path for {file_id} exists')

traj = md.load(pdb_path)
#get dssp 
number_of_chains = traj.n_chains
secondary_structure = md.compute_dssp(traj,simplified=True)
print("Number of chains: ",number_of_chains)
print("Number of sheets: ",len([i for i in secondary_structure[0] if i == 'E']))
print("Number of helices: ",len([i for i in secondary_structure[0] if i == 'H']))
print("Number of coils: ",len([i for i in secondary_structure[0] if i == 'C']))

It is asserted that file path for 1LYZ_005041 exists
Number of chains:  2
Number of sheets:  14
Number of helices:  50
Number of coils:  65


# non-descriptive prompt

In [9]:
agent_2 = MDCrow(agent_type="Structured", model=llm_model, top_k_tools=tools)

In [10]:
agent_2.run(non_descriptive_prompt_2)

Thought: To answer the question, I need to first download the PDB file for 1LYZ. Then, I will analyze the structure to determine the number of residues in the chains and beta sheets. I will start by downloading the PDB file.

Action:
```json
{
  "action": "PDBFileDownloader",
  "action_input": "1LYZ"
}
```PDB file found with this ID: 1LYZ
To determine the number of residues in the chains and beta sheets, I need to analyze the downloaded PDB file. I will use the tool to get the number and location of turns, beta sheets, and helices for the protein.

Action:
```json
{
  "action": "GetTurnsBetaSheetsHelices",
  "action_input": {
    "query": "1LYZ",
    "primary_accession": ""
  }
}
```To accurately determine the number of residues in the chains and beta sheets, I need to analyze the PDB file directly for structural information. I will summarize the protein structure to get the number of residues in the chains and then specifically look for beta sheets.

Action:
```json
{
  "action": "Sum

({'input': '\n    You are an expert molecular dynamics scientist, and\n    your task is to respond to the question or\n    solve the problem to the best of your ability using\n    the provided tools.\n\n    You can only respond with a single complete\n    \'Thought, Action, Action Input\' format\n    OR a single \'Final Answer\' format.\n\n    Complete format:\n    Thought: (reflect on your progress and decide what to do next)\n    Action:\n    ```\n    {\n        "action": (the action name, it should be the name of a tool),\n        "action_input": (the input string for the action)\n    }\n    \'\'\'\n\n    OR\n\n    Final Answer: (the final response to the original input\n    question, once all steps are complete)\n\n    You are required to use the tools provided,\n    using the most specific tool\n    available for each action.\n    Your final answer should contain all information\n    necessary to answer the question and its subquestions.\n    Before you finish, reflect on your pro

In [11]:
registry = agent_2.path_registry
print("ckpt_dir:",os.path.basename(registry.ckpt_dir))
paths_and_descriptions = registry.list_path_names_and_descriptions()
print("\n".join(paths_and_descriptions.split(",")))

ckpt_dir: ckpt_22
Files found in registry: 1LYZ_005049: PDB file downloaded from RSCB
 PDBFile ID: 1LYZ_005049
 rec0_005057: dssp values for trajectory with id: 1LYZ_005049


In [12]:
import mdtraj as md
import os
import re

match = re.search(rf"1LYZ_\d+", paths_and_descriptions)
file_id = match.group(0)
pdb_path = registry.get_mapped_path(file_id)
assert os.path.exists(pdb_path)
print(f'It is asserted that file path for {file_id} exists')

traj = md.load(pdb_path)
#get dssp 
number_of_chains = traj.n_chains
secondary_structure = md.compute_dssp(traj,simplified=True)
print("Number of chains: ",number_of_chains)
print("Number of sheets: ",len([i for i in secondary_structure[0] if i == 'E']))
print("Number of helices: ",len([i for i in secondary_structure[0] if i == 'H']))
print("Number of coils: ",len([i for i in secondary_structure[0] if i == 'C']))

It is asserted that file path for 1LYZ_005049 exists
Number of chains:  2
Number of sheets:  14
Number of helices:  50
Number of coils:  65
