In [1]:
import datetime
import os
from mdcrow import MDCrow

In [2]:
prompt3 = "Download the PDB file for protein 1GZX. Then, analyze the secondary structure of the protein and provide information on how many helices, sheets, and other components are present. Get the gene names for this protein."
llm_var = "accounts/fireworks/models/llama-v3p1-405b-instruct"
tools = "all"
agent = MDCrow(agent_type="Structured", model=llm_var, top_k_tools=tools)
now = datetime.datetime.now()
date = now.strftime("%Y-%m-%d")
print("date and time:",date)
time = now.strftime("%H:%M:%S")
print("time:",time)
print("LLM: ",agent.llm.model_name,"\nTemperature: ",agent.llm.temperature)

date and time: 2024-09-28
time: 11:34:52
LLM:  accounts/fireworks/models/llama-v3p1-405b-instruct 
Temperature:  0.1


In [3]:
agent.run(prompt3)

Thought: First, I need to download the PDB file for protein 1GZX. Then, I can analyze the secondary structure of the protein and provide information on how many helices, sheets, and other components are present. Finally, I will get the gene names for this protein.

Action:
```
{
  "action": "PDBFileDownloader",
  "action_input": "1GZX"
}
```
PDB file found with this ID: 1GZX
Now that I have the PDB file, I can analyze the secondary structure of the protein and provide information on how many helices, sheets, and other components are present.

Action:
```
{
  "action": "ComputeDSSP",
  "action_input": {
    "traj_file": "1GZX_113456",
    "top_file": "1GZX_113456",
    "target_frames": "all"
  }
}
```

(I will wait for the result of this action before proceeding)Now that I have the secondary structure analysis, I can get the gene names for this protein.

Action:
```
{
  "action": "GetGeneNames",
  "action_input": {
    "query": "1GZX",
    "primary_accession": ""
  }
}
```

(I will wait

({'input': '\n    You are an expert molecular dynamics scientist, and\n    your task is to respond to the question or\n    solve the problem to the best of your ability using\n    the provided tools.\n\n    You can only respond with a single complete\n    \'Thought, Action, Action Input\' format\n    OR a single \'Final Answer\' format.\n\n    Complete format:\n    Thought: (reflect on your progress and decide what to do next)\n    Action:\n    ```\n    {\n        "action": (the action name, it should be the name of a tool),\n        "action_input": (the input string for the action)\n    }\n    \'\'\'\n\n    OR\n\n    Final Answer: (the final response to the original input\n    question, once all steps are complete)\n\n    You are required to use the tools provided,\n    using the most specific tool\n    available for each action.\n    Your final answer should contain all information\n    necessary to answer the question and its subquestions.\n    Before you finish, reflect on your pro

In [4]:
now = datetime.datetime.now()
date = now.strftime("%Y-%m-%d")
print("date and time:",date)
time = now.strftime("%H:%M:%S")
print("time:",time)

date and time: 2024-09-28
time: 11:35:02


In [5]:
registry = agent.path_registry
print('ckpt:',os.path.basename(registry.ckpt_dir))
paths_and_descriptions = registry.list_path_names_and_descriptions()
print("\n".join(paths_and_descriptions.split(",")))

ckpt: ckpt_12
Files found in registry: 1GZX_113456: PDB file downloaded from RSCB
 PDBFile ID: 1GZX_113456
 rec0_113459: dssp values for trajectory with id: 1GZX_113456


In [7]:
import mdtraj as md
path = registry.get_mapped_path("1GZX_113456")
traj = md.load(path)
top = traj.topology

secondary_structure = md.compute_dssp(traj,simplified=True)
print("Number of residues in sheets: ",len([i for i in secondary_structure[0] if i == 'E']))
print("Number of residues in helices: ",len([i for i in secondary_structure[0] if i == 'H']))
print("Number of residues in coils: ",len([i for i in secondary_structure[0] if i == 'C']))

Number of residues in sheets:  0
Number of residues in helices:  444
Number of residues in coils:  130
