In [1]:
from mdcrow import MDCrow
import os

In [2]:
#todays date and time
import datetime

start = datetime.datetime.now()
date = start.strftime("%Y-%m-%d")
print("date:",date)
time = start.strftime("%H:%M:%S")
print("time:",time)

date: 2024-09-08
time: 17:41:15


In [3]:
model="accounts/fireworks/models/llama-v3p1-70b-instruct"

agent = MDCrow(
    agent_type='Structured', 
    model=model, 
    top_k_tools="all"
)
print("LLM: ", agent.llm.model_name,"\nTemperature: ", agent.llm.temperature)

LLM:  accounts/fireworks/models/llama-v3p1-70b-instruct 
Temperature:  0.1


In [4]:
prompt11 = "Download the PDB files for 8PFK and 8PFQ. Then, compare the secondary structures of the two proteins, including the number of atoms, secondary structures, number of chains, etc."
answer = agent.run(prompt11)

Thought:

 To solve this problem, I need

 to first download the PDB files for 8

PFK and 8PFQ using

 the PDBFileDownloader tool. Then,

 I can use the SummarizePro

teinStructure tool to compare

 the secondary structures of the two proteins.

Action:


```
{
  "action": "P

DBFileDownloader",
  "action_input":

 "8PFK"
}
```


PDB file found with this ID: 8PFK


Thought:

 Now that I have downloaded the PDB

 file for 8PFK, I

 need to download the PDB file for

 8PFQ using the PDB

FileDownloader

 tool.

Action:
```
{
 

 "action": "PDBFileDownloader",
 

 "action_input": "8PFQ

"
}
```


PDB file found with this ID: 8PFQ


Thought:

 Now that I have downloaded the PDB

 files for both 8PFK and 

8PFQ,

 I can use the SummarizePro

teinStructure tool to compare the secondary structures of the

 two proteins.

Action:
```
{
 

 "action": "SummarizePro

teinStructure",
  "action_input": {


    "traj_file": "8

PFK_174117",
    "top

_file": "8PFK_174117"


  }
}
```



Thought:

 Now that I have the summary of

 the protein structure for 8PFK,

 I need to get the summary of the protein structure

 for 8PFQ using the Summar

izeProteinStructure tool.

Action:
``

`
{
  "action": "Summar

izeProteinStructure",
  "action_input": {
   

 "traj_file": "8PF

Q_174119",
    "top

_file": "8PFQ_174

119"
  }
}
```



Thought:

 Now that I have

 the summaries of the protein structures for both 8

PFK and 8PFQ,

 I can compare the secondary structures of the

 two proteins.

Action:
```
{
 

 "action": "ComputeDSS

P",
  "action_input":

 {
   

 "traj_file": "8PF

K_174117",
    "top_file

": "8PFK_174117",


    "target_frames": "all"
  }
}


```



Thought:

 Now that I have the DSS

P assignments for 8PFK

, I need to get the DSSP assignments for

 8PFQ using the ComputeD

SSP tool.

Action:
```
{
  "action

": "ComputeDSSP",
  "action

_input": {
    "traj_file

": "8PFQ_174119",


    "top_file": "8PFQ

_174119",


    "target_frames": "all

"
  }
}
```

Thought:

 Now that I have the DSSP assignments for

 both 8PFK and 8

PFQ, I can compare the secondary structures

 of the two proteins.

Action:
``

`
{
  "action": "Final Answer

",
  "action_input": "The

 PDB files for 8PFK

 and 8PFQ have been downloaded

. The summaries of the protein structures for both proteins

 are: 8PFK

 - {'n_atoms': 487,

 'n_residues':

 109, 'n_chains': 

2, 'n_frames': 1

, 'n_bonds': 420}

 and 8PFQ - {'

n_atoms': 950, 'n_resid

ues': 194, 'n_ch

ains': 6, 'n

_frames': 1, '

n_bonds': 833}. The

 DSSP assignments for both

 proteins are

: 8

PFK - {'

residues in helix': 

0, 'residues in

 strand':

 0

, 'residues in coil': 

0, 'residues not assigned

, not a protein residue':

 109} and 8PFQ

 - {'residues in helix

': 0, 'residues

 in strand': 0, 'resid

ues in coil': 0

, '

residues not assigned, not

 a protein residue': 194}."


}
```

In [5]:
end = datetime.datetime.now()
elapsed = end - start
print(f"duration: {elapsed.total_seconds()/60:.2f} minutes")

duration: 0.20 minutes


In [6]:
registry = agent.path_registry
print("ckpt_dir",os.path.basename(registry.ckpt_dir))
paths_and_descriptions = registry.list_path_names_and_descriptions()
print("\n".join(paths_and_descriptions.split(",")))

ckpt_dir ckpt_125
Files found in registry: 8PFK_174117: PDB file downloaded from RSCB
 PDBFile ID: 8PFK_174117
 8PFQ_174119: PDB file downloaded from RSCB
 PDBFile ID: 8PFQ_174119
 rec0_174122: dssp values for trajectory with id: 8PFK_174117
 rec0_174124: dssp values for trajectory with id: 8PFQ_174119


In [7]:
import re
import os
match = re.search(r"8PFK_\d+", paths_and_descriptions)
file_id1 = match.group(0)
path1 = registry.get_mapped_path(file_id1)
assert os.path.exists(path1)

match = re.search(r"8PFQ_\d+", paths_and_descriptions)
file_id2 = match.group(0)
path2 = registry.get_mapped_path(file_id2)
assert os.path.exists(path2)
print(f'It is asserted that pdb files for {file_id1} and {file_id2} exist')

It is asserted that pdb files for 8PFK_174117 and 8PFQ_174119 exist


In [8]:
import mdtraj as md
traj = md.load(path1)
top = traj.topology
number_of_chains = top.n_chains
number_of_atoms = top.n_atoms
print('protein 8PFK')
print('Number of chains: ', number_of_chains)
print('Number of atoms: ', number_of_atoms)
secondary_structure = md.compute_dssp(traj,simplified=True)
print("Number of residues in sheets: ",len([i for i in secondary_structure[0] if i == 'E']))
print("Number of residues in helices: ",len([i for i in secondary_structure[0] if i == 'H']))
print("Number of residues in coils: ",len([i for i in secondary_structure[0] if i == 'C']))

traj = md.load(path2)
top = traj.topology
number_of_chains = top.n_chains
number_of_atoms = top.n_atoms
print('\nprotein 8PFQ')
print('Number of chains: ', number_of_chains)
print('Number of atoms: ', number_of_atoms)
secondary_structure = md.compute_dssp(traj,simplified=True)
print("Number of residues in sheets: ",len([i for i in secondary_structure[0] if i == 'E']))
print("Number of residues in helices: ",len([i for i in secondary_structure[0] if i == 'H']))
print("Number of residues in coils: ",len([i for i in secondary_structure[0] if i == 'C']))

protein 8PFK
Number of chains:  2
Number of atoms:  487
Number of residues in sheets:  0
Number of residues in helices:  0
Number of residues in coils:  0

protein 8PFQ
Number of chains:  6
Number of atoms:  950
Number of residues in sheets:  0
Number of residues in helices:  0
Number of residues in coils:  0


# Experiment Result: ✅❌

1. Completed without Exception or TimeOut Errors ✅
2. Attempted all necessary steps ✅
3.  Completed without Hallucination ✅
4. Logic make sense ✅
5.  Correct Answer ✅