In [1]:
from mdcrow import MDCrow

In [2]:
#todays date and time
import datetime

start = datetime.datetime.now()
date = start.strftime("%Y-%m-%d")
print("date:",date)
time = start.strftime("%H:%M:%S")
print("time:",time)

date: 2024-09-09
time: 12:32:32


In [3]:
model = "gpt-4-1106-preview"
mda = MDCrow(
    agent_type='Structured', 
    model=model, 
    tools_model=model,
    use_memory=False, 
    top_k_tools="all",
    ckpt_dir="ckpt_55"
)
print("LLM: ", mda.llm.model_name,"\nTemperature: ", mda.llm.temperature)

LLM:  gpt-4-1106-preview 
Temperature:  0.1


In [4]:
prompt = 'Download the PDB files for 8PFK and 8PFQ. Then, compare the secondary structures of the two proteins, including the number of atoms, secondary structures, number of chains, etc.'
# answer = mda.run(prompt)

### Final Answer
Final Answer:
The PDB files for proteins 8PFK and 8PFQ were successfully downloaded and cleaned to prepare for secondary structure analysis. However, the analysis using the ComputeDSSP tool revealed that both proteins have no residues in helix or strand secondary structures, and all residues are not assigned, suggesting that they may not be protein residues. This is an unusual result for protein structures and may indicate an issue with the PDB files or the analysis process. The summaries of the proteins before cleaning were as follows:

- 8PFK: 487 atoms, 109 residues, 2 chains, 1 frame, 420 bonds.
- 8PFQ: 950 atoms, 194 residues, 6 chains, 1 frame, 833 bonds.

After cleaning, the DSSP analysis still showed no secondary structures for either protein, with 11 and 22 residues not assigned for 8PFK and 8PFQ, respectively. These findings suggest that the PDB files may not contain typical protein structures or that there may be a limitation in the analysis method used. Further investigation would be required to determine the nature of these structures and why they do not exhibit typical protein secondary structures.

Checkpint directory:  /gpfs/fs2/scratch/jmedina9/mdcrow/md-agent/ckpt/ckpt_55

In [5]:
end = datetime.datetime.now()
elapsed = end - start
print(f"duration: {elapsed.total_seconds()/60:.2f} minutes")

duration: 0.01 minutes


# Confirm the downloaded files and their structures

In [6]:
registry = mda.path_registry
all_names = registry.list_path_names()
print(all_names)

Names found in registry: 8PFK_113023, 8PFQ_113027, rec0_113048, rec0_113055, 8PFK_113107, 8PFQ_113117, rec0_113122, rec0_113130


In [9]:
all_paths = registry._list_all_paths()
for i, path in enumerate(all_paths):
    path = registry.ckpt_dir + path.split('ckpt_55')[1]
    all_paths[i] = path

In [10]:
import re
import os
pattern1 = re.compile(r"8PFK_\d+")
pattern2 = re.compile(r"8PFQ_\d+")
match1 = pattern1.search(all_names)
match2 = pattern2.search(all_names)
assert match1
assert match2
print('It is asserted that 8PFK and 8PFQ are in path_registry')

assert os.path.exists(all_paths[0])
assert os.path.exists(all_paths[1])
print('It is asserted these pdb files physically exist')

It is asserted that 8PFK and 8PFQ are in path_registry
It is asserted these pdb files physically exist


In [11]:
print(all_paths)

['/Users/jorgemedina/Desktop/md-agent/ckpt/ckpt_55/pdb/8PFK_raw_20240822_113023.pdb', '/Users/jorgemedina/Desktop/md-agent/ckpt/ckpt_55/pdb/8PFQ_raw_20240822_113027.pdb', '/Users/jorgemedina/Desktop/md-agent/ckpt/ckpt_55/records/dssp_20240822_113048.npy', '/Users/jorgemedina/Desktop/md-agent/ckpt/ckpt_55/records/dssp_20240822_113055.npy', '/Users/jorgemedina/Desktop/md-agent/ckpt/ckpt_55/pdb/8PFK_Clean_20240822_113107.pdb', '/Users/jorgemedina/Desktop/md-agent/ckpt/ckpt_55/pdb/8PFQ_Clean_20240822_113117.pdb', '/Users/jorgemedina/Desktop/md-agent/ckpt/ckpt_55/records/dssp_20240822_113122.npy', '/Users/jorgemedina/Desktop/md-agent/ckpt/ckpt_55/records/dssp_20240822_113130.npy']


In [13]:
import mdtraj as md
traj = md.load(all_paths[0])
top = traj.topology
number_of_chains = top.n_chains
number_of_atoms = top.n_atoms
print('protein 8PFK')
print('Number of chains: ', number_of_chains)
print('Number of atoms: ', number_of_atoms)
traj = md.load(all_paths[1])
top = traj.topology
number_of_chains = top.n_chains
number_of_atoms = top.n_atoms
print('\nprotein 8PFQ')
print('Number of chains: ', number_of_chains)
print('Number of atoms: ', number_of_atoms)

protein 8PFK
Number of chains:  2
Number of atoms:  487

protein 8PFQ
Number of chains:  6
Number of atoms:  950


# Experiment Result:
### Completed without Exception or TimeOut Errors ✅
### Attempted all necessary steps ✅
### No Hallucination ✅
### Logic make sense ✅
### Correct Answer  ✅