In [1]:
import datetime
from mdcrow.utils import _make_llm

In [2]:
prompt = "Write me a code to run simulation of fibronectin."
model = "gpt-4o-2024-08-06"

In [3]:
# Parameters
prompt = "Download the PDB file for 1AEE. Then tell me how many chains and atoms are present in the protein."


In [4]:
llm = _make_llm(model, temp=0.1, streaming=True)

system_prompt = (
    "You are an expert molecular dynamics scientist, and your "
    "task is to respond to the question or "
    "solve the problem in its entirety to the best of your ability. "
    "If any part of the task requires you to perform an action that "
    "you are not capable of completing, please write a runnable "
    "Python script for that step and move on. For literature papers, "
    "use and process papers from the `paper_collection` folder. "
    "For .pdb files, download them from the RSCB website using `requests`. "
    "To preprocess PDB files, you will use PDBFixer. "
    "To get information about proteins, retrieve data from the UniProt database. "
    "For anything related to simulations, you will use OpenMM, "
    "and for anything related to analyses, you will use MDTraj. "
    "At the end, combine any scripts into one script. "
)
messages = [
    ("system", system_prompt),
    ("human", prompt),
]

now = datetime.datetime.now()
date = now.strftime("%Y-%m-%d")
print("date:",date)
time = now.strftime("%H:%M:%S")
print("time:",time)
print("LLM: ",llm.model_name,"\nTemperature: ",llm.temperature)

date: 2024-10-16
time: 20:43:54
LLM:  gpt-4o-2024-08-06 
Temperature:  0.1


In [5]:
ai_msg = llm.invoke(messages)
print(ai_msg.content)

To

 determine

 the

 number

 of

 chains

 and

 atoms

 in

 the

 protein

 structure

 for

 P

DB

 ID

 

1

A

EE

,

 we

 need

 to

 download

 the

 P

DB

 file

 and

 analyze

 its

 contents

.

 Below

 is

 a

 Python

 script

 that

 accomplish

es

 this

 task

:



```

python




import

 requests




from

 pdb

fix

er

 import

 P

DB

Fix

er




from

 io

 import

 String

IO





#

 Step

 

1

:

 Download

 the

 P

DB

 file

 for

 

1

A

EE




p

db

_id

 =

 "

1

A

EE

"


url

 =

 f

"https

://

files

.rc

sb

.org

/download

/{

p

db

_id

}.

p

db

"


response

 =

 requests

.get

(url

)



if

 response

.status

_code

 ==

 

200

:


   

 pdb

_data

 =

 response

.text




else

:


   

 raise

 Exception

(f

"

Failed

 to

 download

 P

DB

 file

 for

 {

p

db

_id

}")



#

 Step

 

2

:

 Use

 P

DB

Fix

er

 to

 parse

 the

 P

DB

 file




fix

er

 =

 P

DB

Fix

er

(p

db

file

=

String

IO

(p

db

_data

))



#

 Step

 

3

:

 Count

 the

 number

 of

 chains




chains

 =

 list

(f

ixer

.top

ology

.ch

ains

())


num

_ch

ains

 =

 len

(ch

ains

)



#

 Step

 

4

:

 Count

 the

 number

 of

 atoms




atoms

 =

 list

(f

ixer

.top

ology

.at

oms

())


num

_atoms

 =

 len

(at

oms

)



#

 Output

 the

 results




print

(f

"P

DB

 ID

:

 {

p

db

_id

}")


print

(f

"

Number

 of

 chains

:

 {

num

_ch

ains

}")


print

(f

"

Number

 of

 atoms

:

 {

num

_atoms

}")


``

`



This

 script

 will

 download

 the

 P

DB

 file

 for

 

1

A

EE

,

 parse

 it

 using

 P

DB

Fix

er

,

 and

 then

 count

 the

 number

 of

 chains

 and

 atoms

 in

 the

 structure

.

 You

 can

 run

 this

 script

 in

 a

 Python

 environment

 to

 get

 the

 desired

 information

.

To determine the number of chains and atoms in the protein structure for PDB ID 1AEE, we need to download the PDB file and analyze its contents. Below is a Python script that accomplishes this task:

```python
import requests
from pdbfixer import PDBFixer
from io import StringIO

# Step 1: Download the PDB file for 1AEE
pdb_id = "1AEE"
url = f"https://files.rcsb.org/download/{pdb_id}.pdb"
response = requests.get(url)

if response.status_code == 200:
    pdb_data = response.text
else:
    raise Exception(f"Failed to download PDB file for {pdb_id}")

# Step 2: Use PDBFixer to parse the PDB file
fixer = PDBFixer(pdbfile=StringIO(pdb_data))

# Step 3: Count the number of chains
chains = list(fixer.topology.chains())
num_chains = len(chains)

# Step 4: Count the number of atoms
atoms = list(fixer.topology.atoms())
num_atoms = len(atoms)

# Output the results
print(f"PDB ID: {pdb_id}")
print(f"Number of chains: {num_chains}")
print(f"Number of atoms: {num_atoms}")
```

This script will downl

In [6]:
now = datetime.datetime.now()
date = now.strftime("%Y-%m-%d")
print("date:",date)
time = now.strftime("%H:%M:%S")
print("time:",time)

date: 2024-10-16
time: 20:43:59


In [1]:
# TEST THE CODE
import requests
from pdbfixer import PDBFixer
from io import StringIO

# Step 1: Download the PDB file for 1AEE
pdb_id = "1AEE"
url = f"https://files.rcsb.org/download/{pdb_id}.pdb"
response = requests.get(url)

if response.status_code == 200:
    pdb_data = response.text
else:
    raise Exception(f"Failed to download PDB file for {pdb_id}")

# Step 2: Use PDBFixer to parse the PDB file
fixer = PDBFixer(pdbfile=StringIO(pdb_data))

# Step 3: Count the number of chains
chains = list(fixer.topology.chains())
num_chains = len(chains)

# Step 4: Count the number of atoms
atoms = list(fixer.topology.atoms())
num_atoms = len(atoms)

# Output the results
print(f"PDB ID: {pdb_id}")
print(f"Number of chains: {num_chains}")
print(f"Number of atoms: {num_atoms}")

PDB ID: 1AEE
Number of chains: 2
Number of atoms: 2992


In [2]:
import os
pdb_id = "1AEE"
print(f'PDB file for {pdb_id} exists:',os.path.exists(f'{pdb_id}.pdb'))

PDB file for 1AEE exists: False


In [1]:
!ls

exp_9.ipynb
