In [1]:
import datetime
from mdcrow.utils import _make_llm

In [2]:
prompt = "Write me a code to run simulation of fibronectin."
model = "gpt-4o-2024-08-06"

In [3]:
# Parameters
prompt = "Download and clean protein 1A3N."


In [4]:
llm = _make_llm(model, temp=0.1, streaming=True)

system_prompt = (
    "You are an expert molecular dynamics scientist, and your "
    "task is to respond to the question or "
    "solve the problem in its entirety to the best of your ability. "
    "If any part of the task requires you to perform an action that "
    "you are not capable of completing, please write a runnable "
    "Python script for that step and move on. For literature papers, "
    "use and process papers from the `paper_collection` folder. "
    "For .pdb files, download them from the RSCB website using `requests`. "
    "To preprocess PDB files, you will use PDBFixer. "
    "To get information about proteins, retrieve data from the UniProt database. "
    "For anything related to simulations, you will use OpenMM, "
    "and for anything related to analyses, you will use MDTraj. "
    "At the end, combine any scripts into one script. "
)
messages = [
    ("system", system_prompt),
    ("human", prompt),
]

now = datetime.datetime.now()
date = now.strftime("%Y-%m-%d")
print("date:",date)
time = now.strftime("%H:%M:%S")
print("time:",time)
print("LLM: ",llm.model_name,"\nTemperature: ",llm.temperature)

date: 2024-10-16
time: 20:48:28
LLM:  gpt-4o-2024-08-06 
Temperature:  0.1


In [5]:
ai_msg = llm.invoke(messages)
print(ai_msg.content)

To

 download

 and

 clean

 the

 protein

 structure

 for

 

1

A

3

N

,

 we

 will

 use

 the

 P

DB

Fix

er

 library

 to

 handle

 missing

 residues

,

 atoms

,

 and

 other

 common

 issues

 in

 P

DB

 files

.

 Below

 is

 a

 Python

 script

 that

 performs

 these

 tasks

:



```

python




import

 requests




from

 pdb

fix

er

 import

 P

DB

Fix

er




from

 open

mm

.app

 import

 P

DB

File





#

 Step

 

1

:

 Download

 the

 P

DB

 file

 for

 

1

A

3

N




p

db

_id

 =

 "

1

A

3

N

"


url

 =

 f

"https

://

files

.rc

sb

.org

/download

/{

p

db

_id

}.

p

db

"


response

 =

 requests

.get

(url

)



#

 Save

 the

 P

DB

 file

 locally




with

 open

(f

"{

p

db

_id

}.

p

db

",

 "

w

")

 as

 file

:


   

 file

.write

(response

.text

)



#

 Step

 

2

:

 Use

 P

DB

Fix

er

 to

 clean

 the

 P

DB

 file




fix

er

 =

 P

DB

Fix

er

(filename

=f

"{

p

db

_id

}.

p

db

")



#

 Find

 missing

 residues

 and

 atoms




fix

er

.find

Missing

Resid

ues

()


fix

er

.find

Missing

Atoms

()


fix

er

.add

Missing

Atoms

()


fix

er

.add

Missing

Hyd

rog

ens

()



#

 Step

 

3

:

 Save

 the

 cleaned

 P

DB

 file




with

 open

(f

"{

p

db

_id

}_

clean

.p

db

",

 "

w

")

 as

 file

:


   

 P

DB

File

.write

File

(f

ixer

.top

ology

,

 fixer

.positions

,

 file

)



print

(f

"

Clean

ed

 P

DB

 file

 saved

 as

 {

p

db

_id

}_

clean

.p

db

")


``

`



This

 script

 performs

 the

 following

 steps

:


1

.

 Downloads

 the

 P

DB

 file

 for

 the

 protein

 with

 ID

 

1

A

3

N

 from

 the

 R

CS

B

 P

DB

 website

.


2

.

 Saves

 the

 downloaded

 P

DB

 file

 locally

.


3

.

 Uses

 P

DB

Fix

er

 to

 identify

 and

 fix

 common

 issues

 such

 as

 missing

 residues

 and

 atoms

,

 and

 adds

 missing

 hyd

rog

ens

.


4

.

 Saves

 the

 cleaned

 P

DB

 structure

 to

 a

 new

 file

.



You

 can

 run

 this

 script

 in

 a

 Python

 environment

 with

 the

 necessary

 libraries

 installed

.

 Make

 sure

 you

 have

 `

requests

`,

 `

p

db

fix

er

`,

 and

 `

open

mm

`

 installed

 in

 your

 Python

 environment

.

To download and clean the protein structure for 1A3N, we will use the PDBFixer library to handle missing residues, atoms, and other common issues in PDB files. Below is a Python script that performs these tasks:

```python
import requests
from pdbfixer import PDBFixer
from openmm.app import PDBFile

# Step 1: Download the PDB file for 1A3N
pdb_id = "1A3N"
url = f"https://files.rcsb.org/download/{pdb_id}.pdb"
response = requests.get(url)

# Save the PDB file locally
with open(f"{pdb_id}.pdb", "w") as file:
    file.write(response.text)

# Step 2: Use PDBFixer to clean the PDB file
fixer = PDBFixer(filename=f"{pdb_id}.pdb")

# Find missing residues and atoms
fixer.findMissingResidues()
fixer.findMissingAtoms()
fixer.addMissingAtoms()
fixer.addMissingHydrogens()

# Step 3: Save the cleaned PDB file
with open(f"{pdb_id}_clean.pdb", "w") as file:
    PDBFile.writeFile(fixer.topology, fixer.positions, file)

print(f"Cleaned PDB file saved as {pdb_id}_clean.pdb")
```

This script performs the

In [6]:
now = datetime.datetime.now()
date = now.strftime("%Y-%m-%d")
print("date:",date)
time = now.strftime("%H:%M:%S")
print("time:",time)

date: 2024-10-16
time: 20:48:34


In [1]:
# TEST THE CODE

import requests
from pdbfixer import PDBFixer
from openmm.app import PDBFile

# Step 1: Download the PDB file for 1A3N
pdb_id = "1A3N"
url = f"https://files.rcsb.org/download/{pdb_id}.pdb"
response = requests.get(url)

# Save the PDB file locally
with open(f"{pdb_id}.pdb", "w") as file:
    file.write(response.text)

# Step 2: Use PDBFixer to clean the PDB file
fixer = PDBFixer(filename=f"{pdb_id}.pdb")

# Find missing residues and atoms
fixer.findMissingResidues()
fixer.findMissingAtoms()
fixer.addMissingAtoms()
fixer.addMissingHydrogens()

# Step 3: Save the cleaned PDB file
with open(f"{pdb_id}_clean.pdb", "w") as file:
    PDBFile.writeFile(fixer.topology, fixer.positions, file)

print(f"Cleaned PDB file saved as {pdb_id}_clean.pdb")

Cleaned PDB file saved as 1A3N_clean.pdb


In [2]:
# check if files exist
import os
pdb_id = "1A3N"
print(f'PDB file for {pdb_id} exists:',os.path.exists(f'{pdb_id}.pdb'))
pdb_id = "1A3N_clean"
print(f'PDB file for {pdb_id} exists:',os.path.exists(f'{pdb_id}.pdb'))

PDB file for 1A3N exists: True
PDB file for 1A3N_clean exists: True


In [4]:
# check the changes in cleaning
import mdtraj as md
path_before = "1A3N.pdb"
path_after = "1A3N_clean.pdb"

raw_file = md.load(path_before)
clean_file = md.load(path_after)

residues_before = set([str(res)[:3] for res in raw_file.top.residues])
residues_after = set([str(res)[:3] for res in clean_file.top.residues])
print("Residues before:",residues_before)
print("Residues after:",residues_after)
print("Residues removed:",residues_before - residues_after)

Residues before: {'HIS', 'TRP', 'GLU', 'ARG', 'PHE', 'THR', 'MET', 'GLN', 'PRO', 'HOH', 'ALA', 'LYS', 'TYR', 'ASN', 'ASP', 'CYS', 'VAL', 'LEU', 'SER', 'GLY', 'HEM'}
Residues after: {'HIS', 'TRP', 'GLU', 'ARG', 'PHE', 'THR', 'MET', 'GLN', 'PRO', 'HOH', 'ALA', 'LYS', 'TYR', 'ASN', 'ASP', 'CYS', 'VAL', 'LEU', 'SER', 'GLY', 'HEM'}
Residues removed: set()


In [2]:
import mdtraj as md
traj= md.load("1A3N.pdb")
secondary_structure = md.compute_dssp(traj,simplified=True)[-1] # last frame
print("==== BEFORE ====")
print("Number of residues in total: ",traj.n_residues)
print("Number of atoms in total: ",traj.n_atoms)
print("Number of chains: ",traj.n_chains)

==== BEFORE ====
Number of residues in total:  1027
Number of atoms in total:  4993
Number of chains:  12


In [3]:
traj= md.load("1A3N_clean.pdb")
secondary_structure = md.compute_dssp(traj,simplified=True)[-1] # last frame
print("==== AFTER =====")
print("Number of residues in total: ",traj.n_residues)
print("Number of atoms in total: ",traj.n_atoms)
print("Number of chains: ",traj.n_chains)

==== AFTER =====
Number of residues in total:  1029
Number of atoms in total:  10291
Number of chains:  12
