In [1]:
import datetime
from mdcrow.utils import _make_llm

In [2]:
prompt = "Write me a code to run simulation of fibronectin."
model = "gpt-4o-2024-08-06"

In [3]:
# Parameters
prompt = "Simulate protein 1ZNI at 300 K for 1 ns and calculate the RMSD."


In [4]:
llm = _make_llm(model, temp=0.1, streaming=True)

system_prompt = (
    "You are an expert molecular dynamics scientist, and your "
    "task is to respond to the question or "
    "solve the problem in its entirety to the best of your ability. "
    "If any part of the task requires you to perform an action that "
    "you are not capable of completing, please write a runnable "
    "Python script for that step and move on. For literature papers, "
    "use and process papers from the `paper_collection` folder. "
    "For .pdb files, download them from the RSCB website using `requests`. "
    "To preprocess PDB files, you will use PDBFixer. "
    "To get information about proteins, retrieve data from the UniProt database. "
    "For anything related to simulations, you will use OpenMM, "
    "and for anything related to analyses, you will use MDTraj. "
    "At the end, combine any scripts into one script. "
)
messages = [
    ("system", system_prompt),
    ("human", prompt),
]

now = datetime.datetime.now()
date = now.strftime("%Y-%m-%d")
print("date:",date)
time = now.strftime("%H:%M:%S")
print("time:",time)
print("LLM: ",llm.model_name,"\nTemperature: ",llm.temperature)

date: 2024-10-16
time: 20:44:10
LLM:  gpt-4o-2024-08-06 
Temperature:  0.1


In [5]:
ai_msg = llm.invoke(messages)
print(ai_msg.content)

To

 simulate

 the

 protein

 

1

Z

NI

 at

 

300

 K

 for

 

1

 ns

 and

 calculate

 the

 RMS

D

,

 we

 will

 follow

 these

 steps

:



1

.

 Download

 the

 P

DB

 file

 for

 

1

Z

NI

.


2

.

 Pre

process

 the

 P

DB

 file

 using

 P

DB

Fix

er

 to

 ensure

 it

 is

 suitable

 for

 simulation

.


3

.

 Set

 up

 the

 molecular

 dynamics

 simulation

 using

 Open

MM

.


4

.

 Run

 the

 simulation

 for

 

1

 ns

.


5

.

 Analyze

 the

 trajectory

 to

 calculate

 the

 RMS

D

 using

 MDT

raj

.



Let's

 write

 the

 Python

 script

 to

 perform

 these

 tasks

:



```

python




import

 requests




from

 pdb

fix

er

 import

 P

DB

Fix

er




from

 open

mm

.app

 import

 *


from

 open

mm

 import

 *


from

 open

mm

.unit

 import

 *


import

 md

traj

 as

 md




import

 os





#

 Step

 

1

:

 Download

 the

 P

DB

 file

 for

 

1

Z

NI




p

db

_id

 =

 "

1

Z

NI

"


p

db

_url

 =

 f

"https

://

files

.rc

sb

.org

/download

/{

p

db

_id

}.

p

db

"


p

db

_filename

 =

 f

"{

p

db

_id

}.

p

db

"



response

 =

 requests

.get

(p

db

_url

)


with

 open

(p

db

_filename

,

 '

wb

')

 as

 file

:


   

 file

.write

(response

.content

)



#

 Step

 

2

:

 Pre

process

 the

 P

DB

 file

 using

 P

DB

Fix

er




fix

er

 =

 P

DB

Fix

er

(filename

=p

db

_filename

)


fix

er

.find

Missing

Resid

ues

()


fix

er

.find

Missing

Atoms

()


fix

er

.add

Missing

Atoms

()


fix

er

.add

Missing

Hyd

rog

ens

()



#

 Save

 the

 fixed

 P

DB

 file




fixed

_p

db

_filename

 =

 f

"{

p

db

_id

}_

fixed

.p

db

"


with

 open

(f

ixed

_p

db

_filename

,

 '

w

')

 as

 file

:


   

 P

DB

File

.write

File

(f

ixer

.top

ology

,

 fixer

.positions

,

 file

)



#

 Step

 

3

:

 Set

 up

 the

 molecular

 dynamics

 simulation

 using

 Open

MM




p

db

 =

 P

DB

File

(f

ixed

_p

db

_filename

)


force

field

 =

 Force

Field

('

amber

14

-all

.xml

',

 '

amber

14

/t

ip

3

p

.xml

')


system

 =

 force

field

.create

System

(p

db

.top

ology

,

 non

bond

ed

Method

=

PM

E

,

 non

bond

ed

Cut

off

=

1

*

nan

ometer

,

 constraints

=

HB

onds

)


integr

ator

 =

 Lange

vin

Integrator

(

300

*

kel

vin

,

 

1

/p

ic

ose

cond

,

 

0

.

002

*

pic

oseconds

)


simulation

 =

 Simulation

(p

db

.top

ology

,

 system

,

 integr

ator

)


simulation

.context

.set

Positions

(p

db

.positions

)



#

 Min

imize

 energy




simulation

.min

imize

Energy

()



#

 Step

 

4

:

 Run

 the

 simulation

 for

 

1

 ns




simulation

.report

ers

.append

(State

Data

Reporter

(stdout

,

 

100

0

,

 step

=True

,

 potential

Energy

=True

,

 temperature

=True

))


simulation

.report

ers

.append

(D

CD

Reporter

(f

'{

p

db

_id

}_

trajectory

.d

cd

',

 

100

0

))


simulation

.step

(

500

000

)

 

 #

 

1

 ns

 with

 

2

 fs

 time

 step





#

 Step

 

5

:

 Analyze

 the

 trajectory

 to

 calculate

 the

 RMS

D

 using

 MDT

raj




traj

 =

 md

.load

_d

cd

(f

'{

p

db

_id

}_

trajectory

.d

cd

',

 top

=f

ixed

_p

db

_filename

)


r

ms

d

 =

 md

.r

ms

d

(tr

aj

,

 traj

,

 

0

)

 

 #

 Calculate

 RMS

D

 with

 respect

 to

 the

 first

 frame





#

 Print

 RMS

D

 values




print

("

R

MS

D

 values

 (

nm

):

",

 rms

d

)



#

 Clean

 up




os

.remove

(p

db

_filename

)


os

.remove

(f

ixed

_p

db

_filename

)


``

`



This

 script

 will

 download

 the

 P

DB

 file

 for

 

1

Z

NI

,

 preprocess

 it

,

 set

 up

 and

 run

 a

 

1

 ns

 molecular

 dynamics

 simulation

 at

 

300

 K

,

 and

 finally

 calculate

 the

 RMS

D

 of

 the

 trajectory

.

 The

 RMS

D

 values

 are

 printed

 in

 nan

ometers

.

To simulate the protein 1ZNI at 300 K for 1 ns and calculate the RMSD, we will follow these steps:

1. Download the PDB file for 1ZNI.
2. Preprocess the PDB file using PDBFixer to ensure it is suitable for simulation.
3. Set up the molecular dynamics simulation using OpenMM.
4. Run the simulation for 1 ns.
5. Analyze the trajectory to calculate the RMSD using MDTraj.

Let's write the Python script to perform these tasks:

```python
import requests
from pdbfixer import PDBFixer
from openmm.app import *
from openmm import *
from openmm.unit import *
import mdtraj as md
import os

# Step 1: Download the PDB file for 1ZNI
pdb_id = "1ZNI"
pdb_url = f"https://files.rcsb.org/download/{pdb_id}.pdb"
pdb_filename = f"{pdb_id}.pdb"

response = requests.get(pdb_url)
with open(pdb_filename, 'wb') as file:
    file.write(response.content)

# Step 2: Preprocess the PDB file using PDBFixer
fixer = PDBFixer(filename=pdb_filename)
fixer.findMissingResidues()
fixer.findMissingAtoms()
fixer.addMissingAtom

In [6]:
now = datetime.datetime.now()
date = now.strftime("%Y-%m-%d")
print("date:",date)
time = now.strftime("%H:%M:%S")
print("time:",time)

date: 2024-10-16
time: 20:44:20


In [2]:
# TEST THE CODE

import requests
from pdbfixer import PDBFixer
from openmm.app import *
from openmm import *
from openmm.unit import *
import mdtraj as md
import os

# Step 1: Download the PDB file for 1ZNI
pdb_id = "1ZNI"
pdb_url = f"https://files.rcsb.org/download/{pdb_id}.pdb"
pdb_filename = f"{pdb_id}.pdb"

response = requests.get(pdb_url)
with open(pdb_filename, 'wb') as file:
    file.write(response.content)

# Step 2: Preprocess the PDB file using PDBFixer
fixer = PDBFixer(filename=pdb_filename)
fixer.findMissingResidues()
fixer.findMissingAtoms()
fixer.addMissingAtoms()
fixer.addMissingHydrogens()

# Save the fixed PDB file
fixed_pdb_filename = f"{pdb_id}_fixed.pdb"
with open(fixed_pdb_filename, 'w') as file:
    PDBFile.writeFile(fixer.topology, fixer.positions, file)

# Step 3: Set up the molecular dynamics simulation using OpenMM
pdb = PDBFile(fixed_pdb_filename)
forcefield = ForceField('amber14-all.xml', 'amber14/tip3p.xml')
system = forcefield.createSystem(pdb.topology, nonbondedMethod=PME, nonbondedCutoff=1*nanometer, constraints=HBonds)
integrator = LangevinIntegrator(300*kelvin, 1/picosecond, 0.002*picoseconds)
simulation = Simulation(pdb.topology, system, integrator)
simulation.context.setPositions(pdb.positions)

# Minimize energy
simulation.minimizeEnergy()

# Step 4: Run the simulation for 1 ns
simulation.reporters.append(StateDataReporter(stdout, 1000, step=True, potentialEnergy=True, temperature=True))
simulation.reporters.append(DCDReporter(f'{pdb_id}_trajectory.dcd', 1000))
simulation.step(500000)  # 1 ns with 2 fs time step

# Step 5: Analyze the trajectory to calculate the RMSD using MDTraj
traj = md.load_dcd(f'{pdb_id}_trajectory.dcd', top=fixed_pdb_filename)
rmsd = md.rmsd(traj, traj, 0)  # Calculate RMSD with respect to the first frame

# Print RMSD values
print("RMSD values (nm):", rmsd)

# Clean up
os.remove(pdb_filename)
os.remove(fixed_pdb_filename)

ValueError: No template found for residue 103 (ZN).  The set of atoms matches ZN, but the bonds are different.  Perhaps the chain is missing a terminal group?  For more information, see https://github.com/openmm/openmm/wiki/Frequently-Asked-Questions#template

In [1]:
!ls

1ZNI_fixed.pdb	1ZNI.pdb  exp_10.ipynb
