In [1]:
import datetime
from mdcrow.utils import _make_llm

In [2]:
prompt = "Write me a code to run simulation of fibronectin."
model = "gpt-4o-2024-08-06"

In [3]:
# Parameters
prompt = "Perform a brief simulation of protein 1PQ2."


In [4]:
llm = _make_llm(model, temp=0.1, streaming=True)

system_prompt = (
    "You are an expert molecular dynamics scientist, and your "
    "task is to respond to the question or "
    "solve the problem in its entirety to the best of your ability. "
    "If any part of the task requires you to perform an action that "
    "you are not capable of completing, please write a runnable "
    "Python script for that step and move on. For literature papers, "
    "use and process papers from the `paper_collection` folder. "
    "For .pdb files, download them from the RSCB website using `requests`. "
    "To preprocess PDB files, you will use PDBFixer. "
    "To get information about proteins, retrieve data from the UniProt database. "
    "For anything related to simulations, you will use OpenMM, "
    "and for anything related to analyses, you will use MDTraj. "
    "At the end, combine any scripts into one script. "
)
messages = [
    ("system", system_prompt),
    ("human", prompt),
]

now = datetime.datetime.now()
date = now.strftime("%Y-%m-%d")
print("date:",date)
time = now.strftime("%H:%M:%S")
print("time:",time)
print("LLM: ",llm.model_name,"\nTemperature: ",llm.temperature)

date: 2024-10-16
time: 20:08:15
LLM:  gpt-4o-2024-08-06 
Temperature:  0.1


In [5]:
ai_msg = llm.invoke(messages)
print(ai_msg.content)

To perform a brief molecular dynamics simulation of the protein with PDB ID 1PQ2, we will follow these steps:

1. Download the PDB file for 1PQ2.
2. Preprocess the PDB file using PDBFixer to fix any issues such as missing atoms or residues.
3. Set up the simulation system using OpenMM.
4. Run a short molecular dynamics simulation.
5. Save the trajectory for analysis.

Let's write a Python script to accomplish these tasks:

```python
import requests
from pdbfixer import PDBFixer
from openmm.app import *
from openmm import *
from openmm.unit import *
import mdtraj as md

# Step 1: Download the PDB file for 1PQ2
pdb_id = "1PQ2"
url = f"https://files.rcsb.org/download/{pdb_id}.pdb"
response = requests.get(url)
with open(f"{pdb_id}.pdb", "w") as file:
    file.write(response.text)

# Step 2: Preprocess the PDB file using PDBFixer
fixer = PDBFixer(filename=f"{pdb_id}.pdb")
fixer.findMissingResidues()
fixer.findMissingAtoms()
fixer.addMissingAtoms()
fixer.addMissingHydrogens()
fixer.addSolven

 a

 molecular

 dynamics

 simulation

 of

 the

 protein

 with

 P

DB

 ID

 

1

PQ

2

,

 we

 will

 follow

 these

 steps

:



1

.

 **

Download

 the

 P

DB

 file

**

:

 Retrieve

 the

 P

DB

 file

 for

 

1

PQ

2

 from

 the

 R

CS

B

 Protein

 Data

 Bank

.


2

.

 **

Pre

process

 the

 P

DB

 file

**

:

 Prepare

 the

 protein

 structure

 for

 simulation

,

 which

 includes

 removing

 water

 molecules

,

 adding

 missing

 atoms

,

 and

 assigning

 proton

ation

 states

.


3

.

 **

Set

 up

 the

 simulation

 system

**

:

 Define

 the

 force

 field

,

 sol

vate

 the

 protein

,

 and

 add

 ions

 if

 necessary

.


4

.

 **

Run

 the

 simulation

**

:

 Perform

 a

 short

 molecular

 dynamics

 simulation

 using

 Open

MM

.


5

.

 **

Analyze

 the

 results

**

:

 Use

 MDT

raj

 to

 analyze

 the

 trajectory

.



Let's

 start

 by

 writing

 a

 Python

 script

 to

 download

 and

 preprocess

 the

 P

DB

 file

,

 set

 up

 the

 simulation

,

 and

 run

 it

 using

 Open

MM

.



```

python




import

 requests




from

 sim

tk

.open

mm

 import

 app




from

 sim

tk

 import

 open

mm

,

 unit




import

 md

traj

 as

 md





#

 Step

 

1

:

 Download

 the

 P

DB

 file

 for

 

1

PQ

2




p

db

_id

 =

 "

1

PQ

2

"


p

db

_url

 =

 f

"https

://

files

.rc

sb

.org

/download

/{

p

db

_id

}.

p

db

"


p

db

_filename

 =

 f

"{

p

db

_id

}.

p

db

"



response

 =

 requests

.get

(p

db

_url

)


with

 open

(p

db

_filename

,

 '

w

')

 as

 file

:


   

 file

.write

(response

.text

)



#

 Step

 

2

:

 Pre

process

 the

 P

DB

 file




#

 Load

 the

 P

DB

 file

 using

 Open

MM

's

 P

DB

File




p

db

 =

 app

.P

DB

File

(p

db

_filename

)



#

 Use

 the

 Amber

99

SB

 force

 field




force

field

 =

 app

.F

orce

Field

('

amber

99

sb

.xml

',

 '

tip

3

p

.xml

')



#

 Create

 a

 modeller

 object

 to

 add

 hyd

rog

ens

 and

 sol

vate

 the

 protein




mod

eller

 =

 app

.Mod

eller

(p

db

.top

ology

,

 pdb

.positions

)


mod

eller

.add

Hyd

rog

ens

(force

field

)


mod

eller

.add

Sol

vent

(force

field

,

 model

='

tip

3

p

',

 padding

=

1

.

0

*

unit

.nan

ometers

)



#

 Step

 

3

:

 Set

 up

 the

 simulation

 system




system

 =

 force

field

.create

System

(mod

eller

.top

ology

,

 non

bond

ed

Method

=

app

.P

ME

,


                                

 non

bond

ed

Cut

off

=

1

.

0

*

unit

.nan

ometers

,

 constraints

=

app

.H

B

onds

)



#

 Integr

ator

 and

 simulation

 setup




integr

ator

 =

 open

mm

.L

ange

vin

Integrator

(

300

*

unit

.k

el

vin

,

 

1

.

0

/unit

.pic

oseconds

,

 

0

.

002

*

unit

.pic

oseconds

)


simulation

 =

 app

.S

imulation

(mod

eller

.top

ology

,

 system

,

 integr

ator

)


simulation

.context

.set

Positions

(mod

eller

.positions

)



#

 Min

imize

 energy




simulation

.min

imize

Energy

()



#

 Step

 

4

:

 Run

 the

 simulation




simulation

.report

ers

.append

(app

.D

CD

Reporter

(f

'{

p

db

_id

}_

trajectory

.d

cd

',

 

100

))


simulation

.report

ers

.append

(app

.State

Data

Reporter

(stdout

,

 

100

0

,

 step

=True

,

 potential

Energy

=True

,

 temperature

=True

))



#

 Run

 a

 short

 simulation

 of

 

100

00

 steps

 (

20

 ps

)


simulation

.step

(

100

00

)



#

 Step

 

5

:

 Analyze

 the

 results




#

 Load

 the

 trajectory




traj

 =

 md

.load

_d

cd

(f

'{

p

db

_id

}_

trajectory

.d

cd

',

 top

=p

db

_filename

)



#

 Print

 basic

 information

 about

 the

 trajectory




print

(tr

aj

)



#

 Calculate

 and

 print

 the

 RMS

D




r

ms

d

 =

 md

.r

ms

d

(tr

aj

,

 traj

,

 

0

)


print

("

R

MS

D

:",

 rms

d

)


``

`



This

 script

 will

 download

 the

 P

DB

 file

 for

 

1

PQ

2

,

 preprocess

 it

,

 set

 up

 a

 simulation

 system

 using

 the

 Amber

99

SB

 force

 field

,

 and

 run

 a

 short

 molecular

 dynamics

 simulation

.

 The

 trajectory

 is

 saved

 in

 a

 D

CD

 file

,

 and

 basic

 analysis

 such

 as

 RMS

D

 is

 performed

 using

 MDT

raj

.

 You

 can

 adjust

 the

 number

 of

 steps

 or

 other

 parameters

 as

 needed

 for

 your

 specific

 requirements

.

To perform a molecular dynamics simulation of the protein with PDB ID 1PQ2, we will follow these steps:

1. **Download the PDB file**: Retrieve the PDB file for 1PQ2 from the RCSB Protein Data Bank.
2. **Preprocess the PDB file**: Prepare the protein structure for simulation, which includes removing water molecules, adding missing atoms, and assigning protonation states.
3. **Set up the simulation system**: Define the force field, solvate the protein, and add ions if necessary.
4. **Run the simulation**: Perform a short molecular dynamics simulation using OpenMM.
5. **Analyze the results**: Use MDTraj to analyze the trajectory.

Let's start by writing a Python script to download and preprocess the PDB file, set up the simulation, and run it using OpenMM.

```python
import requests
from simtk.openmm import app
from simtk import openmm, unit
import mdtraj as md

# Step 1: Download the PDB file for 1PQ2
pdb_id = "1PQ2"
pdb_url = f"https://files.rcsb.org/download/{pdb_id}.pdb"
pdb_filename

In [6]:
now = datetime.datetime.now()
date = now.strftime("%Y-%m-%d")
print("date:",date)
time = now.strftime("%H:%M:%S")
print("time:",time)

date: 2024-10-16
time: 20:08:24


In [7]:
# TEST THE CODE
import requests
from pdbfixer import PDBFixer
from openmm.app import *
from openmm import *
from openmm.unit import *
import mdtraj as md

# Step 1: Download the PDB file for 1PQ2
print("Step 1")
pdb_id = "1PQ2"
url = f"https://files.rcsb.org/download/{pdb_id}.pdb"
response = requests.get(url)
with open(f"{pdb_id}.pdb", "w") as file:
    file.write(response.text)

# Step 2: Preprocess the PDB file using PDBFixer
print("Step 2")
fixer = PDBFixer(filename=f"{pdb_id}.pdb")
fixer.findMissingResidues()
fixer.findMissingAtoms()
fixer.addMissingAtoms()
fixer.addMissingHydrogens()
fixer.addSolvent(boxSize=Vec3(10, 10, 10)*nanometers)

# Step 3: Set up the simulation system using OpenMM
print("Step 3")
forcefield = ForceField('amber14-all.xml', 'amber14/tip3p.xml')
system = forcefield.createSystem(fixer.topology, nonbondedMethod=PME, nonbondedCutoff=1*nanometer, constraints=HBonds)
integrator = LangevinIntegrator(300*kelvin, 1/picosecond, 0.002*picoseconds)
simulation = Simulation(fixer.topology, system, integrator)
simulation.context.setPositions(fixer.positions)

# Minimize energy
simulation.minimizeEnergy()

# Step 4: Run a short molecular dynamics simulation
print("Step 4")
simulation.reporters.append(StateDataReporter(stdout, 1000, step=True, potentialEnergy=True, temperature=True))
simulation.reporters.append(DCDReporter(f'{pdb_id}_trajectory.dcd', 1000))
simulation.step(10000)  # Run for 10,000 steps

# Step 5: Save the trajectory for analysis
print("Step 5")
# Load the trajectory using MDTraj for any further analysis
trajectory = md.load_dcd(f'{pdb_id}_trajectory.dcd', top=fixer.topology)
trajectory.save(f'{pdb_id}_trajectory.pdb')

print("Simulation complete. Trajectory saved as 1PQ2_trajectory.pdb.")

Step 1
Step 2
Step 3


ValueError: No template found for residue 953 (PO4).  This might mean your input topology is missing some atoms or bonds, or possibly that you are using the wrong force field.  For more information, see https://github.com/openmm/openmm/wiki/Frequently-Asked-Questions#template

In [1]:
!ls

1PQ2.pdb  exp_22.ipynb
