In [1]:
import datetime
from mdcrow.utils import _make_llm

In [2]:
prompt = "Write me a code to run simulation of fibronectin."
model = "gpt-4o-2024-08-06"

In [3]:
# Parameters
prompt = "Simulate 1VII for 1 ns at a temperature of 300 K. Then, tell me if the secondary structure changed from the beginning of the simulation to the end of the simulation."


In [4]:
llm = _make_llm(model, temp=0.1, streaming=True)

system_prompt = (
    "You are an expert molecular dynamics scientist, and your "
    "task is to respond to the question or "
    "solve the problem in its entirety to the best of your ability. "
    "If any part of the task requires you to perform an action that "
    "you are not capable of completing, please write a runnable "
    "Python script for that step and move on. For literature papers, "
    "use and process papers from the `paper_collection` folder. "
    "For .pdb files, download them from the RSCB website using `requests`. "
    "To preprocess PDB files, you will use PDBFixer. "
    "To get information about proteins, retrieve data from the UniProt database. "
    "For anything related to simulations, you will use OpenMM, "
    "and for anything related to analyses, you will use MDTraj. "
    "At the end, combine any scripts into one script. "
)
messages = [
    ("system", system_prompt),
    ("human", prompt),
]

now = datetime.datetime.now()
date = now.strftime("%Y-%m-%d")
print("date:",date)
time = now.strftime("%H:%M:%S")
print("time:",time)
print("LLM: ",llm.model_name,"\nTemperature: ",llm.temperature)

date: 2024-10-21
time: 13:33:29
LLM:  gpt-4o-2024-08-06 
Temperature:  0.1


In [5]:
ai_msg = llm.invoke(messages)
print(ai_msg.content)

To simulate the protein with PDB ID 1VII for 1 ns at a temperature of 300 K and analyze the secondary structure changes, we will follow these steps:

1. Download the PDB file for 1VII.
2. Preprocess the PDB file using PDBFixer to ensure it is ready for simulation.
3. Set up and run a molecular dynamics simulation using OpenMM.
4. Analyze the secondary structure at the beginning and end of the simulation using MDTraj.

Let's start by writing the Python script to perform these tasks:

```python
import requests
from pdbfixer import PDBFixer
from openmm.app import *
from openmm import *
from openmm.unit import *
import mdtraj as md
import os

# Step 1: Download the PDB file for 1VII
pdb_id = "1VII"
pdb_url = f"https://files.rcsb.org/download/{pdb_id}.pdb"
pdb_filename = f"{pdb_id}.pdb"

response = requests.get(pdb_url)
with open(pdb_filename, 'wb') as file:
    file.write(response.content)

# Step 2: Preprocess the PDB file using PDBFixer
fixer = PDBFixer(filename=pdb_filename)
fixer.findM

 protein

 structure

 

1

X

Q

8

 for

 

1

 ns

 at

 

300

 K

 and

 analyze

 the

 secondary

 structure

 changes

,

 we

 will

 follow

 these

 steps

:



1

.

 Download

 the

 P

DB

 file

 for

 

1

X

Q

8

.


2

.

 Pre

process

 the

 P

DB

 file

 using

 P

DB

Fix

er

 to

 ensure

 it

 is

 ready

 for

 simulation

.


3

.

 Set

 up

 and

 run

 a

 molecular

 dynamics

 simulation

 using

 Open

MM

.


4

.

 Analyze

 the

 secondary

 structure

 at

 the

 beginning

 and

 end

 of

 the

 simulation

 using

 MDT

raj

.



Let's

 start

 by

 writing

 the

 Python

 script

 to

 perform

 these

 tasks

:



```

python




import

 requests




from

 pdb

fix

er

 import

 P

DB

Fix

er




from

 open

mm

.app

 import

 *


from

 open

mm

 import

 *


from

 open

mm

.unit

 import

 *


import

 md

traj

 as

 md





#

 Step

 

1

:

 Download

 the

 P

DB

 file

 for

 

1

X

Q

8




p

db

_id

 =

 "

1

X

Q

8

"


url

 =

 f

"https

://

files

.rc

sb

.org

/download

/{

p

db

_id

}.

p

db

"


response

 =

 requests

.get

(url

)


with

 open

(f

"{

p

db

_id

}.

p

db

",

 "

wb

")

 as

 file

:


   

 file

.write

(response

.content

)



#

 Step

 

2

:

 Pre

process

 the

 P

DB

 file

 using

 P

DB

Fix

er




fix

er

 =

 P

DB

Fix

er

(filename

=f

"{

p

db

_id

}.

p

db

")


fix

er

.find

Missing

Resid

ues

()


fix

er

.find

Missing

Atoms

()


fix

er

.add

Missing

Atoms

()


fix

er

.add

Missing

Hyd

rog

ens

()



#

 Use

 the

 AM

BER

 force

 field




force

field

 =

 Force

Field

('

amber

14

-all

.xml

',

 '

amber

14

/t

ip

3

p

.xml

')



#

 Create

 the

 system




system

 =

 force

field

.create

System

(f

ixer

.top

ology

,

 non

bond

ed

Method

=

PM

E

,

 non

bond

ed

Cut

off

=

1

*

nan

ometer

,

 constraints

=

HB

onds

)



#

 Step

 

3

:

 Set

 up

 and

 run

 a

 molecular

 dynamics

 simulation

 using

 Open

MM




integr

ator

 =

 Lange

vin

Integrator

(

300

*

kel

vin

,

 

1

/p

ic

ose

cond

,

 

0

.

002

*

pic

oseconds

)


simulation

 =

 Simulation

(f

ixer

.top

ology

,

 system

,

 integr

ator

)


simulation

.context

.set

Positions

(f

ixer

.positions

)



#

 Min

imize

 energy




simulation

.min

imize

Energy

()



#

 Equ

ilibr

ate




simulation

.context

.set

Vel

ocities

To

Temperature

(

300

*

kel

vin

)


simulation

.step

(

100

00

)

 

 #

 

20

 ps

 equil

ibration





#

 Run

 the

 simulation

 for

 

1

 ns




simulation

.report

ers

.append

(D

CD

Reporter

(f

'{

p

db

_id

}_

trajectory

.d

cd

',

 

100

0

))


simulation

.report

ers

.append

(State

Data

Reporter

(stdout

,

 

100

0

,

 step

=True

,

 potential

Energy

=True

,

 temperature

=True

))


simulation

.step

(

500

000

)

 

 #

 

1

 ns

 simulation





#

 Step

 

4

:

 Analyze

 the

 secondary

 structure

 using

 MDT

raj




#

 Load

 the

 trajectory




traj

 =

 md

.load

_d

cd

(f

'{

p

db

_id

}_

trajectory

.d

cd

',

 top

=

md

.Top

ology

.from

_open

mm

(f

ixer

.top

ology

))



#

 Analyze

 secondary

 structure




initial

_structure

 =

 md

.compute

_d

ssp

(tr

aj

[

0

])


final

_structure

 =

 md

.compute

_d

ssp

(tr

aj

[-

1

])



#

 Compare

 initial

 and

 final

 secondary

 structures




initial

_counts

 =

 {

k

:

 initial

_structure

.count

(k

)

 for

 k

 in

 set

(initial

_structure

)}


final

_counts

 =

 {

k

:

 final

_structure

.count

(k

)

 for

 k

 in

 set

(final

_structure

)}



print

("

Initial

 Secondary

 Structure

 Counts

:",

 initial

_counts

)


print

("

Final

 Secondary

 Structure

 Counts

:",

 final

_counts

)



#

 Determine

 if

 there

 was

 a

 change




if

 initial

_counts

 !=

 final

_counts

:


   

 print

("

The

 secondary

 structure

 changed

 during

 the

 simulation

.")


else

:


   

 print

("

The

 secondary

 structure

 did

 not

 change

 during

 the

 simulation

.")


``

`



This

 script

 will

 download

 the

 P

DB

 file

 for

 

1

X

Q

8

,

 preprocess

 it

,

 run

 a

 

1

 ns

 simulation

 at

 

300

 K

,

 and

 analyze

 the

 secondary

 structure

 at

 the

 beginning

 and

 end

 of

 the

 simulation

.

 The

 results

 will

 indicate

 whether

 the

 secondary

 structure

 changed

 during

 the

 simulation

.

To simulate the protein structure 1XQ8 for 1 ns at 300 K and analyze the secondary structure changes, we will follow these steps:

1. Download the PDB file for 1XQ8.
2. Preprocess the PDB file using PDBFixer to ensure it is ready for simulation.
3. Set up and run a molecular dynamics simulation using OpenMM.
4. Analyze the secondary structure at the beginning and end of the simulation using MDTraj.

Let's start by writing the Python script to perform these tasks:

```python
import requests
from pdbfixer import PDBFixer
from openmm.app import *
from openmm import *
from openmm.unit import *
import mdtraj as md

# Step 1: Download the PDB file for 1XQ8
pdb_id = "1XQ8"
url = f"https://files.rcsb.org/download/{pdb_id}.pdb"
response = requests.get(url)
with open(f"{pdb_id}.pdb", "wb") as file:
    file.write(response.content)

# Step 2: Preprocess the PDB file using PDBFixer
fixer = PDBFixer(filename=f"{pdb_id}.pdb")
fixer.findMissingResidues()
fixer.findMissingAtoms()
fixer.addMissingAtoms

In [6]:
now = datetime.datetime.now()
date = now.strftime("%Y-%m-%d")
print("date:",date)
time = now.strftime("%H:%M:%S")
print("time:",time)

date: 2024-10-21
time: 13:33:47


In [None]:
# TESTING THE CODE

import requests
from pdbfixer import PDBFixer
from openmm.app import *
from openmm import *
from openmm.unit import *
import mdtraj as md

# Step 1: Download the PDB file for 1VII
pdb_id = "1VII"
pdb_url = f"https://files.rcsb.org/download/{pdb_id}.pdb"
pdb_filename = f"{pdb_id}.pdb"

response = requests.get(pdb_url)
with open(pdb_filename, 'wb') as file:
    file.write(response.content)

# Step 2: Preprocess the PDB file using PDBFixer
fixer = PDBFixer(filename=pdb_filename)
fixer.findMissingResidues()
fixer.findMissingAtoms()
fixer.addMissingAtoms()
fixer.addMissingHydrogens()

# Use the AMBER force field
forcefield = ForceField('amber14-all.xml', 'amber14/tip3p.xml')

# Step 3: Set up the simulation
modeller = Modeller(fixer.topology, fixer.positions)
modeller.addSolvent(forcefield, model='tip3p', padding=1.0*nanometer)

system = forcefield.createSystem(modeller.topology, nonbondedMethod=PME, 
                                 nonbondedCutoff=1.0*nanometer, constraints=HBonds)

integrator = LangevinIntegrator(300*kelvin, 1/picosecond, 0.002*picoseconds)
simulation = Simulation(modeller.topology, system, integrator)
simulation.context.setPositions(modeller.positions)

# Minimize energy
simulation.minimizeEnergy()

# Equilibrate
simulation.context.setVelocitiesToTemperature(300*kelvin)
simulation.step(10000)  # 20 ps equilibration

# Step 4: Run the simulation for 1 ns
simulation.reporters.append(DCDReporter('trajectory.dcd', 1000))
simulation.reporters.append(StateDataReporter('output.log', 1000, step=True, 
                                              potentialEnergy=True, temperature=True))

simulation.step(500000)  # 1 ns simulation

# Step 5: Analyze the secondary structure
traj = md.load('trajectory.dcd', top=md.Topology.from_openmm(modeller.topology))

# Get secondary structure at the beginning and end
initial_structure = md.compute_dssp(traj[0])
final_structure = md.compute_dssp(traj[-1])

# Compare secondary structures
initial_counts = {ss: list(initial_structure[0]).count(ss) for ss in set(initial_structure[0])}
final_counts = {ss: list(final_structure[0]).count(ss) for ss in set(final_structure[0])}

print("Initial Secondary Structure Counts:", initial_counts)
print("Final Secondary Structure Counts:", final_counts)

# Check for changes
changes = {ss: final_counts.get(ss, 0) - initial_counts.get(ss, 0) for ss in set(initial_counts) | set(final_counts)}
print("Changes in Secondary Structure:", changes)