In [1]:
import datetime
from mdagent.utils import _make_llm

In [2]:
prompt = "Write me a code to run simulation of fibronectin."
model = "gpt-4o-2024-08-06"

In [3]:
# Parameters
prompt = "Simulate oxygenated hemoglobin (1A3N) and deoxygenated hemoglobin (6BB5). Then analyze the RDF of both."


In [4]:
llm = _make_llm(model, temp=0.1, streaming=True)

system_prompt = (
    "You are an expert molecular dynamics scientist, and your "
    "task is to respond to the question or "
    "solve the problem in its entirety to the best of your ability. "
    "If any part of the task requires you to perform an action that "
    "you are not capable of completing, please write a runnable "
    "Python script for that step and move on. For literature papers, "
    "use and process papers from the `paper_collection` folder. "
    "For .pdb files, download them from the RSCB website using `requests`. "
    "To preprocess PDB files, you will use PDBFixer. "
    "To get information about proteins, retrieve data from the UniProt database. "
    "For anything related to simulations, you will use OpenMM, "
    "and for anything related to analyses, you will use MDTraj. "
    "At the end, combine any scripts into one script. "
)
messages = [
    ("system", system_prompt),
    ("human", prompt),
]

now = datetime.datetime.now()
date = now.strftime("%Y-%m-%d")
print("date:",date)
time = now.strftime("%H:%M:%S")
print("time:",time)
print("LLM: ",llm.model_name,"\nTemperature: ",llm.temperature)

date: 2024-10-16
time: 20:08:04
LLM:  gpt-4o-2024-08-06 
Temperature:  0.1


In [5]:
ai_msg = llm.invoke(messages)
print(ai_msg.content)

To simulate oxygenated hemoglobin (PDB ID: 1A3N) and deoxygenated hemoglobin (PDB ID: 6BB5), and analyze the radial distribution function (RDF) of both, we will follow these steps:

1. Download the PDB files for both structures.
2. Preprocess the PDB files using PDBFixer to ensure they are ready for simulation.
3. Set up and run molecular dynamics simulations using OpenMM.
4. Analyze the RDF using MDTraj.

Let's start by writing a Python script that performs these tasks:

```python
import os
import requests
from pdbfixer import PDBFixer
from openmm.app import *
from openmm import *
from openmm.unit import *
import mdtraj as md

# Function to download PDB files
def download_pdb(pdb_id, filename):
    url = f'https://files.rcsb.org/download/{pdb_id}.pdb'
    response = requests.get(url)
    with open(filename, 'w') as file:
        file.write(response.text)

# Function to preprocess PDB files using PDBFixer
def preprocess_pdb(input_file, output_file):
    fixer = PDBFixer(filename=input_

ated

 hem

oglobin

 (

P

DB

 ID

:

 

1

A

3

N

)

 and

 de

oxygen

ated

 hem

oglobin

 (

P

DB

 ID

:

 

6

BB

5

),

 and

 analyze

 the

 radial

 distribution

 function

 (

R

DF

)

 of

 both

,

 we

 will

 follow

 these

 steps

:



1

.

 **

Download

 the

 P

DB

 files

**

 for

 both

 oxygen

ated

 and

 de

oxygen

ated

 hem

oglobin

.


2

.

 **

Pre

process

 the

 P

DB

 files

**

 to

 ensure

 they

 are

 suitable

 for

 simulation

.


3

.

 **

Set

 up

 and

 run

 molecular

 dynamics

 simulations

**

 using

 Open

MM

.


4

.

 **

Analyze

 the

 RDF

**

 using

 MDT

raj

.



Let's

 start

 by

 downloading

 the

 P

DB

 files

 and

 preprocessing

 them

.



###

 Step

 

1

:

 Download

 P

DB

 Files





We'll

 download

 the

 P

DB

 files

 for

 

1

A

3

N

 and

 

6

BB

5

 using

 the

 `

requests

`

 library

.



```

python




import

 requests





def

 download

_p

db

(p

db

_id

):


   

 url

 =

 f

"https

://

files

.rc

sb

.org

/download

/{

p

db

_id

}.

p

db

"


   

 response

 =

 requests

.get

(url

)


   

 if

 response

.status

_code

 ==

 

200

:


       

 with

 open

(f

"{

p

db

_id

}.

p

db

",

 "

w

")

 as

 file

:


           

 file

.write

(response

.text

)


       

 print

(f

"

Downloaded

 {

p

db

_id

}.

p

db

 successfully

.")


   

 else

:


       

 print

(f

"

Failed

 to

 download

 {

p

db

_id

}.

p

db

.")



download

_p

db

("

1

A

3

N

")


download

_p

db

("

6

BB

5

")


``

`



###

 Step

 

2

:

 Pre

process

 P

DB

 Files





Pre

processing

 involves

 removing

 water

 molecules

,

 adding

 missing

 hyd

rog

ens

,

 and

 ensuring

 the

 structure

 is

 suitable

 for

 simulation

.

 We

 will

 use

 Open

MM

's

 `

Mod

eller

`

 for

 this

.



```

python




from

 open

mm

.app

 import

 P

DB

File

,

 Mod

eller




from

 open

mm

.app

 import

 Force

Field





def

 preprocess

_p

db

(p

db

_id

):


   

 pdb

 =

 P

DB

File

(f

"{

p

db

_id

}.

p

db

")


   

 force

field

 =

 Force

Field

('

amber

14

-all

.xml

',

 '

amber

14

/t

ip

3

p

.xml

')


   

 modeller

 =

 Mod

eller

(p

db

.top

ology

,

 pdb

.positions

)


    


   

 #

 Remove

 water

 molecules




   

 modeller

.delete

Water

()


    


   

 #

 Add

 hyd

rog

ens




   

 modeller

.add

Hyd

rog

ens

(force

field

)


    


   

 #

 Save

 the

 pre

processed

 P

DB




   

 with

 open

(f

"{

p

db

_id

}_

processed

.p

db

",

 "

w

")

 as

 file

:


       

 P

DB

File

.write

File

(mod

eller

.top

ology

,

 modeller

.positions

,

 file

)


   

 print

(f

"

Pre

processed

 {

p

db

_id

}.

p

db

 successfully

.")



pre

process

_p

db

("

1

A

3

N

")


pre

process

_p

db

("

6

BB

5

")


``

`



###

 Step

 

3

:

 Set

 Up

 and

 Run

 Sim

ulations





We'll

 set

 up

 and

 run

 molecular

 dynamics

 simulations

 using

 Open

MM

.



```

python




from

 open

mm

 import

 *


from

 open

mm

.app

 import

 *


from

 open

mm

.unit

 import

 *



def

 run

_sim

ulation

(p

db

_id

):


   

 pdb

 =

 P

DB

File

(f

"{

p

db

_id

}_

processed

.p

db

")


   

 force

field

 =

 Force

Field

('

amber

14

-all

.xml

',

 '

amber

14

/t

ip

3

p

.xml

')


   

 system

 =

 force

field

.create

System

(p

db

.top

ology

,

 non

bond

ed

Method

=

PM

E

,

 non

bond

ed

Cut

off

=

1

.

0

*

nan

ometer

,

 constraints

=

HB

onds

)


    


   

 integr

ator

 =

 Lange

vin

Integrator

(

300

*

kel

vin

,

 

1

/p

ic

ose

cond

,

 

0

.

002

*

pic

oseconds

)


   

 simulation

 =

 Simulation

(p

db

.top

ology

,

 system

,

 integr

ator

)


   

 simulation

.context

.set

Positions

(p

db

.positions

)


    


   

 #

 Min

imize

 energy




   

 simulation

.min

imize

Energy

()


    


   

 #

 Equ

ilibr

ate




   

 simulation

.context

.set

Vel

ocities

To

Temperature

(

300

*

kel

vin

)


   

 simulation

.step

(

100

00

)

 

 #

 

20

 ps

 of

 equil

ibration


    


   

 #

 Production

 run




   

 simulation

.report

ers

.append

(D

CD

Reporter

(f

'{

p

db

_id

}_

trajectory

.d

cd

',

 

100

0

))


   

 simulation

.report

ers

.append

(State

Data

Reporter

(stdout

,

 

100

0

,

 step

=True

,

 potential

Energy

=True

,

 temperature

=True

))


   

 simulation

.step

(

500

00

)

 

 #

 

100

 ps

 of

 production


    


   

 print

(f

"

Simulation

 for

 {

p

db

_id

}

 completed

.")



run

_sim

ulation

("

1

A

3

N

")


run

_sim

ulation

("

6

BB

5

")


``

`



###

 Step

 

4

:

 Analyze

 RDF





Finally

,

 we

 will

 analyze

 the

 RDF

 using

 MDT

raj

.



```

python




import

 md

traj

 as

 md




import

 matplotlib

.pyplot

 as

 plt





def

 analyze

_r

df

(p

db

_id

):


   

 traj

 =

 md

.load

_d

cd

(f

'{

p

db

_id

}_

trajectory

.d

cd

',

 top

=f

'{

p

db

_id

}_

processed

.p

db

')


    


   

 #

 Calculate

 RDF




   

 atom

_indices

 =

 traj

.top

ology

.select

('

name

 O

')

 

 #

 Example

:

 RDF

 of

 oxygen

 atoms




   

 rdf

,

 r

 =

 md

.compute

_r

df

(tr

aj

,

 pairs

=

atom

_indices

)


    


   

 #

 Plot

 RDF




   

 plt

.plot

(r

,

 rdf

,

 label

=f

'{

p

db

_id

}')


   

 plt

.xlabel

('

Distance

 (

nm

)')


   

 plt

.ylabel

('

g

(r

)')


   

 plt

.title

('

Rad

ial

 Distribution

 Function

')


   

 plt

.legend

()


   

 plt

.show

()



an

alyze

_r

df

("

1

A

3

N

")


an

alyze

_r

df

("

6

BB

5

")


``

`



This

 script

 will

 download

 the

 P

DB

 files

,

 preprocess

 them

,

 run

 simulations

,

 and

 analyze

 the

 RDF

 for

 both

 oxygen

ated

 and

 de

oxygen

ated

 hem

oglobin

.

 Adjust

 the

 atom

 selection

 in

 the

 RDF

 analysis

 as

 needed

 for

 your

 specific

 analysis

 goals

.

To simulate oxygenated hemoglobin (PDB ID: 1A3N) and deoxygenated hemoglobin (PDB ID: 6BB5), and analyze the radial distribution function (RDF) of both, we will follow these steps:

1. **Download the PDB files** for both oxygenated and deoxygenated hemoglobin.
2. **Preprocess the PDB files** to ensure they are suitable for simulation.
3. **Set up and run molecular dynamics simulations** using OpenMM.
4. **Analyze the RDF** using MDTraj.

Let's start by downloading the PDB files and preprocessing them.

### Step 1: Download PDB Files

We'll download the PDB files for 1A3N and 6BB5 using the `requests` library.

```python
import requests

def download_pdb(pdb_id):
    url = f"https://files.rcsb.org/download/{pdb_id}.pdb"
    response = requests.get(url)
    if response.status_code == 200:
        with open(f"{pdb_id}.pdb", "w") as file:
            file.write(response.text)
        print(f"Downloaded {pdb_id}.pdb successfully.")
    else:
        print(f"Failed to download {pdb_id}.pdb."

In [6]:
now = datetime.datetime.now()
date = now.strftime("%Y-%m-%d")
print("date:",date)
time = now.strftime("%H:%M:%S")
print("time:",time)

date: 2024-10-16
time: 20:08:21


In [7]:
# TEST THE CODE
import os
import requests
from pdbfixer import PDBFixer
from openmm.app import *
from openmm import *
from openmm.unit import *
import mdtraj as md

# Function to download PDB files
def download_pdb(pdb_id, filename):
    url = f'https://files.rcsb.org/download/{pdb_id}.pdb'
    response = requests.get(url)
    with open(filename, 'w') as file:
        file.write(response.text)

# Function to preprocess PDB files using PDBFixer
def preprocess_pdb(input_file, output_file):
    fixer = PDBFixer(filename=input_file)
    fixer.findMissingResidues()
    fixer.findMissingAtoms()
    fixer.addMissingAtoms()
    fixer.addMissingHydrogens()
    with open(output_file, 'w') as file:
        PDBFile.writeFile(fixer.topology, fixer.positions, file)

# Function to set up and run a simulation
def run_simulation(pdb_file, output_dcd, output_log):
    pdb = PDBFile(pdb_file)
    forcefield = ForceField('amber14-all.xml', 'amber14/tip3p.xml')
    system = forcefield.createSystem(pdb.topology, nonbondedMethod=PME, nonbondedCutoff=1*nanometer, constraints=HBonds)
    integrator = LangevinIntegrator(300*kelvin, 1/picosecond, 0.004*picoseconds)
    simulation = Simulation(pdb.topology, system, integrator)
    simulation.context.setPositions(pdb.positions)
    simulation.minimizeEnergy()
    simulation.reporters.append(DCDReporter(output_dcd, 1000))
    simulation.reporters.append(StateDataReporter(output_log, 1000, step=True, potentialEnergy=True, temperature=True))
    simulation.step(10000)  # Run for 10,000 steps

# Function to analyze RDF using MDTraj
def analyze_rdf(dcd_file, pdb_file, output_rdf):
    traj = md.load(dcd_file, top=pdb_file)
    oxygen_atoms = traj.topology.select('name O')
    rdf, r = md.compute_rdf(traj, pairs=oxygen_atoms)
    with open(output_rdf, 'w') as file:
        for r_value, rdf_value in zip(r, rdf):
            file.write(f"{r_value}, {rdf_value}\n")

# Main script
if __name__ == "__main__":
    # Download PDB files
    print("Step 1")
    download_pdb('1A3N', '1A3N.pdb')
    download_pdb('6BB5', '6BB5.pdb')

    # Preprocess PDB files
    print("Step 2")
    preprocess_pdb('1A3N.pdb', '1A3N_fixed.pdb')
    preprocess_pdb('6BB5.pdb', '6BB5_fixed.pdb')

    # Run simulations
    print("Step 3")
    run_simulation('1A3N_fixed.pdb', '1A3N.dcd', '1A3N.log')
    run_simulation('6BB5_fixed.pdb', '6BB5.dcd', '6BB5.log')

    # Analyze RDF
    print("Step 4")
    analyze_rdf('1A3N.dcd', '1A3N_fixed.pdb', '1A3N_rdf.csv')
    analyze_rdf('6BB5.dcd', '6BB5_fixed.pdb', '6BB5_rdf.csv')

    print("Simulations and RDF analyses are complete.")

Step 1
Step 2
Step 3


ValueError: No template found for residue 575 (HEM).  This might mean your input topology is missing some atoms or bonds, or possibly that you are using the wrong force field.  For more information, see https://github.com/openmm/openmm/wiki/Frequently-Asked-Questions#template

In [1]:
!ls

1A3N_fixed.pdb	1A3N.pdb  6BB5_fixed.pdb  6BB5.pdb  exp_24.ipynb
