In [1]:
import datetime
from mdcrow.utils import _make_llm

In [2]:
prompt = "Write me a code to run simulation of fibronectin."
model = "gpt-4o-2024-08-06"

In [3]:
# Parameters
prompt = "Analyze the RDF of the simulation of 1A3N solvated in water."


In [4]:
llm = _make_llm(model, temp=0.1, streaming=True)

system_prompt = (
    "You are an expert molecular dynamics scientist, and your "
    "task is to respond to the question or "
    "solve the problem in its entirety to the best of your ability. "
    "If any part of the task requires you to perform an action that "
    "you are not capable of completing, please write a runnable "
    "Python script for that step and move on. For literature papers, "
    "use and process papers from the `paper_collection` folder. "
    "For .pdb files, download them from the RSCB website using `requests`. "
    "To preprocess PDB files, you will use PDBFixer. "
    "To get information about proteins, retrieve data from the UniProt database. "
    "For anything related to simulations, you will use OpenMM, "
    "and for anything related to analyses, you will use MDTraj. "
    "At the end, combine any scripts into one script. "
)
messages = [
    ("system", system_prompt),
    ("human", prompt),
]

now = datetime.datetime.now()
date = now.strftime("%Y-%m-%d")
print("date:",date)
time = now.strftime("%H:%M:%S")
print("time:",time)
print("LLM: ",llm.model_name,"\nTemperature: ",llm.temperature)

date: 2024-10-16
time: 20:08:07
LLM:  gpt-4o-2024-08-06 
Temperature:  0.1


In [5]:
ai_msg = llm.invoke(messages)
print(ai_msg.content)

To analyze the radial distribution function (RDF) of a simulation of the protein 1A3N solvated in water, we need to perform the following steps:

1. **Download the PDB file for 1A3N.**
2. **Prepare the system for simulation:**
   - Solvate the protein in a water box.
   - Add ions if necessary to neutralize the system.
3. **Run a molecular dynamics simulation using OpenMM.**
4. **Analyze the trajectory to compute the RDF using MDTraj.**

Let's go through these steps with the necessary Python scripts.

### Step 1: Download the PDB file for 1A3N

```python
import requests

def download_pdb(pdb_id):
    url = f"https://files.rcsb.org/download/{pdb_id}.pdb"
    response = requests.get(url)
    if response.status_code == 200:
        with open(f"{pdb_id}.pdb", "w") as file:
            file.write(response.text)
    else:
        raise Exception(f"Failed to download PDB file for {pdb_id}")

download_pdb("1A3N")
```

### Step 2: Prepare the system for simulation

```python
from pdbfixer impor

 radial

 distribution

 function

 (

R

DF

)

 of

 a

 simulation

 of

 the

 protein

 

1

A

3

N

 solv

ated

 in

 water

,

 we

 need

 to

 follow

 these

 steps

:



1

.

 **

Download

 the

 P

DB

 file

 for

 

1

A

3

N

**

:

 We'll

 retrieve

 the

 P

DB

 file

 from

 the

 R

CS

B

 P

DB

 database

.


2

.

 **

Prepare

 the

 system

 for

 simulation

**

:

 This

 involves

 setting

 up

 the

 protein

 in

 a

 water

 box

,

 adding

 ions

 if

 necessary

,

 and

 defining

 the

 force

 field

 parameters

.


3

.

 **

Run

 the

 molecular

 dynamics

 simulation

**

:

 We'll

 use

 Open

MM

 to

 perform

 the

 simulation

.


4

.

 **

Analyze

 the

 RDF

**

:

 Use

 MDT

raj

 to

 calculate

 and

 plot

 the

 RDF

 of

 the

 system

.



Let's

 start

 by

 writing

 a

 Python

 script

 to

 download

 the

 P

DB

 file

 and

 set

 up

 the

 simulation

.



###

 Step

 

1

:

 Download

 the

 P

DB

 file

 for

 

1

A

3

N





```

python




import

 requests





def

 download

_p

db

(p

db

_id

):


   

 url

 =

 f

"https

://

files

.rc

sb

.org

/download

/{

p

db

_id

}.

p

db

"


   

 response

 =

 requests

.get

(url

)


   

 if

 response

.status

_code

 ==

 

200

:


       

 with

 open

(f

"{

p

db

_id

}.

p

db

",

 "

w

")

 as

 file

:


           

 file

.write

(response

.text

)


       

 print

(f

"{

p

db

_id

}.

p

db

 downloaded

 successfully

.")


   

 else

:


       

 print

(f

"

Failed

 to

 download

 {

p

db

_id

}.

p

db

.")



download

_p

db

("

1

A

3

N

")


``

`



###

 Step

 

2

:

 Prepare

 the

 system

 for

 simulation





```

python




from

 open

mm

.app

 import

 *


from

 open

mm

 import

 *


from

 open

mm

.unit

 import

 *


import

 os





def

 prepare

_sim

ulation

(p

db

_file

):


   

 #

 Load

 the

 P

DB

 file




   

 pdb

 =

 P

DB

File

(p

db

_file

)


    


   

 #

 Define

 the

 force

 field




   

 force

field

 =

 Force

Field

('

amber

14

-all

.xml

',

 '

amber

14

/t

ip

3

pf

b

.xml

')


    


   

 #

 Create

 a

 modeller

 object




   

 modeller

 =

 Mod

eller

(p

db

.top

ology

,

 pdb

.positions

)


    


   

 #

 Add

 solvent

 (

water

)


   

 modeller

.add

Sol

vent

(force

field

,

 model

='

tip

3

p

',

 padding

=

1

.

0

*

nan

ometer

)


    


   

 #

 Create

 the

 system




   

 system

 =

 force

field

.create

System

(mod

eller

.top

ology

,

 non

bond

ed

Method

=

PM

E

,

 


                                    

 non

bond

ed

Cut

off

=

1

.

0

*

nan

ometer

,

 constraints

=

HB

onds

)


    


   

 #

 Set

 up

 the

 integr

ator




   

 integr

ator

 =

 Lange

vin

Integrator

(

300

*

kel

vin

,

 

1

/p

ic

ose

cond

,

 

0

.

002

*

pic

oseconds

)


    


   

 #

 Create

 the

 simulation

 object




   

 simulation

 =

 Simulation

(mod

eller

.top

ology

,

 system

,

 integr

ator

)


   

 simulation

.context

.set

Positions

(mod

eller

.positions

)


    


   

 #

 Min

imize

 energy




   

 simulation

.min

imize

Energy

()


    


   

 #

 Equ

ilibr

ate




   

 simulation

.context

.set

Vel

ocities

To

Temperature

(

300

*

kel

vin

)


   

 simulation

.step

(

100

00

)

 

 #

 Run

 

10

,

000

 steps

 of

 equil

ibration


    


   

 #

 Save

 the

 state




   

 state

 =

 simulation

.context

.get

State

(get

Positions

=True

,

 get

Vel

ocities

=True

)


   

 with

 open

('

equ

ilibr

ated

_state

.xml

',

 '

w

')

 as

 f

:


       

 f

.write

(Xml

Serializer

.serialize

(state

))


    


   

 print

("

System

 prepared

 and

 equilibr

ated

.")



prepare

_sim

ulation

("

1

A

3

N

.p

db

")


``

`



###

 Step

 

3

:

 Run

 the

 molecular

 dynamics

 simulation





```

python




def

 run

_sim

ulation

():


   

 #

 Load

 the

 equilibr

ated

 state




   

 with

 open

('

equ

ilibr

ated

_state

.xml

',

 '

r

')

 as

 f

:


       

 state

 =

 Xml

Serializer

.deserialize

(f

.read

())


    


   

 #

 Create

 the

 simulation

 object

 again




   

 pdb

 =

 P

DB

File

("

1

A

3

N

.p

db

")


   

 force

field

 =

 Force

Field

('

amber

14

-all

.xml

',

 '

amber

14

/t

ip

3

pf

b

.xml

')


   

 modeller

 =

 Mod

eller

(p

db

.top

ology

,

 pdb

.positions

)


   

 modeller

.add

Sol

vent

(force

field

,

 model

='

tip

3

p

',

 padding

=

1

.

0

*

nan

ometer

)


   

 system

 =

 force

field

.create

System

(mod

eller

.top

ology

,

 non

bond

ed

Method

=

PM

E

,

 


                                    

 non

bond

ed

Cut

off

=

1

.

0

*

nan

ometer

,

 constraints

=

HB

onds

)


   

 integr

ator

 =

 Lange

vin

Integrator

(

300

*

kel

vin

,

 

1

/p

ic

ose

cond

,

 

0

.

002

*

pic

oseconds

)


   

 simulation

 =

 Simulation

(mod

eller

.top

ology

,

 system

,

 integr

ator

)


   

 simulation

.context

.set

State

(state

)


    


   

 #

 Run

 the

 simulation




   

 simulation

.report

ers

.append

(D

CD

Reporter

('

trajectory

.d

cd

',

 

100

0

))


   

 simulation

.report

ers

.append

(State

Data

Reporter

('

data

.log

',

 

100

0

,

 step

=True

,


                                                 

 potential

Energy

=True

,

 temperature

=True

))


   

 simulation

.step

(

500

00

)

 

 #

 Run

 

50

,

000

 steps

 of

 production


    


   

 print

("

Simulation

 completed

.")



run

_sim

ulation

()


``

`



###

 Step

 

4

:

 Analyze

 the

 RDF





```

python




import

 md

traj

 as

 md




import

 matplotlib

.pyplot

 as

 plt





def

 analyze

_r

df

():


   

 #

 Load

 the

 trajectory




   

 traj

 =

 md

.load

('

trajectory

.d

cd

',

 top

='

1

A

3

N

.p

db

')


    


   

 #

 Select

 atoms

 for

 RDF

 calculation

 (

e

.g

.,

 protein

 heavy

 atoms

 and

 water

 oxygen

)


   

 protein

_atoms

 =

 traj

.top

ology

.select

('

protein

 and

 name

 CA

')


   

 water

_

oxygen

 =

 traj

.top

ology

.select

('

water

 and

 name

 O

')


    


   

 #

 Calculate

 RDF




   

 rdf

,

 r

 =

 md

.compute

_r

df

(tr

aj

,

 pairs

=

md

.compute

_pairs

(tr

aj

,

 protein

_atoms

,

 water

_

oxygen

),

 r

_range

=(

0

.

0

,

 

1

.

5

))


    


   

 #

 Plot

 RDF




   

 plt

.plot

(r

,

 rdf

)


   

 plt

.xlabel

('

Distance

 (

nm

)')


   

 plt

.ylabel

('

g

(r

)')


   

 plt

.title

('

Rad

ial

 Distribution

 Function

')


   

 plt

.show

()



an

alyze

_r

df

()


``

`



This

 script

 will

 download

 the

 P

DB

 file

,

 prepare

 the

 system

,

 run

 the

 simulation

,

 and

 analyze

 the

 RDF

.

 Make

 sure

 you

 have

 the

 necessary

 dependencies

 installed

 (`

Open

MM

`,

 `

MD

Tra

j

`,

 `

mat

plotlib

`,

 etc

.)

 before

 running

 the

 script

.

To analyze the radial distribution function (RDF) of a simulation of the protein 1A3N solvated in water, we need to follow these steps:

1. **Download the PDB file for 1A3N**: We'll retrieve the PDB file from the RCSB PDB database.
2. **Prepare the system for simulation**: This involves setting up the protein in a water box, adding ions if necessary, and defining the force field parameters.
3. **Run the molecular dynamics simulation**: We'll use OpenMM to perform the simulation.
4. **Analyze the RDF**: Use MDTraj to calculate and plot the RDF of the system.

Let's start by writing a Python script to download the PDB file and set up the simulation.

### Step 1: Download the PDB file for 1A3N

```python
import requests

def download_pdb(pdb_id):
    url = f"https://files.rcsb.org/download/{pdb_id}.pdb"
    response = requests.get(url)
    if response.status_code == 200:
        with open(f"{pdb_id}.pdb", "w") as file:
            file.write(response.text)
        print(f"{pdb_id}.pdb dow

In [6]:
now = datetime.datetime.now()
date = now.strftime("%Y-%m-%d")
print("date:",date)
time = now.strftime("%H:%M:%S")
print("time:",time)

date: 2024-10-16
time: 20:08:35


In [7]:
# TEST THE CODE 
import requests
from pdbfixer import PDBFixer
from openmm.app import *
from openmm import *
from openmm.unit import *
import mdtraj as md
import matplotlib.pyplot as plt

def download_pdb(pdb_id):
    url = f"https://files.rcsb.org/download/{pdb_id}.pdb"
    response = requests.get(url)
    if response.status_code == 200:
        with open(f"{pdb_id}.pdb", "w") as file:
            file.write(response.text)
    else:
        raise Exception(f"Failed to download PDB file for {pdb_id}")

def prepare_system(pdb_file):
    fixer = PDBFixer(filename=pdb_file)
    fixer.findMissingResidues()
    fixer.findMissingAtoms()
    fixer.addMissingAtoms()
    fixer.addMissingHydrogens()

    forcefield = ForceField('amber14-all.xml', 'amber14/tip3pfb.xml')
    modeller = Modeller(fixer.topology, fixer.positions)
    modeller.addSolvent(forcefield, model='tip3p', padding=1.0*nanometers)

    system = forcefield.createSystem(modeller.topology, nonbondedMethod=PME,
                                     nonbondedCutoff=1.0*nanometers, constraints=HBonds)
    integrator = LangevinIntegrator(300*kelvin, 1/picosecond, 0.002*picoseconds)
    simulation = Simulation(modeller.topology, system, integrator)
    simulation.context.setPositions(modeller.positions)

    return simulation

def run_simulation(simulation, steps=10000):
    simulation.minimizeEnergy()
    simulation.context.setVelocitiesToTemperature(300*kelvin)
    simulation.reporters.append(StateDataReporter(stdout, 1000, step=True,
                                                  potentialEnergy=True, temperature=True))
    simulation.reporters.append(DCDReporter('trajectory.dcd', 1000))
    simulation.step(steps)

def compute_rdf(trajectory_file, topology_file, atom_pairs, r_range=(0.0, 1.0), bin_width=0.01):
    traj = md.load(trajectory_file, top=topology_file)
    rdf, r = md.compute_rdf(traj, pairs=atom_pairs, r_range=r_range, bin_width=bin_width)
    return rdf, r

def main():
    print("Step 1")
    pdb_id = "1A3N"
    download_pdb(pdb_id)
    print("Step 2")
    simulation = prepare_system(f"{pdb_id}.pdb")
    print("Step 3")
    run_simulation(simulation)

    print("Step 4")
    # Define atom pairs for RDF calculation (e.g., between oxygen atoms of water molecules)
    # This is a placeholder; you need to specify the actual atom indices or selection criteria.
    atom_pairs = [(i, j) for i in range(100) for j in range(100, 200)]  # Example pairs

    rdf, r = compute_rdf('trajectory.dcd', f"{pdb_id}.pdb", atom_pairs)

    plt.plot(r, rdf)
    plt.xlabel('Distance (nm)')
    plt.ylabel('g(r)')
    plt.title('Radial Distribution Function')
    plt.show()

if __name__ == "__main__":
    main()



Step 1
Step 2


ValueError: No template found for residue 575 (HEM).  This might mean your input topology is missing some atoms or bonds, or possibly that you are using the wrong force field.  For more information, see https://github.com/openmm/openmm/wiki/Frequently-Asked-Questions#template

In [1]:
!ls

1A3N.pdb  exp_23.ipynb
