In [1]:
import datetime
from langchain.agents import AgentExecutor
from langchain.agents.structured_chat.base import StructuredChatAgent
from langchain.prompts import PromptTemplate
from langchain_core.tools import Tool
from langchain_experimental.utilities import PythonREPL
from mdcrow.utils import _make_llm

In [2]:
prompt = "Write me a code to run simulation of fibronectin."
model = "gpt-4o-2024-08-06"

In [3]:
# Parameters
prompt = "Download the PDB file for 1AEE. Then tell me how many chains and atoms are present in the protein."


In [4]:
llm = _make_llm(model, temp=0.1, streaming=True)

python_repl = PythonREPL()
repl_tool = Tool(
    name="python_repl",
    description="A Python shell. Use this to execute python commands. Input should be a valid python command. If you want to see the output of a value, you should print it out with `print(...)`.",
    func=python_repl.run,
)
tools = [repl_tool]

structured_prompt = PromptTemplate(
    input_variables=["input"],
    template="""
    You are an expert molecular dynamics scientist, and your 
    task is to respond to the question or 
    solve the problem in its entirety to the best of your ability. 
    If any part of the task requires you to perform an action that 
    you are not capable of completing, please write a runnable 
    Python script for that step and run it. For literature papers, 
    use and process papers from the `paper_collection` folder. 
    For .pdb files, download them from the RSCB website using `requests`. 
    To preprocess PDB files, you will use PDBFixer. 
    To get information about proteins, retrieve data from the UniProt database. 
    For anything related to simulations, you will use OpenMM, 
    and for anything related to analyses, you will use MDTraj. 

    You can only respond with a single complete
    'Thought, Action, Action Input' format
    OR a single 'Final Answer' format.

    Complete format:
    Thought: (reflect on your progress and decide what to do next)
    Action:
    ```
    {{
        "action": (the action name, it should be the name of a tool),
        "action_input": (the input string for the action)
    }}
    '''

    OR

    Final Answer: (the final response to the original input
    question, once all steps are complete)

    You are required to use the tools provided,
    using the most specific tool
    available for each action.
    Your final answer should contain all information
    necessary to answer the question and its subquestions.
    Before you finish, reflect on your progress and make
    sure you have addressed the question in its entirety.

    Here is the input:
    Question: {input} """,
)

agent = StructuredChatAgent.from_llm_and_tools(
    llm=llm, tools=tools
)
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)


In [5]:
now = datetime.datetime.now()
date = now.strftime("%Y-%m-%d")
print("date:",date)
time = now.strftime("%H:%M:%S")
print("time:",time)
print("LLM: ",llm.model_name,"\nTemperature: ",llm.temperature)

date: 2024-10-17
time: 23:45:26
LLM:  gpt-4o-2024-08-06 
Temperature:  0.1


In [6]:
messages = structured_prompt.format(input=prompt)
agent_executor.invoke({"input": messages})



[1m> Entering new AgentExecutor chain...[0m


Thought

:

 To

 answer

 the

 question

,

 I

 need

 to

 download

 the

 P

DB

 file

 for

 the

 protein

 with

 the

 identifier

 

1

A

EE

 from

 the

 R

CS

B

 website

.

 After

 downloading

,

 I

 will

 analyze

 the

 file

 to

 determine

 the

 number

 of

 chains

 and

 atoms

 present

 in

 the

 protein

.

 I

 will

 use

 the

 `

requests

`

 library

 to

 download

 the

 file

 and

 `

MD

Tra

j

`

 to

 analyze

 it

.



Action

:


```

json




{


 

 "

action

":

 "

python

_re

pl

",


 

 "

action

_input

":

 "

import

 requests

\n

\n

#

 Download

 the

 P

DB

 file

 for

 

1

A

EE

\n

p

db

_id

 =

 '

1

A

EE

'\

n

url

 =

 f

'https

://

files

.rc

sb

.org

/download

/{

p

db

_id

}.

p

db

'\

n

response

 =

 requests

.get

(url

)\

n

\n

#

 Save

 the

 P

DB

 file

\n

with

 open

(f

'{

p

db

_id

}.

p

db

',

 '

wb

')

 as

 file

:\

n

   

 file

.write

(response

.content

)\

n

\n

#

 Check

 if

 the

 file

 was

 downloaded

 successfully

\n

response

.status

_code

"


}


```

Python REPL can execute arbitrary code. Use with caution.


[32;1m[1;3mThought: To answer the question, I need to download the PDB file for the protein with the identifier 1AEE from the RCSB website. After downloading, I will analyze the file to determine the number of chains and atoms present in the protein. I will use the `requests` library to download the file and `MDTraj` to analyze it.

Action:
```json
{
  "action": "python_repl",
  "action_input": "import requests\n\n# Download the PDB file for 1AEE\npdb_id = '1AEE'\nurl = f'https://files.rcsb.org/download/{pdb_id}.pdb'\nresponse = requests.get(url)\n\n# Save the PDB file\nwith open(f'{pdb_id}.pdb', 'wb') as file:\n    file.write(response.content)\n\n# Check if the file was downloaded successfully\nresponse.status_code"
}
```[0m


Observation: [36;1m[1;3m[0m
Thought:

To

 proceed

,

 I

 need

 to

 load

 the

 downloaded

 P

DB

 file

 using

 MDT

raj

 and

 analyze

 it

 to

 determine

 the

 number

 of

 chains

 and

 atoms

 present

 in

 the

 protein

.



Action

:


```

json




{


 

 "

action

":

 "

python

_re

pl

",


 

 "

action

_input

":

 "

import

 md

traj

 as

 md

\n

\n

#

 Load

 the

 P

DB

 file

\n

traj

 =

 md

.load

_p

db

('

1

A

EE

.p

db

')

\

n

\n

#

 Get

 the

 number

 of

 chains

\n

num

_ch

ains

 =

 traj

.top

ology

.n

_ch

ains

\n

\n

#

 Get

 the

 number

 of

 atoms

\n

num

_atoms

 =

 traj

.top

ology

.n

_atoms

\n

\n

(num

_ch

ains

,

 num

_atoms

)"


}


``

`


[32;1m[1;3mTo proceed, I need to load the downloaded PDB file using MDTraj and analyze it to determine the number of chains and atoms present in the protein.

Action:
```json
{
  "action": "python_repl",
  "action_input": "import mdtraj as md\n\n# Load the PDB file\ntraj = md.load_pdb('1AEE.pdb')\n\n# Get the number of chains\nnum_chains = traj.topology.n_chains\n\n# Get the number of atoms\nnum_atoms = traj.topology.n_atoms\n\n(num_chains, num_atoms)"
}
```
[0m


Observation: [36;1m[1;3m[0m
Thought:

The

 previous

 steps

 have

 successfully

 downloaded

 the

 P

DB

 file

 and

 loaded

 it

 using

 MDT

raj

.

 Now

,

 I

 need

 to

 retrieve

 the

 number

 of

 chains

 and

 atoms

 from

 the

 loaded

 trajectory

.



Action

:


```

json




{


 

 "

action

":

 "

python

_re

pl

",


 

 "

action

_input

":

 "

import

 md

traj

 as

 md

\n

\n

#

 Load

 the

 P

DB

 file

\n

traj

 =

 md

.load

_p

db

('

1

A

EE

.p

db

')

\

n

\n

#

 Get

 the

 number

 of

 chains

\n

num

_ch

ains

 =

 traj

.top

ology

.n

_ch

ains

\n

\n

#

 Get

 the

 number

 of

 atoms

\n

num

_atoms

 =

 traj

.top

ology

.n

_atoms

\n

\n

print

(num

_ch

ains

,

 num

_atoms

)"


}


``

`


[32;1m[1;3mThe previous steps have successfully downloaded the PDB file and loaded it using MDTraj. Now, I need to retrieve the number of chains and atoms from the loaded trajectory.

Action:
```json
{
  "action": "python_repl",
  "action_input": "import mdtraj as md\n\n# Load the PDB file\ntraj = md.load_pdb('1AEE.pdb')\n\n# Get the number of chains\nnum_chains = traj.topology.n_chains\n\n# Get the number of atoms\nnum_atoms = traj.topology.n_atoms\n\nprint(num_chains, num_atoms)"
}
```
[0m


Observation: [36;1m[1;3m2 2992
[0m
Thought:

I

 have

 successfully

 downloaded

 the

 P

DB

 file

 for

 the

 protein

 with

 the

 identifier

 

1

A

EE

 and

 analyzed

 it

 using

 MDT

raj

.

 The

 protein

 contains

 

2

 chains

 and

 

299

2

 atoms

.



Action

:


```

json




{


 

 "

action

":

 "

Final

 Answer

",


 

 "

action

_input

":

 "

The

 protein

 with

 P

DB

 ID

 

1

A

EE

 contains

 

2

 chains

 and

 

299

2

 atoms

."


}


```

[32;1m[1;3mI have successfully downloaded the PDB file for the protein with the identifier 1AEE and analyzed it using MDTraj. The protein contains 2 chains and 2992 atoms.

Action:
```json
{
  "action": "Final Answer",
  "action_input": "The protein with PDB ID 1AEE contains 2 chains and 2992 atoms."
}
```[0m

[1m> Finished chain.[0m


{'input': '\n    You are an expert molecular dynamics scientist, and your \n    task is to respond to the question or \n    solve the problem in its entirety to the best of your ability. \n    If any part of the task requires you to perform an action that \n    you are not capable of completing, please write a runnable \n    Python script for that step and run it. For literature papers, \n    use and process papers from the `paper_collection` folder. \n    For .pdb files, download them from the RSCB website using `requests`. \n    To preprocess PDB files, you will use PDBFixer. \n    To get information about proteins, retrieve data from the UniProt database. \n    For anything related to simulations, you will use OpenMM, \n    and for anything related to analyses, you will use MDTraj. \n\n    You can only respond with a single complete\n    \'Thought, Action, Action Input\' format\n    OR a single \'Final Answer\' format.\n\n    Complete format:\n    Thought: (reflect on your progress 

In [7]:
now = datetime.datetime.now()
date = now.strftime("%Y-%m-%d")
print("date:",date)
time = now.strftime("%H:%M:%S")
print("time:",time)

date: 2024-10-17
time: 23:45:38


In [1]:
!ls

1AEE.pdb  exp_9.ipynb


In [4]:
import mdtraj as md
traj = md.load('1AEE.pdb')
secondary_structure = md.compute_dssp(traj,simplified=True)[-1] # last frame
print("Number of atoms: ",traj.n_atoms)
print("Number of chains: ",traj.n_chains)

Number of atoms:  2992
Number of chains:  2
