In [1]:
import ast
import json
import os
from typing import Any, Dict, List, Optional

import langchain
from langchain import LLMChain, PromptTemplate
from langchain.base_language import BaseLanguageModel
from langchain.tools import BaseTool
from openmm import (
    AndersenThermostat,
    BrownianIntegrator,
    LangevinIntegrator,
    LangevinMiddleIntegrator,
    MonteCarloBarostat,
    VerletIntegrator,
    app,
    Platform,
    OpenMMException,
)
from openmm.app import (
    PME,
    CutoffPeriodic,
    DCDReporter,
    ForceField,
    Modeller,
    NoCutoff,
    PDBFile,
    PDBReporter,
    PDBxFile,
    Simulation,
    StateDataReporter,
)
from openmm.unit import bar, femtoseconds, kelvin, nanometers, picosecond, picoseconds
from pydantic import BaseModel, Field, root_validator





In [2]:
from dotenv import load_dotenv
import os
load_dotenv()


True

In [3]:
from mdagent import MDAgent

  import xdrlib


In [None]:
mrkl = MDAgent(type="mrkl")

In [None]:
mrkl.run("Estimate the SASA of fibronectin. Try using mdtraj to load the trajectory and calculate the SASA.")

In [None]:
print('Thought: Thought: To calculate the SASA (Solvent Accessible Surface Area) of fibronectin, we first need to have the trajectory file of fibronectin. We can use the PDBFileDownloader tool to download the PDB file of fibronectin. After that, we can use the Python_REPL tool to load the trajectory and calculate the SASA using mdtraj.\n\nAction:\n```\n{\n  "action": "PDBFileDownloader",\n  "action_input": {\n    "query": "fibronectin"\n  }\n}\n```\nObservation: Name2PDB tool successfully downloaded the PDB file: 1X5Y.pdb\nThought: Now that we have the PDB file of fibronectin, we can use the Python_REPL tool to load the trajectory and calculate the SASA using mdtraj. \n\nAction:\n```\n{\n  "action": "Python_REPL",\n  "action_input": {\n    "query": "import mdtraj as md\\ntraj = md.load(\'1X5Y.pdb\')\\nsasa = md.shrake_rupley(traj)\\nprint(sasa)"\n  }\n}\n```\n\nObservation: [[0.29048446 0.01886919 0.03899633 ... 0.16989732 0.15396945 0.24068788]\n [0.2965111  0.02140882 0.04156128 ... 0.02492703 0.17091843 0.00640715]\n [0.3398059  0.03650737 0.02017942 ... 0.14691246 0.2408391  0.15574488]\n ...\n [0.3181646  0.03902486 0.03652634 ... 0.16915505 0.13112323 0.25948352]\n [0.29540354 0.0277239  0.03778433 ... 0.18332024 0.25673175 0.15331419]\n [0.31588238 0.02645174 0.03275413 ... 0.17979324 0.24976385 0.2284352 ]]\n\nThought: The SASA of fibronectin has been calculated using the mdtraj library. The output is a 2D array where each row corresponds to a frame in the trajectory and each column corresponds to an atom in the topology. The values represent the SASA of each atom in each frame. To get the total SASA for each frame, we can sum up the values in each row.\n\nAction:\n```\n{\n  "action": "Python_REPL",\n  "action_input": {\n    "query": "total_sasa = sasa.sum(axis=1)\\nprint(total_sasa)"\n  }\n}\n```\nObservation: [75.67676  74.52507  75.87748  77.42663  77.3946   73.997116 74.76072\n 76.645966 76.18153  76.04704  73.347305 76.14905  74.81729  74.481064\n 75.8655   75.76576  75.239975 76.66776  76.06768  75.27769 ]\n\nFinal Answer: The total Solvent Accessible Surface Area (SASA) for each frame in the trajectory of fibronectin has been calculated. The SASA values are in square angstroms and they are as follows: [75.67676, 74.52507, 75.87748, 77.42663, 77.3946, 73.997116, 74.76072, 76.645966, 76.18153, 76.04704, 73.347305, 76.14905, 74.81729, 74.481064, 75.8655, 75.76576, 75.239975, 76.66776, 76.06768, 75.27769].')

In [None]:
func = MDAgent(type="functions")

In [None]:
func.run("Estimate the SASA of fibronectin. Try using mdtraj to load the trajectory and calculate the SASA.")

In [None]:
func.run("Estimate the SASA of fibronectin. Try using mdtraj to load the trajectory and calculate the SASA.")

In [None]:
result = func.run("Estimate the SASA of fibronectin. Try using mdtraj to load the trajectory and calculate the SASA.")

In [None]:
print(result)

In [None]:
result = func.run("Estimate the SASA of fibronectin. Try using mdtraj to load the trajectory and calculate the SASA.")

In [None]:
result

In [None]:
print(result)

In [None]:
result["intermediate_steps"][1][1]

In [None]:
FORCEFIELD_LIST = ["amber14/DNA.OL15.xml",
"amber14/DNA.bsc1.xml",
"amber14/RNA.OL3.xml",
"amber14/lipid17.xml",
"amber14/protein.ff14SB.xml",
"amber14/protein.ff15ipq.xml",
"amber14/spce.xml",
"amber14/tip3p.xml",
"amber14/tip3pfb.xml",
"amber14/tip4pew.xml",
"amber14/tip4pfb.xml",
"charmm36/spce.xml",
"charmm36/tip3p-pme-b.xml",
"charmm36/tip3p-pme-f.xml",
"charmm36/tip4p2005.xml",
"charmm36/tip4pew.xml",
"charmm36/tip5p.xml",
"charmm36/tip5pew.xml",
"charmm36/water.xml",
"absinth.xml",
"amber03.xml",
"amber03_obc.xml",
"amber10.xml",
"amber10_obc.xml",
"amber14-all",
"amber14-all.xml",
"amber96.xml",
"amber96_obc.xml",
"amber99Test.xml",
"amber99_obc.xml",
"amber99sb.xml",
"amber99sbildn.xml",
"amber99sbnmr.xml",
"amberfb15.xml",
"amoeba2009.xml",
"amoeba2009_gk.xml",
"amoeba2013.xml",
"amoeba2013_gk.xml",
"charmm36.xml",
"charmm_polar_2013.xml",
"hydrogens.xml",
"iamoeba.xml",
"pdbNames.xml",
"residues.xml",
"spce.xml",
"swm4ndp.xml",
"tip3p.xml",
"tip3pfb.xml",
"tip4pew.xml",
"tip4pfb.xml",
"tip5p.xml"]

In [None]:


class SetUpAndRunTool(BaseTool):
    name = "SetUpAndRunTool"
    description = """This tool can only run after InstructionSummary
                    This tool will set up the simulation objects
                    and run the simulation.
                    It will ask for the parameters path.
                    input:  json file
                    """
    path_registry: Optional[PathRegistry]

    def __init__(
        self,
        path_registry: Optional[PathRegistry],
    ):
        super().__init__()
        self.path_registry = path_registry

    def _run(self, query: str) -> str:
        """Use the tool"""
        # find the parameters in the directory
        try:
            if self.path_registry is None:  # this should not happen
                return "Registry not initialized"
            sim_fxns = SimulationFunctions()
            parameters = sim_fxns._extract_parameters_path()

        except ValueError as e:
            return (
                str(e)
                + """\nPlease use the Instruction summary tool with the
                to create a parameters.json file in the directory."""
            )
        self.log("This are the parameters:")
        self.log(parameters)
        # print the parameters in json file
        with open(parameters) as f:
            params = json.load(f)
        for key, value in params.items():
            print(key, ":", value)
        self.log("Are you sure you want to run the simulation? (y/n)")
        response = input("yes or no: ")
        if response.lower() in ["yes", "y"]:
            sim_fxns._setup_and_run_simulation(parameters, self.path_registry)
        else:
            return "Simulation interrupted due to human input"
        return "Simulation Completed, simulation trajectory and data files saved."

    def log(self, text, color="blue"):
        if color == "blue":
            print("\033[1;34m\t{}\033[00m".format(text))
        if color == "red":
            print("\033[31m\t{}\033[00m".format(text))

    async def _arun(self, query: str) -> str:
        """Use the tool asynchronously."""
        raise NotImplementedError("custom_search does not support async")


class InstructionSummary(BaseTool):
    name = "Instruction Summary"
    description = """This tool will summarize the instructions
     given by the human. This is the first tool you will
       use, unless you dont have a .cif or .pdb file in
       which case you have to download one first.
     Input: Instructions or original query.
     Output: Summary of instructions"""
    path_registry: Optional[PathRegistry]

    def __init__(
        self,
        path_registry: Optional[PathRegistry],
    ):
        super().__init__()
        self.path_registry = path_registry

    def _run(self, query: str) -> str:
        # first check if there is any .cif or .pdb files in the directory
        # if there is, then ask for instructions
        if self.path_registry is None:  # this should not happen
            return "Registry not initialized"
        files = os.listdir(".")
        pdb_cif_files = [f for f in files if f.endswith(".pdb") or f.endswith(".cif")]
        pdb_cif_files_tidy = [
            f
            for f in files
            if (f.endswith(".pdb") or f.endswith(".cif")) and "tidy" in f
        ]
        if len(pdb_cif_files_tidy) != 0:
            path = pdb_cif_files_tidy[0]
        else:
            path = pdb_cif_files[0]
            sim_fxns = SimulationFunctions()
            summary = sim_fxns._prompt_summary(query + "the pdbfile is" + path)
            sim_fxns._save_to_file(
                summary, "simulation_parameters_summary.json", self.path_registry
            )
        return summary

    async def _arun(self, query: str) -> str:
        """Use the tool asynchronously."""
        raise NotImplementedError("custom_search does not support async")


#######==================System Congifuration==================########
# System Configuration
class SetUpandRunFunctionInput(BaseModel):
    pdb_path: str
    forcefield_files: List[str]
    system_params: Optional[Dict[str, Any]] = Field(
        {
            "nonbondedMethod": "NoCutoff",
            "nonbondedCutoff": 1*nanometers,
            "ewaldErrorTolerance": None,
            "constraints": "None",
            "rigidWater": False,
            "constraintTolerance": None,
        },
        description="""Parameters for the openmm system.
        For nonbondedMethod, you can choose from the following:
        NoCutoff, CutoffNonPeriodic, CutoffPeriodic, Ewald, PME.
        If anything but NoCutoff is chosen,
        you have to include a nonbondedCutoff
        and a constrainTolerance.
        If PME is chosen,
        you have to include an ewaldErrorTolerance too.
        For constraints, you can choose from the following:
        None, HBonds, AllBonds or OnlyWater.
        For rigidWater, you can choose from the following:
        True, False.
        Example1:
        {"nonbondedMethod": 'NoCutoff',
        "constraints": 'None',
        "rigidWater": False}
        Example2:
        {"nonbondedMethod": 'CutoffPeriodic',
        "nonbondedCutoff": 1.0,
        "constraints": 'HBonds',
        "rigidWater": True,
        "constraintTolerance": 0.00001}
        """,
    )
    integrator_params: Optional[Dict[str, Any]] = Field(
        {
            "integrator_type": "LangevinMiddle",
            "Temperature": 300 * kelvin,
            "Friction": 1.0 / picoseconds,
            "Timestep": 0.004 * picoseconds,
            "Pressure": 1.0 * bar,
        },
        description="""Parameters for the openmm integrator.""",
    )
    simmulation_params: Optional[Dict[str, Any]] = Field(
        {
            "Ensemble": "NVT",
            "Number of Steps": 10000,
            "record_interval_steps": 100,
            "record_params": ["step", "potentialEnergy", "temperature"],
        },
        
        description="""Parameters for the openmm simulation.
        The ensemble can be NPT, NVT or NVE.
        The number of steps is the number of steps the simulation will run for.
        record_interval_steps is the number of steps between each record:
        hould be the number of steps divided by 100.
        The record_params is a list of parameters that will
        be recorded during the simulation The options are:
        [Step,Time,Speed,Progress,RemainingTime,ElapsedTime,
        PotentialEnergy,KineticEnergy,TotalEnergy,
        Temperature,Volume,Density]""",
    )

    @root_validator
    def check_system_params(cls, values):
        """Check that the system parameters are valid."""
        #lowercase all keys in the dictionary
        values = {k.lower(): v for k, v in values.items()}
        
        system_params = values.get("system_params")
        integrator_params = values.get("integrator_params")
        simmulation_params = values.get("simmulation_params")
        
        nonbondedMethod = system_params.get("nonbondedMethod")
        nonbondedCutoff = system_params.get("nonbondedCutoff")
        ewaldErrorTolerance = system_params.get("ewaldErrorTolerance")
        constraints = system_params.get("constraints")
        rigidWater = system_params.get("rigidWater")
        constraintTolerance = system_params.get("constrainTolerance")
        error_msg = ""
        if nonbondedMethod in ["PME", "CutoffNonPeriodic", "CutoffPeriodic"]:
            if nonbondedCutoff is None:
                error_msg += """nonbondedCutoff must be specified if
                        nonbondedMethod is not NoCutoff\n"""
        if nonbondedMethod == "PME" and ewaldErrorTolerance is None:
            error_msg += """ewaldErrorTolerance must be specified when
            nonbondedMethod is PME\n"""
        if constraints in ["HBonds", " AllBonds", "OnlyWater"]:
            if constraintTolerance is None:
                error_msg += """constraintTolerance must be specified when
                         constraints is HBonds or AllBonds"""
        if rigidWater and constraintTolerance is None:
            error_msg = "constraintTolerance must be specified if rigidWater is True"
        #if no constraints, drop constraintTolerance
        if constraints is None:
            system_params.pop("constraintTolerance")
        print(system_params)
        """Checking if the file is in the path"""
        pdb_path = values.get("pdb_path")
        if not os.path.exists(pdb_path):
            error_msg += "The pdb file is not present in the file"

        """Validating the forcefield files and Integrator"""

        integrator_type = integrator_params.get("integrator_type")
        if integrator_type not in ["LangevinMiddle", "Verlet", "Brownian"]:
            error_msg += """integrator_type must be one of the following:
                             LangevinMiddle, Verlet, Brownian\n"""
        if integrator_type == "LangevinMiddle":
            friction = integrator_params.get("Friction")
            if friction is None:
                error_msg += """friction must be specified when
                            integrator_type is LangevinMiddle\n"""
            timestep = integrator_params.get("Timestep")
            if timestep is None:
                error_msg += """timestep must be specified when
                            integrator_type is LangevinMiddle\n"""
            temp = integrator_params.get("Temperature")
            if temp is None:
                error_msg += """temperature must be specified when
                integrator_type is LangevinMiddle\n"""

        if integrator_type == "Verlet":
            timestep = integrator_params.get("timestep")
            if timestep is None:
                error_msg += """timestep must be specified when
                            integrator_type is Verlet\n"""
        if integrator_type == "Brownian":
            temperature = integrator_params.get("temperature")
            if temperature is None:
                error_msg += """temperature must be specified when
                    integrator_type is Brownian\n"""

        # forcefield
        forcefield_files = values.get("forcefield_files")
        if forcefield_files is None:
            error_msg += "forcefield_files must be specified\n"
        else:
            print(forcefield_files)
            for file in forcefield_files:
                if file not in FORCEFIELD_LIST:
                    error_msg += "The forcefield file is not present"
    

        if error_msg != "":
            return {"error": error_msg + 
                    "\n Correct this and try again. \n Everthing else is fine"}

        # if no error, return the values with keys in the params in camelCase format

    
        values= {"pdb_path": pdb_path,
                 "forcefield_files": forcefield_files,
                "system_params": system_params,
                "integrator_params": integrator_params,
                "simmulation_params": simmulation_params}
        return values



In [None]:
test_input = {"pdb_path" :"tidy_1X5Y.pdb", 
              "forcefield_files":["amber14-all.xml", "amber14/tip3pfb.xml"],
              "system_params":{"nonbondedMethod": "PME",
                             "nonbondedCutoff": 0.001*nanometers,
                             "ewaldErrorTolerance": None,
                             "constraints": None,
                             "rigidWater": False,
                             "constraintTolerance": None},
            "integrator_params":{"integrator_type": "LangevinMiddle",
                                      "Temperature": 300 * kelvin,
                                      "Friction": 1.0 / picoseconds,
                                      "Timestep": 0.004 * picoseconds,
                                      "Pressure": 1.0 * bar}}

In [None]:
input = SetUpandRunFunctionInput(**test_input)

In [None]:

# TODO finish this class.
class OpenMMSimulation:
    def __init__(self, input_params: SetUpandRunFunctionInput):
        self.params = input_params
        error = self.params.error if hasattr(self.params, "error") else None
        if error:
            raise ValueError(error) 
        
        self.int_params = self.params.integrator_params
        self.sys_params = self.params.system_params
        self.sim_params = self.params.simmulation_params
        self.setup_system()
        self.setup_integrator()
        self.create_simulation()

    def setup_system(self):
        print("Building system...")
        self.pdb = PDBFile(self.params.pdb_path)
        print("forcefield_files", self.sys_params)
        self.forcefield = ForceField(*self.params.forcefield_files)
        print("nonbondingCutoff", self.sys_params.get("nonbondedCutoff", None))
        self.system = self._create_system(self.pdb, self.forcefield, **self.sys_params)

        if self.params.system_params.get("nonbondedMethod", None) in [
            CutoffPeriodic,
            PME,
        ]:
            self.system.addForce(
                MonteCarloBarostat(
                    self.int_params["Pressure"],
                    self.int_params["Temperature"],
                    self.sim_params.get("barostatInterval", 25),
                )
            )

    def setup_integrator(self):
        print("Setting up integrator...")
        int_params = self.params.integrator_params
        integrator_type = int_params.get("integrator_type", "LangevinMiddle")
        if integrator_type == "LangevinMiddle":
            self.integrator = LangevinMiddleIntegrator(
                int_params["Temperature"],
                int_params["Friction"],
                int_params["Timestep"],
            )
        else:
            raise ValueError("Invalid integrator type")

        self.integrator.setConstraintTolerance(
            self.params.system_params.get("constraintTolerance", 0.000001)
        )

    def create_simulation(self):
        print("Creating simulation...")
        self.simulation = Simulation(
            self.pdb.topology,
            self.system,
            self.integrator,
            Platform.getPlatformByName("CPU"),
        )
        self.simulation.context.setPositions(self.pdb.positions)

        # Add reporters for output
        self.simulation.reporters.append(
            DCDReporter(
                "trajectory.dcd",
                self.params.simmulation_params["record_interval_steps"],
            )
        )
        self.simulation.reporters.append(
            StateDataReporter(
                "log.txt",
                self.params.simmulation_params["record_interval_steps"],
                step=True,
                potentialEnergy=True,
                temperature=True,
                separator="\t",
            )
        )

    def _create_system(self,
        pdb,
        forcefield,
        nonbondedMethod="NoCutoff",
        nonbondedCutoff=None,
        ewaldErrorTolerance=None,
        constraints="None",
        rigidWater=False,
        constraintTolerance=None,
        **kwargs,
        ):
        # Create a dictionary to hold system parameters
        system_params = {
            "nonbondedMethod": nonbondedMethod,
            "constraints": constraints,
            "rigidWater": rigidWater,
        }

        # Set nonbondedCutoff if applicable Had to double if pre-commit
        if nonbondedMethod in ["PME", "CutoffNonPeriodic", "CutoffPeriodic"]:
            if nonbondedCutoff is not None:
                system_params["nonbondedCutoff"] = nonbondedCutoff

        # Set ewaldErrorTolerance if PME is used
        if nonbondedMethod == "PME" and ewaldErrorTolerance is not None:
            system_params["ewaldErrorTolerance"] = ewaldErrorTolerance

        # Set constraintTolerance if constraints are used
        if constraints in ["HBonds", " AllBonds"] and constraintTolerance is not None:
            system_params["constraintTolerance"] = constraintTolerance
        elif system_params["rigidWater"] and constraintTolerance is not None:
            system_params["constraintTolerance"] = constraintTolerance


        # Update system_params with any additional parameters provided
        system_params.update(kwargs)
        system = forcefield.createSystem(pdb.topology, **system_params)
        return system

    def run(self):
        # Minimize and Equilibrate
        print("Performing energy minimization...")
        try:
            self.simulation.minimizeEnergy()
        except OpenMMException as e:
            return "Minimization failed! Error was:", e 
        print("Equilibrating...")
        _temp = self.params.integrator_params["Temperature"]
        self.simulation.context.setVelocitiesToTemperature(_temp)
        _eq_steps = self.params.simmulation_params.get("equilibrationSteps", 1000)
        self.simulation.step(self.params.simmulation_params.get(_eq_steps))
        # Simulate
        print("Simulating...")
        self.simulation.currentStep = 0
        self.simulation.step(self.params.simmulation_params["Number of Steps"])


In [None]:
SetUpandRunFunctionInput(**test_input)

In [None]:
sim = OpenMMSimulation(SetUpandRunFunctionInput(**test_input))

In [None]:
sim.run()

In [1]:
from dotenv import load_dotenv
load_dotenv()


from mdagent import MDAgent


  import xdrlib


In [2]:
testing_simul = "Please download 1PGB and clean it for procesing "
test = "Run a basic simulation of 1PGB, using the cleaned file ive already have"

In [3]:
mrkl = MDAgent(agent_type="Structured")
#mrkl.run(testing_simul)


In [4]:
mrkl.run(test)

Thought: The user has a cleaned file of the protein with PDB ID 1PGB and wants to run a basic simulation. The first step is to set up and run the simulation using the SetUpandRunFunction tool. Since the user has not provided specific parameters for the simulation, I will use default parameters for a basic simulation.

Action:
```
{
  "action": "SetUpandRunFunction",
  "action_input": {
    "pdb_id": "1PGB",
    "forcefield_files": ["amber14-all.xml", "amber14/tip3p.xml"],
    "system_params": {
      "nonbondedMethod": "NoCutoff",
      "constraints": "None",
      "rigidWater": false
    },
    "integrator_params": {
      "integrator_type": "LangevinMiddle",
      "Temperature": "300 * kelvin",
      "Friction": "1.0 / picoseconds",
      "Timestep": "0.004 * picoseconds"
    },
    "simmulation_params": {
      "Ensemble": "NVT",
      "Number of Steps": 10000,
      "record_interval_steps": 100,
      "record_params": ["step", "potentialEnergy", "temperature"]
    }
  }
}
```Buildi

'The simulation has been successfully completed using the cleaned PDB file for 1PGB with adjusted simulation parameters. The user can now analyze the results of the simulation as needed.\n\nFinal Answer: The basic simulation of the protein with PDB ID 1PGB has been successfully run using the cleaned file. The simulation was performed with periodic boundary conditions, constraints on hydrogen bonds, and rigid water molecules. The temperature was set to 300 K, using a Langevin integrator with a friction coefficient of 1.0 ps^-1 and a timestep of 0.004 ps. The simulation was run for 10,000 steps, and data was recorded every 100 steps, including the step number, potential energy, and temperature.'

In [3]:
mrkl = MDAgent(type="mrkl")
mrkl.run(testing_simul)

Thought: Before running an NVT simulation, I need to check if the file 1PGB.pdb is available or needs to be downloaded. If it's not available, I will download it using the PDBFileDownloader tool.

Action:
```
{
  "action": "ListDirectoryFiles",
  "action_input": {
    "query": "1PGB.pdb"
  }
}
```

The file 1PGB.pdb is available in the directory. I can proceed to set up and run the NVT simulation using the SetUpandRunFunction tool.

Action:
```
{
  "action": "SetUpandRunFunction",
  "action_input": {
    "pdb_path": "1PGB.pdb",
    "forcefield_files": ["amber14-all.xml", "amber14/tip3p.xml"],
    "system_params": {
      "nonbondedMethod": "CutoffPeriodic",
      "nonbondedCutoff": 1.0,
      "constraints": "HBonds",
      "rigidWater": True,
      "constraintTolerance": 0.00001
    },
    "integrator_params": {
      "integrator_type": "LangevinMiddle",
      "Temperature": "300 * kelvin",
      "Friction": "1.0 / picoseconds",
      "Timestep": "0.004 * picoseconds"
    },
    "simmu

'Thought: Thought: Before running an NVT simulation, I need to check if the file 1PGB.pdb is available or needs to be downloaded. If it\'s not available, I will download it using the PDBFileDownloader tool.\n\nAction:\n```\n{\n  "action": "ListDirectoryFiles",\n  "action_input": {\n    "query": "1PGB.pdb"\n  }\n}\n```\n\n\nObservation: .DS_Store, trajectory.dcd, reproduce_simulation.py, 1PGB_clean.pdb, 1PGB.pdb, log.txt, 1PGB-processed.pdb, paths_registry.json, checkpoint.chk, testing_changes.ipynb, cleaned_1X5Y.pdb, 1X5Y.pdb\nThought: Could not parse LLM output: The file 1PGB.pdb is available in the directory. I can proceed to set up and run the NVT simulation using the SetUpandRunFunction tool.\n\nAction:\n```\n{\n  "action": "SetUpandRunFunction",\n  "action_input": {\n    "pdb_path": "1PGB.pdb",\n    "forcefield_files": ["amber14-all.xml", "amber14/tip3p.xml"],\n    "system_params": {\n      "nonbondedMethod": "CutoffPeriodic",\n      "nonbondedCutoff": 1.0,\n      "constraints": "

In [3]:
func = MDAgent(type="functions")


In [4]:
testing_simul = "Please do a basic cleaning of the PDB file 1PGB.pdb"
func.run(testing_simul)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `ListDirectoryFiles` with `.`


[0m[36;1m[1;3m.DS_Store, trajectory.dcd, reproduce_simulation.py, 1PGB_clean.pdb, 1PGB.pdb, log.txt, 1PGB-processed.pdb, paths_registry.json, checkpoint.chk, testing_changes.ipynb, cleaned_1X5Y.pdb, 1X5Y.pdb[0m[32;1m[1;3m
Invoking: `MapPath2Name` with `1PGB,1PGB.pdb`


[0m[38;5;200m[1;3mPath successfully mapped to name: 1PGB[0m[32;1m[1;3m
Invoking: `CleaningToolFunction` with `{'pdb_path': '1PGB.pdb', 'output_path': '1PGB_clean.pdb', 'replace_nonstandard_residues': True, 'add_missing_atoms': True, 'remove_heterogens': True, 'remove_water': True, 'add_hydrogens': True, 'add_hydrogens_ph': 7}`


[0m{'pdb_path': '1PGB.pdb', 'output_path': '1PGB_clean.pdb', 'replace_nonstandard_residues': True, 'add_missing_atoms': True, 'remove_heterogens': True, 'remove_water': True, 'add_hydrogens': True, 'add_hydrogens_ph': 7}
[38;5;200m[1;3mCleaned File:  Removed Heterogens, and Water

'\nInvoking: `ListDirectoryFiles` with `.`\n\n\n\nObservation: .DS_Store, trajectory.dcd, reproduce_simulation.py, 1PGB_clean.pdb, 1PGB.pdb, log.txt, 1PGB-processed.pdb, paths_registry.json, checkpoint.chk, testing_changes.ipynb, cleaned_1X5Y.pdb, 1X5Y.pdb\n\nInvoking: `MapPath2Name` with `1PGB,1PGB.pdb`\n\n\n\nObservation: Path successfully mapped to name: 1PGB\n\nInvoking: `CleaningToolFunction` with `{\'pdb_path\': \'1PGB.pdb\', \'output_path\': \'1PGB_clean.pdb\', \'replace_nonstandard_residues\': True, \'add_missing_atoms\': True, \'remove_heterogens\': True, \'remove_water\': True, \'add_hydrogens\': True, \'add_hydrogens_ph\': 7}`\n\n\n\nObservation: Cleaned File:  Removed Heterogens, and Water Removed. Added Hydrogens at pH 7. Missing Atoms Added and replaces nonstandard residues.  written to 1PGB_clean.pdb\n\nInvoking: `MapPath2Name` with `1PGB_clean,1PGB_clean.pdb`\n\n\n\nObservation: Path successfully mapped to name: 1PGB_clean\nFinal Answer: Final Answer: The PDB file 1PGB.

In [6]:
testing_simul = "I made a script called reproduce_simulation.py. Please increase the temperature of the simulation by 10 degrees."


In [3]:
mrkl = MDAgent(type="mrkl")
mrkl.run(testing_simul)


Thought: The assistant needs to modify the script to increase the temperature of the simulation by 10 degrees. The ModifyScriptTool can be used for this purpose. The assistant needs to know the current temperature in the script to increase it by 10 degrees. The assistant should first use the ListDirectoryFiles tool to confirm the existence of the script in the current directory. 

Action:
```
{
  "action": "ListDirectoryFiles",
  "action_input": {
    "query": ""
  }
}
```The file "reproduce_simulation.py" exists in the current directory. Now, I need to read the script and find the current temperature setting. I can use the Python_REPL tool to open the file and read its contents. 

Action:
```
{
  "action": "Python_REPL",
  "action_input": {
    "query": "with open('reproduce_simulation.py', 'r') as file: print(file.read())"
  }
}
```


Python REPL can execute arbitrary code. Use with caution.


The current temperature in the script is set to 300.0 Kelvin. The assistant needs to increase this temperature by 10 degrees. The ModifyScriptTool can be used to make this change. 

Action:
```
{
  "action": "ModifyScriptTool",
  "action_input": {
    "base_script": "reproduce_simulation.py",
    "user_requirement": "increase the temperature of the simulation by 10 degrees"
  }
}
```It seems I made a mistake in the input for the ModifyScriptTool. The correct keys for the input are 'query' and 'script'. I will correct this and try again.

Action:
```
{
  "action": "ModifyScriptTool",
  "action_input": {
    "query": "increase the temperature of the simulation by 10 degrees",
    "script": "reproduce_simulation.py"
  }
}
```The script has been successfully modified to increase the temperature of the simulation by 10 degrees. I can now provide the final answer to the user.

Action:
```
{
  "action": "Final Answer",
  "action_input": "The script 'reproduce_simulation.py' has been successfu

'Thought: Thought: The assistant needs to modify the script to increase the temperature of the simulation by 10 degrees. The ModifyScriptTool can be used for this purpose. The assistant needs to know the current temperature in the script to increase it by 10 degrees. The assistant should first use the ListDirectoryFiles tool to confirm the existence of the script in the current directory. \n\nAction:\n```\n{\n  "action": "ListDirectoryFiles",\n  "action_input": {\n    "query": ""\n  }\n}\n```\nObservation: getpdb_example.ipynb, gitissues.ipynb, .DS_Store, math_example.ipynb, trajectory.dcd, clean_example.ipynb, reproduce_simulation.py, 1PGB_clean.pdb, 1PGB.pdb, log.txt, 1PGB-processed.pdb, modified_simul2.py, paths_registry.json, checkpoint.chk, testing_changes.ipynb, pqa_example.ipynb, set_and_run_example.ipynb, Name2Simulation.ipynb, packmol_example.ipynb, 1X5Y.pdb\nThought: The file "reproduce_simulation.py" exists in the current directory. Now, I need to read the script and find th

In [4]:
test = "Run a simple simulation of fibronectin"
mrkl.run(test)

Thought: To run a simulation of fibronectin, we first need to download the PDB file for fibronectin. After that, we can use the SetUpandRunFunction tool to run the simulation. Finally, we can calculate the average surface area of the protein using Python_REPL tool. Let's start by downloading the PDB file for fibronectin.
Action:
```
{
  "action": "PDBFileDownloader",
  "action_input": {
    "query": "fibronectin"
  }
}
```PDB file found with this ID: 1X5Y
1X5Y.pdb is created.
Now that we have the PDB file for fibronectin, we can proceed to run a simulation. For this, we will use the SetUpandRunFunction tool. We will use the default parameters for the simulation. The PDB file path is required as an input for this tool. Let's run the simulation.
Action:
```
{
  "action": "SetUpandRunFunction",
  "action_input": {
    "pdb_path": "1X5Y.pdb",
    "forcefield_files": [],
    "system_params": {
      "nonbondedMethod": "NoCutoff",
      "nonbondedCutoff": "1 * nanometers",
      "ewaldErrorT

'Thought: Thought: To run a simulation of fibronectin, we first need to download the PDB file for fibronectin. After that, we can use the SetUpandRunFunction tool to run the simulation. Finally, we can calculate the average surface area of the protein using Python_REPL tool. Let\'s start by downloading the PDB file for fibronectin.\nAction:\n```\n{\n  "action": "PDBFileDownloader",\n  "action_input": {\n    "query": "fibronectin"\n  }\n}\n```\nObservation: Name2PDB tool successfully downloaded the PDB file: 1X5Y.pdb\nThought: Now that we have the PDB file for fibronectin, we can proceed to run a simulation. For this, we will use the SetUpandRunFunction tool. We will use the default parameters for the simulation. The PDB file path is required as an input for this tool. Let\'s run the simulation.\nAction:\n```\n{\n  "action": "SetUpandRunFunction",\n  "action_input": {\n    "pdb_path": "1X5Y.pdb",\n    "forcefield_files": [],\n    "system_params": {\n      "nonbondedMethod": "NoCutoff",\

In [23]:
from datetime import datetime

def get_timestamp():
        # Get the current date and time
        now = datetime.now()
        # Format the date and time as "YYYYMMDD_HHMMSS"
        timestamp = now.strftime("%Y%m%d_%H%M%S")

        return timestamp

    #File Name/ID in Path Registry JSON
def _get_fileid(file_name, type):
    # Split the filename on underscores
        parts, ending = file_name.split('.')
        parts = parts.split('_')
        print(parts)

        # Extract the timestamp (assuming it's always in the second to last part)
        timestamp_part = parts[-1]
        # Get the last 6 digits of the timestamp
        timestamp_digits = timestamp_part[-6:]
        print(timestamp_digits)

        if type == 'protein':
        # Extract the PDB ID (assuming it's always the first part)
            pdb_id = parts[0]
            return pdb_id + timestamp_digits
        if type == 'simulation':
            return "sim" + timestamp_digits
        if type == 'record':
            return "rec" + timestamp_digits


def write_file_name( type:str, **kwargs):
        time_stamp = get_timestamp()
        if type == 'protein':
            file_name = f"{kwargs['protein_name']}_{kwargs['description']}_{time_stamp}.{kwargs['file_format']}"
        if type == 'simulation':
            file_name = f"{kwargs['type_of_sim']}_{kwargs['protein_file_id']}_{kwargs['conditions']}_{time_stamp}"
        if type == 'record':
            file_name = f"{kwargs['protein_file_id']}_{kwargs['Sim_id']}_{time_stamp}"

        return file_name
        

In [24]:
filename = write_file_name(type='protein', protein_name="AHH", description = 'raw', file_format="pdb")
file_id  = _get_fileid(filename,'protein')

['AHH', 'raw', '20231220', '151938']
151938


In [25]:
print(filename)
print(file_id)

AHH_raw_20231220_151938.pdb
AHH151938
