In [1]:
%load_ext autoreload
%autoreload 2

# Molecular Systems, Items and Forms

In [17]:
import molsysmt as msm
from molsysmt import puw
from molsysmt.tools import molecular_systems as tools_molecular_systems
from molsysmt.tools import items as tools_items
from molsysmt.molecular_system import MolecularSystem
from molsysmt import demo_systems

In [4]:
help(tools_molecular_systems)

Help on module molsysmt.tools.molecular_systems in molsysmt.tools:

NAME
    molsysmt.tools.molecular_systems

FUNCTIONS
    is_a_single_molecular_system(items)
    
    where_bonds_in_molecular_system(items)
    
    where_box_in_molecular_system(items)
    
    where_coordinates_in_molecular_system(items)
    
    where_elements_in_molecular_system(items)
    
    where_ff_parameters_in_molecular_system(items)
    
    where_mm_parameters_in_molecular_system(items)
    
    where_simulation_in_molecular_system(items)
    
    where_thermo_state_in_molecular_system(items)
    
    where_velocities_in_molecular_system(items)

FILE
    /home/diego/Projects/MolSysMT/molsysmt/tools/molecular_systems.py




In [5]:
help(tools_items)

Help on module molsysmt.tools.items in molsysmt.tools:

NAME
    molsysmt.tools.items

FUNCTIONS
    compatibles_for_a_single_molecular_system(items)
    
    has_box(items)
    
    has_coordinates(items)
    
    has_topology(items)
    
    has_trajectory(items)
    
    item_is_file(item)
    
    item_is_id(item)
    
    item_is_string(item)

FILE
    /home/diego/Projects/MolSysMT/molsysmt/tools/items.py




- Puede haber items con topology, coordinates or trajectory o box.
Un sistema molecular puede venir descrito por un item o varios items. Por ejemplo un sistema con topología, coordinates y box por separado. O un sistema con un solo item. O una topología con trajectoria.

## Tests

In [6]:
item = demo_systems.files['1tcd.mmtf']

In [7]:
msm.get_form(item)

'file:mmtf'

In [8]:
tools_molecular_systems.is_a_single_molecular_system(item)

True

In [9]:
tools_molecular_systems.where_bonds_in_molecular_system(item)

('/home/diego/Projects/MolSysMT/molsysmt/demo_systems/1tcd.mmtf', 'file:mmtf')

In [10]:
molecular_system = MolecularSystem(item)

In [11]:
molecular_system.__dict__

{'elements_item': '/home/diego/Projects/MolSysMT/molsysmt/demo_systems/1tcd.mmtf',
 'elements_form': 'file:mmtf',
 'bonds_item': '/home/diego/Projects/MolSysMT/molsysmt/demo_systems/1tcd.mmtf',
 'bonds_form': 'file:mmtf',
 'coordinates_item': '/home/diego/Projects/MolSysMT/molsysmt/demo_systems/1tcd.mmtf',
 'coordinates_form': 'file:mmtf',
 'velocities_item': None,
 'velocities_form': None,
 'box_item': '/home/diego/Projects/MolSysMT/molsysmt/demo_systems/1tcd.mmtf',
 'box_form': 'file:mmtf',
 'ff_parameters_item': None,
 'ff_parameters_form': None,
 'mm_parameters_item': None,
 'mm_parameters_form': None,
 'thermo_state_item': None,
 'thermo_state_form': None,
 'simulation_item': None,
 'simulation_form': None}

In [12]:
prmtop_file = demo_systems.files['pentalanine.prmtop']
inpcrd_file = demo_systems.files['pentalanine.inpcrd']
ms = MolecularSystem([prmtop_file, inpcrd_file])

In [13]:
ms

<molsysmt.molecular_system.MolecularSystem at 0x7f9cb0155150>

In [14]:
ms.__dict__

{'elements_item': '/home/diego/Projects/MolSysMT/molsysmt/demo_systems/pentalanine.prmtop',
 'elements_form': 'file:prmtop',
 'bonds_item': '/home/diego/Projects/MolSysMT/molsysmt/demo_systems/pentalanine.prmtop',
 'bonds_form': 'file:prmtop',
 'coordinates_item': '/home/diego/Projects/MolSysMT/molsysmt/demo_systems/pentalanine.inpcrd',
 'coordinates_form': 'file:inpcrd',
 'velocities_item': None,
 'velocities_form': None,
 'box_item': '/home/diego/Projects/MolSysMT/molsysmt/demo_systems/pentalanine.inpcrd',
 'box_form': 'file:inpcrd',
 'ff_parameters_item': '/home/diego/Projects/MolSysMT/molsysmt/demo_systems/pentalanine.prmtop',
 'ff_parameters_form': 'file:prmtop',
 'mm_parameters_item': None,
 'mm_parameters_form': None,
 'thermo_state_item': None,
 'thermo_state_form': None,
 'simulation_item': None,
 'simulation_form': None}

In [15]:
import nglview as nv
ms = MolecularSystem([nv.datafiles.GRO, nv.datafiles.XTC])
ms.__dict__

{'elements_item': '/home/diego/Myopt/miniconda3/envs/MolSysMT_dev/lib/python3.7/site-packages/nglview/datafiles/md_1u19.gro',
 'elements_form': 'file:gro',
 'bonds_item': None,
 'bonds_form': None,
 'coordinates_item': '/home/diego/Myopt/miniconda3/envs/MolSysMT_dev/lib/python3.7/site-packages/nglview/datafiles/md_1u19.xtc',
 'coordinates_form': 'file:xtc',
 'velocities_item': None,
 'velocities_form': None,
 'box_item': '/home/diego/Myopt/miniconda3/envs/MolSysMT_dev/lib/python3.7/site-packages/nglview/datafiles/md_1u19.xtc',
 'box_form': 'file:xtc',
 'ff_parameters_item': None,
 'ff_parameters_form': None,
 'mm_parameters_item': None,
 'mm_parameters_form': None,
 'thermo_state_item': None,
 'thermo_state_form': None,
 'simulation_item': None,
 'simulation_form': None}

In [18]:
molecular_system0 = msm.build_peptide(['AceAlaNME',{'forcefield':'AMBER14', 'implicit_solvent':'OBC1'}])
molecular_system1 = msm.copy(molecular_system0)
msm.translate(molecular_system1, translation=[0.5, 0.0, 0.0]*puw.unit('nm'))

In [21]:
tools_molecular_systems.is_a_single_molecular_system([molecular_system0, molecular_system1])

True

if they are the same molecular system, this has to be written this way to be considered as two molecular systems:

In [23]:
tools_molecular_systems.is_a_single_molecular_system([[molecular_system0], [molecular_system1]])

False

## Casos que quiero resolver

### 1 sistema molecular

- El sistema molecular tiene únicamente una topología
- El sistema molecular tiene únicamente una secuencia de frames o coordenadas
- En el caso de un item, está claro.
- Box tomo la que viene con las coordenadas o la última en la lista.
- Sólo si topologies <=2 and coordinates <=2.
- Podríamos tener más de 2 items si por ejemplo un tercero tiene sólo box.

    - 0 topologías, 0 coordenadas -> -
    - 0 topologías, 1 coordenadas -> Si
    - 0 topologías, 2 coordenadas -> No.
    
    - 1 topologías, 0 coordenadas -> Si
    - 1 topologías, 1 coordenadas -> Si
    - 1 topologías, 2 coordenadas -> Si la top va con coords, si. Si no, no.

    - 2 topologías, 0 coordenadas -> No
    - 2 topologías, 1 coordenadas -> Si las coords van con top, si. Si no, no.
    - 2 topologías, 2 coordenadas -> No

- si hay un item con box sin coordinates ni topology, esa box se pilla.

Multiple molecular systems in a single list only possible si todas las topologías llevan sus coordenadas. La box debe ser la misma o será cogida la última.

### More than a molecular system

- Lista de items no definido como single molecular system.
- Lista de listas, donde cada sublista es un molecular system.
- Llista de items con alguna lista dentro, si es que un molecular system está definido por más de un item.

Ahora he sacado el has_topology o has_coordinates en msm.tools.items

Hay un método para completar? O para añadir información a un item de otro item... por ejemplo, un molsysmt.Topology sin bonds y un openmm.Topology igual con bonds... pasar los bonds al otro. O por ejemplo, una trayectoria sin box y le meto box. No.... esto tendría que poder hacerse con 'set'

## Forms

Molecular systems can take different forms. The same system can be encoded for instance as a pdb file, as a python object of mdtraj.Trajectory class, as a UniProt id code or as an aminoacid sequence. Not all forms have the same level of detail, probably some forms have more information, some other less information, but all are forms of the same molecular system. MolSysMT takes the concept 'form' as a central concept at the center of the multitool. Sometimes we have the system in form A, an mmtf file for example, and to be able to make a specific analysis with a given tool form A needs to be converted to form B -an mdtraj.Topology-, and then we probably need a third library to modify the system but this time the system must be encoded in form C -a parmed.Structure-. And so on. Usually, you can find the way to convert these forms in the documentation of those libraries as well as the way those analysis are invoked. To avoid the time of connecting those pieces, MolSysMT provides with a framework where different tools, nativo and coming from other libraries, can be easily plugged to build up the structure of pipes configuring the workflow you need.

At this moment these are the forms MolSysMT can handle.

## Files

The updated list of forms type file can be printed out with the method `MolSysMT.info_forms()`.

In [24]:
msm.info_forms(form_type='file')

Form,Type,Info
file:crd,file,CHARMM card (CRD) file format with coordinates.
file:dcd,file,
file:fasta,file,
file:gro,file,Gromacs gro file format
file:h5,file,
file:inpcrd,file,AMBER ASCII restart/inpcrd file format
file:mdcrd,file,AMBER mdcrd file format
file:mmtf,file,
file:mol2,file,
file:pdb,file,Protein Data Bank file format


## Classes

MolSysMT works with python classes coming from many other libraries such as MDTraj, PyTraj, MDAnalysis, OpenMM, ParmEd among others; as well as some native classes.

In [25]:
msm.info_forms(form_type='class')

Form,Type,Info
MolecularMechanicsDict,class,
SimulationDict,class,
XYZ,class,
mdanalysis.Topology,class,
mdanalysis.Universe,class,
mdtraj.AmberRestartFile,class,
mdtraj.GroTrajectoryFile,class,
mdtraj.HDF5TrajectoryFile,class,
mdtraj.PDBTrajectoryFile,class,
mdtraj.Topology,class,


## Ids

There are several databases or encoding systems where molecular systems take the form of a string of characters. This is the case of the Protein Data Bank, the ChEMBL database or the UniProt codes. The following table summarizes the list of Ids recognized by MolSysMT.

In [26]:
msm.info_forms(form_type='id')

Form,Type,Info
id:PDB,id,


Notice that form names here ends with ':id'. This suffix is used to distinguish them from other form types. 'pdb' is a form name corresponding to a file and 'pdb:id' is the id form.

## Sequences

Molecular systems can be determined by a sequence of elements. For instance, a peptide as Metenkephaline can be defined by means of its aminoacids sequence. These are the forms of type sequence MolSysMT can handle:

In [27]:
msm.info_forms(form_type='string')

Form,Type,Info
string:aminoacids1,string,
string:aminoacids3,string,
string:pdb,string,Protein Data Bank file format


## Viewers

The last molecular systems form we usually need its the graphical representation: the viewer. MolSysMT works with viewers as if they were a different form type. These are the viewers MolSysMT can work with:

In [28]:
msm.info_forms(form_type='viewer')

Form,Type,Info
nglview.NGLWidget,viewer,NGLView visualization native object
