From d4e0b47bb5393ec6ff6251aced3b97643f948be6 Mon Sep 17 00:00:00 2001 From: Alexis Rohou Date: Thu, 14 May 2026 16:29:16 -0700 Subject: [PATCH 1/4] Add preflight commands for disulfides and altlocs. This was needed for agentic control, because otherwise popup windows requiring the human to click were getting in the way. --- isolde/src/atomic/building/build_utils.py | 29 ++++ isolde/src/atomic/util.py | 23 +++ isolde/src/cmd/cmd.py | 85 ++++++++++ isolde/src/isolde.py | 27 ++-- .../make_all_sensible_disulphides.py | 10 +- isolde/src/ui/main_win.py | 23 ++- isolde/src/validation/cmd.py | 147 ++++++++++++++++++ 7 files changed, 319 insertions(+), 25 deletions(-) diff --git a/isolde/src/atomic/building/build_utils.py b/isolde/src/atomic/building/build_utils.py index d8790b78..a7c7f678 100644 --- a/isolde/src/atomic/building/build_utils.py +++ b/isolde/src/atomic/building/build_utils.py @@ -353,6 +353,35 @@ def create_disulfide(cys1, cys2): from chimerax.atomic.struct_edit import add_bond add_bond(s1, s2) + +def create_all_sensible_disulfides(model, logger=None): + ''' + Create disulfide bonds for every pair of cysteines in ``model`` whose + SG atoms are close enough to be disulfide-bonded but currently lack an + SG-SG bond. Cysteines that cluster in groups of three or more are not + bonded automatically and are returned as the "ambiguous" set so the + caller can warn or display them as appropriate. + + Returns ``(possible_pairs_created, ambiguous_clusters)`` where + ``possible_pairs_created`` is the set of cysteine-residue pairs that + were actually bonded by this call, and ``ambiguous_clusters`` is the + set of cysteine-residue groups that were left untouched. + ''' + _current, possible, ambiguous = current_and_possible_disulfides(model) + for cys_pair in possible: + create_disulfide(*cys_pair) + if logger is not None and possible: + logger.info( + 'Created disulfide bonds between the following residues:\n{}'.format( + '; '.join([ + '-'.join(['{}{}{}'.format(c.chain_id, c.number, c.insertion_code) + for c in p]) + for p in possible + ]) + ) + ) + return possible, ambiguous + _CYS_ALIGN_ATOMS=('CA', 'CB', 'SG') def break_disulfide(cys1, cys2): from chimerax.core.errors import UserError diff --git a/isolde/src/atomic/util.py b/isolde/src/atomic/util.py index f877c8c1..095db3ac 100644 --- a/isolde/src/atomic/util.py +++ b/isolde/src/atomic/util.py @@ -1,5 +1,28 @@ rings = ('PHE','TYR') #,'TYS','PTR') <- non-standard amino acids currently throw a RuntimeError (25/10/2021) + +def clear_altlocs(model, logger=None): + ''' + Drop all alternate conformations from ``model`` and reset the affected + atoms' occupancies to 1.0. Mirrors the action ISOLDE offers via the + auto-popup the first time a model with alt locs is selected. + + Returns the number of atoms that had alternate conformers before the + call. + ''' + atoms_with_alt_locs = model.atoms[model.atoms.num_alt_locs > 0] + n = int(len(atoms_with_alt_locs)) + if n: + model.delete_alt_locs() + atoms_with_alt_locs.occupancies = 1 + if logger is not None: + logger.info( + 'Removed all altlocs in #{} ({} atom(s)) and reset associated ' + 'occupancies to 1.'.format(model.id_string, n) + ) + return n + + def correct_pseudosymmetric_sidechain_atoms(session, residues): ''' Protein sidechain atom names follow strict rules dictating the names of atoms diff --git a/isolde/src/cmd/cmd.py b/isolde/src/cmd/cmd.py index 482b648a..2a1bffae 100644 --- a/isolde/src/cmd/cmd.py +++ b/isolde/src/cmd/cmd.py @@ -374,6 +374,69 @@ def isolde_jump(session, direction="next"): rs = get_stepper(m) rs.incr_chain(direction) + +def isolde_add_disulfides_auto(session, model=None): + ''' + Create disulfide bonds for every pair of cysteines whose SG atoms are + close enough to be disulfide-bonded but currently lack an SG-SG bond + (the "possible" set from ``isolde preflight disulfides``). + + Cysteines that cluster in groups of three or more are *not* bonded + automatically — those need manual triage and are reported as a warning. + + Setting the per-model "checked" flag here also suppresses the auto-popup + that would otherwise fire on the next ``isolde select`` for the model. + ''' + block_if_sim_running(session) + from ..validation.cmd import _resolve_model + m = _resolve_model(session, model) + log = session.logger + from ..atomic.building.build_utils import create_all_sensible_disulfides + possible, ambiguous = create_all_sensible_disulfides(m, logger=log) + if not possible: + log.info('ISOLDE: no new disulfide bonds to create.') + if ambiguous: + warn_str = ( + 'The following groups of cysteines are clustered too close to ' + 'automatically assign disulfide bonding and should be checked ' + 'manually:\n{}' + ).format('; '.join([ + ', '.join(['{}{}{}'.format(c.chain_id, c.number, c.insertion_code) + for c in residues]) + for residues in ambiguous + ])) + log.warning(warn_str) + m._isolde_disulfide_check_done = True + return { + 'model': m.atomspec, + 'created': int(len(possible)), + 'ambiguous': int(len(ambiguous)), + } + + +def isolde_clear_altlocs(session, model=None): + ''' + Drop all alternate conformations from ``model`` (or ISOLDE's currently + selected model) and reset the affected atoms' occupancies to 1.0. This + mirrors the action ISOLDE offers via the auto-popup the first time a + model with alt locs is selected. + ''' + block_if_sim_running(session) + from ..validation.cmd import _resolve_model + m = _resolve_model(session, model) + log = session.logger + from ..atomic.util import clear_altlocs + n = clear_altlocs(m, logger=log) + if n == 0: + log.info('ISOLDE: model {} has no alternate conformations to clear.' + .format(m.atomspec)) + m._isolde_altloc_check_done = True + return { + 'model': m.atomspec, + 'atoms_cleared': n, + } + + def register_isolde(logger): from chimerax.core.commands import ( register, CmdDesc, @@ -516,6 +579,26 @@ def register_isolde_shorthand(): from .shorthand import register_isolde_shorthand_commands register('isolde shorthand', desc, register_isolde_shorthand_commands, logger=logger) + def register_isolde_add_disulfides_auto(): + from .argspec import IsoldeStructureArg + desc = CmdDesc( + optional=[('model', IsoldeStructureArg)], + synopsis=('Create disulfide bonds for every cysteine pair within ' + 'disulfide-bonding distance that is not already bonded'), + ) + register('isolde add disulfides auto', desc, + isolde_add_disulfides_auto, logger=logger) + + def register_isolde_clear_altlocs(): + from .argspec import IsoldeStructureArg + desc = CmdDesc( + optional=[('model', IsoldeStructureArg)], + synopsis=('Drop all alternate conformations from the model and ' + 'reset the affected atoms\' occupancies to 1.0'), + ) + register('isolde clear altlocs', desc, + isolde_clear_altlocs, logger=logger) + register_isolde_start() register_isolde_set() register_isolde_select() @@ -530,6 +613,8 @@ def register_isolde_shorthand(): register_isolde_jump() register_isolde_change_b() register_isolde_shorthand() + register_isolde_add_disulfides_auto() + register_isolde_clear_altlocs() from chimerax.isolde.remote_control import register_remote_commands register_remote_commands(logger) from chimerax.isolde.restraints.cmd import register_isolde_restrain diff --git a/isolde/src/isolde.py b/isolde/src/isolde.py index e64a3bc5..2a0a7e88 100644 --- a/isolde/src/isolde.py +++ b/isolde/src/isolde.py @@ -800,16 +800,23 @@ def _change_selected_model(self, *_, model=None, force=False): with session.triggers.block_trigger('remove models'), session.triggers.block_trigger('add models'): if not getattr(m, 'isolde_initialized', False): - atoms_with_alt_locs = m.atoms[m.atoms.num_alt_locs>0] - if len(atoms_with_alt_locs): - from .dialog import choice_warning - result = choice_warning(f'This model contains {len(atoms_with_alt_locs)} atoms with alternate ' - 'conformers. ISOLDE cannot currently see these, but they will be carried through to the ' - 'output model. In most cases it is best to remove them. Would you like to do so now?') - if result: - m.delete_alt_locs() - atoms_with_alt_locs.occupancies = 1 - self.session.logger.info(f'Removed all altlocs in #{m.id_string} and reset associated occupancies to 1.') + # Skip the alt-loc popup if "isolde preflight altlocs" or + # "isolde clear altlocs" has already been run for this model: + # they set this flag once the situation has been acknowledged + # (or resolved), which lets agent-driven setup handle the + # question through a chat round-trip instead of a blocking + # GUI dialog. + if not getattr(m, '_isolde_altloc_check_done', False): + atoms_with_alt_locs = m.atoms[m.atoms.num_alt_locs>0] + if len(atoms_with_alt_locs): + from .dialog import choice_warning + result = choice_warning(f'This model contains {len(atoms_with_alt_locs)} atoms with alternate ' + 'conformers. ISOLDE cannot currently see these, but they will be carried through to the ' + 'output model. In most cases it is best to remove them. Would you like to do so now?') + if result: + from .atomic.util import clear_altlocs + clear_altlocs(m, logger=self.session.logger) + m._isolde_altloc_check_done = True from .atomic.util import correct_pseudosymmetric_sidechain_atoms correct_pseudosymmetric_sidechain_atoms(session, m.residues) m.isolde_initialized = True diff --git a/isolde/src/menu/model_building/disulphides/make_all_sensible_disulphides.py b/isolde/src/menu/model_building/disulphides/make_all_sensible_disulphides.py index 15f48efe..3a6169a4 100644 --- a/isolde/src/menu/model_building/disulphides/make_all_sensible_disulphides.py +++ b/isolde/src/menu/model_building/disulphides/make_all_sensible_disulphides.py @@ -11,18 +11,12 @@ def run_script(session): from chimerax.core.commands import run from chimerax.core.errors import UserError - from chimerax.isolde.atomic.building.build_utils import current_and_possible_disulfides, create_disulfide + from chimerax.isolde.atomic.building.build_utils import create_all_sensible_disulfides run(session, 'isolde start', log=False) m = session.isolde.selected_model if m is None: raise UserError('Select a model in ISOLDE first!') - current, possible, ambiguous = current_and_possible_disulfides(m) - for cys_pair in possible: - create_disulfide(*cys_pair) - if len(possible): - session.logger.info('Created disulfide bonds between the following residues: \n{}'.format( - '; '.join(['-'.join(['{}{}{}'.format (c.chain_id, c.number, c.insertion_code) for c in p]) for p in possible]) - )) + _possible, ambiguous = create_all_sensible_disulfides(m, logger=session.logger) if len(ambiguous): warn_str = ('The following cysteine residues are clustered too close to ' 'automatically assign disulphide-bonded pairs. Please check manually.\n{}').format( diff --git a/isolde/src/ui/main_win.py b/isolde/src/ui/main_win.py index cb34930e..28a37c03 100644 --- a/isolde/src/ui/main_win.py +++ b/isolde/src/ui/main_win.py @@ -170,7 +170,17 @@ def _check_for_unspecified_disulfides(self, *_): from chimerax.core.triggerset import DEREGISTER if m is None: return DEREGISTER - from chimerax.isolde.atomic.building.build_utils import current_and_possible_disulfides + # The "isolde preflight disulfides" / "isolde add disulfides auto" + # commands set this flag once the user (or a driving agent) has + # acknowledged the situation, so that the popup never fires again + # for that model. This keeps the GUI flow unchanged for interactive + # users while letting agent-driven setup pre-resolve the question + # via the preflight commands. + if getattr(m, '_isolde_disulfide_check_done', False): + return DEREGISTER + from chimerax.isolde.atomic.building.build_utils import ( + current_and_possible_disulfides, create_all_sensible_disulfides + ) current, possible, ambiguous = current_and_possible_disulfides(m, cutoff_distance=2.3) from ..dialog import generic_warning, choice_warning if len(possible): @@ -178,12 +188,7 @@ def _check_for_unspecified_disulfides(self, *_): 'specified in the model metadata. Would you like to create them now?') result = choice_warning(warn_str, yesno=True) if result: - from chimerax.isolde.atomic.building.build_utils import create_disulfide - for cys_pair in possible: - create_disulfide(*cys_pair) - self.session.logger.info('ISOLDE: created disulfide bonds between the following residues: \n{}'.format( - '; '.join(['-'.join(['{}{}{}'.format (c.chain_id, c.number, c.insertion_code) for c in p]) for p in possible]) - )) + create_all_sensible_disulfides(m, logger=self.session.logger) if len(ambiguous): from chimerax.atomic import concise_residue_spec warn_base = ('The following groups of cysteines are clustered too close to automatically assign disulfide bonding and ' @@ -196,6 +201,10 @@ def residue_string(residues): '
'+'
'.join([f'{residue_string(residues)}' for residues in ambiguous]) ) self.session.logger.warning(log_str, is_html=True) + # Mark this model as checked so that subsequent ``isolde select`` + # calls don't re-ask. The flag is also set by the preflight + # commands described above. + m._isolde_disulfide_check_done = True return DEREGISTER diff --git a/isolde/src/validation/cmd.py b/isolde/src/validation/cmd.py index e2cdbf99..a4b27794 100644 --- a/isolde/src/validation/cmd.py +++ b/isolde/src/validation/cmd.py @@ -388,6 +388,137 @@ def isolde_preflight_parameters(session, model=None, forcefield=None, } +def isolde_preflight_disulfides(session, model=None): + ''' + Preflight check: does ``model`` (or ISOLDE's currently selected model) + contain pairs of cysteines whose SG atoms are close enough to be + disulfide-bonded but for which no SG-SG bond is currently present in the + model? + + This is the same geometric check that fires the "create disulfides?" GUI + popup the first time a model is selected in ISOLDE. Calling this command + does *not* create any bonds, but it stamps a per-model flag so that the + auto-popup will not subsequently fire for the same model — i.e. running + this preflight is treated as the user/agent's acknowledgement of the + situation. Pair this with ``isolde add disulfides auto`` if you want the + bonds created. + + Returns a dict with three lists (``current``, ``possible``, ``ambiguous``). + Each list entry is itself a list of residue summaries. + ''' + m = _resolve_model(session, model) + log = session.logger + + from ..atomic.building.build_utils import current_and_possible_disulfides + current, possible, ambiguous = current_and_possible_disulfides( + m, cutoff_distance=2.3 + ) + + def _pair(rset): + return [_residue_summary(r) for r in sorted( + rset, key=lambda r: (r.chain_id, r.number, r.insertion_code) + )] + + current_info = [_pair(p) for p in current] + possible_info = [_pair(p) for p in possible] + ambiguous_info = [_pair(p) for p in ambiguous] + + def _label_set(rset): + return '-'.join(_residue_label(r) for r in sorted( + rset, key=lambda r: (r.chain_id, r.number, r.insertion_code) + )) + + summary = ( + 'ISOLDE disulfide check ({}): {} existing, {} possible new, ' + '{} ambiguous cluster(s).'.format( + m.atomspec, len(current), len(possible), len(ambiguous) + ) + ) + if not possible and not ambiguous: + log.info(summary) + else: + log.warning(summary) + if possible: + log.info(' Possible: ' + ', '.join( + _label_set(p) for p in possible + )) + log.info( + ' To create them, run "isolde add disulfides auto {}".' + .format(m.atomspec) + ) + if ambiguous: + log.info(' Ambiguous (3+ cysteines clustered, manual fix required): ' + + '; '.join(_label_set(p) for p in ambiguous)) + + # Calling the preflight counts as acknowledging the situation; this + # suppresses the GUI popup that would otherwise fire on the next frame + # after `isolde select`. + m._isolde_disulfide_check_done = True + + return { + 'model': m.atomspec, + 'current': current_info, + 'possible': possible_info, + 'ambiguous': ambiguous_info, + 'n_current': len(current), + 'n_possible': len(possible), + 'n_ambiguous': len(ambiguous), + 'recommend_create': len(possible) > 0, + } + + +def isolde_preflight_altlocs(session, model=None): + ''' + Preflight check: does ``model`` (or ISOLDE's currently selected model) + contain atoms with alternate conformations? ISOLDE cannot see alt locs + during a simulation, but they are carried through to the output, so in + most refinement workflows they should be removed before starting. + + This is the situation that triggers the "remove alt locs?" GUI popup + inside ``Isolde.selected_model`` the first time a model is selected. + Calling this preflight stamps a per-model flag so that the auto-popup + will not fire for the model. Pair with ``isolde clear altlocs`` to + actually drop them. + + Returns a dict with the per-residue list and counts. + ''' + m = _resolve_model(session, model) + log = session.logger + + atoms_with_altlocs = m.atoms[m.atoms.num_alt_locs > 0] + n_atoms = int(len(atoms_with_altlocs)) + affected_residues = atoms_with_altlocs.unique_residues + residues_info = [_residue_summary(r) for r in affected_residues] + + summary = ( + 'ISOLDE altloc check ({}): {} atom(s) with alternate conformers ' + 'across {} residue(s).'.format(m.atomspec, n_atoms, len(residues_info)) + ) + if n_atoms == 0: + log.info(summary) + else: + log.warning(summary) + log.info(' Affected residues: ' + ', '.join( + _residue_label(r) for r in affected_residues + )) + log.info( + ' To drop alt locs and reset occupancies, run ' + '"isolde clear altlocs {}".'.format(m.atomspec) + ) + + # See note in isolde_preflight_disulfides: the call itself acknowledges + # the situation and suppresses the auto-popup. + m._isolde_altloc_check_done = True + + return { + 'model': m.atomspec, + 'atoms_with_altlocs': n_atoms, + 'residues': residues_info, + 'n_residues': len(residues_info), + 'recommend_clear': n_atoms > 0, + } + + def register_preflight_commands(logger): from chimerax.core.commands import ( register, CmdDesc, BoolArg, StringArg, @@ -411,3 +542,19 @@ def register_preflight_commands(logger): ) register('isolde preflight parameters', desc_p, isolde_preflight_parameters, logger=logger) + + desc_d = CmdDesc( + optional=[('model', IsoldeStructureArg)], + synopsis=('Preflight check: are there cysteine pairs likely to need ' + 'disulfide bonds? Suppresses the corresponding GUI popup.'), + ) + register('isolde preflight disulfides', desc_d, + isolde_preflight_disulfides, logger=logger) + + desc_a = CmdDesc( + optional=[('model', IsoldeStructureArg)], + synopsis=('Preflight check: does the model contain alternate ' + 'conformers? Suppresses the corresponding GUI popup.'), + ) + register('isolde preflight altlocs', desc_a, + isolde_preflight_altlocs, logger=logger) From b53ae388fba29e6e986decc4cedcae0f0d0e9c99 Mon Sep 17 00:00:00 2001 From: Alexis Rohou Date: Fri, 15 May 2026 14:35:25 -0700 Subject: [PATCH 2/4] Add read-only `isolde validate` command suite Adds `isolde validate {peptidebonds,rama,rotamers,clashes}` for agent / scripted access to the same scoring as the GUI Validate tab, plus a no-op `isolde validate` parent that lists the subcommands when called bare (same for `isolde preflight`). Each subcommand returns a structured dict (summary + items) and accepts shared `log` / `saveFile` / `limit` keywords; summary lines include a hint pointing the caller at how to see the full list. Refactors the GUI peptide-bond and clashes panels and the `rama` / `rota` `report=True` text dumps to share the new compute helpers (`_compute_rama_report`, `_compute_rotamer_report`, `classify_peptide_bonds`, `clash_atom_label`) so the new commands and the legacy GUI/CLI surfaces stay in lock-step. `RamaMgr.cis()` and `twisted()` now also read their cutoffs from `defaults.CIS_PEPTIDE_BOND_CUTOFF` / `defaults.TWISTED_PEPTIDE_BOND_DELTA` instead of hardcoded `radians(30)` / `radians(150)`. --- isolde/docs/source/commands/isolde.rst | 113 +++ isolde/src/cmd/cmd.py | 2 + isolde/src/molobject.py | 19 +- isolde/src/ui/validation_tab/clashes.py | 9 +- isolde/src/ui/validation_tab/peptide_bond.py | 34 +- isolde/src/validation/cmd.py | 947 ++++++++++++++++++- 6 files changed, 1061 insertions(+), 63 deletions(-) diff --git a/isolde/docs/source/commands/isolde.rst b/isolde/docs/source/commands/isolde.rst index 4a15de2a..4fcd824a 100644 --- a/isolde/docs/source/commands/isolde.rst +++ b/isolde/docs/source/commands/isolde.rst @@ -156,6 +156,119 @@ forcefield (e.g. ``amber14``); pass it explicitly to preflight against a different one. *ignoreExternalBonds* defaults to ``true`` to match the behaviour of the GUI panel. +.. _validate: + +isolde validate +=============== + +Read-only commands that run the same scoring/validators as the subpanels +of ISOLDE's GUI **Validate** tab, returning structured results suitable +for programmatic use (e.g. by an agent driving the MCP server) without +opening the GUI. They never modify the model and never start a +simulation. The unparametrised-residues panel is intentionally omitted +here - that check is covered by :ref:`preflight` (``isolde preflight +parameters``). + +Each subcommand returns a dictionary with summary counts plus a +``items`` list, and shares three output keywords: + +- *log* (boolean, default ``false``) - dump the full per-item table to + the ChimeraX HTML log wrapped in ``
...
``, matching the + pattern used by the ChimeraX ``clashes`` and ``hbonds`` commands. +- *saveFile* (path, default unset) - write the full table to disk. + Paths ending in ``.json`` get a structured JSON dump (the full + unclipped item list with the summary); any other extension gets a + plain UTF-8 text table. +- *limit* (integer, default unset / 200 for ``clashes``) - cap the + ``items`` list returned inline so a giant structure doesn't blow up + the agent's context window. The ``saveFile`` output ignores this and + always contains the full list; the returned dict carries + ``truncated``, ``returned_count`` and ``total_count`` when clipped. + +isolde validate peptidebonds +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Syntax: isolde validate peptidebonds [*model*] +[**saveFile** *path*] [**log** *true|FALSE*] [**limit** *integer*] + +Report cis and twisted peptide bonds in *model* (or ISOLDE's currently +selected model), using the same omega-dihedral classification that +ISOLDE's "Peptide Bond Validation" panel applies +(``CIS_PEPTIDE_BOND_CUTOFF`` and ``TWISTED_PEPTIDE_BOND_DELTA``, +defaulting to 30 degrees each). Cis-prolines are valid and are reported +separately from cis non-proline bonds. + +Returns a dictionary with summary counts (``n_residues``, +``n_cis_nonpro``, ``n_cis_pro``, ``n_twisted``, ``n_iffy``) and a +per-bond ``items`` list. Each item carries the chain, both residues, +the omega angle in degrees, the ``conformation`` (``cis`` or +``twisted``), and an ``is_proline`` flag for the C-terminal residue. + +isolde validate rama +~~~~~~~~~~~~~~~~~~~~ + +Syntax: isolde validate rama [*model*] +[**include** *outliers|allowed|all*] [**saveFile** *path*] +[**log** *true|FALSE*] [**limit** *integer*] + +Report Ramachandran scoring for protein residues in *model* (or +ISOLDE's currently selected model), using the same MolProbity contours +and bin cutoffs as ISOLDE's Ramachandran plot. *include* selects which +residues appear in the per-residue list: ``outliers`` (default), +``allowed`` (outliers + allowed) or ``all`` (favored too). Summary +counts always cover the full model regardless of *include*. + +Returns a dictionary with summary counts (``n_scorable``, +``n_favored``, ``n_allowed``, ``n_outlier``) and a per-residue +``items`` list giving the phi and psi angles in degrees, the +MolProbity ``score``, the ``classification`` (favored / allowed / +outlier) and the Ramachandran ``case`` (``general``, ``Gly``, +``trans-Pro``, etc.). + +This is a pure validation command - to toggle ISOLDE's live 3D +Ramachandran annotators see the existing ``rama`` command instead. + +isolde validate rotamers +~~~~~~~~~~~~~~~~~~~~~~~~ + +Syntax: isolde validate rotamers [*model*] +[**include** *outliers|nonfavored|all*] [**saveFile** *path*] +[**log** *true|FALSE*] [**limit** *integer*] + +Report rotamer scoring for sidechain-bearing residues in *model* (or +ISOLDE's currently selected model), using the same MolProbity contours +and P-value cutoffs as ISOLDE's "Rotamer Validation" panel. *include* +selects which residues appear in the per-residue list: ``nonfavored`` +(default; outliers + allowed), ``outliers`` or ``all``. Summary counts +always cover all rotameric residues. + +Returns a dictionary with summary counts (``n_rotameric``, +``n_favored``, ``n_allowed``, ``n_outlier``), the current +``cutoff_allowed`` and ``cutoff_outlier`` P-values, and a per-residue +``items`` list giving the P-value ``score`` and ``classification``. + +This is a pure validation command - to toggle ISOLDE's live 3D +rotamer annotators see the existing ``rota`` command instead. + +isolde validate clashes +~~~~~~~~~~~~~~~~~~~~~~~ + +Syntax: isolde validate clashes [*model*] [**saveFile** *path*] +[**log** *true|FALSE*] [**limit** *integer*] + +Report steric clashes in *model* (or ISOLDE's currently selected +model), using ISOLDE's ``unique_clashes`` wrapper around the ChimeraX +``clashes`` machinery. Each clash carries both atoms, the van der +Waals overlap in Angstroms, and a ``severity`` of either ``strict`` +(overlap above ``STRICT_CUTOFF``, default 0.4 A) or ``severe`` +(overlap above ``SEVERE_CUTOFF``, default 0.6 A). + +Returns a dictionary with summary counts (``n_total``, ``n_severe``, +``n_strict``) and a per-clash ``items`` list sorted by descending +overlap. *limit* defaults to 200 for this command since the inline +list dwarfs the other validators on real-world structures; widen with +*limit* or capture everything with *saveFile*. + .. _sim: isolde sim diff --git a/isolde/src/cmd/cmd.py b/isolde/src/cmd/cmd.py index 2a1bffae..42749677 100644 --- a/isolde/src/cmd/cmd.py +++ b/isolde/src/cmd/cmd.py @@ -635,3 +635,5 @@ def register_isolde_clear_altlocs(): register_isolde_benchmark(logger) from chimerax.isolde.validation.cmd import register_preflight_commands register_preflight_commands(logger) + from chimerax.isolde.validation.cmd import register_validate_commands + register_validate_commands(logger) diff --git a/isolde/src/molobject.py b/isolde/src/molobject.py index 2f98fb77..0d9bbc6c 100644 --- a/isolde/src/molobject.py +++ b/isolde/src/molobject.py @@ -1065,27 +1065,32 @@ def cis(self, residues): of input residues whose peptide bonds are in the cis conformation. ''' from chimerax.atomic import Residue + from ..constants import defaults residues = residues[residues.polymer_types==Residue.PT_AMINO] omegas = self._dihedral_mgr.get_dihedrals(residues, 'omega') - from math import radians import numpy - cis = omegas[numpy.abs(omegas.angles) < radians(30)] + cis = omegas[numpy.abs(omegas.angles) < defaults.CIS_PEPTIDE_BOND_CUTOFF] return cis.residues def twisted(self, residues): ''' - Returns a list of (:class:`chimerax.Residue`, angle) 2-tuples giving the - subset of input residues whose peptide bonds are twisted more than 30 - degrees from planar. + Returns a list of (:class:`chimerax.Residue`, angle) 2-tuples giving + the subset of input residues whose peptide bonds are twisted from + planar by more than ``defaults.CIS_PEPTIDE_BOND_CUTOFF`` but less + than ``pi - defaults.TWISTED_PEPTIDE_BOND_DELTA`` (default 30 - 150 + degrees in absolute value). ''' from chimerax.atomic import Residue + from ..constants import defaults + from math import pi residues = residues[residues.polymer_types==Residue.PT_AMINO] omegas = self._dihedral_mgr.get_dihedrals(residues, 'omega') - from math import radians import numpy angles = omegas.angles abs_angles = numpy.abs(angles) - twisted_mask = numpy.logical_and(abs_angles >= radians(30), abs_angles < radians(150)) + twisted_mask = numpy.logical_and( + abs_angles >= defaults.CIS_PEPTIDE_BOND_CUTOFF, + abs_angles < pi - defaults.TWISTED_PEPTIDE_BOND_DELTA) return [(t.residue, angle) for t, angle in zip(omegas[twisted_mask], numpy.degrees(angles[twisted_mask]))] diff --git a/isolde/src/ui/validation_tab/clashes.py b/isolde/src/ui/validation_tab/clashes.py index 6ec26df6..26a79fc3 100644 --- a/isolde/src/ui/validation_tab/clashes.py +++ b/isolde/src/ui/validation_tab/clashes.py @@ -82,18 +82,17 @@ def _populate_table(self, *_): t.setRowCount(0) if not len(atoms): return - from chimerax.isolde.validation.clashes import unique_clashes + from chimerax.isolde.validation.clashes import unique_clashes, clash_atom_label clashes = unique_clashes(self.session, atoms) t.setRowCount(len(clashes)) for i, clash in enumerate(clashes): catoms = clash.atoms a1, a2 = catoms - r1, r2 = catoms.residues data = ( - "{} {}{}: {}".format(r1.name, r1.chain_id, r1.number, a1.name), - "{} {}{}: {}".format(r2.name, r2.chain_id, r2.number, a2.name), - "{:0.2f}".format(clash.overlap) + clash_atom_label(a1), + clash_atom_label(a2), + "{:0.2f}".format(clash.overlap), ) for j, d in enumerate(data): item = QTableWidgetItem(d) diff --git a/isolde/src/ui/validation_tab/peptide_bond.py b/isolde/src/ui/validation_tab/peptide_bond.py index c1d712ee..50f5d078 100644 --- a/isolde/src/ui/validation_tab/peptide_bond.py +++ b/isolde/src/ui/validation_tab/peptide_bond.py @@ -37,42 +37,35 @@ def __init__(self, session, isolde, gui, collapse_area, sim_sensitive=True): def _populate_table(self, *_): - import numpy table = self.table table.setRowCount(0) m = self.isolde.selected_model if m is None or m.deleted: return from chimerax.isolde.session_extensions import get_proper_dihedral_mgr + from chimerax.isolde.validation.cmd import classify_peptide_bonds pdm = get_proper_dihedral_mgr(self.session) if self.isolde.simulation_running: residues = self.isolde.sim_manager.sim_construct.mobile_residues else: residues = m.residues - omegas = pdm.get_dihedrals(residues, 'omega') - abs_angles = numpy.abs(omegas.angles) - from math import pi - from chimerax.isolde.constants import defaults - cc = defaults.CIS_PEPTIDE_BOND_CUTOFF - tc = defaults.TWISTED_PEPTIDE_BOND_DELTA - cis_mask = abs_angles < cc - twisted_mask = numpy.logical_and(abs_angles >= cc, abs_angles < pi-tc) - iffy_mask = numpy.logical_or(cis_mask, twisted_mask) - iffy = omegas[iffy_mask] - angles = numpy.degrees(iffy.angles) - cis_mask = cis_mask[iffy_mask] + iffy = classify_peptide_bonds(pdm, residues) table.setRowCount(len(iffy)) cis_nonpro_color = QBrush(QColor(255, 100, 100), Qt.SolidPattern) cis_pro_color = QBrush(QColor(100,255,100), Qt.SolidPattern) twisted_color = QBrush(QColor(240, 200, 160), Qt.SolidPattern) - for i, (omega, angle, cis) in enumerate(zip(iffy, angles, cis_mask)): - res1, res2 = omega.atoms.unique_residues - if cis: + for i, it in enumerate(iffy): + res1 = it['res1'] + res2 = it['res2'] + angle = it['omega_deg'] + if it['is_cis']: conf_text = 'cis' + color = cis_pro_color if it['is_proline'] else cis_nonpro_color else: conf_text = 'twisted' + color = twisted_color data = ( res1.chain_id, f'{res1.name}:{res1.number}-{res2.name}:{res2.number}', @@ -81,16 +74,9 @@ def _populate_table(self, *_): for j, d in enumerate(data): item = QTableWidgetItem(d) item.setData(Qt.ItemDataRole.UserRole, res2) - if cis: - if res2.name == 'PRO': - color = cis_pro_color - else: - color = cis_nonpro_color - else: - color = twisted_color item.setBackground(color) table.setItem(i, j, item) - if cis: + if it['is_cis']: from chimerax.core.commands import run def cb(_, r=res2): run(self.session, f'isolde cisflip #{m.id_string}/{r.chain_id}:{r.number}') diff --git a/isolde/src/validation/cmd.py b/isolde/src/validation/cmd.py index a4b27794..8d5e9d06 100644 --- a/isolde/src/validation/cmd.py +++ b/isolde/src/validation/cmd.py @@ -20,16 +20,14 @@ def rota(session, structures=None, report=False): for structure in structures: sx.get_rota_annotator(structure) if report: - from chimerax.atomic import Residues, concatenate - residues = concatenate([m.residues for m in structures]) - mgr = sx.get_rotamer_mgr(session) - rotamers = mgr.get_rotamers(residues) report_str = 'NON-FAVOURED ROTAMERS: \n' - nf, scores = mgr.non_favored_rotamers(rotamers) - for r, score in zip(nf, scores): - report_str += '#{:<6} {}:\t{} {} (P={:.4f})\n'.format( - r.residue.structure.id_string, r.residue.chain_id, r.residue.name, - r.residue.number, score + for structure in structures: + data = _compute_rotamer_report(session, structure, + include='nonfavored') + for it in data['items']: + report_str += '#{:<6} {}:\t{} {} (P={:.4f})\n'.format( + structure.id_string, it['chain_id'], it['name'], + it['number'], it['score'] ) session.logger.info(report_str) @@ -60,27 +58,37 @@ def rama(session, structures=None, show_favored=True, report=False): ra = sx.get_rama_annotator(structure) ra.hide_favored = not show_favored if report: - from chimerax.atomic import Residues, concatenate - residues = concatenate([m.residues for m in structures]) - mgr = sx.get_ramachandran_mgr(session) + pep_data_by_structure = [ + (structure, _compute_peptide_bond_report(session, structure)) + for structure in structures + ] report_str = 'RAMACHANDRAN OUTLIERS: \n' - outliers = mgr.outliers(residues) - for outlier in outliers: - report_str +='#{:<6} {}:\t{} {}\n'.format( - outlier.structure.id_string, outlier.chain_id, outlier.name, outlier.number) + for structure in structures: + rd = _compute_rama_report(session, structure, include='outliers') + for it in rd['items']: + report_str +='#{:<6} {}:\t{} {}\n'.format( + structure.id_string, it['chain_id'], + it['name'], it['number']) report_str += '\nCIS PEPTIDE BONDS: \n' - cispeps = mgr.cis(residues) - for cis in cispeps: - report_str +='#{:<6} {}:\t{} {}\n'.format( - cis.structure.id_string, cis.chain_id, cis.name, cis.number - ) + for structure, pd in pep_data_by_structure: + for it in pd['items']: + if it['conformation'] != 'cis': + continue + r2 = it['res2'] + report_str +='#{:<6} {}:\t{} {}\n'.format( + structure.id_string, r2['chain_id'], + r2['name'], r2['number'] + ) report_str += '\nTWISTED PEPTIDE BONDS: \n' - twisteds = mgr.twisted(residues) - for twisted, angle in twisteds: - report_str += '#{:<6} {}:\t{} {} ({:.1f}°)\n'.format( - twisted.structure.id_string, twisted.chain_id, twisted.name, - twisted.number, angle - ) + for structure, pd in pep_data_by_structure: + for it in pd['items']: + if it['conformation'] != 'twisted': + continue + r2 = it['res2'] + report_str += '#{:<6} {}:\t{} {} ({:.1f}°)\n'.format( + structure.id_string, r2['chain_id'], + r2['name'], r2['number'], it['omega_deg'] + ) session.logger.info(report_str) def unrama(session, structures=None): @@ -154,6 +162,30 @@ def register_rama(logger): from chimerax.core.errors import UserError + +# Subcommand names exposed under ``isolde preflight``. Used both for +# registration order and to build the helpful error raised by the bare +# ``isolde preflight`` parent handler below. +_PREFLIGHT_SUBCOMMANDS = ('hydrogens', 'parameters', 'disulfides', 'altlocs') + + +def isolde_preflight(session, model=None): + ''' + Bare ``isolde preflight`` handler. Always raises ``UserError`` listing + the available subcommands. + + Registered as the parent of ``isolde preflight hydrogens`` / + ``parameters`` / ``disulfides`` / ``altlocs`` so that calls like + ``isolde preflight #1.2`` (model spec but no subcommand) get a useful + "expected one of these" message instead of ChimeraX's generic + ``Unknown command: isolde preflight #1.2``. + ''' + raise UserError( + "'isolde preflight' requires a subcommand. Available: " + + ', '.join(_PREFLIGHT_SUBCOMMANDS) + + ". Example: 'isolde preflight hydrogens #1'." + ) + from .unparameterised import ( H_TO_HEAVY_ATOM_THRESHOLD_RATIO, suspiciously_low_h, @@ -558,3 +590,864 @@ def register_preflight_commands(logger): ) register('isolde preflight altlocs', desc_a, isolde_preflight_altlocs, logger=logger) + + # Parent command: catches ``isolde preflight`` (no subcommand) and + # ``isolde preflight `` (model spec but no subcommand) and + # turns them into a helpful "expected one of: ..." error instead of + # ChimeraX's generic ``Unknown command``. The ``optional`` model arg + # exists so the parser cleanly consumes the spec - the handler always + # raises before doing any work. + desc_top = CmdDesc( + optional=[('model', IsoldeStructureArg)], + synopsis='Run an ISOLDE preflight check (requires a subcommand: {}).'.format( + ', '.join(_PREFLIGHT_SUBCOMMANDS)), + ) + register('isolde preflight', desc_top, isolde_preflight, logger=logger) + + +# --------------------------------------------------------------------------- +# Read-only ``isolde validate`` commands. +# +# These commands run the same scoring/validators as ISOLDE's GUI Validate +# tab and return structured results. They never modify the model and never +# start a simulation - safe to call at any time once a model is selected. +# +# Each handler returns a ``dict`` with summary counts plus a per-item list +# (suitable for direct consumption by an agent / MCP caller). Output is +# also routed through three shared keywords: +# +# - ``log`` (bool, default False): dump the full per-item table to the +# ChimeraX HTML log wrapped in ``
...
`` (same pattern as the +# ChimeraX ``clashes`` / ``hbonds`` commands). +# - ``save_file`` (path, default None): write the full table to disk. +# ``.json`` paths get a structured JSON dump; anything else gets a +# plain-text aligned table written via ``chimerax.io.open_output``. +# - ``limit`` (int, default None): cap the per-item list in the returned +# dict so a giant structure doesn't blow up the agent's context. The +# file output ignores this limit and always contains the full list. +# --------------------------------------------------------------------------- + + +# Don't dump arbitrarily large tables into the HTML log; clip to this many +# rows when ``log=True`` so the Reply Log stays usable. +_LOG_TABLE_ROW_LIMIT = 500 + + +# Subcommand names exposed under ``isolde validate``. Used both for +# registration order and to build the helpful error raised by the bare +# ``isolde validate`` parent handler below. +_VALIDATE_SUBCOMMANDS = ('peptidebonds', 'rama', 'rotamers', 'clashes') + + +def isolde_validate(session, model=None): + ''' + Bare ``isolde validate`` handler. Always raises ``UserError`` listing + the available subcommands. + + Registered as the parent of ``isolde validate peptidebonds`` / + ``rama`` / ``rotamers`` / ``clashes`` so that calls like + ``isolde validate #1.2`` (model spec but no subcommand) get a useful + "expected one of these" message instead of ChimeraX's generic + ``Unknown command: isolde validate #1.2``. + ''' + raise UserError( + "'isolde validate' requires a subcommand. Available: " + + ', '.join(_VALIDATE_SUBCOMMANDS) + + ". Example: 'isolde validate clashes #1'." + ) + + +def _full_list_hint(cmd_name, model_spec, total_count): + ''' + Build a one-line hint pointing the caller at the ``log`` and + ``saveFile`` keywords. Returned as a leading-space-prefixed string so + callers can simply ``summary += _full_list_hint(...)`` without + worrying about extra whitespace, and as the empty string when there + is nothing to list. + ''' + if total_count <= 0: + return '' + return ( + " Showing summary only; re-run as '{cmd} {spec} log true' to dump" + " the full table to the ChimeraX log (capped at {cap} rows), or" + " add 'saveFile ' to write the complete list to disk." + ).format(cmd=cmd_name, spec=model_spec, cap=_LOG_TABLE_ROW_LIMIT) + + +def _format_table(columns, rows): + ''' + Return (header_line, separator_line, [data_line, ...]) as monospace + aligned strings. Columns are left-justified to the widest cell. + ''' + str_rows = [[str(c) for c in row] for row in rows] + widths = [len(c) for c in columns] + for row in str_rows: + for i, cell in enumerate(row): + if len(cell) > widths[i]: + widths[i] = len(cell) + fmt = ' '.join('{{:<{}}}'.format(w) for w in widths) + header_line = fmt.format(*columns) + sep_line = fmt.format(*['-' * w for w in widths]) + data_lines = [fmt.format(*row) for row in str_rows] + return header_line, sep_line, data_lines + + +def _dump_table_to_log(logger, header, columns, rows): + ''' + Log ``header`` followed by a monospace-aligned ``columns`` / ``rows`` + table to the ChimeraX HTML log, matching the pattern used by the + ChimeraX ``clashes`` and ``hbonds`` commands. + ''' + header_line, sep_line, data_lines = _format_table(columns, rows) + truncated = False + if len(data_lines) > _LOG_TABLE_ROW_LIMIT: + data_lines = data_lines[:_LOG_TABLE_ROW_LIMIT] + truncated = True + body = '\n'.join([header_line, sep_line] + data_lines) + if truncated: + body += ('\n... (log output truncated at {} rows; use the saveFile ' + 'option to capture the full table).'.format(_LOG_TABLE_ROW_LIMIT)) + logger.info('
' + header + '\n' + body + '
', is_html=True) + + +def _write_results_file(path, *, summary, columns, rows, json_payload): + ''' + Write the full validation table to ``path``. ``.json`` paths get a + structured JSON dump (the full ``json_payload``); any other extension + gets a plain UTF-8 text table via ``chimerax.io.open_output`` (same + helper the ChimeraX ``clashes`` / ``hbonds`` commands use). + ''' + import json as _json + from chimerax.io import open_output + if str(path).lower().endswith('.json'): + with open_output(path, 'utf-8') as f: + _json.dump(json_payload, f, indent=2, default=str) + return + header_line, sep_line, data_lines = _format_table(columns, rows) + with open_output(path, 'utf-8') as f: + f.write(summary + '\n\n') + f.write(header_line + '\n') + f.write(sep_line + '\n') + for line in data_lines: + f.write(line + '\n') + + +def _maybe_limit(items, limit): + ''' + Truncate ``items`` to ``limit`` entries for the inline return value. + Returns ``(returned_items, truncated, total_count, returned_count)``. + ''' + total = len(items) + if limit is None or limit < 0 or total <= limit: + return items, False, total, total + return items[:limit], True, total, int(limit) + + +def classify_peptide_bonds(pdm, residues, *, + cis_cutoff=None, twisted_delta=None): + ''' + Classify the peptide bonds in ``residues`` as cis or twisted, using + ``pdm.get_dihedrals(residues, 'omega')``. Returns a list of dicts - + one per cis or twisted bond, in the order returned by + ``pdm.get_dihedrals`` - of the form:: + + {'omega': ProperDihedral, + 'res1': Residue, 'res2': Residue, + 'omega_deg': float, + 'is_cis': bool, 'is_twisted': bool, + 'is_proline': bool} + + where: + + - ``omega`` is the omega :class:`ProperDihedral` object; + - ``res1`` / ``res2`` are the N- and C-terminal residues of the bond; + - ``omega_deg`` is the signed omega angle in degrees; + - ``is_cis`` is ``|omega| < cis_cutoff``; + - ``is_twisted`` is ``cis_cutoff <= |omega| < pi - twisted_delta``; + - ``is_proline`` is true when ``res2.name == 'PRO'``. Cis-Pro bonds are + valid biology; callers typically split them out from cis non-Pro. + + Cutoffs default to ``isolde.constants.defaults.CIS_PEPTIDE_BOND_CUTOFF`` + and ``defaults.TWISTED_PEPTIDE_BOND_DELTA`` (both expressed in radians). + The classifier is shared by ``isolde validate peptidebonds`` and + ISOLDE's GUI "Peptide Bond Validation" panel. + ''' + import numpy + from math import pi + from ..constants import defaults + if cis_cutoff is None: + cis_cutoff = defaults.CIS_PEPTIDE_BOND_CUTOFF + if twisted_delta is None: + twisted_delta = defaults.TWISTED_PEPTIDE_BOND_DELTA + + omegas = pdm.get_dihedrals(residues, 'omega') + angles = omegas.angles + abs_angles = numpy.abs(angles) + cis_mask = abs_angles < cis_cutoff + twisted_mask = numpy.logical_and( + abs_angles >= cis_cutoff, abs_angles < pi - twisted_delta) + iffy_mask = numpy.logical_or(cis_mask, twisted_mask) + iffy_omegas = omegas[iffy_mask] + iffy_angles_deg = numpy.degrees(angles[iffy_mask]) + iffy_cis_mask = cis_mask[iffy_mask] + iffy_twisted_mask = twisted_mask[iffy_mask] + + out = [] + for omega, angle_deg, is_cis, is_twisted in zip( + iffy_omegas, iffy_angles_deg, iffy_cis_mask, iffy_twisted_mask): + res1, res2 = omega.atoms.unique_residues + out.append({ + 'omega': omega, + 'res1': res1, + 'res2': res2, + 'omega_deg': float(angle_deg), + 'is_cis': bool(is_cis), + 'is_twisted': bool(is_twisted), + 'is_proline': res2.name == 'PRO', + }) + return out + + +def _compute_peptide_bond_report(session, structure): + ''' + Compute the structured cis / twisted peptide-bond report for a single + ``structure``. Pure compute - no logging, no file output, no inline + truncation. Returns a dict with the same shape as + ``isolde_validate_peptidebonds`` (minus pagination fields): + + {'model', 'n_residues', 'n_cis_nonpro', 'n_cis_pro', + 'n_twisted', 'n_iffy', 'items'} + + Each item carries ``chain_id``, ``residue_pair_label``, the two + residue summaries (``res1``, ``res2``), ``omega_deg``, + ``conformation`` (``'cis'`` or ``'twisted'``) and ``is_proline``. + Items are sorted with twisted bonds first, then by chain / number. + ''' + from chimerax.atomic import Residue + from ..session_extensions import get_proper_dihedral_mgr + + pdm = get_proper_dihedral_mgr(session) + aa_residues = structure.residues[ + structure.residues.polymer_types == Residue.PT_AMINO] + raw_items = classify_peptide_bonds(pdm, aa_residues) + + items = [] + n_cis_pro = 0 + n_cis_nonpro = 0 + n_twisted = 0 + for it in raw_items: + res1 = it['res1'] + res2 = it['res2'] + if it['is_cis']: + conformation = 'cis' + if it['is_proline']: + n_cis_pro += 1 + else: + n_cis_nonpro += 1 + else: + conformation = 'twisted' + n_twisted += 1 + items.append({ + 'chain_id': res2.chain_id, + 'residue_pair_label': '{}:{}-{}:{}'.format( + res1.name, res1.number, res2.name, res2.number), + 'res1': _residue_summary(res1), + 'res2': _residue_summary(res2), + 'omega_deg': float(it['omega_deg']), + 'conformation': conformation, + 'is_proline': bool(it['is_proline']), + }) + items.sort(key=lambda x: ( + x['conformation'] != 'twisted', + x['chain_id'], + x['res2']['number'], + )) + + return { + 'model': structure.atomspec, + 'n_residues': int(len(aa_residues)), + 'n_cis_nonpro': int(n_cis_nonpro), + 'n_cis_pro': int(n_cis_pro), + 'n_twisted': int(n_twisted), + 'n_iffy': int(len(items)), + 'items': items, + } + + +def _compute_rama_report(session, structure, *, include='outliers'): + ''' + Compute the structured Ramachandran report for a single ``structure``. + Pure compute - no logging, no file output, no inline truncation. + + ``include`` selects which residues appear in ``items``: + ``'outliers'`` (default), ``'allowed'`` (outliers + allowed) or + ``'all'``. Summary counts always cover the full model. + ''' + import numpy + from chimerax.atomic import Residue + from ..session_extensions import get_ramachandran_mgr + + mgr = get_ramachandran_mgr(session) + RamaBin = mgr.RamaBin + RamaCase = mgr.RamaCase + case_names = { + int(RamaCase.NONE): 'n/a', + int(RamaCase.CISPRO): 'cis-Pro', + int(RamaCase.TRANSPRO): 'trans-Pro', + int(RamaCase.GLYCINE): 'Gly', + int(RamaCase.PREPRO): 'pre-Pro', + int(RamaCase.ILEVAL): 'Ile/Val', + int(RamaCase.GENERAL): 'general', + } + bin_names = { + int(RamaBin.FAVORED): 'favored', + int(RamaBin.ALLOWED): 'allowed', + int(RamaBin.OUTLIER): 'outlier', + int(RamaBin.NA): 'n/a', + } + + include_choice = (include or 'outliers').lower() + include_sets = { + 'outliers': {int(RamaBin.OUTLIER)}, + 'allowed': {int(RamaBin.OUTLIER), int(RamaBin.ALLOWED)}, + 'all': {int(RamaBin.OUTLIER), int(RamaBin.ALLOWED), + int(RamaBin.FAVORED)}, + } + if include_choice not in include_sets: + raise UserError( + "include must be one of 'outliers', 'allowed', 'all' " + "(got {!r}).".format(include)) + keep_bins = include_sets[include_choice] + + aa_residues = structure.residues[ + structure.residues.polymer_types == Residue.PT_AMINO] + ramas = mgr.get_ramas(aa_residues) + scores, cases = mgr.validate(ramas) + bins = mgr.bin_scores(scores, cases) + not_na = bins != int(RamaBin.NA) + valid_ramas = ramas[not_na] + scores = scores[not_na] + cases = cases[not_na] + bins = bins[not_na] + phipsis_deg = (numpy.degrees(valid_ramas.phipsis) + if len(valid_ramas) else numpy.zeros((0, 2))) + residues = valid_ramas.residues + + n_favored = int((bins == int(RamaBin.FAVORED)).sum()) + n_allowed = int((bins == int(RamaBin.ALLOWED)).sum()) + n_outlier = int((bins == int(RamaBin.OUTLIER)).sum()) + n_total_scored = int(len(valid_ramas)) + + severity_rank = {'outlier': 0, 'allowed': 1, 'favored': 2, 'n/a': 3} + items = [] + for i in range(n_total_scored): + bin_i = int(bins[i]) + if bin_i not in keep_bins: + continue + r = residues[i] + phi, psi = phipsis_deg[i] + items.append({ + 'chain_id': r.chain_id, + 'name': r.name, + 'number': int(r.number), + 'insertion_code': r.insertion_code, + 'spec': r.atomspec, + 'phi_deg': float(phi), + 'psi_deg': float(psi), + 'score': float(scores[i]), + 'classification': bin_names[bin_i], + 'case': case_names[int(cases[i])], + }) + items.sort(key=lambda it: ( + severity_rank.get(it['classification'], 9), + it['chain_id'], + it['number'], + )) + + return { + 'model': structure.atomspec, + 'include': include_choice, + 'n_scorable': n_total_scored, + 'n_favored': n_favored, + 'n_allowed': n_allowed, + 'n_outlier': n_outlier, + 'items': items, + } + + +def _compute_rotamer_report(session, structure, *, include='nonfavored'): + ''' + Compute the structured rotamer report for a single ``structure``. + Pure compute - no logging, no file output, no inline truncation. + + ``include`` selects which residues appear in ``items``: ``'outliers'``, + ``'nonfavored'`` (default; outliers + allowed) or ``'all'``. Summary + counts always cover the full set of rotameric residues. + ''' + import numpy + from ..session_extensions import get_rotamer_mgr + + mgr = get_rotamer_mgr(session) + allowed_cutoff, outlier_cutoff = mgr.cutoffs + + include_choice = (include or 'nonfavored').lower() + include_sets = { + 'outliers': {'outlier'}, + 'nonfavored': {'outlier', 'allowed'}, + 'all': {'outlier', 'allowed', 'favored'}, + } + if include_choice not in include_sets: + raise UserError( + "include must be one of 'outliers', 'nonfavored', 'all' " + "(got {!r}).".format(include)) + keep = include_sets[include_choice] + + rotas = mgr.get_rotamers(structure.residues) + n_total = len(rotas) + if n_total: + scores = mgr.validate_rotamers(rotas) + residues = rotas.residues + else: + scores = numpy.zeros(0, dtype=float) + from chimerax.atomic import Residues + residues = Residues() + + favored_mask = scores >= allowed_cutoff + outlier_mask = scores < outlier_cutoff + allowed_mask = numpy.logical_and(~favored_mask, ~outlier_mask) + n_favored = int(favored_mask.sum()) + n_allowed = int(allowed_mask.sum()) + n_outlier = int(outlier_mask.sum()) + + severity_rank = {'outlier': 0, 'allowed': 1, 'favored': 2} + items = [] + for i in range(n_total): + s = float(scores[i]) + if s >= allowed_cutoff: + cls = 'favored' + elif s >= outlier_cutoff: + cls = 'allowed' + else: + cls = 'outlier' + if cls not in keep: + continue + r = residues[i] + items.append({ + 'chain_id': r.chain_id, + 'name': r.name, + 'number': int(r.number), + 'insertion_code': r.insertion_code, + 'spec': r.atomspec, + 'resname': r.name, + 'score': s, + 'classification': cls, + }) + items.sort(key=lambda it: ( + severity_rank[it['classification']], + it['chain_id'], + it['number'], + )) + + return { + 'model': structure.atomspec, + 'include': include_choice, + 'cutoff_allowed': float(allowed_cutoff), + 'cutoff_outlier': float(outlier_cutoff), + 'n_rotameric': int(n_total), + 'n_favored': n_favored, + 'n_allowed': n_allowed, + 'n_outlier': n_outlier, + 'items': items, + } + + +def isolde_validate_peptidebonds(session, model=None, + save_file=None, log=False, limit=None): + ''' + Report cis and twisted peptide bonds in ``model`` (or ISOLDE's currently + selected model), using the same omega-dihedral classification that + ISOLDE's "Peptide Bond Validation" panel applies. A bond is classified + ``cis`` when |omega| < CIS_PEPTIDE_BOND_CUTOFF (default 30 deg) and + ``twisted`` when |omega| is between that cutoff and + pi - TWISTED_PEPTIDE_BOND_DELTA. Cis-proline bonds are valid and are + reported separately from cis non-proline bonds. + + Read only - the model is never modified. + + Returns + ------- + dict + Summary counts (``n_residues``, ``n_cis_nonpro``, ``n_cis_pro``, + ``n_twisted``, ``n_iffy``) plus a per-bond ``items`` list. Each + item carries the chain, both residues, omega angle in degrees, + ``conformation`` (``cis`` / ``twisted``), and an ``is_proline`` + flag for the C-terminal residue. ``truncated`` indicates whether + ``limit`` clipped the list (``total_count`` is the unclipped size). + ''' + m = _resolve_model(session, model) + logger = session.logger + + data = _compute_peptide_bond_report(session, m) + items = data['items'] + + columns = ['Chain', 'Residues', 'Omega (deg)', 'Conformation', 'Is Pro?'] + rows = [ + ( + it['chain_id'], + it['residue_pair_label'], + '{:.1f}'.format(it['omega_deg']), + it['conformation'], + 'yes' if it['is_proline'] else 'no', + ) for it in items + ] + + summary = ( + 'ISOLDE peptide bond check ({}): {} of {} amino-acid residues ' + 'are cis or twisted ({} cis non-Pro, {} cis Pro, {} twisted).'.format( + m.atomspec, data['n_iffy'], data['n_residues'], + data['n_cis_nonpro'], data['n_cis_pro'], data['n_twisted'], + ) + ) + summary += _full_list_hint( + 'isolde validate peptidebonds', m.atomspec, data['n_iffy']) + if data['n_cis_nonpro'] > 0 or data['n_twisted'] > 0: + logger.warning(summary) + else: + logger.info(summary) + + if log: + _dump_table_to_log(logger, summary, columns, rows) + + returned_items, truncated, total_count, returned_count = _maybe_limit(items, limit) + + result = dict(data) + result['items'] = returned_items + result['returned_count'] = int(returned_count) + result['total_count'] = int(total_count) + result['truncated'] = bool(truncated) + + if save_file is not None: + _write_results_file(save_file, + summary=summary, columns=columns, rows=rows, + json_payload=dict(data, items=items)) + logger.info('Wrote peptide bond report to {}'.format(save_file)) + + return result + + +def isolde_validate_rama(session, model=None, include='outliers', + save_file=None, log=False, limit=None): + ''' + Report Ramachandran scoring for protein residues in ``model`` (or + ISOLDE's currently selected model), using the same MolProbity contours + and bin cutoffs that ISOLDE's Ramachandran plot and validator use. + + ``include`` selects which residues appear in the per-residue list: + ``'outliers'`` (default), ``'allowed'`` (outliers + allowed) or + ``'all'`` (favored too). Summary counts always cover the full model. + + Read only - the model is never modified, and no live annotators are + created. To toggle the live 3D annotators see the existing ``rama`` + command. + + Returns + ------- + dict + Summary counts (``n_scorable``, ``n_favored``, ``n_allowed``, + ``n_outlier``) plus a per-residue ``items`` list with phi/psi in + degrees, the MolProbity ``score``, ``classification`` and the + Ramachandran ``case`` (e.g. ``general``, ``Gly``, ``trans-Pro``). + ''' + m = _resolve_model(session, model) + logger = session.logger + + data = _compute_rama_report(session, m, include=include) + items = data['items'] + + columns = ['Chain', 'Residue', 'Phi', 'Psi', 'Score', 'Class', 'Case'] + rows = [ + ( + it['chain_id'], + '{} {}'.format(it['name'], it['number']), + '{:.1f}'.format(it['phi_deg']), + '{:.1f}'.format(it['psi_deg']), + '{:.4f}'.format(it['score']), + it['classification'], + it['case'], + ) for it in items + ] + + summary = ( + 'ISOLDE Ramachandran check ({}): {} outliers, {} allowed, {} favored ' + '(of {} scorable residues).'.format( + m.atomspec, data['n_outlier'], data['n_allowed'], + data['n_favored'], data['n_scorable'], + ) + ) + summary += _full_list_hint( + 'isolde validate rama', m.atomspec, len(items)) + if data['n_outlier'] > 0: + logger.warning(summary) + else: + logger.info(summary) + + if log: + _dump_table_to_log(logger, summary, columns, rows) + + returned_items, truncated, total_count, returned_count = _maybe_limit(items, limit) + + result = dict(data) + result['items'] = returned_items + result['returned_count'] = int(returned_count) + result['total_count'] = int(total_count) + result['truncated'] = bool(truncated) + + if save_file is not None: + _write_results_file(save_file, + summary=summary, columns=columns, rows=rows, + json_payload=dict(data, items=items)) + logger.info('Wrote Ramachandran report to {}'.format(save_file)) + + return result + + +def isolde_validate_rotamers(session, model=None, include='nonfavored', + save_file=None, log=False, limit=None): + ''' + Report rotamer scoring for sidechain-bearing residues in ``model`` (or + ISOLDE's currently selected model), using the same MolProbity contours + that ISOLDE's "Rotamer Validation" panel applies. Each residue is + classified ``outlier`` / ``allowed`` / ``favored`` against the current + P-value cutoffs on the session ``RotaMgr``. + + ``include`` selects which residues appear in the per-residue list: + ``'nonfavored'`` (default; outliers + allowed), ``'outliers'`` or + ``'all'``. Summary counts always cover the full set of rotameric + residues. + + Read only - the model is never modified, and no live annotators are + created. To toggle the live 3D annotators see the existing ``rota`` + command. + + Returns + ------- + dict + Summary counts (``n_rotameric``, ``n_favored``, ``n_allowed``, + ``n_outlier``), cutoff values, and a per-residue ``items`` list + with the P-value ``score`` and ``classification``. + ''' + m = _resolve_model(session, model) + logger = session.logger + + data = _compute_rotamer_report(session, m, include=include) + items = data['items'] + + columns = ['Chain', 'Residue', 'Resname', 'P', 'Class'] + rows = [ + ( + it['chain_id'], + str(it['number']), + it['resname'], + '{:.4f}'.format(it['score']), + it['classification'], + ) for it in items + ] + + summary = ( + 'ISOLDE rotamer check ({}): {} outliers, {} allowed, {} favored ' + '(of {} rotameric residues; cutoffs allowed>={:.3f}, ' + 'outlier<{:.3f}).'.format( + m.atomspec, data['n_outlier'], data['n_allowed'], + data['n_favored'], data['n_rotameric'], + data['cutoff_allowed'], data['cutoff_outlier'], + ) + ) + summary += _full_list_hint( + 'isolde validate rotamers', m.atomspec, len(items)) + if data['n_outlier'] > 0: + logger.warning(summary) + else: + logger.info(summary) + + if log: + _dump_table_to_log(logger, summary, columns, rows) + + returned_items, truncated, total_count, returned_count = _maybe_limit(items, limit) + + result = dict(data) + result['items'] = returned_items + result['returned_count'] = int(returned_count) + result['total_count'] = int(total_count) + result['truncated'] = bool(truncated) + + if save_file is not None: + _write_results_file(save_file, + summary=summary, columns=columns, rows=rows, + json_payload=dict(data, items=items)) + logger.info('Wrote rotamer report to {}'.format(save_file)) + + return result + + +def isolde_validate_clashes(session, model=None, + save_file=None, log=False, limit=200): + ''' + Report steric clashes in ``model`` (or ISOLDE's currently selected + model), using ISOLDE's ``unique_clashes`` wrapper around the ChimeraX + ``clashes`` machinery. Each clash carries the two atoms, the van der + Waals overlap in Angstroms, and a ``severity`` of either ``strict`` + (overlap >= STRICT_CUTOFF) or ``severe`` (overlap >= SEVERE_CUTOFF). + + Read only - the model is never modified. Returns a dict with summary + counts and a per-clash ``items`` list, defaulting to the worst 200 + clashes inline (use ``limit`` to widen, or ``save_file`` to capture + the full list). + + Returns + ------- + dict + Summary counts (``n_total``, ``n_severe``, ``n_strict``), cutoff + values, and a per-clash ``items`` list sorted by descending + overlap. + ''' + m = _resolve_model(session, model) + logger = session.logger + + from .clashes import ( + unique_clashes, clash_atom_label, STRICT_CUTOFF, SEVERE_CUTOFF, + ) + clashes = unique_clashes(session, m.atoms) + n_total = int(len(clashes)) + + items = [] + n_severe = 0 + n_strict = 0 + for clash in clashes: + a1, a2 = clash.atoms + overlap = float(clash.overlap) + if overlap >= SEVERE_CUTOFF: + severity = 'severe' + n_severe += 1 + else: + severity = 'strict' + n_strict += 1 + items.append({ + 'atom1_spec': a1.atomspec, + 'atom1_label': clash_atom_label(a1), + 'atom2_spec': a2.atomspec, + 'atom2_label': clash_atom_label(a2), + 'overlap': overlap, + 'severity': severity, + }) + + columns = ['Atom 1', 'Atom 2', 'Overlap', 'Severity'] + rows = [ + ( + it['atom1_label'], + it['atom2_label'], + '{:.2f}'.format(it['overlap']), + it['severity'], + ) for it in items + ] + + summary = ( + 'ISOLDE clash check ({}): {} unique clashes ({} severe, {} strict; ' + 'cutoffs severe>={:.2f}, strict>={:.2f} A).'.format( + m.atomspec, n_total, n_severe, n_strict, + float(SEVERE_CUTOFF), float(STRICT_CUTOFF), + ) + ) + summary += _full_list_hint( + 'isolde validate clashes', m.atomspec, n_total) + if n_severe > 0: + logger.warning(summary) + else: + logger.info(summary) + + if log: + _dump_table_to_log(logger, summary, columns, rows) + + returned_items, truncated, total_count, returned_count = _maybe_limit(items, limit) + + result = { + 'model': m.atomspec, + 'severe_cutoff': float(SEVERE_CUTOFF), + 'strict_cutoff': float(STRICT_CUTOFF), + 'n_total': n_total, + 'n_severe': int(n_severe), + 'n_strict': int(n_strict), + 'items': returned_items, + 'returned_count': int(returned_count), + 'total_count': int(total_count), + 'truncated': bool(truncated), + } + + if save_file is not None: + _write_results_file(save_file, + summary=summary, columns=columns, rows=rows, + json_payload=dict(result, items=items)) + logger.info('Wrote clash report to {}'.format(save_file)) + + return result + + +def register_validate_commands(logger): + from chimerax.core.commands import ( + register, CmdDesc, BoolArg, IntArg, EnumOf, SaveFileNameArg, + ) + from ..cmd.argspec import IsoldeStructureArg + + common_kw = [ + ('save_file', SaveFileNameArg), + ('log', BoolArg), + ('limit', IntArg), + ] + + desc_pep = CmdDesc( + optional=[('model', IsoldeStructureArg)], + keyword=list(common_kw), + synopsis='Validation: report cis and twisted peptide bonds.', + ) + register('isolde validate peptidebonds', desc_pep, + isolde_validate_peptidebonds, logger=logger) + + desc_rama = CmdDesc( + optional=[('model', IsoldeStructureArg)], + keyword=list(common_kw) + [ + ('include', EnumOf(('outliers', 'allowed', 'all'))), + ], + synopsis='Validation: report Ramachandran scoring with phi/psi/score.', + ) + register('isolde validate rama', desc_rama, + isolde_validate_rama, logger=logger) + + desc_rota = CmdDesc( + optional=[('model', IsoldeStructureArg)], + keyword=list(common_kw) + [ + ('include', EnumOf(('outliers', 'nonfavored', 'all'))), + ], + synopsis='Validation: report rotamer scoring (favored/allowed/outlier).', + ) + register('isolde validate rotamers', desc_rota, + isolde_validate_rotamers, logger=logger) + + desc_clash = CmdDesc( + optional=[('model', IsoldeStructureArg)], + keyword=list(common_kw), + synopsis='Validation: report steric clashes.', + ) + register('isolde validate clashes', desc_clash, + isolde_validate_clashes, logger=logger) + + # Parent command: catches ``isolde validate`` (no subcommand) and + # ``isolde validate `` (model spec but no subcommand) and + # turns them into a helpful "expected one of: ..." error instead of + # ChimeraX's generic ``Unknown command``. The ``optional`` model arg + # exists so the parser cleanly consumes the spec - the handler always + # raises before doing any work. + desc_top = CmdDesc( + optional=[('model', IsoldeStructureArg)], + synopsis='Run an ISOLDE validator (requires a subcommand: {}).'.format( + ', '.join(_VALIDATE_SUBCOMMANDS)), + ) + register('isolde validate', desc_top, isolde_validate, logger=logger) From 5098f1e449ce5b42be2722a48bf29e7429ad8835 Mon Sep 17 00:00:00 2001 From: Alexis Rohou Date: Thu, 21 May 2026 18:28:29 -0700 Subject: [PATCH 3/4] Commit change that had been missed earlier - needed for validate clash command --- isolde/src/validation/clashes.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/isolde/src/validation/clashes.py b/isolde/src/validation/clashes.py index f30f63f6..be9e31cc 100644 --- a/isolde/src/validation/clashes.py +++ b/isolde/src/validation/clashes.py @@ -4,6 +4,17 @@ SEVERE_CUTOFF=0.6 STRICT_CUTOFF=0.4 + +def clash_atom_label(atom): + ''' + Human-readable label for one side of a clash, e.g. ``'ALA A12: CB'``. + Shared by ISOLDE's GUI "Clashes" panel and the + ``isolde validate clashes`` command so the two stay in sync. + ''' + r = atom.residue + return '{} {}{}: {}'.format(r.name, r.chain_id, r.number, atom.name) + + def unique_clashes(session, atoms, severe_only = False): from chimerax.clashes import find_clashes if severe_only: From da296bf5ab29f105f264ed95dab1aace6d694150 Mon Sep 17 00:00:00 2001 From: Alexis Rohou Date: Thu, 21 May 2026 18:25:40 -0700 Subject: [PATCH 4/4] Detect thioether-bonded cysteines for any external carbon partner `cys_type()` previously only special-cased Cys SG bonded to an atom literally named `CH3` (the ACEcyc head cap used for cyclic-peptide thioethers). All other external carbon partners -- covalent ligand warhead carbons, post-translationally modified Cys, designed bioconjugates, etc. -- fell through to the metal-binding branch and were mis-parameterised as CYM, with the wrong SG charge and an unstable S--C bond during simulation. Match on `a.element.name == "C"` instead of the literal atom name so any external C--S bond picks the CYScyc / CCYScyc thioether template. These templates only depend on SG having one external bond; the partner's atom name does not affect the internal charges, so the broadened match is safe. N-terminal Cys with an external C--S bond (residue carries H1) now explicitly returns CYM, since no NCYScyc template ships in termods.xml. Disulfide, metal, and iron-sulfur paths are unchanged. --- isolde/src/openmm/openmm_interface.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/isolde/src/openmm/openmm_interface.py b/isolde/src/openmm/openmm_interface.py index 37ea2f02..07c8bc88 100644 --- a/isolde/src/openmm/openmm_interface.py +++ b/isolde/src/openmm/openmm_interface.py @@ -3130,11 +3130,25 @@ def cys_type(residue): if 'H1' in names: return 'NCYX' return 'CYX' - elif a.name == "CH3": + elif a.element.name == "C": + # SG bonded to any external carbon -- a thioether. + # Previously this branch only matched a partner atom + # literally named "CH3" (the ACEcyc head cap), which + # missed thioether bonds to any other external carbon + # (covalent-inhibitor warheads, post-translational + # modifications, designed bioconjugates, ...). The + # CYScyc / CCYScyc templates only depend on SG having + # one external bond; the partner atom's name does not + # affect the internal charges, so the broadened match + # is safe. if 'OXT' in names: return 'CCYScyc' - else: - return 'CYScyc' + if 'H1' in names: + # No NCYScyc template ships in termods.xml yet, so + # return CYM rather than silently mis-parameterise + # an N-terminal Cys with an S--C external bond. + return 'CYM' + return 'CYScyc' # Assume metal binding - will eventually need to do something better here return 'CYM' if a.name == 'HG':