Skip to content


Backends: add known hosts files backend
Browse files Browse the repository at this point in the history
Change-Id: Ic0f6b89c4c08e65dfebb6a61bab7b7831188b2ca
  • Loading branch information
volans- committed Feb 14, 2018
1 parent c0d7201 commit 1f7dcf9
Show file tree
Hide file tree
Showing 12 changed files with 539 additions and 2 deletions.
1 change: 0 additions & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ with different backend modules and combine their results for a fine grained sele
selected, and can provide multiple execution strategies. The executed commands outputs are automatically grouped for an
easy-to-read result.

It can be used both via its command line interface (CLI) `cumin` and as a Python 3 only library.
Cumin was Python 2 only before the 3.0.0 release, due to ClusterShell not yet being Python 3 compatible.

Expand Down
1 change: 0 additions & 1 deletion TODO.rst
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ New Features
parameter. Needs a new local transport with ExecWorker to shell out in parallel.
* backends: generalize backends to allow to return other data too, not only the host certnames.
* backends: add a new backend to support conftool.
* backends: add a new backed to query the known hosts file format.
* puppetdb backend: add support for API v4.
* CLI: when ``-i/--interactive`` is used and no command or query is specified, drop into a REPL session allowing to
easily setup them.
Expand Down
255 changes: 255 additions & 0 deletions cumin/backends/
Original file line number Diff line number Diff line change
@@ -0,0 +1,255 @@
"""Known hosts backend."""
import ipaddress

import pyparsing as pp

from ClusterShell.NodeSet import NodeSet
from ClusterShell.NodeUtils import GroupResolver, GroupSource

from cumin.backends import BaseQueryAggregator, InvalidQueryError

def grammar():
"""Define the query grammar.
Some query examples:
* Simple selection: ``host1.domain``
* ClusterShell syntax for hosts expansion: ``host10[10-42].domain,host2010.other-domain``
* ClusterShell syntax for hosts globbing: ``host10[10-42]*``
* A complex selection: ``host100[1-5]* or (host10[30-40].domain and (host10[10-42].domain and not host33.domain))``
Backus-Naur form (BNF) of the grammar::
<grammar> ::= <item> | <item> <boolean> <grammar>
<item> ::= <hosts> | "(" <grammar> ")"
<boolean> ::= "and not" | "and" | "xor" | "or"
Given that the pyparsing library defines the grammar in a BNF-like style, for the details of the tokens not
specified above check directly the source code.
pyparsing.ParserElement: the grammar parser.
# Boolean operators
boolean = (pp.CaselessKeyword('and not').leaveWhitespace() | pp.CaselessKeyword('and') |
pp.CaselessKeyword('xor') | pp.CaselessKeyword('or'))('bool')

# Parentheses
lpar = pp.Literal('(')('open_subgroup')
rpar = pp.Literal(')')('close_subgroup')

# Hosts selection: clustershell (,!&^[]) syntax is allowed: host10[10-42].domain
hosts = (~(boolean) + pp.Word(pp.alphanums + '-_.,!&^[]*?'))('hosts')

# Final grammar, see the docstring for its BNF based on the tokens defined above
# Groups are used to split the parsed results for an easy access
full_grammar = pp.Forward()
item = hosts | lpar + full_grammar + rpar
full_grammar << pp.Group(item) + pp.ZeroOrMore(pp.Group(boolean + item)) # pylint: disable=expression-not-assigned

return full_grammar

class KnownHostsLineError(InvalidQueryError):
"""Custom exception class for invalid lines in SSH known hosts files."""

class KnownHostsSkippedLineError(InvalidQueryError):
"""Custom exception class for skipped lines in SSH known hosts files."""

class KnownHostsQuery(BaseQueryAggregator):
"""KnownHostsQuery query builder.
The ``knownhosts`` backend allow to use Cumin taking advantage of existing SSH known hosts files that are not
It allow to write arbitrarily complex queries with subgroups and boolean operators, but each item must be either
the hostname itself, or using host expansion with the powerful :py:class:`ClusterShell.NodeSet.NodeSet` syntax.
The typical use case for the ``knownhosts`` backend is when the known hosts file(s) are generated and kept updated
by some external configuration manager or tool that is not yet supported as a backend for Cumin. It can also work
as a fallback backend in case the primary backend is unavailable but the known hosts file(s) are still up to date.

grammar = grammar()
""":py:class:`pyparsing.ParserElement`: load the grammar parser only once in a singleton-like way."""

def __init__(self, config):
"""Known hosts query constructor, initialize the known hosts.
according to parent :py:meth:`cumin.backends.BaseQuery.__init__`.

self.known_hosts = set()
self.resolver = None

def _build(self, query_string):
"""Override parent method to lazy-loading the known hosts if needed.
according to parent :py:meth:`cumin.backends.BaseQuery._build`.
if not self.known_hosts:

if self.resolver is None:
source = GroupSource('all', allgroups='\n'.join(self.known_hosts))
self.resolver = GroupResolver(default_source=source)


def _execute(self):
"""Override parent method to ensure to return only existing hosts.
according to parent :py:meth:`cumin.backends.BaseQuery._execute`.
hosts = super()._execute()
return hosts & NodeSet('*', resolver=self.resolver)

def _parse_token(self, token):
"""Concrete implementation of parent abstract method.
according to parent :py:meth:`cumin.backends.BaseQueryAggregator._parse_token`.
if not isinstance(token, pp.ParseResults): # pragma: no cover - this should never happen
raise InvalidQueryError('Expecting ParseResults object, got {type}: {token}'.format(
type=type(token), token=token))

token_dict = token.asDict()
self.logger.trace('Token is: %s | %s', token_dict, token)

if 'hosts' in token_dict:
element = self._get_stack_element()
element['hosts'] = NodeSet.fromlist(token_dict['hosts'], resolver=self.resolver)
if 'bool' in token_dict:
element['bool'] = token_dict['bool']
elif 'open_subgroup' in token_dict and 'close_subgroup' in token_dict:
if 'bool' in token_dict:
self.stack_pointer['bool'] = token_dict['bool']
for subtoken in token:
if isinstance(subtoken, str): # Grammar literals, boolean operators and parentheses
else: # pragma: no cover - this should never happen
raise InvalidQueryError('Got unexpected token: {token}'.format(token=token))

def _load_known_hosts(self):
"""Load all known hosts file listed in the configuration."""
config = self.config.get('knownhosts', {})
known_hosts_filenames = config.get('files', [])

for filename in known_hosts_filenames:
hosts = set()
with open(filename, 'r') as known_hosts_file:
for lineno, line in enumerate(known_hosts_file, 1):
found, skipped = KnownHostsQuery.parse_known_hosts_line(line)
if skipped:
self.logger.trace("Skipped patterns at line %d in known hosts file '%s': %s",
lineno, filename, ', '.join(skipped))
except KnownHostsLineError as e:
self.logger.warning("Discarded invalid line %d (%s) in known hosts file '%s': %s",
lineno, e, filename, line)
except KnownHostsSkippedLineError as e:
self.logger.trace("Skipped %s line %d in known hosts file '%s': %s", e, lineno, filename, line)

self.logger.debug("Loaded %d hosts from '%s'", len(hosts), filename)

def parse_known_hosts_line(line):
"""Parse an SSH known hosts formatted line and extract the valid hostnames.
See the ``SSH_KNOWN_HOSTS FILE FORMAT` in ``man sshd`` for the details of the file format.
line (str): the line to parse.
KnownHostsSkippedLineError: if the line is skipped.
KnownHostsLineError: if unable to parse the line.
set: a set with the hostnames found in the given line.
line = line.strip()
if not line:
raise KnownHostsSkippedLineError('empty line')

if line[0] == '#':
raise KnownHostsSkippedLineError('comment')

if line[0] == '|':
raise KnownHostsSkippedLineError('hashed')

fields = line.split()
if len(fields) < 3:
raise KnownHostsLineError('not enough fields')

if line[0] == '@':
if len(fields) < 4:
raise KnownHostsLineError('not enough fields')

if fields[0] == '@cert-authority':
line_hosts = fields[1]
elif fields[0] == '@revoked':
raise KnownHostsSkippedLineError('revoked')
raise KnownHostsLineError('unknown marker')
line_hosts = fields[0]

return KnownHostsQuery.parse_line_hosts(line_hosts)

def parse_line_hosts(line_hosts):
"""Parse a comma-separated hostnamed from an SSH known hosts formatted line and extract the valid hostnames.
line_hosts (str): the hostnames to parse.
tuple: a tuple with two sets, the hostnames found in the given line and the hostnames skipped.
hosts = set()
skipped = set()
for host in line_hosts.split(','):
if not host:

if host[0] == '!':
host = host[1:]

if host[0] == '[':
host = host[1:].split(']')[0]

if '*' in host or '?' in host:
except ValueError:
hosts.add(host) # Add hostnames, skip IP addresses

return hosts, skipped

""":py:class:`str`: the prefix associate to this grammar, to register this backend into the general grammar.
Required by the backend auto-loader in :py:meth:`cumin.grammar.get_registered_backends`."""

query_class = KnownHostsQuery # pylint: disable=invalid-name
"""Required by the backend auto-loader in :py:meth:`cumin.grammar.get_registered_backends`."""
7 changes: 7 additions & 0 deletions cumin/tests/fixtures/backends/grammars/knownhosts_invalid.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Invalid grammars
not host1
host1 or not host2
host1 and (not host2)
Z:category = value
14 changes: 14 additions & 0 deletions cumin/tests/fixtures/backends/grammars/knownhosts_valid.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Valid grammars
hostname and host_name.domain.tld
host1 or host2
host1 and host2
host1 and not host2
(host1 or host2) and host1
((host1[0-9] or host01) and host[01-10])
50 changes: 50 additions & 0 deletions cumin/tests/fixtures/backends/knownhosts.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# This is a comment and should be ignored, like empty lines

# Hostname only
host1.domain ecdsa-sha2-nistp256 AAAA...=
# IPv4 only ecdsa-sha2-nistp256 AAAA...=
# IPv6 only
fe80::3 ecdsa-sha2-nistp256 AAAA...=
# Hostname and IPv4
host4.domain, ecdsa-sha2-nistp256 AAAA...=
# Hostname and IPv6
host5.domain,fe80::9 ecdsa-sha2-nistp256 AAAA...=
# IPv4 and IPv6,fe80::11 ecdsa-sha2-nistp256 AAAA...=
# Hostname, IPv4 and IPv6
host7.domain,,fe80::13 ecdsa-sha2-nistp256 AAAA...=
# CA marker
@cert-authority host8.domain ssh-rsa AAAA...=
# Revoked marker
@revoked host9.domain ssh-rsa AAAA...=
# Hashed line
|1|HaSh=|HaSh= ecdsa-sha2-nistp256 AAAA...=
# Not enough fields
host10.domain ssh-rsa
# Not enough fields with marker
@cert-authority host11.domain ssh-rsa
# Unknown marker
@marker host12.domain ssh-rsa AAAA...=
# Patterns only
*.domain ecdsa-sha2-nistp256 AAAA...=
host?.domain ecdsa-sha2-nistp256 AAAA...=
# Hostname and pattern
host13.domain,*.otherdomain ecdsa-sha2-nistp256 AAAA...=
*.otherdomain,host14.domain ecdsa-sha2-nistp256 AAAA...=
# IPv4 and pattern,*.otherdomain ecdsa-sha2-nistp256 AAAA...=
*.otherdomain, ecdsa-sha2-nistp256 AAAA...=
# IPv6 and pattern
fe80::3,*.otherdomain ecdsa-sha2-nistp256 AAAA...=
*.otherdomain,fe80::3 ecdsa-sha2-nistp256 AAAA...=
# Hostname, IPv4 and pattern
host4.domain,*.otherdomain, ecdsa-sha2-nistp256 AAAA...=
# Hostname, IPv6 and pattern
host5.domain,*.otherdomain,fe80::9 ecdsa-sha2-nistp256 AAAA...=
# IPv4, IPv6 and pattern,*.otherdomain,fe80::11 ecdsa-sha2-nistp256 AAAA...=
# Hostname, IPv4, IPv6 and pattern
host7.domain,,*.otherdomain,fe80::13 ecdsa-sha2-nistp256 AAAA...=

invalid line
9 changes: 9 additions & 0 deletions cumin/tests/fixtures/backends/knownhosts_man.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Comments allowed at start of line
closenet, 1024 37 159...93, ssh-rsa AAAA1234.....=
# A hashed hostname
|1|JfKTdBh7rNbXkVAQCRp4OQoPfmI=|USECr3SWf1JUPsms5AqfD5QfxkM= ssh-rsa AAAA1234.....=
# A revoked key
@revoked * ssh-rsa AAAAB5W...
# A CA key, accepted for any host in * or *
@cert-authority *,* ssh-rsa AAAAB5W...

0 comments on commit 1f7dcf9

Please sign in to comment.