From a01b1a60aae2c59169a80a71b63b5757d1eb27c5 Mon Sep 17 00:00:00 2001 From: William Kimball <30981667+wwkimball@users.noreply.github.com> Date: Mon, 13 May 2019 09:07:14 -0500 Subject: [PATCH 1/9] WIP: Fixing unprintable paths --- tests/test_parser.py | 9 ----- yamlpath/parser.py | 92 +++++++++++++++++++++++++++++++++++++------- 2 files changed, 79 insertions(+), 22 deletions(-) diff --git a/tests/test_parser.py b/tests/test_parser.py index 30ac3e87..894f737b 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -72,15 +72,6 @@ def test_empty_str_path(parser): def test_happy_str_path_translations(parser, yaml_path, stringified): assert parser.str_path(yaml_path) == stringified -# This will be a KNOWN ISSUE for this release. The fix for this may require a -# deep rethink of the Parser class. The issue here is that escaped characters -# in YAML Paths work perfectly well, but they can't be printed back to the -# screen in their pre-parsed form. So, when a user submits a YAML Path of -# "some\\escaped\\key", all printed forms of the key will become -# "someescapedkey" even though the path WILL find the requested data. This is -# only a stringification (printing) anomoly and hense, it will be LOW PRIORITY, -# tracked as a KNOWN ISSUE, for now. -@pytest.mark.xfail @pytest.mark.parametrize("yaml_path,stringified", [ ('key\\with\\slashes', 'key\\with\\slashes'), ]) diff --git a/yamlpath/parser.py b/yamlpath/parser.py index 1b388750..011d51cf 100644 --- a/yamlpath/parser.py +++ b/yamlpath/parser.py @@ -16,7 +16,9 @@ class Parser: """Parse YAML Paths into iterable queue components.""" # Cache parsed YAML Path results across instances to avoid repeated parsing - _static_parsings = {} + _stripped_static_parsings = {} + _unstripped_static_parsings = {} + _combined_static_parsings = {} def __init__(self, logger, **kwargs): """Init this class. @@ -94,7 +96,8 @@ def str_path(self, yaml_path, **kwargs): if pathsep == '/': ppath = "/" - for (ptype, element_id) in parsed_path: + for (ptype, element_vers) in parsed_path: + element_id = element_vers[1] if ptype == PathSegmentTypes.KEY: if add_sep: ppath += pathsep @@ -133,11 +136,64 @@ def str_path(self, yaml_path, **kwargs): return ppath def parse_path(self, yaml_path): + """Parses a user's YAML Path into a queue form of its elements, each + identified by its type. See README.md for sample YAML Paths. Each + tuple in the deque indicates: + 1. The PathSegmentTypes of the element + 2. A tuple providing: + 1. The escape-stripped version of the element + 2. The non-stripped version of the same element + + Positional Parameters: + 1. yaml_path (any) The stringified YAML Path to parse + + Returns: (deque) an empty queue or a queue of tuples, each identifying + (type, (stripped_element, unstripped_element)). When yaml_path is + already a list it is blindly converted into a deque and returned. When + yaml_path is already a deque, it is blindly returned as-is. + + Raises: + YAMLPathException when yaml_path is invalid + """ + if yaml_path is None: + return deque() + elif isinstance(yaml_path, deque): + return yaml_path + elif isinstance(yaml_path, list): + return deque(yaml_path) + elif isinstance(yaml_path, dict): + raise YAMLPathException( + "YAML paths must be strings, queues, or lists", + yaml_path + ) + + if yaml_path in Parser._combined_static_parsings: + return Parser._combined_static_parsings[yaml_path] + + stripped_path = self._parse_path(yaml_path, True) + unstripped_path = self._parse_path(yaml_path, False) + combined_path = deque() + + for sref, uref in zip(stripped_path, unstripped_path): + styp = sref[0] + sele = sref[1] + uele = uref[1] + combined_path.append( + (styp, (sele, uele)) + ) + + Parser._combined_static_parsings[yaml_path] = combined_path + + return combined_path + + def _parse_path(self, yaml_path, strip_escapes=True): r"""Breaks apart a stringified YAML Path into component elements, each identified by its type. See README.md for sample YAML Paths. Positional Parameters: 1. yaml_path (any) The stringified YAML Path to parse + 2. strip_escapes (bool) True = Remove leading \ symbols, leaving only + the "escaped" symbol. False = Leave all leading \ symbols intact. Returns: (deque) an empty queue or a queue of tuples, each identifying (type, element) unless yaml_path is already a list or a deque. If @@ -149,7 +205,7 @@ def parse_path(self, yaml_path): YAMLPathException when yaml_path is invalid """ self.log.debug( - "Parser::parse_path: Evaluating {}...".format(yaml_path) + "Parser::_parse_path: Evaluating {}...".format(yaml_path) ) path_elements = deque() @@ -171,8 +227,12 @@ def parse_path(self, yaml_path): return path_elements # Don't parse a path that has already been seen - if yaml_path in Parser._static_parsings: - return Parser._static_parsings[yaml_path].copy() + if strip_escapes: + if yaml_path in Parser._stripped_static_parsings: + return Parser._stripped_static_parsings[yaml_path].copy() + else: + if yaml_path in Parser._unstripped_static_parsings: + return Parser._unstripped_static_parsings[yaml_path].copy() # Infer the path seperator pathsep = self._infer_pathsep(yaml_path) @@ -219,7 +279,8 @@ def parse_path(self, yaml_path): elif c == "\\": # Escape the next character escape_next = True - continue + if strip_escapes: + continue elif ( c == " " @@ -496,16 +557,21 @@ def parse_path(self, yaml_path): path_elements.append((element_type, element_id)) self.log.debug( - "Parser::parse_path: Parsed {} into:".format(yaml_path) + "Parser::_parse_path: Parsed {} into:".format(yaml_path) ) self.log.debug(path_elements) - # Cache the parsed results - Parser._static_parsings[yaml_path] = path_elements + # Cache the parsed results; note that the stringified YAML Path may + # differ from the user version but it has exactly the same parsed + # result, so cache it, too. str_path = self.str_path(path_elements) - if not str_path == yaml_path: - # The stringified YAML Path differs from the user version but has - # exactly the same parsed result, so cache it, too - Parser._static_parsings[str_path] = path_elements + if strip_escapes: + Parser._stripped_static_parsings[yaml_path] = path_elements + if not str_path == yaml_path: + Parser._stripped_static_parsings[str_path] = path_elements + else: + Parser._unstripped_static_parsings[yaml_path] = path_elements + if not str_path == yaml_path: + Parser._unstripped_static_parsings[str_path] = path_elements return path_elements.copy() From c78e9dcce1d9196e15e2c90cab121d808ccccb6e Mon Sep 17 00:00:00 2001 From: William Kimball <30981667+wwkimball@users.noreply.github.com> Date: Mon, 13 May 2019 11:23:17 -0500 Subject: [PATCH 2/9] WIP: Fixing unprintable YAML Paths --- tests/test_yamlpath.py | 2 +- yamlpath/parser.py | 33 ++++++++++++--------- yamlpath/yamlpath.py | 66 +++++++++++++++++------------------------- 3 files changed, 46 insertions(+), 55 deletions(-) diff --git a/tests/test_yamlpath.py b/tests/test_yamlpath.py index 52406547..ce79d0eb 100644 --- a/tests/test_yamlpath.py +++ b/tests/test_yamlpath.py @@ -525,7 +525,7 @@ def test_nonexistant_path_segment_types(yamlpath, yamldata): PathSegmentTypes = Enum('PathSegmentTypes', names) with pytest.raises(NotImplementedError): - for _ in yamlpath._get_elements_by_ref(yamldata, (PathSegmentTypes.DNF, False)): + for _ in yamlpath._get_elements_by_ref(yamldata, (PathSegmentTypes.DNF, ("", False, False))): pass @pytest.mark.parametrize("sep,val", [ diff --git a/yamlpath/parser.py b/yamlpath/parser.py index 011d51cf..7d6e6a48 100644 --- a/yamlpath/parser.py +++ b/yamlpath/parser.py @@ -87,6 +87,10 @@ def str_path(self, yaml_path, **kwargs): Raises: N/A """ + self.log.debug( + "Parser::str_path: Building stringified <{}>{}..." + .format(type(yaml_path), yaml_path) + ) parsed_path = self.parse_path(yaml_path) add_sep = False ppath = "" @@ -97,7 +101,7 @@ def str_path(self, yaml_path, **kwargs): ppath = "/" for (ptype, element_vers) in parsed_path: - element_id = element_vers[1] + element_id = element_vers[2] if ptype == PathSegmentTypes.KEY: if add_sep: ppath += pathsep @@ -141,8 +145,9 @@ def parse_path(self, yaml_path): tuple in the deque indicates: 1. The PathSegmentTypes of the element 2. A tuple providing: - 1. The escape-stripped version of the element - 2. The non-stripped version of the same element + 1. The original YAML Path, unparsed (for error reporting) + 2. The escape-stripped version of the element + 3. The non-stripped version of the same element Positional Parameters: 1. yaml_path (any) The stringified YAML Path to parse @@ -155,6 +160,8 @@ def parse_path(self, yaml_path): Raises: YAMLPathException when yaml_path is invalid """ + self.log.debug("Parser::parse_path: Parsing {}...".format(yaml_path)) + if yaml_path is None: return deque() elif isinstance(yaml_path, deque): @@ -168,7 +175,7 @@ def parse_path(self, yaml_path): ) if yaml_path in Parser._combined_static_parsings: - return Parser._combined_static_parsings[yaml_path] + return Parser._combined_static_parsings[yaml_path].copy() stripped_path = self._parse_path(yaml_path, True) unstripped_path = self._parse_path(yaml_path, False) @@ -179,12 +186,17 @@ def parse_path(self, yaml_path): sele = sref[1] uele = uref[1] combined_path.append( - (styp, (sele, uele)) + (styp, (yaml_path, sele, uele)) ) Parser._combined_static_parsings[yaml_path] = combined_path - return combined_path + self.log.debug( + "Parser::parse_path: Combined {} into:".format(yaml_path) + ) + self.log.debug(combined_path) + + return combined_path.copy() def _parse_path(self, yaml_path, strip_escapes=True): r"""Breaks apart a stringified YAML Path into component elements, each @@ -561,17 +573,10 @@ def _parse_path(self, yaml_path, strip_escapes=True): ) self.log.debug(path_elements) - # Cache the parsed results; note that the stringified YAML Path may - # differ from the user version but it has exactly the same parsed - # result, so cache it, too. - str_path = self.str_path(path_elements) + # Cache the parsed results. if strip_escapes: Parser._stripped_static_parsings[yaml_path] = path_elements - if not str_path == yaml_path: - Parser._stripped_static_parsings[str_path] = path_elements else: Parser._unstripped_static_parsings[yaml_path] = path_elements - if not str_path == yaml_path: - Parser._unstripped_static_parsings[str_path] = path_elements return path_elements.copy() diff --git a/yamlpath/yamlpath.py b/yamlpath/yamlpath.py index 58e49f40..44f309d7 100644 --- a/yamlpath/yamlpath.py +++ b/yamlpath/yamlpath.py @@ -165,7 +165,7 @@ def _get_nodes(self, data, yaml_path): Raises: N/A """ if data is None or yaml_path is None: - return None + return matches = 0 if yaml_path: @@ -193,7 +193,7 @@ def _get_nodes(self, data, yaml_path): yield epn if not matches: - return None + return if not matches: self.log.debug( @@ -336,7 +336,8 @@ def _get_elements_by_ref(self, data, ref): Positional Parameters: 1. data (ruamel.yaml data) The parsed YAML data to process - 2. ref (tuple(PathSegmentTypes,any)) A YAML Path segment + 2. ref (tuple(PathSegmentTypes,(str,any,any))) A YAML Path segment + reference Returns: (object) At least one YAML Node or None @@ -348,7 +349,7 @@ def _get_elements_by_ref(self, data, ref): return reftyp = ref[0] - refele = ref[1] + refele = ref[1][1] if reftyp == PathSegmentTypes.KEY: if isinstance(data, dict) and refele in data: yield data[refele] @@ -611,11 +612,14 @@ def _ensure_path(self, data, path, value=None): if path: (curtyp, curele) = curref = path.popleft() + original_path = curele[0] + stripped_ele = curele[1] + unstripped_ele = curele[2] self.log.debug( ("YAMLPath::_ensure_path: Seeking element <{}>{} in data of" + " type {}:" - ).format(curtyp, curele, type(data)) + ).format(curtyp, unstripped_ele, type(data)) ) self.log.debug(data) self.log.debug("") @@ -626,9 +630,9 @@ def _ensure_path(self, data, path, value=None): if node is not None: matched_nodes += 1 self.log.debug( - ("YAMLPath::_ensure_path: Found element {} in the" + ("YAMLPath::_ensure_path: Found element <{}>{} in the" + " data; recursing into it..." - ).format(curele) + ).format(curtyp, unstripped_ele) ) for epn in self._ensure_path(node, path.copy(), value): if epn is not None: @@ -640,9 +644,9 @@ def _ensure_path(self, data, path, value=None): ): # Add the missing element self.log.debug( - ("YAMLPath::_ensure_path: Element {} is unknown in the" - + " data!" - ).format(curele) + ("YAMLPath::_ensure_path: Element <{}>{} is unknown in" + + " the data!" + ).format(curtyp, unstripped_ele) ) if isinstance(data, list): self.log.debug( @@ -651,7 +655,7 @@ def _ensure_path(self, data, path, value=None): if curtyp is PathSegmentTypes.ANCHOR: new_val = self._default_for_child(path, value) new_ele = self._append_list_element( - data, new_val, curele + data, new_val, stripped_ele ) for node in self._ensure_path(new_ele, path, value): if node is not None: @@ -659,29 +663,23 @@ def _ensure_path(self, data, path, value=None): yield node elif ( curtyp is PathSegmentTypes.INDEX - and isinstance(curele, int) + and isinstance(stripped_ele, int) ): - for _ in range(len(data) - 1, curele): + for _ in range(len(data) - 1, stripped_ele): new_val = self._default_for_child(path, value) self._append_list_element(data, new_val) for node in self._ensure_path( - data[curele], path, value + data[stripped_ele], path, value ): if node is not None: matched_nodes += 1 yield node else: - restore_path = path.copy() - restore_path.appendleft(curref) - restore_path = self.parser.str_path(restore_path) - throw_element = deque() - throw_element.append(curref) - throw_element = self.parser.str_path(throw_element) raise YAMLPathException( "Cannot add {} subreference to lists" .format(str(curtyp)) - , restore_path - , throw_element + , original_path + , unstripped_ele ) elif isinstance(data, dict): self.log.debug( @@ -690,40 +688,28 @@ def _ensure_path(self, data, path, value=None): if curtyp is PathSegmentTypes.ANCHOR: raise NotImplementedError elif curtyp is PathSegmentTypes.KEY: - data[curele] = self._default_for_child(path, value) + data[stripped_ele] = self._default_for_child(path, value) for node in self._ensure_path( - data[curele], path, value + data[stripped_ele], path, value ): if node is not None: matched_nodes += 1 yield node else: - restore_path = path.copy() - restore_path.appendleft(curref) - restore_path = self.parser.str_path(restore_path) - throw_element = deque() - throw_element.append(curref) - throw_element = self.parser.str_path(throw_element) raise YAMLPathException( "Cannot add {} subreference to dictionaries".format( str(curtyp) ), - restore_path, - throw_element + original_path, + unstripped_ele ) else: - restore_path = path.copy() - restore_path.appendleft(curref) - restore_path = self.parser.str_path(restore_path) - throw_element = deque() - throw_element.append(curref) - throw_element = self.parser.str_path(throw_element) raise YAMLPathException( "Cannot add {} subreference to scalars".format( str(curtyp) ), - restore_path, - throw_element + original_path, + unstripped_ele ) else: From 34cb896dd78a17eb52e1f8c4e677710e7ddf9c88 Mon Sep 17 00:00:00 2001 From: William Kimball <30981667+wwkimball@users.noreply.github.com> Date: Mon, 13 May 2019 13:04:30 -0500 Subject: [PATCH 3/9] Added tests for new caps --- tests/test_parser.py | 39 ++++++++++++++++++++++++++++++++++++++- tests/test_yamlpath.py | 18 +++++++++++++----- yamlpath/__init__.py | 1 + 3 files changed, 52 insertions(+), 6 deletions(-) diff --git a/tests/test_parser.py b/tests/test_parser.py index 894f737b..0a8cc142 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -6,7 +6,7 @@ from yamlpath.parser import Parser from yamlpath.exceptions import YAMLPathException from yamlpath.wrappers import ConsolePrinter -from yamlpath.enums import PathSeperators +from yamlpath.enums import PathSeperators, PathSegmentTypes @pytest.fixture def parser(): @@ -145,3 +145,40 @@ def test_pretyped_pathsep(pathsep, compare): def test_bad_pathsep(): with pytest.raises(YAMLPathException): _ = Parser(None, pathsep="no such seperator!") + +@pytest.mark.parametrize("yaml_path,strip_escapes", [ + ("some.hash.key", True), + ("some.hash.key", False), + ("/some/hash/key", True), + ("/some/hash/key", False), +]) +def test_repeat_parsings(parser, yaml_path, strip_escapes): + orig = parser._parse_path(yaml_path, strip_escapes) + comp = parser._parse_path(yaml_path, strip_escapes) + assert orig == comp + +def test_no_dict_parsings(parser): + with pytest.raises(YAMLPathException): + _ = parser._parse_path({}) + +def test_list_to_deque_parsing(parser): + parsed_list = [ + (PathSegmentTypes.KEY, 'aliases'), + (PathSegmentTypes.ANCHOR, 'secretIdentity') + ] + verify_queue = deque([ + (PathSegmentTypes.KEY, 'aliases'), + (PathSegmentTypes.ANCHOR, 'secretIdentity') + ]) + assert verify_queue == parser._parse_path(parsed_list) + +def test_deque_to_deque_parsings(parser): + verify_queue = deque([ + (PathSegmentTypes.KEY, 'aliases'), + (PathSegmentTypes.ANCHOR, 'secretIdentity') + ]) + assert verify_queue == parser._parse_path(verify_queue) + +def test_none_to_empty_deque_parsings(parser): + verify_queue = deque() + assert verify_queue == parser._parse_path(None) diff --git a/tests/test_yamlpath.py b/tests/test_yamlpath.py index ce79d0eb..e462500c 100644 --- a/tests/test_yamlpath.py +++ b/tests/test_yamlpath.py @@ -10,7 +10,7 @@ from ruamel.yaml.scalarfloat import ScalarFloat from ruamel.yaml.scalarint import ScalarInt -from yamlpath import YAMLPath +from yamlpath import YAMLPath, Parser from yamlpath.exceptions import YAMLPathException from yamlpath.enums import ( YAMLValueFormats, @@ -161,11 +161,14 @@ ] @pytest.fixture -def yamlpath(): - """Returns a YAMLPath with a quiet logger.""" +def quiet_logger(): args = SimpleNamespace(verbose=False, quiet=True, debug=False) - logger = ConsolePrinter(args) - return YAMLPath(logger) + return ConsolePrinter(args) + +@pytest.fixture +def yamlpath(quiet_logger): + """Returns a YAMLPath with a quiet logger.""" + return YAMLPath(quiet_logger) @pytest.fixture def yamldata(): @@ -582,3 +585,8 @@ def test_yamlpath_exception(): raise YAMLPathException("meh", "/some/path", "/some") except YAMLPathException as ex: _ = str(ex) + +def test_premade_parser(quiet_logger): + premade = Parser(quiet_logger) + preload = YAMLPath(quiet_logger, parser=premade) + assert preload.parser == premade diff --git a/yamlpath/__init__.py b/yamlpath/__init__.py index 9ba9a685..7697e6fd 100644 --- a/yamlpath/__init__.py +++ b/yamlpath/__init__.py @@ -2,3 +2,4 @@ import yamlpath.enums import yamlpath from yamlpath.yamlpath import YAMLPath +from yamlpath.parser import Parser From 4c13544007516b78f6193146841f18d2824f53f5 Mon Sep 17 00:00:00 2001 From: William Kimball <30981667+wwkimball@users.noreply.github.com> Date: Mon, 13 May 2019 14:44:12 -0500 Subject: [PATCH 4/9] Fix: weird escape symbols were being dropped --- CHANGES | 10 ++++++++++ bin/yaml-get | 4 ++-- tests/test_parser.py | 26 ++++++++++++++++++++++++-- yamlpath/parser.py | 14 ++++++++++++-- yamlpath/yamlpath.py | 9 +++++---- 5 files changed, 53 insertions(+), 10 deletions(-) diff --git a/CHANGES b/CHANGES index e17d8d66..673d2983 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,13 @@ +1.2.1 +Bug Fixes: +* yaml-get version 1.0.2 now converts new-lines into "\n" character sequences + when writing output so that multi-line values remain one-result-per-line. +* Use of escape symbols for unusual characters (where demarcation would usually + be preferred) is now preserved. Thus, these two search phrases are now + identical: + array[.%" can't "] + array[.%\ can\'t\ ] + 1.2.0 Enhancements: * A new search operator, :, now enables capturing slices of Arrays (by 0-based diff --git a/bin/yaml-get b/bin/yaml-get index 16613708..e7361af3 100755 --- a/bin/yaml-get +++ b/bin/yaml-get @@ -31,7 +31,7 @@ from yamlpath.eyaml import EYAMLPath from yamlpath.wrappers import ConsolePrinter # Implied Constants -MY_VERSION = "1.0.1" +MY_VERSION = "1.0.2" def processcli(): """Process command-line arguments.""" @@ -169,7 +169,7 @@ def main(): if isinstance(node, list) or isinstance(node, dict): print(json.dumps(node)) else: - print("{}".format(node)) + print("{}".format(str(node).replace("\n", r"\n"))) if __name__ == "__main__": main() diff --git a/tests/test_parser.py b/tests/test_parser.py index 0a8cc142..8ee45d23 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -19,6 +19,7 @@ def test_empty_str_path(parser): assert parser.str_path("") == "" @pytest.mark.parametrize("yaml_path,stringified", [ + ("&topArrayAnchor[0]", "&topArrayAnchor[0]"), ("aliases[&anchor]", "aliases[&anchor]"), ("a l i a s e s [ & a n c h o r ]", "aliases[&anchor]"), ("aliases[2]", "aliases[2]"), @@ -27,6 +28,11 @@ def test_empty_str_path(parser): ("lookup::credentials.backend.database.password.hash", "lookup::credentials.backend.database.password.hash"), ("does::not[7].exist[4]", "does::not[7].exist[4]"), ("messy.messy.'dotted.sub.key'.child", r"messy.messy.dotted\.sub\.key.child"), +]) +def test_happy_str_path_translations_simple(parser, yaml_path, stringified): + assert parser.str_path(yaml_path) == stringified + +@pytest.mark.parametrize("yaml_path,stringified", [ ('some[search="Name Here"]', r"some[search=Name\ Here]"), ('some[search=="Name Here"]', r"some[search=Name\ Here]"), ('some[search^"Name "]', r"some[search^Name\ ]"), @@ -62,14 +68,30 @@ def test_empty_str_path(parser): ('some[!search < 42]', "some[search!<42]"), ('some[!search >= 5280]', "some[search!>=5280]"), ('some[!search <= 14000]', "some[search!<=14000]"), +]) +def test_happy_str_path_translations_simple_searches(parser, yaml_path, stringified): + assert parser.str_path(yaml_path) == stringified + +@pytest.mark.parametrize("yaml_path,stringified", [ (r'some[search =~ /^\d{5}$/]', r'some[search=~/^\d{5}$/]'), +]) +def test_happy_str_path_translations_regex_searches(parser, yaml_path, stringified): + assert parser.str_path(yaml_path) == stringified + +@pytest.mark.parametrize("yaml_path,stringified", [ ('"aliases[&some_name]"', r'aliases\[\&some_name\]'), - ('&topArrayAnchor[0]', '&topArrayAnchor[0]'), ('"&topArrayAnchor[0]"', r'\&topArrayAnchor\[0\]'), ('"&subHashAnchor.child1.attr_tst"', r'\&subHashAnchor\.child1\.attr_tst'), ("'&topArrayAnchor[!.=~/[Oo]riginal/]'", r"\&topArrayAnchor\[!\.=~/\[Oo\]riginal/\]"), ]) -def test_happy_str_path_translations(parser, yaml_path, stringified): +def test_happy_str_path_translations_bad_quotes(parser, yaml_path, stringified): + assert parser.str_path(yaml_path) == stringified + +@pytest.mark.parametrize("yaml_path,stringified", [ + ("aliases[.%' ']", r"aliases[.%\ ]"), + (r"aliases[.%\ ]", r"aliases[.%\ ]"), +]) +def test_happy_str_path_translations_weird_escapes(parser, yaml_path, stringified): assert parser.str_path(yaml_path) == stringified @pytest.mark.parametrize("yaml_path,stringified", [ diff --git a/yamlpath/parser.py b/yamlpath/parser.py index 7d6e6a48..a0ad4de8 100644 --- a/yamlpath/parser.py +++ b/yamlpath/parser.py @@ -125,7 +125,13 @@ def str_path(self, yaml_path, **kwargs): if method == PathSearchMethods.REGEX: safe_term = "/{}/".format(term.replace("/", r"\/")) else: - safe_term = str(term).replace(" ", r"\ ") + # Replace unescaped spaces with escaped spaces + safe_term = r"\ ".join( + list(map( + lambda ele: ele.replace(" ", r"\ ") + , term.split(r"\ ") + )) + ) ppath += ( "[" + str(attr) @@ -137,6 +143,10 @@ def str_path(self, yaml_path, **kwargs): add_sep = True + self.log.debug( + "Parser::str_path: Finished building <{}>{} from <{}>{}." + .format(type(ppath), ppath, type(yaml_path), yaml_path) + ) return ppath def parse_path(self, yaml_path): @@ -268,6 +278,7 @@ def _parse_path(self, yaml_path, strip_escapes=True): if escape_next: # Pass-through; capture this escaped character + escape_next = False pass elif capturing_regex: @@ -544,7 +555,6 @@ def _parse_path(self, yaml_path, strip_escapes=True): element_id += c seeking_anchor_mark = False - escape_next = False # Check for unterminated RegExes if capturing_regex: diff --git a/yamlpath/yamlpath.py b/yamlpath/yamlpath.py index 44f309d7..aab896a0 100644 --- a/yamlpath/yamlpath.py +++ b/yamlpath/yamlpath.py @@ -170,11 +170,12 @@ def _get_nodes(self, data, yaml_path): matches = 0 if yaml_path: (curtyp, curele) = curref = yaml_path.popleft() + unstripped_ele = curele[2] self.log.debug( ("YAMLPath::_get_nodes: Seeking element <{}>{} in data of" + " type {}:" - ).format(curtyp, curele, type(data)) + ).format(curtyp, unstripped_ele, type(data)) ) self.log.debug(data) self.log.debug("") @@ -184,9 +185,9 @@ def _get_nodes(self, data, yaml_path): if node is not None: matches += 1 self.log.debug( - ("YAMLPath::_get_nodes: Found element {} in the data;" - + " recursing into it..." - ).format(curele) + ("YAMLPath::_get_nodes: Found element <{}>{} in the" + + " data and recursing into it...") + .format(curtyp, unstripped_ele) ) for epn in self._get_nodes(node, yaml_path.copy()): if epn is not None: From a5273ec563541f69e3001742a9be3cf0990d5b75 Mon Sep 17 00:00:00 2001 From: William Kimball <30981667+wwkimball@users.noreply.github.com> Date: Mon, 13 May 2019 14:47:17 -0500 Subject: [PATCH 5/9] E501 --- yamlpath/yamlpath.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/yamlpath/yamlpath.py b/yamlpath/yamlpath.py index aab896a0..80ac40c5 100644 --- a/yamlpath/yamlpath.py +++ b/yamlpath/yamlpath.py @@ -689,7 +689,9 @@ def _ensure_path(self, data, path, value=None): if curtyp is PathSegmentTypes.ANCHOR: raise NotImplementedError elif curtyp is PathSegmentTypes.KEY: - data[stripped_ele] = self._default_for_child(path, value) + data[stripped_ele] = self._default_for_child( + path, value + ) for node in self._ensure_path( data[stripped_ele], path, value ): From e6a0fa6fceaeefdc4b07f1332402b1cf5149d5a5 Mon Sep 17 00:00:00 2001 From: William Kimball <30981667+wwkimball@users.noreply.github.com> Date: Mon, 13 May 2019 15:34:49 -0500 Subject: [PATCH 6/9] Updated exception messages and related tests --- CHANGES | 15 ++++++++++++++- tests/test_yamlpath.py | 4 ++-- yamlpath/yamlpath.py | 32 ++++++++++++++++++-------------- 3 files changed, 34 insertions(+), 17 deletions(-) diff --git a/CHANGES b/CHANGES index 673d2983..2a9fe7ad 100644 --- a/CHANGES +++ b/CHANGES @@ -1,12 +1,25 @@ 1.2.1 +Enhancements: +* Some exception/error messages have been updated to print the entire original + -- albeit parsed -- YAML Path rather than just the present segment under + evaluation. + Bug Fixes: * yaml-get version 1.0.2 now converts new-lines into "\n" character sequences when writing output so that multi-line values remain one-result-per-line. * Use of escape symbols for unusual characters (where demarcation would usually - be preferred) is now preserved. Thus, these two search phrases are now + be more intuitive) is now preserved. Thus, these two search phrases are now identical: array[.%" can't "] array[.%\ can\'t\ ] +* The issue preventing some YAML Paths from being printable after parsing has + been fixed. Valid, parsed YAML Paths now correctly print into a re-parsable + form even with weird sequences and escapes. Note that superfluous whitespace + and other symbols are still removed or escaped when the YAML Path is printed, + so: + term [ key == "Superfluous spaces aren\'t kept." ] + correctly parses and prints as: + term[key=Superfluous\ spaces\ aren\'t\ kept.] 1.2.0 Enhancements: diff --git a/tests/test_yamlpath.py b/tests/test_yamlpath.py index e462500c..234e5325 100644 --- a/tests/test_yamlpath.py +++ b/tests/test_yamlpath.py @@ -552,12 +552,12 @@ def test_append_list_element_value_error(yamlpath): def test_get_elements_by_bad_ref(yamlpath, yamldata): with pytest.raises(YAMLPathException): - for _ in yamlpath._get_elements_by_ref(yamldata, (PathSegmentTypes.INDEX, "4F")): + for _ in yamlpath._get_elements_by_ref(yamldata, (PathSegmentTypes.INDEX, ("bad_index[4F]", "4F", "4F"))): pass def test_get_elements_by_none_refs(yamlpath, yamldata): tally = 0 - for _ in yamlpath._get_elements_by_ref(None, (PathSegmentTypes.INDEX, "4F")): + for _ in yamlpath._get_elements_by_ref(None, (PathSegmentTypes.INDEX, ("bad_index[4F]", "4F", "4F"))): tally += 1 for _ in yamlpath._get_elements_by_ref(yamldata, None): diff --git a/yamlpath/yamlpath.py b/yamlpath/yamlpath.py index 80ac40c5..b14c720e 100644 --- a/yamlpath/yamlpath.py +++ b/yamlpath/yamlpath.py @@ -350,10 +350,12 @@ def _get_elements_by_ref(self, data, ref): return reftyp = ref[0] - refele = ref[1][1] + refori = ref[1][0] + refesc = ref[1][1] + refune = ref[1][2] if reftyp == PathSegmentTypes.KEY: - if isinstance(data, dict) and refele in data: - yield data[refele] + if isinstance(data, dict) and refesc in data: + yield data[refesc] elif isinstance(data, list): # Pass-through search against possible Array-of-Hashes for rec in data: @@ -362,11 +364,11 @@ def _get_elements_by_ref(self, data, ref): yield node elif ( reftyp == PathSegmentTypes.INDEX - and isinstance(refele, str) - and ':' in refele + and isinstance(refesc, str) + and ':' in refesc ): # Array index or Hash key slice - refparts = refele.split(':', 1) + refparts = refesc.split(':', 1) min_match = refparts[0] max_match = refparts[1] if isinstance(data, list): @@ -375,8 +377,9 @@ def _get_elements_by_ref(self, data, ref): intmax = int(max_match) except ValueError: raise YAMLPathException( - "{} is not an integer array slice".format(str(refele)) - , str(ref) + "{} is not an integer array slice".format(str(refesc)) + , refori + , refune ) if intmin == intmax and len(data) > intmin: @@ -390,11 +393,12 @@ def _get_elements_by_ref(self, data, ref): yield val elif reftyp == PathSegmentTypes.INDEX: try: - intele = int(refele) + intele = int(refesc) except ValueError: raise YAMLPathException( - "{} is not an integer array index".format(str(refele)) - , str(ref) + "{} is not an integer array index".format(str(refesc)) + , refori + , refune ) if isinstance(data, list) and len(data) > intele: @@ -402,14 +406,14 @@ def _get_elements_by_ref(self, data, ref): elif reftyp == PathSegmentTypes.ANCHOR: if isinstance(data, list): for ele in data: - if hasattr(ele, "anchor") and refele == ele.anchor.value: + if hasattr(ele, "anchor") and refesc == ele.anchor.value: yield ele elif isinstance(data, dict): for _, val in data.items(): - if hasattr(val, "anchor") and refele == val.anchor.value: + if hasattr(val, "anchor") and refesc == val.anchor.value: yield val elif reftyp == PathSegmentTypes.SEARCH: - for match in self._search(data, refele): + for match in self._search(data, refesc): if match is not None: yield match else: From 8b5fa8b570a88bbe6c80e2f0ffdd4c82e06f93d0 Mon Sep 17 00:00:00 2001 From: William Kimball <30981667+wwkimball@users.noreply.github.com> Date: Mon, 13 May 2019 15:38:58 -0500 Subject: [PATCH 7/9] Miscellaneous documentation updates --- CHANGES | 2 +- yamlpath/enums/pathsearchmethods.py | 5 ++++- yamlpath/enums/pathsegmenttypes.py | 5 ++++- yamlpath/enums/pathseperators.py | 5 ++++- yamlpath/enums/yamlvalueformats.py | 5 ++++- yamlpath/exceptions/eyamlcommand.py | 4 ++++ yamlpath/exceptions/yamlpath.py | 2 -- 7 files changed, 21 insertions(+), 7 deletions(-) diff --git a/CHANGES b/CHANGES index 2a9fe7ad..c7b2cfa8 100644 --- a/CHANGES +++ b/CHANGES @@ -1,7 +1,7 @@ 1.2.1 Enhancements: * Some exception/error messages have been updated to print the entire original - -- albeit parsed -- YAML Path rather than just the present segment under + -- albeit parsed -- YAML Path in addition to the present segment under evaluation. Bug Fixes: diff --git a/yamlpath/enums/pathsearchmethods.py b/yamlpath/enums/pathsearchmethods.py index 3f6e11ed..a92f0a33 100644 --- a/yamlpath/enums/pathsearchmethods.py +++ b/yamlpath/enums/pathsearchmethods.py @@ -1,4 +1,7 @@ -"""Implements the PathSearchMethods enumeration.""" +"""Implements the PathSearchMethods enumeration. + +Copyright 2019 William W. Kimball, Jr. MBA MSIS +""" from enum import Enum, auto diff --git a/yamlpath/enums/pathsegmenttypes.py b/yamlpath/enums/pathsegmenttypes.py index cbbe69ef..93d3591e 100644 --- a/yamlpath/enums/pathsegmenttypes.py +++ b/yamlpath/enums/pathsegmenttypes.py @@ -1,4 +1,7 @@ -"""Implements the PathSegmentTypes enumeration.""" +"""Implements the PathSegmentTypes enumeration. + +Copyright 2019 William W. Kimball, Jr. MBA MSIS +""" from enum import Enum, auto diff --git a/yamlpath/enums/pathseperators.py b/yamlpath/enums/pathseperators.py index baca8a49..c699a666 100644 --- a/yamlpath/enums/pathseperators.py +++ b/yamlpath/enums/pathseperators.py @@ -1,4 +1,7 @@ -"""Implements the PathSeperators enumeration.""" +"""Implements the PathSeperators enumeration. + +Copyright 2019 William W. Kimball, Jr. MBA MSIS +""" from enum import Enum, auto diff --git a/yamlpath/enums/yamlvalueformats.py b/yamlpath/enums/yamlvalueformats.py index e0babfdb..6838dfad 100644 --- a/yamlpath/enums/yamlvalueformats.py +++ b/yamlpath/enums/yamlvalueformats.py @@ -1,4 +1,7 @@ -"""Implements the YAMLValueFormats enumeration.""" +"""Implements the YAMLValueFormats enumeration. + +Copyright 2019 William W. Kimball, Jr. MBA MSIS +""" from enum import Enum, auto diff --git a/yamlpath/exceptions/eyamlcommand.py b/yamlpath/exceptions/eyamlcommand.py index 2c4189c8..66e876cf 100644 --- a/yamlpath/exceptions/eyamlcommand.py +++ b/yamlpath/exceptions/eyamlcommand.py @@ -1,2 +1,6 @@ +"""Represents an exception that occurs during an EYAML command execution. + +Copyright 2019 William W. Kimball, Jr. MBA MSIS +""" class EYAMLCommandException(Exception): pass diff --git a/yamlpath/exceptions/yamlpath.py b/yamlpath/exceptions/yamlpath.py index 20142660..34e54454 100644 --- a/yamlpath/exceptions/yamlpath.py +++ b/yamlpath/exceptions/yamlpath.py @@ -2,8 +2,6 @@ Copyright 2019 William W. Kimball, Jr. MBA MSIS """ - - class YAMLPathException(Exception): """Occurs when a YAML Path is illegal or fails to lead to a YAML node.""" From 5a7892f7459949ad6eaeb5fdc14b9c7b1b8747b3 Mon Sep 17 00:00:00 2001 From: William Kimball <30981667+wwkimball@users.noreply.github.com> Date: Mon, 13 May 2019 15:41:33 -0500 Subject: [PATCH 8/9] E302 --- yamlpath/exceptions/eyamlcommand.py | 2 ++ yamlpath/exceptions/yamlpath.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/yamlpath/exceptions/eyamlcommand.py b/yamlpath/exceptions/eyamlcommand.py index 66e876cf..991ee396 100644 --- a/yamlpath/exceptions/eyamlcommand.py +++ b/yamlpath/exceptions/eyamlcommand.py @@ -2,5 +2,7 @@ Copyright 2019 William W. Kimball, Jr. MBA MSIS """ + + class EYAMLCommandException(Exception): pass diff --git a/yamlpath/exceptions/yamlpath.py b/yamlpath/exceptions/yamlpath.py index 34e54454..20142660 100644 --- a/yamlpath/exceptions/yamlpath.py +++ b/yamlpath/exceptions/yamlpath.py @@ -2,6 +2,8 @@ Copyright 2019 William W. Kimball, Jr. MBA MSIS """ + + class YAMLPathException(Exception): """Occurs when a YAML Path is illegal or fails to lead to a YAML node.""" From 06ed54eb70777a189ffc1cf093cfb9f07dd216cb Mon Sep 17 00:00:00 2001 From: William Kimball <30981667+wwkimball@users.noreply.github.com> Date: Mon, 13 May 2019 15:47:39 -0500 Subject: [PATCH 9/9] Prepare 1.2.1 release --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 93d848b2..8a7564a7 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setuptools.setup( name="yamlpath", - version="1.2.0", + version="1.2.1", description="Generally-useful YAML and EYAML tools employing a human-friendly YAML Path", long_description=long_description, long_description_content_type="text/markdown",