Merge pull request #78 from valentingol/sets

✨ Allow (nested) set/tuple in ProcessType and set/tuple comprehension in ProcessDef
valentingol · Jan 8, 2024 · 9611e5b · 9611e5b
2 parents 221c220 + 238400c
commit 9611e5b
Show file tree

Hide file tree

Showing 7 changed files with 176 additions and 80 deletions.
diff --git a/DOCUMENTATION.md b/DOCUMENTATION.md
@@ -179,9 +179,8 @@ The default tags include:
   If/else statements and comprehension lists are also supported.
 * `@type:<my type>`: This tag checks if the key matches the specified type `<my type>`
   after each update, even if the tag is no longer present. It tries to convert
-  the type if it is not the good one. It supports basic types
-  (except for tuples and sets, which are not handled by YAML) as well as unions
-  (using "Union" or "|"), optional values, nested list, and nested dict.
+  the type if it is not the good one. It supports basic types as well as unions
+  (using either "Union" or "|"), optional values, nested list/set/tuple/dict.
   For instance: `my_param@type:List[Dict[str, int|float]]: [{"a": 0}]`.
 * `@select`: This tag select param/sub-config(s) to keep and delete the other
   param/sub-configs in the same parent config. The tagged key is not deleted if
@@ -261,25 +260,29 @@ section of the documentation.
 
 ## Edge cases
 
-* **Please note that YAML does not support tuples and sets**, and therefore they
-  cannot be used in YAML files. If possible, consider using lists instead.
+* **YAML does not recognize "None" as a None object**, but interprets it as a
+  string. If you wish to set a None object, you can **use "null" or "Null" instead**.
 
-* Moreover, YAML does not recognize "None" as a None object, but interprets it as a
-  string. If you wish to set a None object, you can use "null" or "Null" instead.
+* **Please note that YAML does not natively support tuples and sets**, and therefore they
+  cannot be used directly in YAML files. However, you can use either cliconfig type conversion
+  (with `@type:<tuple/set>` followed by a list) or cliconfig definition
+  (with `@def` followed by a string) to define a set or a tuple. Example:
 
-* "@" is a special character used by the package to identify tags. You can't use it
-  in your parameters names (but you can use it in your values). It will raise an error
-  if you try to do so.
+```yaml
+# config.yaml
+my_tuple@type:tuple: [1, 2, 3]
+my_tuple2@def: "(1, 2, 3)"
+my_set@type:set: [1, 2, 3]
+my_set2@def: "{1, 2, 3}"
+```
 
-* "dict" and "process_list" are reserved names of attributes and should not be used
-  as sub-config or parameter names. It can raise an error if you try to access them
-  as config attributes (with dots).
+Note that with `@def` you can also create lists, sets and dicts by comprehension.
 
-In the context of this package, dictionaries are treated as sub-configurations,
-which means that modifying or adding keys directly in the additional configs may
-not be possible (because only the merge of default configuration allow adding new keys).
-If you need to have a dictionary object where you can modify the keys, consider
-using the `@dict` tag:
+* In the context of this package, **dictionaries** are treated as sub-configurations,
+which means that modifying or adding keys directly in additional configs may
+not be possible (because only default configurations allow adding new keys).
+If you need to have a dictionary object where you want to modify the keys, **consider
+using the `@dict` tag**:
 
 For instance:
 
@@ -294,8 +297,18 @@ logging:
   styles@dict: {train_loss: red, val_acc: cyan}
 ```
 
-Like a sub-config, the dictionary can be accessed with the dot notation like this:
-`config.logging.styles.val_acc` and will return "cyan".
+This will not raises an error with the tag `@dict`.
+
+The dictionary can be accessed with the dot notation like this:
+`config.logging.styles.val_acc` like a sub-config (and will return "cyan" here).
+
+* "@" is a special character used by the package to identify tags. You can't use it
+  in your parameters names if there are not intended to be tags (but you can use it
+  in your values). It will raise an error if you try to do so.
+
+* "dict" and "process_list" are reserved names of config attributes and should not be used
+  as sub-configs or parameters names. If you try to do so, you will not able to access
+  them via dots (`config.<something>`).
 
 ## Processing
 

diff --git a/cliconfig/dict_routines.py b/cliconfig/dict_routines.py
@@ -264,12 +264,14 @@ def clean_pre_flat(in_dict: Dict[str, Any], priority: str) -> Dict[str, Any]:
 
     Warns
     -----
-    - No flat key can contain a dict. Then, dicts like `{'a.b': {'c': 1}}`
-      are not supported.
-    - All the keys that contain dots (the flat keys) must be at the root.
-      Then, dicts like `{a: {'b.c': 1}}` are not supported.
-    - To summarize, the dict must contain only fully flat dicts
-      and/or fully nested dicts.
+    No flat key can contain a dict. Then, dicts like `{'a.b': {'c': 1}}`
+    are not supported.
+
+    All the keys that contain dots (the flat keys) must be at the root.
+    Then, dicts like `{a: {'b.c': 1}}` are not supported.
+
+    To summarize, the dict must contain only fully flat dicts
+    and/or fully nested dicts.
 
     Examples
     --------
@@ -332,12 +334,14 @@ def _del_key(
 
     Warns
     -----
-    - No flat key can contain a dict. Then, dicts like `{'a.b': {'c': 1}}`
-      are not supported.
-    - All the keys that contain dots (the flat keys) must be at the root.
-      Then, dicts like `{a: {'b.c': 1}}` are not supported.
-    - To summarize, the dict must contain only fully flat dicts
-      and fully nested dicts.
+    No flat key can contain a dict. Then, dicts like `{'a.b': {'c': 1}}`
+    are not supported.
+
+    All the keys that contain dots (the flat keys) must be at the root.
+    Then, dicts like `{a: {'b.c': 1}}` are not supported.
+
+    To summarize, the dict must contain only fully flat dicts
+    and fully nested dicts.
 
     Examples
     --------

diff --git a/cliconfig/processing/_ast_parser.py b/cliconfig/processing/_ast_parser.py
@@ -25,11 +25,14 @@ def _process_node(node: Any, flat_dict: dict) -> Any:
         ast.Name: _process_param_name,  # parameter name
         ast.Attribute: _process_subconfig,  # sub-config
         ast.IfExp: _process_ifexp,  # if/else
-        ast.List: _process_ltd,  # list
-        ast.Tuple: _process_ltd,  # tuple
-        ast.Dict: _process_ltd,  # dict
+        ast.List: _process_ltsd,  # list
+        ast.Tuple: _process_ltsd,  # tuple
+        ast.Set: _process_ltsd,  # set
+        ast.Dict: _process_ltsd,  # dict
         ast.Call: _process_call,  # function
-        ast.ListComp: _process_listcomp,  # comprehension list
+        ast.ListComp: _process_lsdcomp,  # comprehension list/set/dict
+        ast.SetComp: _process_lsdcomp,  # comprehension list/set/dict
+        ast.DictComp: _process_lsdcomp,  # comprehension list/set/dict
         ast.comprehension: _process_comprehension,  # comprehension
     }
     if isinstance(node, tuple(functions.keys())):
@@ -131,8 +134,8 @@ def _process_ifexp(node: Any, flat_dict: dict) -> Any:
     )
 
 
-def _process_ltd(node: Any, flat_dict: dict) -> Any:
-    """Process a list, a tuple or a dict node."""
+def _process_ltsd(node: Any, flat_dict: dict) -> Any:
+    """Process a list, a tuple, a set or a dict node."""
     if isinstance(node, ast.List):
         # List
         return [
@@ -143,6 +146,11 @@ def _process_ltd(node: Any, flat_dict: dict) -> Any:
         return tuple(
             _process_node(node=element, flat_dict=flat_dict) for element in node.elts
         )
+    if isinstance(node, ast.Set):
+        # Set
+        return {
+            _process_node(node=element, flat_dict=flat_dict) for element in node.elts
+        }
     # Dict
     return {
         _process_node(node=key, flat_dict=flat_dict): _process_node(
@@ -236,14 +244,22 @@ def _filter_allowed(list_names: List[str]) -> bool:
     return False
 
 
-def _process_listcomp(node: Any, flat_dict: dict) -> Any:
-    """Process comprehension list node."""
-    elt = node.elt
+def _process_lsdcomp(node: Any, flat_dict: dict) -> Any:
+    """Process comprehension list, set or dict node."""
     generator = node.generators[0]
-    result = []
+    if isinstance(node, (ast.ListComp, ast.SetComp)):
+        elt = node.elt
+        result = []
+        for variables in _process_node(generator, flat_dict=flat_dict):
+            result.append(_process_node(elt, flat_dict=variables))
+        return result if isinstance(node, ast.ListComp) else set(result)
+    # DictComp
+    dict_result: Dict = {}
     for variables in _process_node(generator, flat_dict=flat_dict):
-        result.append(_process_node(elt, flat_dict=variables))
-    return result
+        key = _process_node(node.key, flat_dict=variables)
+        value = _process_node(node.value, flat_dict=variables)
+        dict_result[key] = value
+    return dict_result
 
 
 def _process_comprehension(node: Any, flat_dict: dict) -> Any:

diff --git a/cliconfig/processing/_type_parser.py b/cliconfig/processing/_type_parser.py
@@ -1,14 +1,15 @@
 # Copyright (c) 2023 Valentin Goldite. All Rights Reserved.
 """Private module with type parser for processing module with type manipulation."""
+from functools import partial
 from pydoc import locate
-from typing import List, Optional, Tuple, Type, Union
+from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union
 
 
 def _parse_type(type_desc: str) -> Tuple:
     """Parse a type description.
 
-    Allow basic types (none, any, bool, int, float, str, list, dict), nested lists,
-    nested dicts, unions (with Union or the '|' symbol) and Optional.
+    Allow basic types (none, any, bool, int, float, str, list, set, tuple, dict),
+    nested lists/sets/dicts, unions (with Union or the '|' symbol) and Optional.
 
     Examples of representation:
     * "str" -> (str,)
@@ -41,16 +42,21 @@ def _parse_type(type_desc: str) -> Tuple:
         blocks = _split_brackets(type_desc, delimiter="|")
         types: Tuple = ()
         for block in blocks:
-            if block[:5] == "list[":
-                types += _parse_list(block)
-            elif block[:5] == "dict[":
-                types += _parse_dict(block)
-            elif block[:9] == "optional[":
-                types += _parse_optional(block)
-            elif block[:6] == "union[":
-                types += _parse_union(block)
+            if "[" in block:
+                kind = block[: block.index("[")]
+                parsing_funcs: Dict[str, Callable] = {
+                    "list": partial(_parse_set_list, kind="list"),
+                    "set": partial(_parse_set_list, kind="set"),
+                    "dict": _parse_dict,
+                    "tuple": _parse_tuple,
+                    "optional": _parse_optional,
+                    "union": _parse_union,
+                }
+                if kind not in parsing_funcs:
+                    raise ValueError(f"Unknown type: '{block}'")
+                types += parsing_funcs[kind](type_desc=block)
             else:  # Should be a base type
-                base_type = _parse_base_type(block)
+                base_type = _parse_base_type(type_desc=block)
                 if base_type is not None:
                     types += (base_type,)
                 else:
@@ -72,17 +78,17 @@ def _parse_base_type(type_desc: str) -> Optional[Type]:
     if type_desc == "any":
         # Match any type
         return object
-    if type_desc in ("bool", "int", "float", "str", "list", "dict"):
+    if type_desc in ("bool", "int", "float", "str", "list", "set", "tuple", "dict"):
         return locate(type_desc)  # type: ignore
     return None
 
 
-def _parse_list(type_desc: str) -> Tuple:
-    """Parse a "list" type description."""
-    sub_desc = type_desc[5:-1]
+def _parse_set_list(kind: str, type_desc: str) -> Tuple:
+    """Parse a "list" or a "set" type description."""
+    sub_desc = type_desc[5:-1] if kind == "list" else type_desc[4:-1]
     if len(_split_brackets(sub_desc, delimiter=",")) > 1:
-        raise ValueError(f"Invalid List type: '{type_desc}'")
-    return (("list",) + (_parse_type(sub_desc),),)
+        raise ValueError(f"Invalid {kind.capitalize()} type: '{type_desc}'")
+    return ((kind,) + (_parse_type(sub_desc),),)
 
 
 def _parse_dict(type_desc: str) -> Tuple:
@@ -96,6 +102,16 @@ def _parse_dict(type_desc: str) -> Tuple:
     return (("dict",) + (key_type,) + ((value_type),),)
 
 
+def _parse_tuple(type_desc: str) -> Tuple:
+    """Parse a "tuple" type description."""
+    sub_desc = type_desc[6:-1]
+    sub_blocks = _split_brackets(sub_desc, delimiter=",")
+    types: Tuple = ()
+    for sub_block in sub_blocks:
+        types += (_parse_type(sub_block),)
+    return (("tuple",) + types,)
+
+
 def _parse_optional(type_desc: str) -> Tuple:
     """Parse an "optional" type description."""
     sub_desc = type_desc[9:-1]
@@ -150,18 +166,26 @@ def _isinstance(obj: object, types: Union[Type, Tuple]) -> bool:
         return isinstance(obj, list) and all(
             _isinstance(elem, types[1]) for elem in obj
         )
+    if types[0] == "set" and len(types) == 2:
+        return isinstance(obj, set) and all(
+            _isinstance(elem, types[1]) for elem in obj
+        )
     if types[0] == "dict" and len(types) == 3:
         return (
             isinstance(obj, dict)
             and all(_isinstance(key, types[1]) for key in obj)
             and all(_isinstance(value, types[2]) for value in obj.values())
         )
+    if types[0] == "tuple" and len(types) >= 2:
+        return isinstance(obj, tuple) and all(
+            _isinstance(elem, types[i + 1]) for i, elem in enumerate(obj)
+        )
     if isinstance(types[0], (type, tuple)):
         return any(_isinstance(obj, sub_types) for sub_types in types)
     raise ValueError(f"Invalid type for _isinstance: '{types}'")
 
 
-def _convert_type(obj: object, types: Union[Type, Tuple]) -> object:
+def _convert_type(obj: Any, types: Union[Type, Tuple]) -> Any:
     """Try to convert an object to a type or a tuple of types.
 
     Intended to work with the outputs of _parse_type.
@@ -172,22 +196,29 @@ def _convert_type(obj: object, types: Union[Type, Tuple]) -> object:
         return obj
 
 
-def _convert_type_internal(obj: object, types: Union[Type, Tuple]) -> object:
+def _convert_type_internal(obj: Any, types: Union[Type, Tuple]) -> Any:
     """Try to convert an object to a type or a tuple of types.
 
     Intended to work with the outputs of _parse_type.
     """
     if isinstance(types, type):
         return types(obj)
-    if types[0] == "list" and len(types) == 2:
-        return [_convert_type_internal(elem, types[1]) for elem in obj]  # type: ignore
+    if types[0] in ("list", "set") and len(types) == 2:
+        type_to_use = locate(types[0])  # list or set
+        return type_to_use(
+            _convert_type_internal(elem, types[1]) for elem in obj
+        )  # type: ignore
     if types[0] == "dict" and len(types) == 3:
         return {
             _convert_type_internal(key, types[1]): _convert_type_internal(
                 value, types[2]
             )
-            for key, value in obj.items()  # type: ignore
+            for key, value in obj.items()
         }
+    if types[0] == "tuple" and len(types) >= 2:
+        return tuple(
+            _convert_type_internal(elem, types[i + 1]) for i, elem in enumerate(obj)
+        )
     if isinstance(types[0], (type, tuple)):
         if any(_isinstance(obj, sub_types) for sub_types in types):
             return obj

diff --git a/cliconfig/processing/builtin.py b/cliconfig/processing/builtin.py
@@ -406,9 +406,9 @@ class ProcessTyping(Processing):
     """Try to convert and force a type with `@type:<mytype>` tag.
 
     The type is forced forever.
-    Allow basic types (none, any, bool, int, float, str, list, dict), nested lists,
-    nested dicts, unions (with Union or the '|' symbol) and Optional.
-    The type description is lowercased and spaces are removed.
+    Allow basic types (none, any, bool, int, float, str, list, set, tuple, dict),
+    nested lists/sets/dicts, unions (with Union or the '|' symbol) and Optional.
+    The type description is automatically lowercased and spaces are removed.
 
     For instance: `@type:None|List[Dict[str, int|float]]` is valid and force
     the type to be None or a list containing dicts with str keys and int or float
@@ -823,10 +823,11 @@ class ProcessDict(Processing):
 
     Warns
     -----
-    - Processings are not applied in the dict keys. In particular,
-      the tags are not used and not removed.
-    - The tag `@dict` must be added at the key containing
-      the dict every time you want to modify the dict.
+    Processings are not applied in the dict keys. In particular,
+    the tags are not used and not removed.
+
+    The tag `@dict` must be added at the key containing
+    the dict every time you want to modify the dict.
     """
 
     class PseudoDict:

diff --git a/tests/unit/processing/test_ast_parser.py b/tests/unit/processing/test_ast_parser.py
@@ -27,16 +27,16 @@ def test_ast_parser() -> None:
     result = _process_node(node=tree.body, flat_dict=flat_dict)
     check.equal(result, (0.2, 0.2))
 
-    expr = "sum([1 for _ in range(2)])"
+    expr = "sum([1 for _ in range(2)]), {i for i in range(3)}, {i: 0 for i in range(3)}"
     tree = ast.parse(expr, mode="eval")
     result = _process_node(node=tree.body, flat_dict=flat_dict)
-    check.equal(result, 2)
+    check.equal(result, (2, {0, 1, 2}, {0: 0, 1: 0, 2: 0}))
 
     flat_dict = {"elems": [(1, 2), (3, 4), (5, 6)], "val": 2}
-    expr = "{'list': [i+2*j for i, j in elems if i > val]}, val"
+    expr = "{'list': [i+2*j for i, j in elems if i > val]}, {val}"
     tree = ast.parse(expr, mode="eval")
     result = _process_node(node=tree.body, flat_dict=flat_dict)
-    check.equal(result, ({"list": [11, 17]}, 2))
+    check.equal(result, ({"list": [11, 17]}, {2}))
 
     flat_dict = {"a": {"b": 1}}
     expr = "list(a.keys())"