zaibacu · zaibacu · Dec 19, 2019 · Dec 18, 2019 · Dec 18, 2019 · Dec 19, 2019
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1 +1,22 @@
+None 0.3.2 (2019-12-19)
+***********************
+
+Features
+--------
+
+- - Introduced `towncrier` to track changes
+  - Added linter `flake8`
+  - Refactored code to match `pep8`
+  #32
+
+Fix
+---
+
+- - Fix WORD split by `-`
+
+  - Split by ` ` (empty space) as well
+
+  - Coverage score increase
+  #35
+
 
diff --git a/changes/32.feature.md b/changes/32.feature.md
diff --git a/rita/engine/translate_spacy.py b/rita/engine/translate_spacy.py
@@ -61,11 +61,17 @@ def phrase_parse(value, config, op=None):
     """
     TODO: Does not support operators
     """
-    buff = value.split("-")
-    yield next(generic_parse("ORTH", buff[0], config=config, op=None))
-    for b in buff[1:]:
-        yield next(generic_parse("ORTH", "-", config=config, op=None))
-        yield next(generic_parse("ORTH", b, config=config, op=None))
+    splitter = next((s for s in ["-", " "]
+                     if s in value), None)
+    if splitter:
+        buff = value.split(splitter)
+        yield next(generic_parse("ORTH", buff[0], config=config, op=None))
+        for b in buff[1:]:
+            if splitter != " ":
+                yield next(generic_parse("ORTH", splitter, config=config, op=None))
+            yield next(generic_parse("ORTH", b, config=config, op=None))
+    else:
+        yield generic_parse("ORTH", value, config=config, op=None)
 
 
 PARSERS = {

diff --git a/rita/macros.py b/rita/macros.py
@@ -20,7 +20,7 @@ def explode(v):
     return chain(*new_lst)
 
 
-def resolve_value(obj, config=None):
+def resolve_value(obj, config):
     context = []
 
     logger.debug("Resolving value: {0}".format(obj))
@@ -39,10 +39,7 @@ def resolve_value(obj, config=None):
     elif isinstance(obj, types.GeneratorType):
         return ("either", list(obj), None)
 
-    if config:
-        return obj(config=config)
-    else:
-        return obj()
+    return obj(config=config)
 
 
 def ANY(config, op=None):

diff --git a/rita/preprocess.py b/rita/preprocess.py
@@ -36,11 +36,19 @@ def handle_multi_word(rules, config):
     WORD("Knee-length") => WORD("Knee"), WORD("-"), WORD("length")
     """
     for group_label, pattern in rules:
-        yield (group_label, pattern)
+        def gen():
+            for p in pattern:
+                (name, args, op) = p
+                if name == "value" and is_complex(args):
+                    yield ("phrase", args, op)
+                else:
+                    yield p
+
+        yield (group_label, list(gen()))
 
 
 def is_complex(arg):
-    splitters = ["-"]
+    splitters = ["-", " "]
     return any([s in arg
                 for s in splitters])
 

diff --git a/tests/test_lexer.py b/tests/test_lexer.py
@@ -75,10 +75,14 @@ def test_tokenize_exec_macro():
 def test_tokenize_two_exec_macros():
     lex = RitaLexer()
     lex.build()
-    tokens = list(lex.tokenize("""
-    !CONFIG("setting.1", "1")
-    !CONFIG("setting.2", "0")
-    """))
+    tokens = list(
+        lex.tokenize(
+            """
+            !CONFIG("setting.1", "1")
+            !CONFIG("setting.2", "0")
+            """
+        )
+    )
     assert len(tokens) == 14
     assert tokens[0].type == "EXEC"
     assert tokens[1].type == "KEYWORD"
@@ -89,3 +93,20 @@ def test_tokenize_two_exec_macros():
     assert tokens[8].type == "KEYWORD"
     assert tokens[10].type == "LITERAL"
     assert tokens[12].type == "LITERAL"
+
+
+def test_tokenize_list_w_one_item():
+    lex = RitaLexer()
+    lex.build()
+
+    tokens = list(
+        lex.tokenize(
+            """
+            members = { "first" }
+            """
+        )
+    )
+
+    assert tokens[0].type == "NAME"
+    assert tokens[1].type == "ASSIGN"
+    assert tokens[3].type == "LITERAL"
diff --git a/tests/test_parser.py b/tests/test_parser.py
@@ -42,10 +42,10 @@ def test_parser_assign_literal_and_ignore_it(config):
 
     results = p.parse(
         """
-    my_variable = "Test"
+        my_variable = "Test"
 
-    {WORD("something")} -> MARK("TEST")
-    """
+        {WORD("something")} -> MARK("TEST")
+        """
     )
     assert len(results) == 2
 
@@ -61,10 +61,10 @@ def test_parser_assign_literal_and_use_it(config):
 
     results = p.parse(
         """
-    my_variable = "Test"
+        my_variable = "Test"
 
-    {WORD(my_variable)} -> MARK("TEST")
-    """
+        {WORD(my_variable)} -> MARK("TEST")
+        """
     )
     assert len(results) == 2
 
@@ -80,8 +80,8 @@ def test_parser_just_assign_macro(config):
 
     results = p.parse(
         """
-    x = WORD("Test")
-    """
+        x = WORD("Test")
+        """
     )
     assert len(results) == 1
 
@@ -92,9 +92,9 @@ def test_parser_assign_two_variables(config):
 
     results = p.parse(
         """
-    a = "A"
-    b = "B"
-    """
+        a = "A"
+        b = "B"
+        """
     )
     assert len(results) == 2
 
@@ -105,10 +105,10 @@ def test_parser_assign_macro_and_use_it(config):
 
     results = p.parse(
         """
-    my_variable = WORD("Test")
+        my_variable = WORD("Test")
 
-    {my_variable} -> MARK("TEST")
-    """
+        {my_variable} -> MARK("TEST")
+        """
     )
     assert len(results) == 2
 
@@ -124,10 +124,10 @@ def test_parser_import_module(config):
 
     results = p.parse(
         """
-    IMPORT("rita.modules.fuzzy") -> EXEC
+        IMPORT("rita.modules.fuzzy") -> EXEC
 
-    FUZZY("test") -> MARK("FUZZY_MATCH")
-    """
+        FUZZY("test") -> MARK("FUZZY_MATCH")
+        """
     )
 
     assert len(results) == 2
@@ -140,10 +140,10 @@ def test_parser_import_module_shortcut(config, caplog):
 
     results = p.parse(
         """
-    !IMPORT("rita.modules.fuzzy")
+        !IMPORT("rita.modules.fuzzy")
 
-    FUZZY("test") -> MARK("FUZZY_MATCH")
-    """
+        FUZZY("test") -> MARK("FUZZY_MATCH")
+        """
     )
 
     assert len(results) == 2
@@ -155,10 +155,39 @@ def test_parser_config(config):
 
     p.parse(
         """
-    !CONFIG("foo", "bar")
-    !CONFIG("testing", "1")
-    """
+        !CONFIG("foo", "bar")
+        !CONFIG("testing", "1")
+        """
     )
 
     assert config.foo == "bar"
     assert config.testing
+
+
+def test_parser_list_w_one_item(config):
+    p = RitaParser(config)
+    p.build(debug=True)
+
+    results = p.parse(
+        """
+        members = { "one" }
+
+        IN_LIST(members) -> MARK("MEMBER")
+        """
+    )
+
+    assert len(results) == 2
+
+def test_parser_list_w_two_items(config):
+    p = RitaParser(config)
+    p.build(debug=True)
+
+    results = p.parse(
+        """
+        members = {"one", "two"}
+
+        IN_LIST(members) -> MARK("MEMBER")
+        """
+    )
+
+    assert len(results) == 2
diff --git a/tests/test_rules.py b/tests/test_rules.py
@@ -13,6 +13,33 @@ def compiler(self, rules):
         pytest.importorskip("spacy", minversion="2.1")
         return rita.compile_string(rules, use_engine="spacy")
 
+    def test_punct(self):
+        rules = self.compiler('PUNCT->MARK("SOME_PUNCT")')
+        print(rules)
+        assert len(rules) == 1
+        assert rules[0] == {
+            "pattern": [{"IS_PUNCT": True}],
+            "label": "SOME_PUNCT"
+        }
+
+    def test_number(self):
+        rules = self.compiler('NUM("42")->MARK("SOME_NUMBER")')
+        print(rules)
+        assert len(rules) == 1
+        assert rules[0] == {
+            "pattern": [{"LOWER": "42"}],
+            "label": "SOME_NUMBER"
+        }
+
+    def test_pos(self):
+        rules = self.compiler('POS("VERB")->MARK("SOME_POS")')
+        print(rules)
+        assert len(rules) == 1
+        assert rules[0] == {
+            "pattern": [{"POS": "VERB"}],
+            "label": "SOME_POS"
+        }
+
     def test_single_word(self):
         rules = self.compiler('WORD("Test")->MARK("SOME_LABEL")')
         print(rules)
@@ -142,6 +169,31 @@ def test_double_branching_list(self):
             "pattern": [{"LOWER": "test"}, {"LOWER": "-"}, {"LOWER": "5"}]
         }
 
+    def test_word_with_spaces(self):
+        rules = self.compiler('''
+        WORD("test1 test2")->MARK("SPLIT_WORD")
+        ''')
+        print(rules)
+        # It should be split into two: WORD("test1"), WORD("test2")
+        assert len(rules) == 1
+        assert rules[0] == {
+            "label": "SPLIT_WORD",
+            "pattern": [{"LOWER": "test1"}, {"LOWER": "test2"}]
+        }
+
+    def test_word_with_dash(self):
+        rules = self.compiler('''
+        WORD("test1-test2")->MARK("SPLIT_WORD")
+        ''')
+        print(rules)
+        # It should be split into two: WORD("test1"), WORD("test2")
+        assert len(rules) == 1
+        assert rules[0] == {
+            "label": "SPLIT_WORD",
+            "pattern": [{"LOWER": "test1"}, {"LOWER": "-"}, {"LOWER": "test2"}]
+        }
+
+
 
 class TestStandalone(object):
     @property
@@ -155,6 +207,18 @@ def flags(self):
     def compiler(self, rules):
         return rita.compile_string(rules, use_engine="standalone").patterns
 
+    def test_punct(self):
+        rules = self.compiler('PUNCT->MARK("SOME_PUNCT")')
+        print(rules)
+        assert len(rules) == 1
+        assert rules[0] == re.compile(r"(?P<SOME_PUNCT>[.,!;?:])", self.flags)
+
+    def test_number(self):
+        rules = self.compiler('NUM("42")->MARK("SOME_NUMBER")')
+        print(rules)
+        assert len(rules) == 1
+        assert rules[0] == re.compile(r"(?P<SOME_NUMBER>(42))", self.flags)
+
     def test_single_word(self):
         rules = self.compiler('WORD("Test")->MARK("SOME_LABEL")')
         print(rules)