Skip to content

Commit

Permalink
Merge pull request #103 from rolandmueller/feature/tag_word
Browse files Browse the repository at this point in the history
Add TAG_WORD macro to Tag module
  • Loading branch information
zaibacu committed Oct 2, 2020
2 parents d8182b4 + b7358e2 commit 1cae785
Show file tree
Hide file tree
Showing 7 changed files with 120 additions and 12 deletions.
3 changes: 3 additions & 0 deletions changes/103.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Additional macro for `tag` module, allowing to tag specific word/list of words

Implemented by: Roland M. Mueller (https://github.com/rolandmueller)
26 changes: 24 additions & 2 deletions docs/modules.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,11 @@ vehicles={"car", "motorbike", "bicycle", "ship", "plane"}

## Tag

Is used or generating POS/TAG patterns based on a Regex
e.g. TAG("^NN|^JJ") for nouns or adjectives.
This module offers two new macros: `TAG` and `TAG_WORD`.


`TAG` is used for generating POS/TAG patterns based on a Regex
e.g. `TAG("^NN|^JJ")` for nouns or adjectives.

Works only with spaCy engine

Expand All @@ -55,6 +58,25 @@ Usage:
{WORD*, TAG("^NN|^JJ")}->MARK("TAGGED_MATCH")
```

`TAG_WORD` is for generating TAG patterns with a word or a list.

e.g. match only "proposed" when it is in the sentence a verb (and not an adjective):

```
!IMPORT("rita.modules.tag")
TAG_WORD("^VB", "proposed")
```

or e.g. match a list of words only to verbs

```
!IMPORT("rita.modules.tag")
words = {"percived", "proposed"}
{TAG_WORD("^VB", words)?}->MARK("LABEL")
```

## Orth

Ignores case-insensitive configuration and checks words as written
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "rita-dsl"
version = "0.6.9"
version = "0.6.10"
description = "DSL for building language rules"
authors = [
"Šarūnas Navickas <zaibacu@gmail.com>"
Expand Down
2 changes: 1 addition & 1 deletion rita/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

logger = logging.getLogger(__name__)

__version__ = (0, 6, 9, os.getenv("VERSION_PATCH"))
__version__ = (0, 6, 10, os.getenv("VERSION_PATCH"))


def get_version():
Expand Down
19 changes: 16 additions & 3 deletions rita/engine/translate_spacy.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,13 +74,26 @@ def phrase_parse(value, config, op=None):
yield generic_parse("ORTH", value, config=config, op=None)


def tag_parse(r, config, op=None):
def tag_parse(values, config, op=None):
"""
For generating POS/TAG patterns based on a Regex
e.g. TAG("^NN|^JJ") for adjectives or nouns
also deals with TAG_WORD for tag and word or tag and list
"""
d = {"TAG": {"REGEX": r}}

d = {"TAG": {"REGEX": values["tag"]}}
if "word" in values:
if config.ignore_case:
d["LOWER"] = values["word"].lower()
else:
d["TEXT"] = values["word"]
elif "list" in values:
lst = values["list"]
if config.ignore_case:
normalized = sorted([item.lower()
for item in lst])
d["LOWER"] = {"REGEX": r"^({0})$".format("|".join(normalized))}
else:
d["TEXT"] = {"REGEX": r"^({0})$".format("|".join(sorted(lst)))}
if op:
d["OP"] = op
yield d
Expand Down
25 changes: 20 additions & 5 deletions rita/modules/tag.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,24 @@
from rita.macros import resolve_value


def TAG(name, config, op=None):
def TAG(tag, config, op=None):
"""
For generating POS/TAG patterns based on a Regex
e.g. TAG("^NN|^JJ") for nouns or adjectives
"""
return "tag", resolve_value(name, config=config), op
values = {"tag": tag}
return "tag", values, op


def TAG_WORD(tag, value, config, op=None):
"""
For generating TAG patterns with a word or a list
e.g. match only "proposed" when it is in the sentence a verb (and not an adjective):
TAG_WORD("^VB", "proposed")
e.g. match a list of words only to verbs
words = {"percived", "proposed"}
{TAG_WORD("^VB", words)?}->MARK("LABEL")
"""
values = {"tag": tag}
if type(value) == list:
values["list"] = value
else:
values["word"] = value
return "tag", values, op
55 changes: 55 additions & 0 deletions tests/test_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,61 @@ def test_tag_module(self):
"pattern": [{"TAG": {"REGEX": "^NN|^JJ"}}]
}

def test_tag_word(self):
rules = self.compiler("""
!IMPORT("rita.modules.tag")
TAG_WORD("^VB", "proposed")->MARK("TEST_TAG")
""")

print(rules)

assert len(rules) == 1
assert rules[0] == {
"label": "TEST_TAG",
"pattern": [{"LOWER": "proposed", "TAG": {"REGEX": "^VB"}}]
}

def test_tag_list(self):
rules = self.compiler("""
!IMPORT("rita.modules.tag")
words = {"perceived", "proposed"}
{TAG_WORD("^VB", words)}->MARK("TEST_TAG")
""")

print(rules)

assert len(rules) == 1
assert rules[0] == {
"label": "TEST_TAG",
"pattern": [{"LOWER": {"REGEX": "^(perceived|proposed)$"}, "TAG": {"REGEX": "^VB"}}]
}

def test_tags_case_sensitive(self):
rules = self.compiler("""
!CONFIG("ignore_case", "F")
!IMPORT("rita.modules.tag")
words = {"perceived", "proposed"}
TAG_WORD("^VB", "proposed")->MARK("TEST_TAG")
{TAG_WORD("^VB", words)}->MARK("TEST_TAG")
""")

print(rules)

assert len(rules) == 2
assert rules == [
{
"label": "TEST_TAG",
"pattern": [{"TEXT": "proposed", "TAG": {"REGEX": "^VB"}}]
},
{
"label": "TEST_TAG",
"pattern": [{"TEXT": {"REGEX": "^(perceived|proposed)$"}, "TAG": {"REGEX": "^VB"}}]
}
]


class TestStandalone(object):
@property
Expand Down

0 comments on commit 1cae785

Please sign in to comment.