Skip to content

Commit

Permalink
[master] support some phrasal prepositions
Browse files Browse the repository at this point in the history
  • Loading branch information
vacancy committed Aug 27, 2018
1 parent 77e102e commit f122ca4
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 7 deletions.
1 change: 1 addition & 0 deletions example/demo.py
Expand Up @@ -32,6 +32,7 @@ def main():
demo('A woman is playing the space craft at NASA.')
demo('A woman is playing with a space craft at NASA.')
demo('A woman next to a piano.')
demo('A woman in front of a piano.')
demo('A woman standing next to a piano.')
demo('The woman is a pianist.')
demo('A giraffe grazing a tree in the wildness with other wildlife.')
Expand Down
11 changes: 11 additions & 0 deletions sng_parser/_data/phrasal-preps.txt
@@ -0,0 +1,11 @@
in addition to
in front of
in reference to
in regard to
in spite of
on account of
on top of
on side of
on the side of
with regard to

31 changes: 24 additions & 7 deletions sng_parser/backends/spacy_parser.py
Expand Up @@ -113,6 +113,7 @@ def parse(self, sentence):

# Step 3: determine the relations.
relations = list()
fake_noun_marks = set()
for entity in doc.noun_chunks:
# Again, the subjects and the objects are represented by their position.
relation = None
Expand Down Expand Up @@ -154,6 +155,18 @@ def parse(self, sentence):
'relation': entity.root.head.text,
'lemma_relation': entity.root.head.lemma_
}
# E.g., A [woman] in front of a [piano].
elif (
entity.root.head.head.dep_ == 'pobj' and
database.is_phrasal_prep(doc[entity.root.head.head.head.i:entity.root.head.i + 1].lower_)
):
fake_noun_marks.add(entity.root.head.head.i)
relation = {
'subject': entity.root.head.head.head.head.i,
'object': entity.root.i,
'relation': doc[entity.root.head.head.head.i:entity.root.head.i + 1].text,
'lemma_relation': doc[entity.root.head.head.head.i:entity.root.head.i].lemma_
}
# E.g., A [piano] in the [room].
elif entity.root.head.head.pos_ == 'NOUN':
relation = {
Expand Down Expand Up @@ -197,13 +210,17 @@ def parse(self, sentence):
'lemma_relation': entity.root.head.lemma_
}

if relation is not None:
# Use a helper function to map the subj/obj represented by the position
# back to one of the entity nodes.
relation['subject'] = self.__locate_noun(entity_chunks, relation['subject'])
relation['object'] = self.__locate_noun(entity_chunks, relation['object'])
if relation['subject'] != None and relation['object'] != None:
relations.append(relation)
# Apply the `fake_noun_marks`.
entities = [e for e, ec in zip(entities, entity_chunks) if ec.root.i not in fake_noun_marks]
entity_chunks = [ec for ec in entity_chunks if ec.root.i not in fake_noun_marks]

if relation is not None:
# Use a helper function to map the subj/obj represented by the position
# back to one of the entity nodes.
relation['subject'] = self.__locate_noun(entity_chunks, relation['subject'])
relation['object'] = self.__locate_noun(entity_chunks, relation['object'])
if relation['subject'] != None and relation['object'] != None:
relations.append(relation)

return {'entities': entities, 'relations': relations}

Expand Down
4 changes: 4 additions & 0 deletions sng_parser/database.py
Expand Up @@ -30,6 +30,10 @@ def is_phrasal_verb(verb):
return verb in load_list('phrasal-verbs.txt')


def is_phrasal_prep(prep):
return prep in load_list('phrasal-preps.txt')


def is_scene_noun(noun):
head = noun.split(' ')[-1]
s = load_list('scene-nouns.txt')
Expand Down

0 comments on commit f122ca4

Please sign in to comment.