Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Addressing Ori's comment at #35 #41

Merged
merged 9 commits into from
Sep 28, 2017
27 changes: 18 additions & 9 deletions examples/single_sentence.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,18 @@
{"Predicates":
{"<P2>": {"Bare predicate": "to land in",
"Template": "<A1> to land in <A2>"},
"<P1>": {"Bare predicate": "was forced",
"Template": "<A1> was forced <P2>"}},
"Entities":
{"<A1>": "The Syrian plane",
"<A2>": "Moscow"},
"Sentence": "The Syrian plane was forced to land in Moscow ."}
{'Entities': {'A1': ('plane', (2,)),
'A2': ('Moscow', (8,)),
'A3': ('Syrian', (1,))},
'Predicates': {'P1': {'Arguments': ['A1', 'P2'],
'Bare predicate': ('was forced', (3, 4)),
'Head': {'Lemma': u'force', 'POS': 'VBN', 'Surface': ('forced', [4])},
'Template': '{A1} was forced {P2}'},
'P2': {'Arguments': ['A2', 'A1'],
'Bare predicate': ('to land in', (5, 6, 7)),
'Head': {'Lemma': 'land', 'POS': 'VB', 'Surface': ('land', [6])},
'Template': '{A1} to land in {A2}'},
'P3': {'Arguments': ('A3', 'A1'),
'Bare predicate': ('IMPLICIT', (-1,)),
'Head': {'Lemma': 'IMPLICIT', 'POS': 'IMPLICIT', 'Surface': 'IMPLICIT'},
'Template': '{A3} {A1}'}},
'Sentence': 'The Syrian plane was forced to land in Moscow .'
}

5 changes: 4 additions & 1 deletion src/baseline_automatic_pipeline_system/parse_okr_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,9 @@ def cluster_propositions(all_proposition_mentions, all_entity_mentions, entities
mentions_for_clustering = []
for mention_id, mention_info in all_proposition_mentions.iteritems():
head_lemma = mention_info["Head"]["Lemma"]
# case no lemma was extracted for head (lemma is empty) - use head surface
if not head_lemma:
head_lemma = mention_info["Head"]["Surface"][0]
"""
get all relevant concepts - all concepts which their symbol's prefix is the sentence-id.
This is because the template is using the single-sentence symbol - Ai for entities ans Pi for propositions.
Expand Down Expand Up @@ -401,7 +404,7 @@ def auto_pipeline_okr_info(sentences):
okr_info = generate_okr_info(sentences, all_entity_mentions, all_proposition_mentions, entities, propositions)

# using copy because OKR CTor changes the template of PropositionMentions of propositions attribute
okr_v1 = okr.OKR(**copy.deepcopy(okr_info))
okr_v1 = okr.OKR(prepare_templates=False, **copy.deepcopy(okr_info))

# log eventual results
## did we cluster any mentions?
Expand Down
7 changes: 4 additions & 3 deletions src/common/okr.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ class OKR:
"""
A class for the OKR graph structure
"""
def __init__(self, name, sentences, ignored_indices, tweet_ids, entities, propositions):
def __init__(self, name, sentences, ignored_indices, tweet_ids, entities, propositions, prepare_templates=True):

self.name = name # XML file name
self.sentences = sentences # Dictionary of sentence ID (starts from 1) to tokenized sentence
Expand All @@ -32,8 +32,9 @@ def __init__(self, name, sentences, ignored_indices, tweet_ids, entities, propos

# Set template for predicate mentions and use it to create mention entailment graph
for p_id, prop in self.propositions.iteritems():
for m_id, prop_mention in prop.mentions.iteritems():
set_template(prop_mention, self.entities, self.propositions)
if prepare_templates:
for m_id, prop_mention in prop.mentions.iteritems():
set_template(prop_mention, self.entities, self.propositions)

# create mention entailment graph (if entailment_graph given)
if prop.entailment_graph:
Expand Down