vered1986 · kleinay · Sep 28, 2017 · Sep 18, 2017 · Sep 18, 2017 · Sep 18, 2017
diff --git a/examples/single_sentence.json b/examples/single_sentence.json
@@ -1,9 +1,18 @@
-{"Predicates":
- {"<P2>": {"Bare predicate": "to land in",
-           "Template": "<A1> to land in <A2>"},
-  "<P1>": {"Bare predicate": "was forced",
-           "Template": "<A1> was forced <P2>"}},
- "Entities":
- {"<A1>": "The Syrian plane",
-  "<A2>": "Moscow"},
- "Sentence": "The Syrian plane was forced to land in Moscow ."}
+{'Entities': {'A1': ('plane', (2,)),
+  'A2': ('Moscow', (8,)),
+  'A3': ('Syrian', (1,))},
+ 'Predicates': {'P1': {'Arguments': ['A1', 'P2'],
+   'Bare predicate': ('was forced', (3, 4)),
+   'Head': {'Lemma': u'force', 'POS': 'VBN', 'Surface': ('forced', [4])},
+   'Template': '{A1} was forced {P2}'},
+  'P2': {'Arguments': ['A2', 'A1'],
+   'Bare predicate': ('to land in', (5, 6, 7)),
+   'Head': {'Lemma': 'land', 'POS': 'VB', 'Surface': ('land', [6])},
+   'Template': '{A1} to land in {A2}'},
+  'P3': {'Arguments': ('A3', 'A1'),
+   'Bare predicate': ('IMPLICIT', (-1,)),
+   'Head': {'Lemma': 'IMPLICIT', 'POS': 'IMPLICIT', 'Surface': 'IMPLICIT'},
+   'Template': '{A3} {A1}'}},
+ 'Sentence': 'The Syrian plane was forced to land in Moscow .'
+}
+
diff --git a/src/baseline_automatic_pipeline_system/parse_okr_info.py b/src/baseline_automatic_pipeline_system/parse_okr_info.py
@@ -116,6 +116,9 @@ def cluster_propositions(all_proposition_mentions, all_entity_mentions, entities
     mentions_for_clustering = []
     for mention_id, mention_info in all_proposition_mentions.iteritems():
         head_lemma = mention_info["Head"]["Lemma"]
+        # case no lemma was extracted for head (lemma is empty) - use head surface
+        if not head_lemma:
+            head_lemma = mention_info["Head"]["Surface"][0]
         """
         get all relevant concepts - all concepts which their symbol's prefix is the sentence-id.
         This is because the template is using the single-sentence symbol - Ai for entities ans Pi for propositions.
@@ -401,7 +404,7 @@ def auto_pipeline_okr_info(sentences):
     okr_info = generate_okr_info(sentences, all_entity_mentions, all_proposition_mentions, entities, propositions)
 
     # using copy because OKR CTor changes the template of PropositionMentions of propositions attribute
-    okr_v1 = okr.OKR(**copy.deepcopy(okr_info))
+    okr_v1 = okr.OKR(prepare_templates=False, **copy.deepcopy(okr_info))
 
     # log eventual results
     ## did we cluster any mentions?

diff --git a/src/common/okr.py b/src/common/okr.py
@@ -15,7 +15,7 @@ class OKR:
     """
     A class for the OKR graph structure
     """
-    def __init__(self, name, sentences, ignored_indices, tweet_ids, entities, propositions):
+    def __init__(self, name, sentences, ignored_indices, tweet_ids, entities, propositions, prepare_templates=True):
 
         self.name = name  # XML file name
         self.sentences = sentences  # Dictionary of sentence ID (starts from 1) to tokenized sentence
@@ -32,8 +32,9 @@ def __init__(self, name, sentences, ignored_indices, tweet_ids, entities, propos
 
         # Set template for predicate mentions and use it to create mention entailment graph
         for p_id, prop in self.propositions.iteritems():
-            for m_id, prop_mention in prop.mentions.iteritems():
-                set_template(prop_mention, self.entities, self.propositions)
+            if prepare_templates:
+                for m_id, prop_mention in prop.mentions.iteritems():
+                    set_template(prop_mention, self.entities, self.propositions)
 
             # create mention entailment graph (if entailment_graph given)
             if prop.entailment_graph: