In [1]:
import os
import sys

In [2]:
# `ancast/demo`
project_root = os.getcwd()
if os.path.basename(project_root) == "demo":
    project_root = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
    sys.path.append(project_root)

In [3]:
from ancast import evaluate, evaluate_doc, io_utils

# Ancast++
* Document level evaluation only
* see `ancast.ipynb` for snt-level evaluation
* the following two calls are the same
```python
# (1)
evalaute(..., scope="doc", ...)

# (2), same as
evaluate_doc(...)
```

## 1. Comparing 2 UMR files

In [4]:
umr_test_fpath = "../samples/umr_test.txt"
umr_gold_fpath = "../samples/umr_gold.txt"

In [6]:
print(io_utils.load_txt(umr_test_fpath, delimiter="\n\n\n")[0])

# :: snt1	Lindsay left in order to eat lunch .

# sentence level graph:
(s1x0 / leave-11
    :ARG0 (s1x1 / person
        :name (s1x2 / name
            :op1 "Lindsay")
        :wiki "Lindsay_Rappaport")
    :purpose (s1x3 / lunch-01
        :ARG0 s1x1))

# alignment:
s1x1: 0-0
s1x0: 1-1
s1x3: 6-6

# document level annotation:
(s1s0 / sentence
    :modal ((author :full-affirmative s1x0))
    :temporal ((document-creation-time :before s1x0)))


In [8]:
print(io_utils.load_txt(umr_gold_fpath, delimiter="\n\n\n")[0])

# :: snt1	  Lindsay left in order to eat lunch .

# sentence level graph:
(s1l / leave-02
    :ARG0 (s1p / person
        :name (s1n / name :op1 "Lindsay"))
    :aspect performance
    :purpose (s1e / eat-01
        :ARG0 s1p
        :ARG1 (s1l2 / lunch)
        :aspect performance))

# alignment:
s1l: 2-2
s1p: 1-1
s1n: 0-0
s1e: 6-6
s1l2: 7-7

# document level annotation:
(s1s0 / sentence
    :temporal ((document-creation-time :before s1l)
            (s1l :after s1e))
    :modal ((root :modal author)
            (author :full-affirmative s1l)
            (author :full-negative s1e)))


In [11]:
fscores = evaluate(
    pred_inputs=umr_test_fpath,
    gold_inputs=umr_gold_fpath,
    data_format="umr",
    scope="doc"
)
print("F score:", fscores)

F score: {'sent': 0.4103448275862069, 'modal': 0.5, 'temporal': 0.5454545454545454, 'coref': 0.0, 'comp': 0.4524471867810081}


In [12]:
fscores = evaluate_doc(
    pred_inputs=umr_test_fpath,
    gold_inputs=umr_gold_fpath,
    data_format="umr",
)
print("F score:", fscores)

F score: {'sent': 0.4103448275862069, 'modal': 0.5, 'temporal': 0.5454545454545454, 'coref': 0.0, 'comp': 0.4524471867810081}


## 2. Comparing 2 UMR Strings

In [13]:
test_umr="""# :: snt1	Lindsay left in order to eat lunch .

# sentence level graph:
(s1x0 / leave-11
    :ARG0 (s1x1 / person
        :name (s1x2 / name
            :op1 "Lindsay")
        :wiki "Lindsay_Rappaport")
    :purpose (s1x3 / lunch-01
        :ARG0 s1x1))

# alignment:
s1x1: 0-0
s1x0: 1-1
s1x3: 6-6

# document level annotation:
(s1s0 / sentence
    :modal ((author :full-affirmative s1x0))
    :temporal ((document-creation-time :before s1x0)))"""

In [14]:
gold_umr="""# :: snt1	  Lindsay left in order to eat lunch .

# sentence level graph:
(s1l / leave-02
    :ARG0 (s1p / person
        :name (s1n / name :op1 "Lindsay"))
    :aspect performance
    :purpose (s1e / eat-01
        :ARG0 s1p
        :ARG1 (s1l2 / lunch)
        :aspect performance))

# alignment:
s1l: 2-2
s1p: 1-1
s1n: 0-0
s1e: 6-6
s1l2: 7-7

# document level annotation:
(s1s0 / sentence
    :temporal ((document-creation-time :before s1l)
            (s1l :after s1e))
    :modal ((root :modal author)
            (author :full-affirmative s1l)
            (author :full-negative s1e)))"""

In [15]:
fscores = evaluate_doc(
    pred_inputs=test_umr,
    gold_inputs=gold_umr,
    data_format="umr",
)
print("F score:", fscores)

F score: {'sent': 0.4214285714285715, 'modal': 0.5, 'temporal': 0.6666666666666666, 'coref': 0.0, 'comp': 0.4900264450321118}
