In [1]:
import os
import sys

In [2]:
# `ancast/demo`
project_root = os.getcwd()
if os.path.basename(project_root) == "demo":
    project_root = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
    sys.path.append(project_root)

In [3]:
from ancast import evaluate, evaluate_snt, io_utils

# Ancast
* Sentence level evaluation only
* see `ancast++.ipynb` for doc-level evaluation
* the following two calls are the same
```python
# (1)
evalaute(..., scope="snt", ...)

# (2), same as
evaluate_snt(...)
```

## 1. Comparing 2 AMR files

### AMR Data

In [4]:
amr_test_fpath = "../samples/amr_test.txt"
amr_gold_fpath = "../samples/amr_gold.txt"

In [5]:
# test sample
print(io_utils.load_txt(amr_test_fpath, delimiter="\n\n")[0])

# ::id 0
# ::annotator bart-amr
# ::snt Resolutely support the thread starter! I compose a poem in reply:
(z0 / multi-sentence
    :snt1 (z1 / support-01
              :mode imperative
              :ARG0 (z2 / you)
              :ARG1 (z3 / person
                        :ARG0-of (z4 / start-01
                                     :ARG1 (z5 / thread)))
              :manner (z6 / resolute))
    :snt2 (z7 / compose-02
              :ARG0 (z8 / i)
              :ARG1 (z9 / poem)
              :ARG2-of (z10 / reply-01
                            :ARG0 z8)))


In [6]:
# gold sample
print(io_utils.load_txt(amr_gold_fpath, delimiter="\n\n")[1])

# ::id bolt12_64556_5627.1 ::date 2012-12-04T17:55:20 ::annotator SDL-AMR-09 ::preferred
# ::snt Resolutely support the thread starter! I compose a poem in reply:
# ::save-date Sun Dec 8, 2013 ::file bolt12_64556_5627_1.txt
(m / multi-sentence
      :snt1 (s / support-01 :mode imperative
            :ARG0 (y / you)
            :ARG1 (p / person
                  :ARG0-of (s2 / start-01
                        :ARG1 (t / thread)))
            :manner (r / resolute))
      :snt2 (r2 / reply-01
            :ARG0 (i / i)
            :ARG2 (c / compose-02
                  :ARG0 i
                  :ARG1 (p2 / poem))))


### Ancast Snt-graph Eval
* ancast can handle multiple input test/gold pairs
* final fscores are hence output as a list of floats

In [7]:
fscores = evaluate(
    pred_inputs=amr_test_fpath,
    gold_inputs=amr_gold_fpath,
    data_format="amr",
    scope="snt"  # `ancast` rather than `ancast++`
)
print("F score:", fscores[0])

F score: 0.7052565114443566


In [8]:
fscores = evaluate_snt(
    pred_inputs=amr_test_fpath,
    gold_inputs=amr_gold_fpath,
    data_format="amr",
)
print("F score:", fscores[0])

F score: 0.7052565114443566


## 2. Comparing 2 AMR Strings
* sentence-level evaluation only

In [9]:
pred_amr = """# ::id 0
# ::annotator bart-amr
# ::snt Resolutely support the thread starter! I compose a poem in reply:
(z0 / multi-sentence
    :snt1 (z1 / support-01
              :mode imperative
              :ARG0 (z2 / you)
              :ARG1 (z3 / person
                        :ARG0-of (z4 / start-01
                                     :ARG1 (z5 / thread)))
              :manner (z6 / resolute))
    :snt2 (z7 / compose-02
              :ARG0 (z8 / i)
              :ARG1 (z9 / poem)
              :ARG2-of (z10 / reply-01
                            :ARG0 z8)))"""

In [10]:
gold_amr = """# ::id bolt12_64556_5627.1 ::date 2012-12-04T17:55:20 ::annotator SDL-AMR-09 ::preferred
# ::snt Resolutely support the thread starter! I compose a poem in reply:
# ::save-date Sun Dec 8, 2013 ::file bolt12_64556_5627_1.txt
(m / multi-sentence
      :snt1 (s / support-01 :mode imperative
            :ARG0 (y / you)
            :ARG1 (p / person
                  :ARG0-of (s2 / start-01
                        :ARG1 (t / thread)))
            :manner (r / resolute))
      :snt2 (r2 / reply-01
            :ARG0 (i / i)
            :ARG2 (c / compose-02
                  :ARG0 i
                  :ARG1 (p2 / poem))))"""

In [11]:
fscores = evaluate_snt(
    pred_inputs=pred_amr,
    gold_inputs=gold_amr,
    data_format="amr",
)
print("F score:", fscores[0])

F score: 0.9166666666666666


## 3. Comparing 2 UMR files

In [12]:
umr_test_fpath = "../samples/umr_test.txt"
umr_gold_fpath = "../samples/umr_gold.txt"

In [14]:
print(io_utils.load_txt(umr_test_fpath, delimiter="\n\n")[1])

# sentence level graph:
(s1x0 / leave-11
    :ARG0 (s1x1 / person
        :name (s1x2 / name
            :op1 "Lindsay")
        :wiki "Lindsay_Rappaport")
    :purpose (s1x3 / lunch-01
        :ARG0 s1x1))


In [15]:
print(io_utils.load_txt(umr_gold_fpath, delimiter="\n\n")[1])

# sentence level graph:
(s1l / leave-02
    :ARG0 (s1p / person
        :name (s1n / name :op1 "Lindsay"))
    :aspect performance
    :purpose (s1e / eat-01
        :ARG0 s1p
        :ARG1 (s1l2 / lunch)
        :aspect performance))


In [18]:
fscores = evaluate_snt(
    pred_inputs=umr_test_fpath,
    gold_inputs=umr_gold_fpath,
    data_format="umr",
)
print("F score:", fscores[0])

F score: 0.4103448275862069


## 4. Comparing 2 UMR Strings

In [20]:
test_umr="""# :: snt1	Lindsay left in order to eat lunch .

# sentence level graph:
(s1x0 / leave-11
    :ARG0 (s1x1 / person
        :name (s1x2 / name
            :op1 "Lindsay")
        :wiki "Lindsay_Rappaport")
    :purpose (s1x3 / lunch-01
        :ARG0 s1x1))

# alignment:
s1x1: 0-0
s1x0: 1-1
s1x3: 6-6

# document level annotation:
(s1s0 / sentence
    :modal ((author :full-affirmative s1x0))
    :temporal ((document-creation-time :before s1x0)))"""

In [21]:
gold_umr="""# :: snt1	  Lindsay left in order to eat lunch .

# sentence level graph:
(s1l / leave-02
    :ARG0 (s1p / person
        :name (s1n / name :op1 "Lindsay"))
    :aspect performance
    :purpose (s1e / eat-01
        :ARG0 s1p
        :ARG1 (s1l2 / lunch)
        :aspect performance))

# alignment:
s1l: 2-2
s1p: 1-1
s1n: 0-0
s1e: 6-6
s1l2: 7-7

# document level annotation:
(s1s0 / sentence
    :temporal ((document-creation-time :before s1l)
            (s1l :after s1e))
    :modal ((root :modal author)
            (author :full-affirmative s1l)
            (author :full-negative s1e)))"""

In [22]:
fscores = evaluate_snt(
    pred_inputs=test_umr,
    gold_inputs=gold_umr,
    data_format="umr",
)
print("F score:", fscores[0])

F score: 0.4214285714285715
