In [1]:
import amrlib

In [2]:
stog = amrlib.load_stog_model()
graphs = stog.parse_sents(['This is a test of the system.', 'This is a second sentence.'])
for graph in graphs:
    print(graph)

# ::snt This is a test of the system.
(t / test-01
      :ARG1 (s / system)
      :domain (t2 / this))
# ::snt This is a second sentence.
(s / sentence
      :ord (o / ordinal-entity
            :value 2)
      :domain (t / this))


In [3]:
gtos = amrlib.load_gtos_model()
sents, _ = gtos.generate(graphs)
for sent in sents:
    print(sent)

This is system testing.
This is the second sentence.


In [4]:
import spacy
amrlib.setup_spacy_extension()
nlp = spacy.load('en_core_web_sm')
doc = nlp('This is a test of the SpaCy extension. The test has multiple sentences.')
graphs = doc._.to_amr()
for graph in graphs:
    print(graph)

# ::snt This is a test of the SpaCy extension.
(t / test-01
      :ARG1 (e / extend-01
            :ARG1 (p / product
                  :name (n / name
                        :op1 "SpaCy")))
      :domain (t2 / this))
# ::snt The test has multiple sentences.
(h / have-03
      :ARG0 (t / test)
      :ARG1 (s / sentence
            :quant (m / multiple)))


In [5]:
import json
import amrlib
import spacy

In [6]:
amrlib.setup_spacy_extension()
nlp = spacy.load('en_core_web_sm')

In [7]:
# Path to your JSONL file
file_path = "data/massive_amr.jsonl"

In [8]:
# Read the JSONL file
with open(file_path, "r", encoding="utf-8") as file:
    data = [json.loads(line) for line in file]

In [9]:
# Extract sentences and AMR graphs
sentences = [entry["utt"] for entry in data]
amr_graphs = [entry["raw_amr"] for entry in data]

In [10]:
# --- Test STOG: Convert Sentences to AMR ---
print("\n### STOG: Sentences to AMR ###")
parsed_graphs = stog.parse_sents(sentences[:5])  # Test on first 5 sentences
for sent, graph in zip(sentences[:5], parsed_graphs):
    print(f"\nSentence: {sent}\nAMR Graph:\n{graph}")


### STOG: Sentences to AMR ###

Sentence: what are some updates about the stock market
AMR Graph:
# ::snt what are some updates about the stock market
(u / update-02
      :ARG1 (m / market
            :mod (s / stock))
      :ARG2 (a / amr-unknown)
      :quant (s2 / some))

Sentence: definition of velocity
AMR Graph:
# ::snt definition of velocity
(d / define-01
      :ARG1 (v / velocity))

Sentence: please look up exchange between us and mexico
AMR Graph:
# ::snt please look up exchange between us and mexico
(l / look-up-05
      :polite +
      :mode imperative
      :ARG0 (y / you)
      :ARG1 (e / exchange-01
            :ARG0 (w / we)
            :ARG2 (c / country
                  :name (n / name
                        :op1 "Mexico"))))

Sentence: can you describe to me what a pineapple looks like
AMR Graph:
# ::snt can you describe to me what a pineapple looks like
(p / possible-01
      :polarity (a / amr-unknown)
      :ARG1 (d / describe-01
            :ARG0 (y / you)
  

In [11]:
# --- Test GTOS: Convert AMR to Sentences ---
print("\n### GTOS: AMR to Sentences ###")
reconstructed_sentences, _ = gtos.generate(amr_graphs[:5])  # Test on first 5 AMRs
for amr, recon_sent in zip(amr_graphs[:5], reconstructed_sentences):
    print(f"\nAMR:\n{amr}\nReconstructed Sentence: {recon_sent}")


### GTOS: AMR to Sentences ###

AMR:
(u / update-02
      :ARG2 (a / amr-unknown)
      :topic (m / market-01
            :ARG1 (s / stock))
      :mod (s2 / some))
Reconstructed Sentence: What are some stock market updates?

AMR:
(d / define-01
      :ARG1 (v / velocity)
      :ARG2 (a / amr-unknown))
Reconstructed Sentence: What is the definition of velocity?

AMR:
(l / look-up-05 :mode imperative :polite +
      :ARG0 (y / you)
      :ARG1 (e / exchange-01
            :ARG1 (c / currency
                  :mod (c3 / country :name (n / name :op1 "us")))
            :ARG3 (c2 / currency
                  :mod (c4 / country :name (n2 / name :op1 "mexico")))))
Reconstructed Sentence: Please look up exchange rates between US and Mexican currency.

AMR:
(d / describe-01 :mode imperative :polite +
      :ARG0 (y / you)
      :ARG1 (t / thing
            :ARG1-of (l / look-02
                  :ARG0 (f / food-dish :name (n / name :op1 "pineapple")))))
Reconstructed Sentence: Please describ

In [12]:
# --- Test SpaCy + AMR ---
print("\n### SpaCy AMR Extension ###")
for sent in sentences[:3]:  # Test on first 3 sentences
    doc = nlp(sent)
    doc_graphs = doc._.to_amr()
    for graph in doc_graphs:
        print(f"\nSentence: {sent}\nSpaCy AMR Graph:\n{graph}")


### SpaCy AMR Extension ###

Sentence: what are some updates about the stock market
SpaCy AMR Graph:
# ::snt what are some updates about the stock market
(u / update-02
      :ARG1 (m / market
            :mod (s / stock))
      :ARG2 (a / amr-unknown)
      :quant (s2 / some))

Sentence: definition of velocity
SpaCy AMR Graph:
# ::snt definition of velocity
(d / define-01
      :ARG1 (v / velocity))

Sentence: please look up exchange between us and mexico
SpaCy AMR Graph:
# ::snt please look up exchange between us and mexico
(l / look-up-05
      :polite +
      :mode imperative
      :ARG0 (y / you)
      :ARG1 (e / exchange-01
            :ARG0 (w / we)
            :ARG2 (c / country
                  :name (n / name
                        :op1 "Mexico"))))


# Let's try to translate the dataset to Irish

In [41]:
import json
from transformers import MarianMTModel, MarianTokenizer

In [42]:
# ----------------------------
# 1. Load Translation Model (English → Irish)
# ----------------------------

In [43]:
model_name = "Helsinki-NLP/opus-mt-en-ga"  # English to Irish
tokenizer = MarianTokenizer.from_pretrained(model_name)
model = MarianMTModel.from_pretrained(model_name)

In [44]:
# ----------------------------
# 2. Load JSONL Data
# ----------------------------

In [45]:
input_file = "data/massive_amr.jsonl"
output_file = "data/massive_amr_irish.jsonl"

In [46]:
with open(input_file, "r", encoding="utf-8") as f:
    data = [json.loads(line) for line in f]

In [47]:
# ----------------------------
# 3. Translate Function
# ----------------------------

In [48]:
def translate_text(text, tokenizer, model):
    """Translates English text to Irish using MarianMT."""
    if not text.strip():
        return text  # Skip empty strings

    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=128)
    translated_tokens = model.generate(**inputs)
    translated_text = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]

    return translated_text

In [49]:
# ----------------------------
# 4. Translate Sentences
# ----------------------------

In [50]:
for entry in data:
    entry["utt"] = translate_text(entry["utt"], tokenizer, model)
    entry["annot_utt"] = translate_text(entry["annot_utt"], tokenizer, model)

In [51]:
# ----------------------------
# 5. Save Translated Data
# ----------------------------

In [52]:
with open(output_file, "w", encoding="utf-8") as f:
    for entry in data:
        f.write(json.dumps(entry, ensure_ascii=False) + "\n")

In [53]:
print("✅ Translation complete! Saved as 'massive_amr_irish.jsonl'")

✅ Translation complete! Saved as 'massive_amr_irish.jsonl'
