In [1]:
from pathlib import Path
from a01.DepParser.parse_dataset import Dataset
import argparse

In [3]:
def conllu(source):
    buffer = []
    for line in source:
        line = line.rstrip()    # strip off the trailing newline
        if not line.startswith("#"):
            if not line:
                yield buffer
                buffer = []
            else:
                columns = line.split("\t")
                if columns[0].isdigit():    # skip range tokens
                    buffer.append(columns)

In [17]:
def trees(source):
    """
    Reads trees from an input source.

    Args: source: An iterable, such as a file pointer.

    Yields: Triples of the form `words`, `tags`, heads where: `words`
    is the list of words of the tree (including the pseudo-word
    <ROOT> at position 0), `tags` is the list of corresponding
    part-of-speech tags, and `heads` is the list of head indices
    (one head index per word in the tree).
    """
    for rows in conllu(source):
        words = ["<ROOT>"] + [row[1] for row in rows]
        tags = ["<ROOT>"] + [row[3] for row in rows]
        tree = [0] + [int(row[6]) for row in rows]
        relations = ["root"] + [row[7] for row in rows]
        yield words, tags, tree, relations

In [21]:
path = "./a01/DepParser/en-ud-dev-projective.conllu"

words = trees(open(path))
print(next(words))

next(words)

(['<ROOT>', 'From', 'the', 'AP', 'comes', 'this', 'story', ':'], ['<ROOT>', 'ADP', 'DET', 'PROPN', 'VERB', 'DET', 'NOUN', 'PUNCT'], [0, 3, 3, 4, 0, 6, 4, 4], ['root', 'case', 'det', 'nmod', 'root', 'det', 'nsubj', 'punct'])


(['<ROOT>',
  'President',
  'Bush',
  'on',
  'Tuesday',
  'nominated',
  'two',
  'individuals',
  'to',
  'replace',
  'retiring',
  'jurists',
  'on',
  'federal',
  'courts',
  'in',
  'the',
  'Washington',
  'area',
  '.'],
 ['<ROOT>',
  'PROPN',
  'PROPN',
  'ADP',
  'PROPN',
  'VERB',
  'NUM',
  'NOUN',
  'PART',
  'VERB',
  'VERB',
  'NOUN',
  'ADP',
  'ADJ',
  'NOUN',
  'ADP',
  'DET',
  'PROPN',
  'NOUN',
  'PUNCT'],
 [0, 2, 5, 4, 5, 0, 7, 5, 9, 5, 11, 9, 14, 14, 11, 18, 18, 18, 14, 5],
 ['root',
  'compound',
  'nsubj',
  'case',
  'nmod',
  'root',
  'nummod',
  'dobj',
  'mark',
  'advcl',
  'amod',
  'dobj',
  'case',
  'amod',
  'nmod',
  'case',
  'det',
  'compound',
  'nmod',
  'punct'])

In [None]:
SH, LA, RA = 0,1,2

def step_by_step(self,string) :
    """
    Parses a string and builds a dependency tree. In each step,
    the user needs to input the move to be made.
    """
    w = ("<ROOT> " + string).split()
    i, stack, pred_tree = 0, [], [0]*len(w) # Input configuration
    while True :
        print( "----------------" )
        print( "Buffer: ", w[i:] )
        print( "Stack: ", [w[s] for s in stack] )
        print( "Predicted tree: ", pred_tree )
        try :
            ms = input( "Move: (Shift,Left,Right): " ).lower()[0]
            m = SH if ms=='s' else LA if ms=='l' else RA if ms=='r' else -1
            if m not in self.valid_moves(i,stack,pred_tree) :
                print( "Illegal move" )
                continue
        except :
            print( "Illegal move" )
            continue
        i, stack, pred_tree = self.move(i,stack,pred_tree,m)
        if i == len(w) and stack == [0] :
            # Terminal configuration
            print( "----------------" )
            print( "Final predicted tree: ", pred_tree )
            return

In [3]:
w = "John made the pie in the fridge"

w = ("<ROOT> " + w).split()

stack = []
[w[s] for s in stack]

[]

In [14]:
w = "John made the pie in the fridge"
w = ("<ROOT> " + w).split()

i, stack, pred_tree = 0, [], [0]*len(w)

print(i)
print(stack)
print(pred_tree)

0
[]
[0, 0, 0, 0, 0, 0, 0, 0]


In [15]:
stack.append(i)
i += 1

stack.append(i)
i += 1

stack.append(i)
i += 1

stack


[0, 1, 2]

In [16]:
[w[s] for s in stack]

['<ROOT>', 'John', 'made']

In [18]:
pred_tree[stack[-2]] = stack[-1]
stack.pop(-2)
pred_tree


[0, 2, 0, 0, 0, 0, 0, 0]

In [19]:
stack.append(i)
i += 1

In [20]:
print([w[s] for s in stack])

pred_tree[stack[-1]] = stack[-2]
stack.pop(-1)

print(pred_tree)

['<ROOT>', 'made', 'the']
[0, 2, 0, 2, 0, 0, 0, 0]
