In [1]:
!pip install spaCy
import numpy as np
import re
import spacy
from spacy import displacy



In [2]:
# Example

nlp = spacy.load("en_core_web_sm")
doc1 = nlp("do not cross road until the car pass")
doc2 = nlp("cross the road after the car pass")
doc3 = nlp("wait for car pass then crossing the road")
displacy.render(doc1, style="dep", jupyter=True)
displacy.render(doc2, style="dep", jupyter=True)
displacy.render(doc3, style="dep", jupyter=True)

### Parse

In [104]:
nlp = spacy.load("en_core_web_sm")

def remove_unused_tags(sentence, keywords):
    sentence = sentence.replace('-', ' ').replace('[', ' ')
    doc = nlp(sentence)
    neg = ["no", "not"]
    sent = []
    toks = []
    tags = []
    for token in doc:
        if token.lemma_ in neg or token.lemma_ in keywords or token.lemma_.isnumeric():
            sent.append(token.lemma_)
            tags.append(token.pos_)
        elif token.pos_ in ["VERB", "NOUN", "ADJ"] and token.lemma_ !="be":
            sent.append(token.lemma_)
            tags.append(token.pos_)
        elif "CONJ" in token.pos_ or "PUNCT" in token.pos_:
            sent.append(token.lemma_)
            tags.append(token.pos_)

    return sent, tags

def get_split_set(sent, tags):
    split_set = set()
    for i in range(len(tags)):
        tag = tags[i]
        if tag == 'PUNCT' or 'CONJ' in tag:
            split_set.add(sent[i])
    return split_set

def extract_keyword_verb_phrase(sent, tags, split_set, keywords):
    kvp = []
    vp_tags = []
    i = 0
    while i < len(sent):
        if sent[i] in keywords:
            kvp.append(sent[i:i+1])
            vp_tags.append(tags[i:i+1])
            i += 1
        else:
            for j in range(i, len(sent)):
                if sent[j] in split_set or sent[j] in keywords:
                    kvp.append(sent[i:j])
                    vp_tags.append(tags[i:j])
                    if sent[j] in keywords:
                        kvp.append(sent[j:j+1])
                        vp_tags.append(tags[j:j+1])
                    i = j+1
                    break
                if  j==len(sent)-1:
                    kvp.append(sent[i:])
                    vp_tags.append(tags[i:])
                    i = j+1
                    break
    new_kvp, k, phrase = [], len(kvp)-1, []
    while k >= 0:
        phrase.insert(0, kvp[k])
        for t in range(len(kvp[k])):
            if vp_tags[k][t] == "VERB" or kvp[k][t] in keywords:
                phrase = [word for ph in phrase for word in ph]
                if len(phrase) > 0:
                    new_kvp.insert(0, ' '.join(phrase))
                phrase = []
        k -= 1
    return new_kvp

def parse(sentence, keywords):
    sent, tags = remove_unused_tags(sentence, keywords)
    split_set = get_split_set(sent, tags)
    vps = extract_keyword_verb_phrase(sent, tags, split_set, keywords)
    print(vps)
    print()
    return vps

keywords = {'if', 'wait', 'before', 'after'}

parse("if there are no cars coming", keywords)
parse("if there are cars coming, proceed to [2]", keywords)
parse("do not cross on non-intersection", keywords)
parse("if there are cars coming, wait for the car passing and then cross the road", keywords)

['if', 'no car come']

['if', 'car come', 'proceed 2']

['not cross non intersection']

['if', 'car come', 'wait', 'car pass', 'cross road']



['if', 'car come', 'wait', 'car pass', 'cross road']

### English Grammar to Transition Rules

In [107]:
# transition = (starting state, target state, [input symbols], [output symbols])
states = ['PLACEHOLDER', '[1]', '[2]', '[3]', '[4]']

def is_negation(vp_1, vp_2):
    p1, p2 = '', ''
    for i in range(len(vp_1)):
        if vp_1[i] == 'if':
            p1 = vp_1[i+1]
            break
    for i in range(len(vp_2)):
        if vp_2[i] == 'if':
            p2 = vp_2[i+1]
            break
    p1, p2 = set(p1.split()), set(p2.split())
    neg = ["no", "not"]

    for n in neg:
        p = p1 if len(p1) > len(p2) else p2
        if n in p:
            p.remove(n)
    return p1 == p2

def direct_trans(transition, verb_phrases, flag):
    for vp in verb_phrases:
        for token in vp:
            if token.isnumeric():
                transition = (transition[0], states.index('['+str(token)+']'), transition[2], [])
                flag = True
                return flag, transition
    return flag, transition

def self_trans(transition, verb_phrases, flag):
    if 'wait' in verb_phrases:
        idx = verb_phrases.index('wait')
        condition = verb_phrases[idx + 1]
        self_tran = (transition[0], transition[0], transition[2]+['no ' + condition], [])
        transition[2].append(condition)
        flag = True
        return flag, transition, self_tran
    return flag, transition, None

def cond_trans(transition, verb_phrases, flag):
    if 'if' in verb_phrases:
        idx = verb_phrases.index('if')
        condition = verb_phrases[idx + 1]
        action = verb_phrases[idx - 1]
        transition[2].append(condition)
        transition[3].append(action)
        if 'no' in condition or 'not' in condition:
            new_cond = condition.replace('no ', '').replace('not ', '')
        else:
            new_cond = 'no '+condition
        neg_tran = (transition[0], transition[1], [new_cond], [])
        flag = True
        return flag, transition, neg_tran
    return flag, transition, None

def transition_rule(verb_phrases, cur_state):
    transitions = []
    transition = (cur_state, cur_state+1, [], [])
    flag = False
    flag, transition, neg_tran = cond_trans(transition, verb_phrases, flag)
    if neg_tran is not None:
        transitions.append(neg_tran)
    flag, transition, self_tran = self_trans(transition, verb_phrases, flag)
    if self_tran is not None:
        transitions.append(self_tran)
    flag, transition = direct_trans(transition, verb_phrases, flag)
    if flag == False:
        transition[3].append(verb_phrases[0])
    transitions.append(transition)
    return transitions

s1 = "look both ways before crossing the road."
s2 = "If there are cars coming, wait for them to pass before crossing the road"
s3 = "if there are no cars coming, cross the road."
s4 = "If there are no cars coming, proceed to [2]."

vp1 = parse(s1, keywords)
vp2 = parse(s2, keywords)
vp3 = parse(s3, keywords)
vp4 = parse(s4, keywords)

['look way', 'before', 'cross road']

['if', 'car come', 'wait', 'pass', 'before', 'cross road']

['if', 'no car come', 'cross road']

['if', 'no car come', 'proceed 2']



In [108]:
transition_rule(vp1, 1), transition_rule(vp2, 2), transition_rule(vp3, 3), transition_rule(vp4, 4)

([(1, 2, [], ['look way'])],
 [(2, 3, ['no car come'], []),
  (2, 2, ['car come', 'no pass'], []),
  (2, 3, ['car come', 'pass'], ['cross road'])],
 [(3, 4, ['car come'], []), (3, 4, ['no car come'], ['cross road'])],
 [(4, 5, ['car come'], []), (4, 2, ['no car come'], [])])

### Build Automata

In [116]:
def text2automata(steps, keywords):
    states = ['PLACEHOLDER']
    in_symbols, out_symbols = set(), set()
    idx = 1
    transitions = []
    for step in steps:
        anchor, sentence = step.split(maxsplit=1)
        states.append(anchor)
        vps = parse(sentence, keywords)
        transitions += transition_rule(vps, idx)
        idx += 1
    for t in transitions:
        for i in t[2]:
            in_symbols.add(i)
        for o in t[3]:
            out_symbols.add(o)
    states.append('FINAL')
    return states, transitions, in_symbols, out_symbols

steps = [
  ("[1] Look both ways before crossing the road."),
  ("[2] If there are no cars coming, proceed to cross the road."),
  ("[3] If there are cars coming, wait for them to pass before crossing the road."),
]
keywords = {'if', 'wait', 'until', 'before', 'after', 'once'}
automaton = text2automata(steps, keywords)

['look way', 'before', 'cross road']

['if', 'no car come', 'proceed cross road']

['if', 'car come', 'wait', 'pass', 'before', 'cross road']



In [117]:
automaton # states and transitions

(['PLACEHOLDER', '[1]', '[2]', '[3]', 'FINAL'],
 [(1, 2, [], ['look way']),
  (2, 3, ['car come'], []),
  (2, 3, ['no car come'], ['proceed cross road']),
  (3, 4, ['no car come'], []),
  (3, 3, ['car come', 'no pass'], []),
  (3, 4, ['car come', 'pass'], ['cross road'])],
 {'car come', 'no car come', 'no pass', 'pass'},
 {'cross road', 'look way', 'proceed cross road'})

In [118]:
steps = [
  ("[1.1] Face the direction you want to cross the road in."),
  ("[1.2] Look to the left."),
  ("[1.3] Look to the right."),
  ("[1.4] If there are no cars coming, proceed to [2]."),
]
text2automata(steps, keywords)

['face direction want cross road']

['look left']

['look right']

['if', 'no car come', 'proceed 2']



(['PLACEHOLDER', '[1.1]', '[1.2]', '[1.3]', '[1.4]', 'FINAL'],
 [(1, 2, [], ['face direction want cross road']),
  (2, 3, [], ['look left']),
  (3, 4, [], ['look right']),
  (4, 5, ['car come'], []),
  (4, 2, ['no car come'], [])],
 {'car come', 'no car come'},
 {'face direction want cross road', 'look left', 'look right'})