# Part 1

In [1]:
import spacy
from spacy import displacy

nlp = spacy.load("en_core_web_md")

# Part 2

In [2]:
text = "The quick brown fox jumps over the lazy dog."

doc = nlp(text)

# http://127.0.0.1:5000
displacy.serve(doc, style="dep")




Using the 'dep' visualizer
Serving on http://0.0.0.0:5000 ...

Shutting down server on port 5000.


# Part 3

In [32]:
text = "Apple is looking at buying U.K. startup for $1 billion."
doc = nlp(text)

print(f"{"TEXT":<12} | {"DEP":<10} | {"HEAD TEXT":<12} | {"HEAD POS":<8} | {"CHILDREN"}")
print("-" * 70)

for token in doc:
    children = [child.text for child in token.children]

    print(f"{token.text:<12} | {token.dep_:<10} | {token.head.text:<12} | {token.head.pos_:<8} | {children}")

TEXT         | DEP        | HEAD TEXT    | HEAD POS | CHILDREN
----------------------------------------------------------------------
Apple        | nsubj      | looking      | VERB     | []
is           | aux        | looking      | VERB     | []
looking      | ROOT       | looking      | VERB     | ['Apple', 'is', 'at', '.']
at           | prep       | looking      | VERB     | ['buying']
buying       | pcomp      | at           | ADP      | ['startup']
U.K.         | compound   | startup      | NOUN     | []
startup      | dobj       | buying       | VERB     | ['U.K.', 'for']
for          | prep       | startup      | NOUN     | ['billion']
$            | quantmod   | billion      | NUM      | []
1            | compound   | billion      | NUM      | []
billion      | pobj       | for          | ADP      | ['$', '1']
.            | punct      | looking      | VERB     | []


# Part 4

In [30]:
text = "The cat chased the mouse and the dog watched them."
doc = nlp(text)

for token in doc:
    if (token.pos_ == "VERB"):
        verb = token.text
        subject = ""
        obj = ""

        for child in token.children:
            if (child.dep_ == "nsubj"):
                subject = child.text
            if (child.dep_ == "dobj"):
                obj = child.text

        if (subject and obj):
            print(f"Found Triplet: ({subject}, {verb}, {obj})")

Found Triplet: (cat, chased, mouse)
Found Triplet: (dog, watched, them)


In [5]:
text = "The big, fluffy white cat is sleeping on the warm mat."
doc = nlp(text)

for token in doc:
    if (token.pos_ == "NOUN"):
        adjectives = []
        for child in token.children:
            if (child.dep_ == "amod"):
                adjectives.append(child.text)

        if (adjectives):
            print(f"Danh từ '{token.text}' được bổ nghĩa bởi các tính từ: {adjectives}")

Danh từ 'cat' được bổ nghĩa bởi các tính từ: ['big', 'fluffy', 'white']
Danh từ 'mat' được bổ nghĩa bởi các tính từ: ['warm']


# Part 5

In [33]:
def find_main_verb(doc):
    for token in doc:
        if (token.dep_ == "ROOT"):
            return token
    
    return None

print(f"ROOT: ", find_main_verb(doc))

ROOT:  looking


In [34]:
def find_noun_chunks(doc):
    chunks = []
    modifiers = {"det", "amod", "compound", "nummod"}

    for token in doc:
        if token.pos_ in ("NOUN", "PROPN"):
            left_mods = [child for child in token.children
                         if child.dep_ in modifiers and child.i < token.i]

            chunk_tokens = sorted(left_mods + [token], key=lambda t: t.i)

            chunk_text = " ".join(t.text for t in chunk_tokens)
            chunks.append(chunk_text)

    return chunks

print(find_noun_chunks(doc))

['Apple', 'U.K.', 'U.K. startup']


In [36]:
def get_path_to_root(target, doc):
    path = []
    for token in doc:
        if (token.text == target):
            path.append(token)
            tmp = token
        
            while (tmp != tmp.head):
                tmp = tmp.head
                path.append(tmp)
            
            return path

    return None
                
print(get_path_to_root("startup", doc))

[startup, buying, at, looking]
