In [1]:
import spacy

nlp = spacy.load("en_core_web_sm")
doc = nlp("Autonomous cars shift insurance liability toward manufacturers")
for chunk in doc.noun_chunks:
    print(chunk.text, chunk.root.text, chunk.root.dep_,
            chunk.root.head.text)

Autonomous cars cars nsubj shift
insurance liability liability dobj shift
manufacturers manufacturers pobj toward


In [5]:
import spacy

nlp = spacy.load("en_core_web_sm")
doc = nlp("Apple is looking at buying U.K. startup for $1 billion")

for token in doc:
    print(token, token.dep_)


Apple nsubj
is aux
looking ROOT
at prep
buying pcomp
U.K. dobj
startup dep
for prep
$ quantmod
1 compound
billion pobj


In [None]:
import spacy

# "my_custom_el_pipeline" is assumed to be a custom NLP pipeline that was trained and serialized to disk
nlp = spacy.load("en_core_web_sm")
doc = nlp("Looking for a room from the 1st to the 17th for 8 people")

# Document level
ents = [(e.text, e.label_, e.kb_id_) for e in doc.ents]
# print(ents)  # [('Ada Lovelace', 'PERSON', 'Q7259'), ('London', 'GPE', 'Q84')]
print(doc.ents)
# Token level
for d in doc:
    print([d.text, d.ent_type_, d.ent_kb_id_])
# ent_ada_0 = [doc[0].text, doc[0].ent_type_, doc[0].ent_kb_id_]
# ent_ada_1 = [doc[1].text, doc[1].ent_type_, doc[1].ent_kb_id_]
# ent_london_5 = [doc[5].text, doc[5].ent_type_, doc[5].ent_kb_id_]
# print(ent_ada_0)  # ['Ada', 'PERSON', 'Q7259']
# print(ent_ada_1)  # ['Lovelace', 'PERSON', 'Q7259']
# print(ent_london_5)  # ['London', 'GPE', 'Q84']

(2,)
['I', '', '']
["'d", '', '']
['like', '', '']
['an', '', '']
['en', '', '']
['suite', '', '']
['room', '', '']
['for', '', '']
['2', 'CARDINAL', '']
['adults', '', '']
['please', '', '']


In [11]:
import spacy

nlp = spacy.load("en_core_web_sm")
doc = nlp("This is a sentence. This is another sentence.")
assert doc.has_annotation("SENT_START")
for sent in doc.sents:
    print(sent.text)

This is a sentence.
This is another sentence.


In [12]:
import spacy

nlp = spacy.load("en_core_web_sm")
text = "I saw The Who perform. Who did you see?"
doc1 = nlp(text)
print(doc1[2].tag_, doc1[2].pos_)  # DT DET
print(doc1[3].tag_, doc1[3].pos_)  # WP PRON

# Add attribute ruler with exception for "The Who" as NNP/PROPN NNP/PROPN
ruler = nlp.get_pipe("attribute_ruler")
# Pattern to match "The Who"
patterns = [[{"LOWER": "the"}, {"TEXT": "Who"}]]
# The attributes to assign to the matched token
attrs = {"TAG": "NNP", "POS": "PROPN"}
# Add rules to the attribute ruler
ruler.add(patterns=patterns, attrs=attrs, index=0)  # "The" in "The Who"
ruler.add(patterns=patterns, attrs=attrs, index=1)  # "Who" in "The Who"

doc2 = nlp(text)
print(doc2[2].tag_, doc2[2].pos_)  # NNP PROPN
print(doc2[3].tag_, doc2[3].pos_)  # NNP PROPN
# The second "Who" remains unmodified
print(doc2[5].tag_, doc2[5].pos_)  # WP PRON

DT DET
WP PRON
NNP PROPN
NNP PROPN
. PUNCT


In [30]:
import spacy

nlp = spacy.load("en_core_web_sm")
doc = nlp("I want to book for today")

for token in doc:
    print(token.text, token.pos_, token.dep_, token.ent_type_)

I PRON nsubj 
want VERB ROOT 
to PART aux 
book VERB xcomp 
for ADP prep 
today NOUN pobj DATE


In [4]:
import spacy
from spacy import displacy

nlp = spacy.load("en_core_web_sm")
doc = nlp("4 single rooms from the 23rd until the 29 May")
for token in doc:
    print(token.text, token.head.text, token.ent_iob_, token.ent_type_, token.dep_, token.left_edge, token.right_edge)
displacy.serve(doc, style="dep", auto_select_port=True)


4 rooms B CARDINAL nummod 4 4
single rooms O  amod single single
rooms rooms O  ROOT 4 May
from rooms O  prep from 23rd
the 23rd B DATE det the the
23rd from I DATE pobj the 23rd
until rooms O  prep until May
the May O  det the the
29 May O  nummod 29 29
May until O  pobj the May



Using the 'dep' visualizer
Serving on http://0.0.0.0:5001 ...

Shutting down server on port 5001.
