In [1]:
import spacy

In [2]:
nlp = spacy.load('en_core_web_sm')

In [3]:
doc = nlp(u'Tesla is looking at buying U.S. startup for $6 million')

In [4]:
for token in doc:
    print(token.text, token.pos_, token.dep_)

Tesla PROPN nsubj
is AUX aux
looking VERB ROOT
at ADP prep
buying VERB pcomp
U.S. PROPN compound
startup NOUN dobj
for ADP prep
$ SYM quantmod
6 NUM compound
million NUM pobj


In [5]:
nlp.pipeline

[('tagger', <spacy.pipeline.pipes.Tagger at 0xc0de59af08>),
 ('parser', <spacy.pipeline.pipes.DependencyParser at 0xc0e0a80888>),
 ('ner', <spacy.pipeline.pipes.EntityRecognizer at 0xc0e0a80828>)]

In [6]:
nlp.pipe_names

['tagger', 'parser', 'ner']

In [19]:
doc2 = nlp(u"Tesla is't looking into startups anymore")

for token in doc2:
    print(token.text, token.pos_, token.dep_)

Tesla PROPN compound
is't DET nsubj
looking VERB ROOT
into ADP prep
startups NOUN pobj
anymore ADV advmod


In [9]:
doc2

Tesla is't looking into startups anymore

In [10]:
doc2[0]

Tesla

In [11]:
type(doc2)

spacy.tokens.doc.Doc

In [12]:
doc2[0].pos_

'PROPN'

In [13]:
doc2[0].dep_

'compound'

In [14]:
print(doc2[4].text)
print(doc2[4].lemma_)

startups
startup


In [15]:
print(doc2[4].pos_)
print(doc2[4].tag_ + ' / ' + spacy.explain(doc2[4].tag_))

NOUN
NNS / noun, plural


In [22]:
# Boolean Values:
print(doc2[0].is_alpha)
print(doc2[0].is_stop)

True
False


In [23]:
doc3 = nlp(u'Although commmonly attributed to John Lennon from his song "Beautiful Boy", \
the phrase "Life is what happens to us while we are making other plans" was written by \
cartoonist Allen Saunders and published in Reader\'s Digest in 1957, when Lennon was 17.')

In [24]:
life_quote = doc3[16:30]
print(life_quote)

"Life is what happens to us while we are making other plans"


In [26]:
type(life_quote)

spacy.tokens.span.Span

In [27]:
doc4 = nlp(u'This is the first sentence. This is another sentence. This is the last sentence.')

In [28]:
for sent in doc4.sents:
    print(sent)

This is the first sentence.
This is another sentence.
This is the last sentence.


In [29]:
doc4[6].is_sent_start

True

In [30]:
mystring = '"We\'re moving to L.A.!"'
doc = nlp(mystring)
for token in doc:
    print(token.text, end=' | ')

" | We | 're | moving | to | L.A. | ! | " | 

In [32]:
#Here the abbreviations for "Saint" and "United States" are both preserved
doc5 = nlp(u"Let's visit St. Louis in the U.S. next year.")

for t in doc5:
    print(t)

Let
's
visit
St.
Louis
in
the
U.S.
next
year
.


In [33]:
len(doc5)

11

In [34]:
doc6 = nlp(u'My dinner was horrible.')
doc7 = nlp(u'Your dinner was delicious.')

In [35]:
# Try to change "My dinner was horrible" to "My dinner was delicious"
doc6[3] = doc7[3]

TypeError: 'spacy.tokens.doc.Doc' object does not support item assignment

In [37]:
#visualizing the entity recognizer
from spacy import displacy
doc8 = nlp(u'Over the last quarter Apple sold nearly 20 thousand iPods for a profit of $6 million.')
displacy.render(doc8, style='ent', jupyter=True)