In [1]:
import spacy

In [2]:
nlp = spacy.load("en_core_web_sm")

In [3]:
# loading a model called nlp
# doc -> holds the processed text

doc = nlp(u"Tesla is looking at buying U.S. startup for $6 million")

In [4]:
for token in doc:
    print(token.text)

Tesla
is
looking
at
buying
U.S.
startup
for
$
6
million


In [5]:
# pos -> part of speech
for token in doc:
    print(token.text, token.pos)

Tesla 92
is 87
looking 100
at 85
buying 100
U.S. 96
startup 92
for 85
$ 99
6 93
million 93


In [6]:
for token in doc:
    print(token.text, token.pos_)

Tesla NOUN
is AUX
looking VERB
at ADP
buying VERB
U.S. PROPN
startup NOUN
for ADP
$ SYM
6 NUM
million NUM


In [7]:
# dep -> syntactic dependency
for token in doc:
    print(token.text, token.pos_, token.dep_)

Tesla NOUN nsubj
is AUX aux
looking VERB ROOT
at ADP prep
buying VERB pcomp
U.S. PROPN compound
startup NOUN dobj
for ADP prep
$ SYM quantmod
6 NUM compound
million NUM pobj


In [8]:
nlp.pipeline

[('tok2vec', <spacy.pipeline.tok2vec.Tok2Vec at 0x18295790640>),
 ('tagger', <spacy.pipeline.tagger.Tagger at 0x18295766080>),
 ('parser', <spacy.pipeline.dep_parser.DependencyParser at 0x182958382e0>),
 ('attribute_ruler',
  <spacy.pipeline.attributeruler.AttributeRuler at 0x182959e3580>),
 ('lemmatizer', <spacy.lang.en.lemmatizer.EnglishLemmatizer at 0x182957989c0>),
 ('ner', <spacy.pipeline.ner.EntityRecognizer at 0x18295838580>)]

In [9]:
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

### tokenization

words into tokens

In [13]:
doc2 = nlp(u"Tesla isn't looking into startups anymore.")

In [14]:
for token in doc2:
    print(token.text, token.pos_, token.dep_)

Tesla NOUN nsubj
is AUX aux
n't PART neg
looking VERB ROOT
into ADP prep
startups NOUN pobj
anymore ADV advmod
. PUNCT punct


In [12]:
# doc2 = nlp(u"Tesla isn't looking       into startups anymore.")

# for token in doc2:
#     print(token.text, token.pos_, token.dep_)

Tesla NOUN nsubj
is AUX aux
n't PART neg
looking VERB ROOT
       SPACE dep
into ADP prep
startups NOUN pobj
anymore ADV advmod
. PUNCT punct


In [16]:
doc2[0].pos_

'NOUN'

In [17]:
doc2[0].dep_

'nsubj'

### spans

In [18]:
doc3 = nlp(u'Although commmonly attributed to John Lennon from his song "Beautiful Boy", \
the phrase "Life is what happens to us while we are making other plans" was written by \
cartoonist Allen Saunders and published in Reader\'s Digest in 1957, when Lennon was 17.')

In [19]:
life_quote = doc3[16:30]

In [20]:
print(life_quote)

"Life is what happens to us while we are making other plans"


In [21]:
type(life_quote)

spacy.tokens.span.Span

In [22]:
type(doc3)

spacy.tokens.doc.Doc

In [23]:
doc4 = nlp(u"This is the first sentence. This is another sentence. This is the last sentence.")

In [24]:
for sentence in doc4.sents:
    print(sentence)

This is the first sentence.
This is another sentence.
This is the last sentence.


In [25]:
doc4[6]

This

In [26]:
doc4[6].is_sent_start

True

In [28]:
doc4[8]

another

In [27]:
doc4[8].is_sent_start

False