In [2]:
import spacy
from spacy import displacy
import nltk

In [6]:
nlp = spacy.load('en_core_web_sm')

doc = nlp("The quick brown fox jumped over the lazy dog's back.")

print(doc.text)
print(doc[4].text)
print(doc[4].pos_)
print(doc[4].tag_)

print('\n')

for token in doc:
    print(f'{token.text:<12} {token.pos_:<10} {token.tag_:<10} {spacy.explain(token.tag_)}')

The quick brown fox jumped over the lazy dog's back.
jumped
VERB
VBD


The          DET        DT         determiner
quick        ADJ        JJ         adjective (English), other noun-modifier (Chinese)
brown        ADJ        JJ         adjective (English), other noun-modifier (Chinese)
fox          NOUN       NN         noun, singular or mass
jumped       VERB       VBD        verb, past tense
over         ADP        IN         conjunction, subordinating or preposition
the          DET        DT         determiner
lazy         ADJ        JJ         adjective (English), other noun-modifier (Chinese)
dog          NOUN       NN         noun, singular or mass
's           PART       POS        possessive ending
back         NOUN       NN         noun, singular or mass
.            PUNCT      .          punctuation mark, sentence closer


In [8]:
doc=nlp(u"I read books on NLP.")
doc2=nlp(u"I read a book on NLP.")
word = doc[1]
word2 = doc2[1]
print(word.text)
print(word2.text)
print("\n")
token = word
token2 = word2
print(f"{token.text:<12} {token.pos_:<10} {token.tag_:<10} {spacy.explain(token.tag_)}")
print(f"{token2.text:<12} {token2.pos_:<10} {token2.tag_:<10} {spacy.explain(token2.tag_)}")

read
read


read         VERB       VBP        verb, non-3rd person singular present
read         VERB       VBD        verb, past tense


In [12]:
doc = nlp(u"The quick brown fox jumped over the lazy dog's back.")
pos_counts = doc.count_by(spacy.attrs.POS)
print(pos_counts)
print('\n')
print(doc.vocab[84].text)
print(doc.vocab[92].text)
print(doc.vocab[97].text)
print('\n')

for key,value in sorted(pos_counts.items()):
    print(f"{key}. {doc.vocab[key].text:} : {value}")

{90: 2, 84: 3, 92: 3, 100: 1, 85: 1, 94: 1, 97: 1}


ADJ
NOUN
PUNCT


84. ADJ : 3
85. ADP : 1
90. DET : 2
92. NOUN : 3
94. PART : 1
97. PUNCT : 1
100. VERB : 1


In [13]:
import spacy.attrs

tag_counts = doc.count_by(spacy.attrs.TAG)
for k,v in sorted(tag_counts.items()):
    print(f"{k}.{doc.vocab[k].text:} {v}")

74.POS 1
1292078113972184607.IN 1
10554686591937588953.JJ 3
12646065887601541794.. 1
15267657372422890137.DT 2
15308085513773655218.NN 3
17109001835818727656.VBD 1


In [14]:
options = {'distance':110,'compact':True,'color':'yellow','bg':'#09a3d5','font':'Times'}
displacy.render(doc,style='dep',jupyter=True,options=options)

In [16]:
from spacy.tokens import Span
from spacy.matcher import PhraseMatcher
from spacy import displacy

def show_ents(doc):
    if doc.ents:
        for ent in doc.ents:
            print(ent.text+" - "+ent.label_ + " - "+ str(spacy.explain(ent.label_)))
    else:
        print("No entities found")

doc = nlp(u"Our Company created a brand new vacuum cleaner."
          u"This new vacuum-cleaner is the best in show.")

show_ents(doc)

matcher = PhraseMatcher(nlp.vocab)
phrase_list = ['vacuum cleaner','vacuum-cleaner']

phrase_patterns = [nlp(text) for text in phrase_list]

matcher.add('newproduct',None,*phrase_patterns)
found_matches = matcher(doc)
print(found_matches)

PROD = doc.vocab.strings[u"PRODUCT"]
new_ents=[Span(doc,match[1],match[2],label=PROD) for match in found_matches]

doc.ents = list(doc.ents) + new_ents

show_ents(doc)

doc = nlp(u"Originally I paid $29.95 for this car toy, but now it is marked down by 10 dollars.")

len([ent for ent in doc.ents if ent.label == 'MONEY'])


No entities found
[(2689272359382549672, 6, 8), (2689272359382549672, 11, 14)]
vacuum cleaner - PRODUCT - Objects, vehicles, foods, etc. (not services)
vacuum-cleaner - PRODUCT - Objects, vehicles, foods, etc. (not services)


0