In [1]:
#Part of speech Basics

In [1]:
import spacy
from spacy import displacy

In [2]:
nlp=spacy.load("en_core_web_sm")

In [3]:
doc=nlp("The quick brown fox jumped over the lazy dog's back")

In [11]:
print(doc[4].text, doc[4].pos_, doc[4].tag_, spacy.explain(doc[4].tag_))

jumped VERB VBD verb, past tense


In [19]:
for token in doc:
    print(f"{token.text:{10}} {token.pos_:{8}} {token.tag_:{6}} {spacy.explain(token.tag_)}")

The        DET      DT     determiner
quick      ADJ      JJ     adjective (English), other noun-modifier (Chinese)
brown      ADJ      JJ     adjective (English), other noun-modifier (Chinese)
fox        NOUN     NN     noun, singular or mass
jumped     VERB     VBD    verb, past tense
over       ADP      IN     conjunction, subordinating or preposition
the        DET      DT     determiner
lazy       ADJ      JJ     adjective (English), other noun-modifier (Chinese)
dog        NOUN     NN     noun, singular or mass
's         PART     POS    possessive ending
back       NOUN     NN     noun, singular or mass


In [91]:
doc=nlp("I read books on NLP.")

In [93]:
for token in doc:
    print(f"{token.text:{10}} {token.pos_:{8}} {token.tag_:{6}} {spacy.explain(token.tag_)}")

I          PRON     PRP    pronoun, personal
read       VERB     VBP    verb, non-3rd person singular present
books      NOUN     NNS    noun, plural
on         ADP      IN     conjunction, subordinating or preposition
NLP        PROPN    NNP    noun, proper singular
.          PUNCT    .      punctuation mark, sentence closer


In [43]:
doc=nlp("I read book on NLP.")

In [97]:
for token in doc:
    print(f"{token.text:{10}} {token.pos_:{8}} {token.tag_:{6}} {spacy.explain(token.tag_)}")

I          PRON     PRP    pronoun, personal
read       VERB     VBP    verb, non-3rd person singular present
books      NOUN     NNS    noun, plural
on         ADP      IN     conjunction, subordinating or preposition
NLP        PROPN    NNP    noun, proper singular
.          PUNCT    .      punctuation mark, sentence closer


In [99]:
#Count the frequencies  of different coarse-grained POS text

In [119]:
POS_counts=doc.count_by(spacy.attrs.POS)

In [121]:
POS_count=doc.count_by(spacy.attrs.POS)

In [123]:
POS_counts

{90: 2, 84: 3, 92: 3, 100: 1, 85: 1, 94: 1}

In [129]:
doc.vocab[100].text

'VERB'

In [131]:
for k,v  in sorted(POS_counts.items()):
    print(f"{k}.{doc.vocab[k].text:{5}}: {v}")

84.ADJ  : 3
85.ADP  : 1
90.DET  : 2
92.NOUN : 3
94.PART : 1
100.VERB : 1


In [133]:
#Count the different fine-grained tags:

In [137]:
TAG_counts=doc.count_by(spacy.attrs.TAG)

In [147]:
for k,v  in sorted(TAG_counts.items()):
    print(f"{k}.{doc.vocab[k].text:{5}}: {v}")

74.POS  : 1
1292078113972184607.IN   : 1
10554686591937588953.JJ   : 3
15267657372422890137.DT   : 2
15308085513773655218.NN   : 3
17109001835818727656.VBD  : 1


In [149]:
#Count the different dependencies.

In [155]:
DEP_counts=doc.count_by(spacy.attrs.DEP)

In [157]:
for k,v  in sorted(DEP_counts.items()):
    print(f"{k}.{doc.vocab[k].text:{5}}: {v}")

400.advmod: 1
402.amod : 3
415.det  : 2
429.nsubj: 1
439.pobj : 1
443.prep : 1
8110129090154140942.case : 1
8206900633647566924.ROOT : 1


In [7]:
displacy.render(doc, style="dep", jupyter=True, options={"distance":110})

In [11]:
displacy.render(doc,style="dep", jupyter=True, options={"distance":110})

In [23]:
displacy.render(doc,style="dep", jupyter=True,options={"distance":110,"compact":"True","color":"yellow","bg":"pink","font":"Times"})

In [31]:
displacy.serve(doc, style="dep", options={"distance":110},auto_select_port=True)




Using the 'dep' visualizer
Serving on http://0.0.0.0:5001 ...



127.0.0.1 - - [09/Oct/2024 16:38:20] "GET / HTTP/1.1" 200 9191
127.0.0.1 - - [09/Oct/2024 16:38:20] "GET /favicon.ico HTTP/1.1" 200 9191


Shutting down server on port 5001.


In [39]:
#Headling Large Text

In [72]:
doc=nlp("This is a sentence. This is another sentence, possibly longer sentence.")

In [43]:
spans=list(doc.sents)

In [49]:
displacy.serve(spans,style="dep", options={"distance":110},auto_select_port=True)


Using the 'dep' visualizer
Serving on http://0.0.0.0:5001 ...



127.0.0.1 - - [09/Oct/2024 16:42:43] "GET / HTTP/1.1" 200 8956


Shutting down server on port 5001.


In [76]:
displacy.serve(doc,style="dep", options={"distance":110,"compact":"True","color":"white","bg":"black","font":"Times"},auto_select_port=True)



Using the 'dep' visualizer
Serving on http://0.0.0.0:5001 ...

Shutting down server on port 5001.
