In [None]:
import spacy
nlp=spacy.load('en_core_web_sm')

In [None]:
with open('peterrabbit.txt','r',encoding='utf-8') as f:
  x=f.read()
  doc=nlp(x)

In [None]:
sentences=list(doc.sents)

In [None]:
third_sentence=sentences[2]

In [None]:
print('Third Sentence:',third_sentence)

Third Sentence: They lived with their Mother in a sand-bank, underneath the root of a
very big fir-tree.




In [None]:
for token in third_sentence:
  print(f"Token {token.text:<10} POS {token.pos_:<10} TAG {token.tag_:<10} DESCRIPTION {spacy.explain(token.tag_)}")

Token They       POS PRON       TAG PRP        DESCRIPTION pronoun, personal
Token lived      POS VERB       TAG VBD        DESCRIPTION verb, past tense
Token with       POS ADP        TAG IN         DESCRIPTION conjunction, subordinating or preposition
Token their      POS PRON       TAG PRP$       DESCRIPTION pronoun, possessive
Token Mother     POS PROPN      TAG NNP        DESCRIPTION noun, proper singular
Token in         POS ADP        TAG IN         DESCRIPTION conjunction, subordinating or preposition
Token a          POS DET        TAG DT         DESCRIPTION determiner
Token sand       POS NOUN       TAG NN         DESCRIPTION noun, singular or mass
Token -          POS PUNCT      TAG HYPH       DESCRIPTION punctuation mark, hyphen
Token bank       POS NOUN       TAG NN         DESCRIPTION noun, singular or mass
Token ,          POS PUNCT      TAG ,          DESCRIPTION punctuation mark, comma
Token underneath POS ADP        TAG IN         DESCRIPTION conjunction, subordinatin

In [None]:
import spacy
from collections import Counter

In [None]:
pos_count=Counter([token.pos_ for token in doc])

In [None]:
print("Frequency of pos tags:/n")
for pos,freq in pos_count.items():
  print(f"{pos: <10}: {freq}")

Frequency of pos tags:/n
DET       : 90
PROPN     : 75
ADP       : 124
PUNCT     : 172
NUM       : 8
SPACE     : 99
ADV       : 65
SCONJ     : 20
NOUN      : 173
PRON      : 108
VERB      : 131
ADJ       : 54
CCONJ     : 61
AUX       : 50
PART      : 28


In [None]:
tokens = [token for token in doc if not token.is_punct and not token.is_space]

In [None]:
noun_tokens = [token for token in tokens if token.pos_ in ["NOUN", "PROPN"]]

In [None]:
noun_percentage = (len(noun_tokens) / len(tokens)) * 100 if tokens else 0

In [None]:
print(f"Percentage of nouns: {noun_percentage:.2f}%")

Percentage of nouns: 25.08%


In [None]:
sentences=list(doc.sents)

In [None]:
third_sentence=sentences[2]

In [None]:
print('Third Sentence:',third_sentence)

Third Sentence: They lived with their Mother in a sand-bank, underneath the root of a
very big fir-tree.




In [None]:
for token in third_sentence:
  print(f'{token.text:12} {token.dep_:<10} {token.pos_:<10} {token.head.text:<10}')

They         nsubj      PRON       lived     
lived        ROOT       VERB       lived     
with         prep       ADP        lived     
their        poss       PRON       Mother    
Mother       pobj       PROPN      with      
in           prep       ADP        lived     
a            det        DET        bank      
sand         compound   NOUN       bank      
-            punct      PUNCT      bank      
bank         pobj       NOUN       in        
,            punct      PUNCT      bank      
underneath   prep       ADP        bank      
the          det        DET        root      
root         pobj       NOUN       underneath
of           prep       ADP        root      
a            det        DET        tree      

            dep        SPACE      a         
very         advmod     ADV        big       
big          amod       ADJ        tree      
fir          compound   NOUN       tree      
-            punct      PUNCT      tree      
tree         pobj       NOUN      

In [None]:
from spacy import displacy

In [None]:
displacy.render(third_sentence,style='dep',jupyter=True)

In [None]:
entities=[(ent.text,ent.label_) for ent in doc.ents]
print('First two named entities')
for ent_text,ent_label in entities[:2]:
  print(f'{ent_text} {ent_label}')

First two named entities
The Tale of Peter Rabbit WORK_OF_ART
Beatrix Potter PERSON


In [None]:
sentences=list(doc.sents)

In [None]:
print("No. of sentences:",len(sentences))

No. of sentences: 57


In [None]:
for i, sent in enumerate(sentences[:3], start=1):
    print(f"Sentence {i}: {sent.text.strip()}")

Sentence 1: The Tale of Peter Rabbit, by Beatrix Potter (1902).
Sentence 2: Once upon a time there were four little Rabbits, and their names
were--

          Flopsy,
       Mopsy,
   Cotton-tail,
and Peter.
Sentence 3: They lived with their Mother in a sand-bank, underneath the root of a
very big fir-tree.


In [None]:
sentences = list(doc.sents)
sent_with_entities = [sent for sent in sentences if any(ent.start >= sent.start and ent.end <= sent.end for ent in doc.ents)]

In [None]:
print(f"Total sentences: {len(sentences)}")
print(f"Sentences containing named entities: {len(sent_with_entities)}")

Total sentences: 57
Sentences containing named entities: 38


In [None]:
for sent in sent_with_entities:
    ents = [ent.text for ent in doc.ents if ent.start >= sent.start and ent.end <= sent.end]
    print(f"\nSentence: {sent.text.strip()}")
    print(f"Named Entities: {ents}")


Sentence: The Tale of Peter Rabbit, by Beatrix Potter (1902).
Named Entities: ['The Tale of Peter Rabbit', 'Beatrix Potter', '1902']

Sentence: Once upon a time there were four little Rabbits, and their names
were--

          Flopsy,
       Mopsy,
   Cotton-tail,
and Peter.
Named Entities: ['four', 'Mopsy', 'Cotton-tail', 'Peter']

Sentence: 'Now my dears,' said old Mrs. Rabbit one morning, 'you may go into
the fields or down the lane, but don't go into Mr. McGregor's garden:
your Father had an accident there; he was put in a pie by Mrs.
McGregor.'

'Now run along, and don't get into mischief.
Named Entities: ['Rabbit', 'one morning', 'McGregor', 'McGregor']

Sentence: Then old Mrs. Rabbit took a basket and her umbrella, and went through
the wood to the baker's.
Named Entities: ['Rabbit']

Sentence: She bought a loaf of brown bread and five
currant buns.
Named Entities: ['five']

Sentence: Flopsy, Mopsy, and Cottontail, who were good little bunnies, went
down the lane to gather black

In [None]:
from spacy import displacy

In [None]:
list_of_sents=list(doc.sents)

In [None]:
first_sentence=list_of_sents[0]
print("Sentence to visualisation",first_sentence.text)

Sentence to visualisation The Tale of Peter Rabbit, by Beatrix Potter (1902).




In [None]:
displacy.render(first_sentence,style='ent',jupyter=True)