# IMPORTING NECESSARY LIBRARIES

In [1]:
# run the next line only once if needed 
# This model includes word vectors and is useful for various natural language processing tasks.
!python -m spacy download en_core_web_lg 

# Importing the spaCy library
import spacy

# Loading the downloaded English language model for spaCy
nlp = spacy.load("en_core_web_lg")

Collecting en-core-web-lg==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.7.1/en_core_web_lg-3.7.1-py3-none-any.whl (587.7 MB)
     ---------------------------------------- 0.0/587.7 MB ? eta -:--:--
     -------------------------------------- 0.0/587.7 MB 330.3 kB/s eta 0:29:40
     -------------------------------------- 0.1/587.7 MB 656.4 kB/s eta 0:14:56
     ---------------------------------------- 0.3/587.7 MB 2.7 MB/s eta 0:03:38
     ---------------------------------------- 0.6/587.7 MB 3.5 MB/s eta 0:02:49
     ---------------------------------------- 0.8/587.7 MB 3.7 MB/s eta 0:02:38
     ---------------------------------------- 1.0/587.7 MB 4.2 MB/s eta 0:02:19
     ---------------------------------------- 1.2/587.7 MB 4.4 MB/s eta 0:02:13
     ---------------------------------------- 1.4/587.7 MB 4.3 MB/s eta 0:02:16
     ---------------------------------------- 1.8/587.7 MB 4.7 MB/s eta 0:02:05
     -------------------------

# TOKENIZATION ON SMALL TEXT

In [2]:
# Define a text string containing a sentence.
text = "My cousin kajal Agarwal likes fancy adventure games."

# Process the text using the spaCy language model loaded previously.
doc = nlp(text)

# Iterate through each token in the processed document and print them with a delimiter "|".
for token in doc:
    print(token, end=' | ')

My | cousin | kajal | Agarwal | likes | fancy | adventure | games | . | 

# THE ATTRIBUTES THE SPACY ADDS

In [3]:
# Import the pandas library with alias pd for data manipulation and analysis.
import pandas as pd

# Define a function display_nlp that takes a spaCy document (doc) and an optional flag (include_punct) 
# to decide whether to include punctuation tokens in the output DataFrame.
def display_nlp(doc, include_punct=False):
    """
    Generate a DataFrame for visualization of spaCy tokens.
    
    Parameters:
        doc (spacy.Doc): The processed spaCy document.
        include_punct (bool): Flag to include punctuation tokens. Default is False.

    Returns:
        pd.DataFrame: DataFrame containing token information.
    """
    # Initialize an empty list to store rows of token information.
    rows = []
    
    # Iterate through each token in the spaCy document.
    for i, t in enumerate(doc):
        # Check if the token is not punctuation or if include_punct flag is True.
        if not t.is_punct or include_punct:
            # Create a dictionary containing token information.
            row = {'token': i,  'text': t.text, 'lemma_': t.lemma_, 
                   'is_stop': t.is_stop, 'is_alpha': t.is_alpha,
                   'pos_': t.pos_, 'dep_': t.dep_, 
                   'ent_type_': t.ent_type_, 'ent_iob_': t.ent_iob_}
            # Append the dictionary to the rows list.
            rows.append(row)
    
    # Create a DataFrame from the list of token information rows.
    df = pd.DataFrame(rows).set_index('token')
    
    # Set the index name to None for cleaner display.
    df.index.name = None
    
    # Return the DataFrame.
    return df

# Call the display_nlp function with the spaCy document (doc) to generate a DataFrame
# for visualization of spaCy tokens.
display_nlp(doc)

Unnamed: 0,text,lemma_,is_stop,is_alpha,pos_,dep_,ent_type_,ent_iob_
0,My,my,True,True,PRON,poss,,O
1,cousin,cousin,False,True,NOUN,nsubj,,O
2,kajal,kajal,False,True,PROPN,compound,PERSON,B
3,Agarwal,Agarwal,False,True,PROPN,appos,PERSON,I
4,likes,like,False,True,VERB,ROOT,,O
5,fancy,fancy,False,True,ADJ,amod,,O
6,adventure,adventure,False,True,NOUN,compound,,O
7,games,game,False,True,NOUN,dobj,,O


# REMOVING STOPWORS

In [4]:
# Define a text string containing a sentence.
text = "Dear Vidya, we need to sit down and talk. Regards, Kajal"

# Process the text using the spaCy language model loaded previously.
doc = nlp(text)

# Create a list comprehension to filter out tokens that are neither stop words nor punctuation.
non_stop = [t for t in doc if not t.is_stop and not t.is_punct]

# Print the list of non-stopword and non-punctuation tokens.
print(non_stop)

[Dear, Vidya, need, sit, talk, Regards, Kajal]


# FINDING ALL NOUNS

In [5]:
# Define a text string containing a sentence.
text = "My cousin kajal Agarwal likes fancy adventure games."

# Process the text using the spaCy language model loaded previously.
doc = nlp(text)

# Create a list comprehension to filter out tokens that are nouns or proper nouns.
nouns = [t for t in doc if t.pos_ in ['NOUN', 'PROPN']]

# Print the list of tokens identified as nouns or proper nouns.
print(nouns)

[cousin, kajal, Agarwal, adventure, games]


# NAMED ENTITY RECOGNITION

In [6]:
# Iterate through each named entity in the processed document and print its text and label.
for ent in doc.ents:
    print(f"({ent.text}, {ent.label_})", end=" ")

(kajal Agarwal, PERSON) 

# TRYING HARDER ONE

In [7]:
# Define a text string containing a sentence.
text = "James O'Neill, chairman of World Cargo Inc, lives in San Francisco."

# Process the text using the spaCy language model loaded previously.
doc = nlp(text)

# Iterate through each named entity in the processed document and print its text and label.
for ent in doc.ents:
    print(f"({ent.text}, {ent.label_})", end=" ")

(James O'Neill, PERSON) (World Cargo Inc, ORG) (San Francisco, GPE) 

# VISUALIZING NERS

In [8]:
# Import the displacy module from spaCy for entity visualization.
from spacy import displacy

# Render the spaCy document (doc) with entity visualization style and display it in Jupyter Notebook.
displacy.render(doc, style='ent', jupyter=True)

# LET'S TRY A REAL DATASET


INSTALLING NECCESSARY LIBRARIES

In [9]:
!pip install html5lib
!pip install --upgrade pip setuptools
!pip install beautifulsoup4

Collecting html5lib
  Obtaining dependency information for html5lib from https://files.pythonhosted.org/packages/6c/dd/a834df6482147d48e225a49515aabc28974ad5a4ca3215c18a882565b028/html5lib-1.1-py2.py3-none-any.whl.metadata
  Downloading html5lib-1.1-py2.py3-none-any.whl.metadata (16 kB)
Downloading html5lib-1.1-py2.py3-none-any.whl (112 kB)
   ---------------------------------------- 0.0/112.2 kB ? eta -:--:--
   --- ------------------------------------ 10.2/112.2 kB ? eta -:--:--
   ------------------------------------ --- 102.4/112.2 kB 1.2 MB/s eta 0:00:01
   ---------------------------------------- 112.2/112.2 kB 1.1 MB/s eta 0:00:00
Installing collected packages: html5lib
Successfully installed html5lib-1.1
Collecting pip
  Obtaining dependency information for pip from https://files.pythonhosted.org/packages/8a/6a/19e9fe04fca059ccf770861c7d5721ab4c2aebc539889e97c7977528a53b/pip-24.0-py3-none-any.whl.metadata
  Using cached pip-24.0-py3-none-any.whl.metadata (3.6 kB)
Collecting set

ERROR: To modify pip, please run the following command:
C:\Users\DELL\anaconda3\python.exe -m pip install --upgrade pip setuptools




# LOADING DATASET - ARTICLE 1

In [10]:
# Import necessary libraries
from bs4 import BeautifulSoup
import requests
import re

# Define a function to extract text content from a given URL
def url_to_string(url):
    # Send a GET request to the URL
    res = requests.get(url)
    # Extract the HTML content
    html = res.text
    # Create a BeautifulSoup object to parse the HTML
    soup = BeautifulSoup(html, 'html5lib')
    # Remove unwanted elements like scripts, styles, and asides
    for script in soup(["script", "style", 'aside']):
        script.extract()
    # Extract the text content and join it into a single string
    return " ".join(re.split(r'[\n\t]+', soup.get_text()))

# Use the defined function to fetch content from a given URL (in this case, the New York Times trending page)
ny_bb = url_to_string('https://www.emarketer.com/content/digital-retailers-want-use-ai-hyperpersonal-2024')

# Process the extracted text using the spaCy language model loaded previously
article = nlp(ny_bb)

# Calculate the number of named entities in the processed text
len(article.ents)

117

# HAVE A LOOK AT THE NERS

In [11]:
# Import the displacy module from spaCy for entity visualization.
from spacy import displacy

# Render the spaCy document (article) with entity visualization style and display it in Jupyter Notebook.
displacy.render(article, style='ent', jupyter=True)

# MOST POPULAR NER TYPES

In [12]:
# Download the large English language model for spaCy if it hasn't been downloaded already.
!python -m spacy download en_core_web_lg 

# Import the spaCy library
import spacy

# Load the downloaded English language model for spaCy
nlp = spacy.load("en_core_web_lg")

# Import Counter from collections module to count occurrences of each element
from collections import Counter

# Extract the labels of named entities from the processed text and count their occurrences
labels = [x.label_ for x in article.ents]
counter = Counter(labels)

# Print the count of each entity label
print(counter)

Collecting en-core-web-lg==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.7.1/en_core_web_lg-3.7.1-py3-none-any.whl (587.7 MB)
     ---------------------------------------- 0.0/587.7 MB ? eta -:--:--
     ---------------------------------------- 0.0/587.7 MB ? eta -:--:--
     -------------------------------------- 0.0/587.7 MB 667.8 kB/s eta 0:14:40
     ---------------------------------------- 0.2/587.7 MB 2.0 MB/s eta 0:04:57
     ---------------------------------------- 0.4/587.7 MB 2.9 MB/s eta 0:03:26
     ---------------------------------------- 0.5/587.7 MB 2.6 MB/s eta 0:03:49
     ---------------------------------------- 0.7/587.7 MB 3.2 MB/s eta 0:03:07
     ---------------------------------------- 0.9/587.7 MB 3.5 MB/s eta 0:02:49
     ---------------------------------------- 1.0/587.7 MB 3.2 MB/s eta 0:03:01
     ---------------------------------------- 1.2/587.7 MB 3.6 MB/s eta 0:02:44
     --------------------------------

Counter({'ORG': 44, 'DATE': 25, 'CARDINAL': 16, 'PRODUCT': 9, 'GPE': 6, 'PERSON': 4, 'PERCENT': 4, 'FAC': 3, 'NORP': 2, 'ORDINAL': 2, 'MONEY': 1, 'WORK_OF_ART': 1})


# MOST POPULAR NER

In [13]:
# Extract the text of named entities from the processed text and count their occurrences
items = [x.text for x in article.ents]
counter_items = Counter(items)

# Get the five most common named entities and their counts
most_common_entities = counter_items.most_common(5)

# Print the result
print(most_common_entities)

[('AI', 9), ('Mar 26', 5), ('Mar 25', 5), ('2024', 3), ('MarketingEmail', 3)]


# PRINTING SENTENCES

In [14]:
# Import the spaCy library
import spacy

# Load the English language model for spaCy
nlp = spacy.load("en_core_web_lg")

# Extract sentences containing named entities from the processed text
sentences = [x for x in article.ents]

# Print the 21st sentence containing named entities
print(sentences[1:13])

[AI, 2024, ClientBecome, ClientGet, DemoPricingCalendarIndustriesProductsInsightsEventsPricingAboutIndustries, OverviewOur, five, Advertising & MarketingSocial, MarketingEmail, SalesSocial, AmericaWestern EuropeHealthValue, CareDigital]


# NER TAGS

In [15]:
# Import the displacy module from spaCy for entity visualization.
from spacy import displacy

# Render the 21st sentence containing named entities with entity visualization style
displacy.render(nlp(str(sentences[1:13])), jupyter=True, style='ent')

# TYPES OF WORDS IN THE SENTENCE

In [16]:
# Extract non-stopword and non-punctuation tokens from the 21st sentence containing named entities
tokens_info = [(x.orth_, x.pos_, x.lemma_) for x in [y for y in nlp(str(sentences[1:13])) if not y.is_stop and y.pos_ != 'PUNCT']]

# Print the tokens' orthographic form, part-of-speech, and lemma
print(tokens_info)

[('[', 'X', '['), ('AI', 'PROPN', 'AI'), ('2024', 'NUM', '2024'), ('ClientBecome', 'PROPN', 'ClientBecome'), ('ClientGet', 'PROPN', 'ClientGet'), ('DemoPricingCalendarIndustriesProductsInsightsEventsPricingAboutIndustries', 'NOUN', 'demopricingcalendarindustriesproductsinsightseventspricingaboutindustrie'), ('OverviewOur', 'PROPN', 'OverviewOur'), ('Advertising', 'PROPN', 'Advertising'), ('&', 'CCONJ', '&'), ('MarketingSocial', 'PROPN', 'MarketingSocial'), ('MarketingEmail', 'PROPN', 'MarketingEmail'), ('SalesSocial', 'PROPN', 'SalesSocial'), ('AmericaWestern', 'PROPN', 'AmericaWestern'), ('EuropeHealthValue', 'PROPN', 'EuropeHealthValue'), ('CareDigital', 'PROPN', 'CareDigital')]


# SENTENCE DEPENDENCY TREE

In [17]:
# Import the displacy module from spaCy for dependency visualization.
from spacy import displacy

# Render the dependency parse of the 21st sentence containing named entities and display it in Jupyter Notebook.
# Adjust the distance between tokens for better visualization.
displacy.render(nlp(str(sentences[1:5])), style='dep', jupyter=True, options={'distance': 150})

# LOADING DATASET - ARTICLE-2

In [19]:
#Import necessary libraries
from bs4 import BeautifulSoup
import requests
import re

# Define a function to extract text content from a given URL
def url_to_string(url):
    # Send a GET request to the URL
    res = requests.get(url)
    # Extract the HTML content
    html = res.text
    # Create a BeautifulSoup object to parse the HTML
    soup = BeautifulSoup(html, 'html5lib')
    # Remove unwanted elements like scripts, styles, and asides
    for script in soup(["script", "style", 'aside']):
        script.extract()
    # Extract the text content and join it into a single string
    return " ".join(re.split(r'[\n\t]+', soup.get_text()))

# Use the defined function to fetch content from a given URL (in this case, the New York Times trending page)
ny_bb = url_to_string('https://time.com/6960491/donald-trump-stock-truth-social-worth-billions/')

# Process the extracted text using the spaCy language model loaded previously
article = nlp(ny_bb)

# Calculate the number of named entities in the processed text
len(article.ents)

146

# HAVE A LOOK AT THE NERS

In [20]:
# Import the displacy module from spaCy for entity visualization.
from spacy import displacy

# Render the spaCy document (article) with entity visualization style and display it in Jupyter Notebook.
displacy.render(article, style='ent', jupyter=True)

# MOST POPULAR NER TYPES

In [21]:
# Import the spaCy library
import spacy

# Load the downloaded English language model for spaCy
nlp = spacy.load("en_core_web_lg")

# Import Counter from collections module to count occurrences of each element
from collections import Counter

# Extract the labels of named entities from the processed text and count their occurrences
labels = [x.label_ for x in article.ents]
counter = Counter(labels)

# Print the count of each entity label
print(counter)

Counter({'ORG': 53, 'PERSON': 40, 'DATE': 17, 'GPE': 9, 'CARDINAL': 9, 'MONEY': 5, 'PRODUCT': 4, 'WORK_OF_ART': 2, 'TIME': 2, 'NORP': 2, 'PERCENT': 2, 'ORDINAL': 1})


# MOST POPULAR NER

In [22]:
# Extract the text of named entities from the processed text and count their occurrences
items = [x.text for x in article.ents]
counter_items = Counter(items)

# Get the five most common named entities and their counts
most_common_entities = counter_items.most_common(5)

# Print the result
print(most_common_entities)

[('Trump', 19), ('TIME', 7), ('Truth Social', 7), ('Truth Social Worth Billions', 3), ('Yass', 3)]


# PRINTING SENTENCES

In [23]:
# Import the spaCy library
import spacy

# Load the English language model for spaCy
nlp = spacy.load("en_core_web_lg")

# Extract sentences containing named entities from the processed text
sentences = [x for x in article.ents]

# Print the 21st sentence containing named entities
print(sentences[1:13])

[Truth Social Worth Billions, Our Ideas NewsletterSubscribeSubscribeSectionsHomeU.S.PoliticsWorldHealthClimateFuture of Work, TIMECouponsPersonal Finance, TIME, TIME, StampedJoin UsNewslettersSubscribeGive a, GiftShop, TIME, Cover StoreCustomer CareUS & CanadaGlobal Help CenterReach, RoomContact, PermissionsMoreAbout UsPrivacy PolicyYour, UseModern Slavery]


# NER TAGS

In [24]:
# Import the displacy module from spaCy for entity visualization.
from spacy import displacy

# Render the 21st sentence containing named entities with entity visualization style
displacy.render(nlp(str(sentences[1:13])), jupyter=True, style='ent')

# TYPES OF WORDS IN THE SENTENCE 

In [25]:
# Extract non-stopword and non-punctuation tokens from the 21st sentence containing named entities
tokens_info = [(x.orth_, x.pos_, x.lemma_) for x in [y for y in nlp(str(sentences[1:13])) if not y.is_stop and y.pos_ != 'PUNCT']]

# Print the tokens' orthographic form, part-of-speech, and lemma
print(tokens_info)

[('Truth', 'PROPN', 'Truth'), ('Social', 'PROPN', 'Social'), ('Worth', 'PROPN', 'Worth'), ('Billions', 'PROPN', 'Billions'), ('Ideas', 'PROPN', 'Ideas'), ('NewsletterSubscribeSubscribeSectionsHomeU.S.PoliticsWorldHealthClimateFuture', 'PROPN', 'NewsletterSubscribeSubscribeSectionsHomeU.S.PoliticsWorldHealthClimateFuture'), ('Work', 'PROPN', 'Work'), ('TIMECouponsPersonal', 'PROPN', 'TIMECouponsPersonal'), ('Finance', 'PROPN', 'Finance'), ('TIME', 'PROPN', 'TIME'), ('TIME', 'PROPN', 'TIME'), ('StampedJoin', 'PROPN', 'StampedJoin'), ('UsNewslettersSubscribeGive', 'NOUN', 'usnewsletterssubscribegive'), ('GiftShop', 'PROPN', 'GiftShop'), ('TIME', 'PROPN', 'TIME'), ('Cover', 'PROPN', 'Cover'), ('StoreCustomer', 'PROPN', 'StoreCustomer'), ('CareUS', 'PROPN', 'CareUS'), ('&', 'CCONJ', '&'), ('CanadaGlobal', 'PROPN', 'CanadaGlobal'), ('Help', 'VERB', 'help'), ('CenterReach', 'PROPN', 'CenterReach'), ('RoomContact', 'PROPN', 'RoomContact'), ('PermissionsMoreAbout', 'PROPN', 'PermissionsMoreAbou

# SENTENCE DEPENDENCY TREE

In [26]:
# Import the displacy module from spaCy for dependency visualization.
from spacy import displacy

# Render the dependency parse of the 21st sentence containing named entities and display it in Jupyter Notebook.
# Adjust the distance between tokens for better visualization.
displacy.render(nlp(str(sentences[1:3])), style='dep', jupyter=True, options={'distance': 150})

# LOADING DATASET - ARTICLE-3

In [27]:
# Import necessary libraries
from bs4 import BeautifulSoup
import requests
import re

# Define a function to extract text content from a given URL
def url_to_string(url):
    # Send a GET request to the URL
    res = requests.get(url)
    # Extract the HTML content
    html = res.text
    # Create a BeautifulSoup object to parse the HTML
    soup = BeautifulSoup(html, 'html5lib')
    # Remove unwanted elements like scripts, styles, and asides
    for script in soup(["script", "style", 'aside']):
        script.extract()
    # Extract the text content and join it into a single string
    return " ".join(re.split(r'[\n\t]+', soup.get_text()))

# Use the defined function to fetch content from a given URL (in this case, the New York Times trending page)
ny_bb = url_to_string('https://time.com/6899782/thailand-prime-minister-srettha-thavisin-business-hub/')

# Process the extracted text using the spaCy language model loaded previously
article = nlp(ny_bb)

# Calculate the number of named entities in the processed text
len(article.ents)

362

# HAVE A LOOK AT THE NERS

In [28]:
# Import the displacy module from spaCy for entity visualization.
from spacy import displacy

# Render the spaCy document (article) with entity visualization style and display it in Jupyter Notebook.
displacy.render(article, style='ent', jupyter=True)

# MOST POPULAR NER TYPES

In [29]:
# Import the spaCy library
import spacy

# Load the downloaded English language model for spaCy
nlp = spacy.load("en_core_web_lg")

# Import Counter from collections module to count occurrences of each element
from collections import Counter

# Extract the labels of named entities from the processed text and count their occurrences
labels = [x.label_ for x in article.ents]
counter = Counter(labels)

# Print the count of each entity label
print(counter)

Counter({'PERSON': 74, 'ORG': 68, 'GPE': 65, 'DATE': 49, 'NORP': 36, 'CARDINAL': 24, 'LOC': 9, 'MONEY': 7, 'PRODUCT': 6, 'ORDINAL': 6, 'PERCENT': 6, 'TIME': 4, 'WORK_OF_ART': 3, 'EVENT': 2, 'QUANTITY': 2, 'FAC': 1})


# MOST POPULAR NER

In [30]:
# Extract the text of named entities from the processed text and count their occurrences
items = [x.text for x in article.ents]
counter_items = Counter(items)

# Get the five most common named entities and their counts
most_common_entities = counter_items.most_common(5)

# Print the result
print(most_common_entities)

[('Srettha', 36), ('Thailand', 29), ('Thai', 17), ('TIME', 9), ('Bangkok', 7)]


# PRINTING SENTENCES

In [31]:
# Import the spaCy library
import spacy

# Load the English language model for spaCy
nlp = spacy.load("en_core_web_lg")

# Extract sentences containing named entities from the processed text
sentences = [x for x in article.ents]

# Print the 21st sentence containing named entities
print(sentences[1:13])

[TIMETIME, Our Ideas NewsletterSubscribeSubscribeSectionsHomeU.S.PoliticsWorldHealthClimateFuture of Work, TIMECouponsPersonal Finance, TIME, TIME, StampedJoin UsNewslettersSubscribeGive a, GiftShop, TIME, Cover StoreCustomer CareUS & CanadaGlobal Help CenterReach, RoomContact, PermissionsMoreAbout UsPrivacy PolicyYour, UseModern Slavery]


# NER TAGS

In [32]:
# Import the displacy module from spaCy for entity visualization.
from spacy import displacy

# Render the 21st sentence containing named entities with entity visualization style
displacy.render(nlp(str(sentences[1:13])), jupyter=True, style='ent')

# TYPES OF WORDS IN THE SENTENCE

In [33]:
# Extract non-stopword and non-punctuation tokens from the 21st sentence containing named entities
tokens_info = [(x.orth_, x.pos_, x.lemma_) for x in [y for y in nlp(str(sentences[1:13])) if not y.is_stop and y.pos_ != 'PUNCT']]

# Print the tokens' orthographic form, part-of-speech, and lemma
print(tokens_info)

[('[', 'X', '['), ('TIMETIME', 'PROPN', 'TIMETIME'), ('Ideas', 'PROPN', 'Ideas'), ('NewsletterSubscribeSubscribeSectionsHomeU.S.PoliticsWorldHealthClimateFuture', 'PROPN', 'NewsletterSubscribeSubscribeSectionsHomeU.S.PoliticsWorldHealthClimateFuture'), ('Work', 'PROPN', 'Work'), ('TIMECouponsPersonal', 'PROPN', 'TIMECouponsPersonal'), ('Finance', 'PROPN', 'Finance'), ('TIME', 'PROPN', 'TIME'), ('TIME', 'PROPN', 'TIME'), ('StampedJoin', 'PROPN', 'StampedJoin'), ('UsNewslettersSubscribeGive', 'NOUN', 'usnewsletterssubscribegive'), ('GiftShop', 'PROPN', 'GiftShop'), ('TIME', 'PROPN', 'TIME'), ('Cover', 'PROPN', 'Cover'), ('StoreCustomer', 'PROPN', 'StoreCustomer'), ('CareUS', 'PROPN', 'CareUS'), ('&', 'CCONJ', '&'), ('CanadaGlobal', 'PROPN', 'CanadaGlobal'), ('Help', 'VERB', 'help'), ('CenterReach', 'PROPN', 'CenterReach'), ('RoomContact', 'PROPN', 'RoomContact'), ('PermissionsMoreAbout', 'PROPN', 'PermissionsMoreAbout'), ('UsPrivacy', 'PROPN', 'UsPrivacy'), ('PolicyYour', 'PROPN', 'Polic

# SENTENCE DEPENDENCY TREE

In [34]:
# Import the displacy module from spaCy for dependency visualization.
from spacy import displacy

# Render the dependency parse of the 21st sentence containing named entities and display it in Jupyter Notebook.
# Adjust the distance between tokens for better visualization.
displacy.render(nlp(str(sentences[1:3])), style='dep', jupyter=True, options={'distance': 200})