# ***Question No: 1 -->	Write a Python program using NLTK to extract named entities from the sentence: "Apple Inc. is looking at buying U.K. startup for $1 billion."***

In [6]:
import nltk
from nltk import word_tokenize, pos_tag, ne_chunk

# Download necessary resources
nltk.download('punkt')
nltk.download('maxent_ne_chunker')
nltk.download('words')
nltk.download('punkt_tab')
nltk.download('maxent_ne_chunker_tab')
nltk.download('averaged_perceptron_tagger_eng')
# Input sentence
sentence = "Apple Inc. is looking at buying U.K. startup for $1 billion."

# Tokenize and POS tag the sentence
tokens = word_tokenize(sentence)
pos_tags = pos_tag(tokens)

# Perform Named Entity Recognition (NER)
named_entities = ne_chunk(pos_tags)

# Print named entities


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package maxent_ne_chunker is already up-to-date!
[nltk_data] Downloading package words to /root/nltk_data...
[nltk_data]   Package words is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package maxent_ne_chunker_tab to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package maxent_ne_chunker_tab is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger_eng is already up-to-
[nltk_data]       date!


In [7]:
print(named_entities)

(S
  (PERSON Apple/NNP)
  (ORGANIZATION Inc./NNP)
  is/VBZ
  looking/VBG
  at/IN
  buying/VBG
  U.K./NNP
  startup/NN
  for/IN
  $/$
  1/CD
  billion/CD
  ./.)


# ***Question No: 2 --> Using NLTK, write a function that takes a list of sentences and returns a list of named entities found in each sentence.***

In [9]:
import nltk
from nltk import word_tokenize, pos_tag, ne_chunk

# Download necessary resources
nltk.download('punkt')
nltk.download('maxent_ne_chunker')
nltk.download('words')

def extract_named_entities(sentences):
    named_entities_list = []

    for sentence in sentences:
        tokens = word_tokenize(sentence)  # Tokenize sentence
        pos_tags = pos_tag(tokens)  # Part-of-speech tagging
        chunked_tree = ne_chunk(pos_tags)  # Named Entity Recognition

        entities = []
        for subtree in chunked_tree:
            if isinstance(subtree, nltk.Tree):  # Check if it's a named entity
                entity_name = " ".join([token for token, pos in subtree.leaves()])
                entity_type = subtree.label()  # Entity type (e.g., PERSON, ORGANIZATION, GPE)
                entities.append((entity_name, entity_type))

        named_entities_list.append(entities)

    return named_entities_list

# Example usage
sentences = [
    "Apple Inc. is looking at buying U.K. startup for $1 billion.",
    "Elon Musk founded SpaceX in 2002 in California.",
    "Google was founded by Larry Page and Sergey Brin."
]

result = extract_named_entities(sentences)

# Print named entities for each sentence



[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package maxent_ne_chunker is already up-to-date!
[nltk_data] Downloading package words to /root/nltk_data...
[nltk_data]   Package words is already up-to-date!


In [10]:
for i, entities in enumerate(result):
    print(f"Sentence {i+1}: {entities}")

Sentence 1: [('Apple', 'PERSON'), ('Inc.', 'ORGANIZATION')]
Sentence 2: [('Elon', 'PERSON'), ('Musk', 'PERSON'), ('SpaceX', 'ORGANIZATION'), ('California', 'GPE')]
Sentence 3: [('Google', 'PERSON'), ('Larry Page', 'PERSON'), ('Sergey Brin', 'PERSON')]


# ***Question No: 3 --> Write a Python program that uses NLTK to extract and display all noun phrases from a given text.***

In [12]:
import nltk
from nltk import word_tokenize, pos_tag
from nltk.tree import Tree

# Download necessary resources
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

def extract_noun_phrases(text):
    tokens = word_tokenize(text)  # Tokenize the text
    pos_tags = pos_tag(tokens)  # Part-of-speech tagging

    # Define a simple grammar for noun phrases (NP)
    grammar = "NP: {<DT>?<JJ>*<NN.*>+}"  # Determiner (optional) + Adjective (optional) + Noun(s)

    # Create a parser
    chunk_parser = nltk.RegexpParser(grammar)
    chunk_tree = chunk_parser.parse(pos_tags)

    noun_phrases = []
    for subtree in chunk_tree:
        if isinstance(subtree, Tree) and subtree.label() == "NP":
            noun_phrase = " ".join(word for word, pos in subtree.leaves())
            noun_phrases.append(noun_phrase)

    return noun_phrases

# Example usage
text = "The quick brown fox jumps over the lazy dog. Apple Inc. is a big company."

noun_phrases = extract_noun_phrases(text)


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


In [13]:
print("Noun Phrases:", noun_phrases)


Noun Phrases: ['The quick brown fox', 'the lazy dog', 'Apple Inc.', 'a big company']
