# ***Question No: 1 --> Write a Python program using NLTK to perform part-of-speech tagging on the sentence: "The quick brown fox jumps over the lazy dog."***

In [None]:
import nltk
from nltk.tokenize import word_tokenize
from nltk import pos_tag

# Download required NLTK resources
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('punkt_tab')
nltk.download('averaged_perceptron_tagger_eng')
# Input sentence
sentence = "The quick brown fox jumps over the lazy dog."

# Tokenize the sentence
tokens = word_tokenize(sentence)

# Perform POS tagging
tagged_tokens = pos_tag(tokens)

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger_eng.zip.


In [None]:

# Output the result
print(tagged_tokens)

[('The', 'DT'), ('quick', 'JJ'), ('brown', 'NN'), ('fox', 'NN'), ('jumps', 'VBZ'), ('over', 'IN'), ('the', 'DT'), ('lazy', 'JJ'), ('dog', 'NN'), ('.', '.')]


# ***Question No: 2 --> Using NLTK, write a function that takes a list of sentences and returns a list of part-of-speech tagged sentences.***

In [None]:
import nltk
from nltk.tokenize import word_tokenize

def pos_tag_sentences(sentences):
    """
    Takes a list of sentences and returns a list of sentences with POS-tagged words.

    :param sentences: List of sentences (strings)
    :return: List of lists, where each sublist contains tuples of (word, POS tag)
    """
    # Ensure the necessary NLTK resources are available
    nltk.download('punkt')
    nltk.download('averaged_perceptron_tagger')

    # Tokenize each sentence and perform POS tagging
    tagged_sentences = []
    for sentence in sentences:
        tokens = word_tokenize(sentence)  # Tokenize the sentence into words
        tagged = nltk.pos_tag(tokens)     # Perform POS tagging
        tagged_sentences.append(tagged)   # Add the tagged sentence to the list

    return tagged_sentences

In [None]:
sentences = [
    "The quick brown fox jumps over the lazy dog.",
    "NLTK is a powerful library for natural language processing."
]

tagged = pos_tag_sentences(sentences)

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


In [None]:
for sentence in tagged:
    print(sentence)

[('The', 'DT'), ('quick', 'JJ'), ('brown', 'NN'), ('fox', 'NN'), ('jumps', 'VBZ'), ('over', 'IN'), ('the', 'DT'), ('lazy', 'JJ'), ('dog', 'NN'), ('.', '.')]
[('NLTK', 'NNP'), ('is', 'VBZ'), ('a', 'DT'), ('powerful', 'JJ'), ('library', 'NN'), ('for', 'IN'), ('natural', 'JJ'), ('language', 'NN'), ('processing', 'NN'), ('.', '.')]


# ***Question No: 3 --> Explain how to map the Penn Treebank POS tags to the Universal POS tags using NLTK. Provide a code example that tags a sentence and maps the tags accordingly.***

In [None]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.tag import pos_tag
from nltk.tag.mapping import map_tag
nltk.download('universal_tagset')
# Sample sentence
sentence = "They refuse to permit us to obtain the refuse permit."

# Tokenize the sentence
tokens = word_tokenize(sentence)

# Perform POS tagging using Penn Treebank tagset
pos_tags = pos_tag(tokens)

# Map Penn Treebank tags to Universal POS tags
universal_tags = [(word, map_tag('en-ptb', 'universal', tag)) for word, tag in pos_tags]

[nltk_data] Downloading package universal_tagset to /root/nltk_data...
[nltk_data]   Unzipping taggers/universal_tagset.zip.


In [None]:

# Output the results
print("Penn Treebank POS tags:")
print(pos_tags)
print("\nUniversal POS tags:")
print(universal_tags)

Penn Treebank POS tags:
[('They', 'PRP'), ('refuse', 'VBP'), ('to', 'TO'), ('permit', 'VB'), ('us', 'PRP'), ('to', 'TO'), ('obtain', 'VB'), ('the', 'DT'), ('refuse', 'NN'), ('permit', 'NN'), ('.', '.')]

Universal POS tags:
[('They', 'PRON'), ('refuse', 'VERB'), ('to', 'PRT'), ('permit', 'VERB'), ('us', 'PRON'), ('to', 'PRT'), ('obtain', 'VERB'), ('the', 'DET'), ('refuse', 'NOUN'), ('permit', 'NOUN'), ('.', '.')]
