# Assignment 5
Name: Vivek Mule
Roll: 381072
PRN: 22420145

Use WordNet to identify semantic relationships such as synonymy, antonymy, and hypernymy in
text data.

In [1]:
import nltk
from nltk.corpus import wordnet as wn
from nltk import word_tokenize, pos_tag
import pandas as pd

# Ensure required resources are available
nltk.download('wordnet')
nltk.download('omw-1.4')
nltk.download('averaged_perceptron_tagger')
nltk.download('punkt')

[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\vivek\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\vivek\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\vivek\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\vivek\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [2]:
from typing import Optional, Dict, Set

def map_pos(treebank_tag: str) -> Optional[str]:
    """Map NLTK POS tags to WordNet POS tags."""
    if treebank_tag.startswith('J'):
        return wn.ADJ
    if treebank_tag.startswith('V'):
        return wn.VERB
    if treebank_tag.startswith('N'):
        return wn.NOUN
    if treebank_tag.startswith('R'):
        return wn.ADV
    return None


def gather_relations(word: str, wn_pos: str) -> Dict[str, Set[str]]:
    synsets = wn.synsets(word, pos=wn_pos)
    if not synsets:
        return {"synonyms": set(), "antonyms": set(), "hypernyms": set(), "hyponyms": set()}

    primary = synsets[0]  # use most frequent sense to keep output concise
    synonyms = {lemma.name().replace('_', ' ') for lemma in primary.lemmas() if lemma.name().lower() != word.lower()}
    antonyms = {ant.name().replace('_', ' ') for lemma in primary.lemmas() for ant in lemma.antonyms()}
    hypernyms = {lemma.name().replace('_', ' ') for syn in primary.hypernyms() for lemma in syn.lemmas()}
    hyponyms = {lemma.name().replace('_', ' ') for syn in primary.hyponyms() for lemma in syn.lemmas()}
    return {
        "synonyms": synonyms,
        "antonyms": antonyms,
        "hypernyms": hypernyms,
        "hyponyms": hyponyms,
    }


def format_set(items: Set[str], limit: int = 5) -> str:
    return ', '.join(sorted(items)) if items else '-'

In [3]:
sample_text = (
    "WordNet helps move from surface words to meaning-level analysis. "
    "Dogs are loyal animals and often family pets. "
    "Happy and joyful align closely, while sad and happy are opposites."
)

# Step 1: tokenize and POS tag
tokens = word_tokenize(sample_text)
pos_tags = pos_tag(tokens)

records = []
for token, tag in pos_tags:
    wn_pos = map_pos(tag)
    if wn_pos is None or not token.isalpha():
        continue  # skip items without a supported POS or non-alphabetic tokens

    relations = gather_relations(token, wn_pos)
    records.append({
        "token": token,
        "pos": tag,
        "synonyms": format_set(relations["synonyms"]),
        "antonyms": format_set(relations["antonyms"]),
        "hypernyms": format_set(relations["hypernyms"]),
        "hyponyms": format_set(relations["hyponyms"]),
    })

relations_df = pd.DataFrame(records)
relations_df

Unnamed: 0,token,pos,synonyms,antonyms,hypernyms,hyponyms
0,WordNet,NNP,-,-,lexical database,-
1,helps,VBZ,"aid, assist, help",-,"back up, support","alleviate, assist, attend, attend to, avail, b..."
2,move,VB,"go, locomote, travel",stay in place,-,"accompany, advance, angle, arise, ascend, auto..."
3,surface,NN,-,-,"artefact, artifact","Klein bottle, Mobius strip, bed, board, face, ..."
4,words,NNS,-,-,"language, oral communication, speech, speech c...",-
5,analysis,NN,-,-,"investigating, investigation","anatomy, case study, chemical analysis, cost a..."
6,Dogs,NNS,"Canis familiaris, dog, domestic dog",-,"canid, canine, domestic animal, domesticated a...","Belgian griffon, Brussels griffon, Great Pyren..."
7,are,VBP,be,-,-,"abound, accept, account, account for, act, add..."
8,loyal,JJ,-,disloyal,-,-
9,animals,NNS,"animal, animate being, beast, brute, creature,...",-,"being, organism","acrodont, adult, biped, captive, chordate, con..."
