In [None]:
# --- Q2: Semantic Understanding & Language Modeling ---

import nltk
from nltk.corpus import wordnet
from collections import Counter

nltk.download('wordnet')
nltk.download('omw-1.4')

# 1. Synonyms, Antonyms, Hypernyms
word = "happy"
synonyms, antonyms, hypernyms = set(), set(), set()

for syn in wordnet.synsets(word):
    for l in syn.lemmas():
        synonyms.add(l.name())
        if l.antonyms():
            antonyms.add(l.antonyms()[0].name())
    for h in syn.hypernyms():
        for l in h.lemmas():
            hypernyms.add(l.name())

print("Synonyms:", synonyms)
print("Antonyms:", antonyms)
print("Hypernyms:", hypernyms)

# 2. Simple N-Gram Language Model (Bigram + Laplace)
text = "the cat sat on the mat"
tokens = text.split()
bigrams = [(tokens[i], tokens[i+1]) for i in range(len(tokens)-1)]
freq = Counter(bigrams)
vocab = set(tokens)

def prob(w1, w2):
    return (freq[(w1, w2)] + 1) / (tokens.count(w1) + len(vocab))

print("\nP('sat' | 'cat') =", prob('cat', 'sat'))

"""
Possible Errors & Fixes:
1. LookupError: wordnet not found → nltk.download('wordnet')
2. ZeroDivisionError → occurs if word not in text
3. IndexError → ensure text has enough words for n-grams
"""


In [None]:
# --- Q2: Semantic Understanding & Language Modeling ---

import nltk
from nltk.corpus import wordnet
from collections import defaultdict, Counter
import math

nltk.download('wordnet')
nltk.download('omw-1.4')

# 1. Synonyms, Antonyms, Hypernyms
word = "good"
synonyms = set()
antonyms = set()
hypernyms = set()

for syn in wordnet.synsets(word):
    for lemma in syn.lemmas():
        synonyms.add(lemma.name())
        if lemma.antonyms():
            antonyms.add(lemma.antonyms()[0].name())
    for hyper in syn.hypernyms():
        hypernyms.update(lemma.name() for lemma in hyper.lemmas())

print("Synonyms:", synonyms)
print("Antonyms:", antonyms)
print("Hypernyms:", hypernyms)

# 2. N-Gram Language Model with Laplace Smoothing
def generate_ngrams(tokens, n):
    return [tuple(tokens[i:i+n]) for i in range(len(tokens)-n+1)]

text = "the cat sat on the mat"
tokens = text.split()
n = 2  # bigram
ngrams = generate_ngrams(tokens, n)

# Frequency counts
counts = Counter(ngrams)
vocab_size = len(set(tokens))

def laplace_prob(w1, w2):
    return (counts[(w1, w2)] + 1) / (tokens.count(w1) + vocab_size)

print("\nP('sat' | 'cat') =", laplace_prob('cat', 'sat'))

"""
Possible Errors & Fixes:
1. LookupError: wordnet not found → Run nltk.download('wordnet')
2. IndexError in n-grams → Ensure text length >= n
3. ZeroDivisionError → Occurs if token count is 0 → Check tokenization
4. Encoding errors in input text → Convert to lowercase and UTF-8
"""
