In [None]:
# --- Q3: NER, Sentiment & Word Embeddings ---

import nltk
from nltk import word_tokenize, pos_tag, ne_chunk
from nltk.sentiment import SentimentIntensityAnalyzer
from gensim.models import Word2Vec

nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('maxent_ne_chunker')
nltk.download('words')
nltk.download('vader_lexicon')

# 1. Named Entity Recognition
text = "Elon Musk founded SpaceX in California."
tokens = word_tokenize(text)
tags = pos_tag(tokens)
ner = ne_chunk(tags)
print("NER Result:\n", ner)

# 2. Sentiment Analysis
sia = SentimentIntensityAnalyzer()
print("\nSentiment:", sia.polarity_scores("The movie was good but too long."))

# 3. Word Embeddings (Word2Vec)
sentences = [["nlp", "is", "fun"], ["deep", "learning", "is", "powerful"]]
model = Word2Vec(sentences, vector_size=20, min_count=1)
print("\nSimilarity (nlp, learning):", model.wv.similarity("nlp", "learning"))

"""
Possible Errors & Fixes:
1. LookupError: missing NLTK data → run nltk.download() for required resources
2. ModuleNotFoundError: gensim → install via pip install gensim
3. KeyError in similarity → check word exists in model vocabulary
4. Visualization errors → skip plotting if not needed
"""


In [None]:
# --- Q3: NER, Sentiment Analysis, Word Embeddings ---

import nltk
import gensim
from gensim.models import Word2Vec
from nltk import pos_tag, ne_chunk, word_tokenize
from nltk.sentiment import SentimentIntensityAnalyzer
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt

nltk.download('vader_lexicon')
nltk.download('punkt')
nltk.download('maxent_ne_chunker')
nltk.download('words')
nltk.download('averaged_perceptron_tagger')

text = "Elon Musk founded SpaceX in the United States."

# 1. Named Entity Recognition (NER)
tokens = word_tokenize(text)
pos_tags = pos_tag(tokens)
ner_tree = ne_chunk(pos_tags)
print("\nNamed Entities:\n", ner_tree)

# 2. Sentiment Analysis
sia = SentimentIntensityAnalyzer()
sentiment = sia.polarity_scores("The movie was good but a bit long.")
print("\nSentiment Scores:", sentiment)

# 3. Word2Vec Embeddings & Similarity
sentences = [["natural", "language", "processing"],
             ["machine", "learning", "models"],
             ["language", "models", "learn", "patterns"]]

model = Word2Vec(sentences, vector_size=50, min_count=1, workers=2)
print("\nSimilarity (language, models):", model.wv.similarity('language', 'models'))

# Visualization
words = list(model.wv.index_to_key)
X = model.wv[words]
pca = PCA(n_components=2)
result = pca.fit_transform(X)

plt.scatter(result[:, 0], result[:, 1])
for i, word in enumerate(words):
    plt.annotate(word, xy=(result[i, 0], result[i, 1]))
plt.title("Word2Vec Embedding Visualization")
plt.show()

"""
Possible Errors & Fixes:
1. LookupError: vader_lexicon, punkt, etc. → Run nltk.download('vader_lexicon') etc.
2. gensim.models errors → Ensure gensim installed (pip install gensim)
3. ValueError in PCA → Need >= 2 words in vocabulary
4. Matplotlib display issues → Use %matplotlib inline in Jupyter
5. Missing NER models → Ensure nltk.download('maxent_ne_chunker') + nltk.download('words')
"""
