# Lab 5: Topic Modeling (Enhanced)

## Introduction
We will visualize the topics using **WordClouds**. This makes it much easier to see what a topic is "about".

In [None]:
%pip install scikit-learn wordcloud matplotlib

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import NMF
from wordcloud import WordCloud
import matplotlib.pyplot as plt

# Sample Corpus
documents = [
    "Stocks are high", "Market is booming", "Invest in stocks",
    "Football match yesterday", "Player scored goal", "Championship game",
    "New software update", "Cybersecurity patch", "Install version 2.0"
]

# Model
tfidf_vect = TfidfVectorizer(stop_words='english')
doc_term_matrix = tfidf_vect.fit_transform(documents)
nmf = NMF(n_components=3, random_state=42)
nmf.fit(doc_term_matrix)

feature_names = tfidf_vect.get_feature_names_out()

for index, topic in enumerate(nmf.components_):
    print(f"TOPIC {index}")
    
    # Create a dictionary of word->weight for this topic
    topic_words = {feature_names[i]: topic[i] for i in topic.argsort()[:-10:-1]}
    
    wc = WordCloud(width=400, height=200, background_color='white').generate_from_frequencies(topic_words)
    plt.figure(figsize=(6, 3))
    plt.imshow(wc)
    plt.axis("off")
    plt.show()