We have to install the modules first and then restart the kernel.

In [3]:
%%capture
import os
!pip install numpy --force-reinstall
!pip install topic-wizard==0.2.4 --force-reinstall
!pip install sklearn --force-reinstall 
os._exit(0) # kernel needs a restart

We load necessary packages for training a topic model and visualizing with topicwizard.

In [None]:
from random import sample
import topicwizard
from sklearn.datasets import fetch_20newsgroups
from sklearn.decomposition import NMF
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.pipeline import Pipeline

We load an example dataset from scikit-learn.

In [2]:
corpus = fetch_20newsgroups(subset="all").data
# We sample the corpus so that the preprocessing happens faster
# and you can get the preview quicker :))
corpus = sample(corpus, 500)

We specify an NMF topic model with a regular count bag-of-words vectorizer and 20 topics.

In [3]:
# Setting up topic modelling pipeline
vectorizer = CountVectorizer(max_df=0.8, min_df=10, stop_words="english")
# NMF topic model with 20 topics
nmf = NMF(n_components=20)
pipeline = Pipeline([
    ("bow", vectorizer),
    ("nmf", nmf)
])

We fit the topic pipeline.

In [None]:
# Fitting pipeline
pipeline.fit(corpus)

Let's visualize our results with topicwizard.

In [None]:
# Visualizing with topicwizard
topicwizard.visualize(pipeline=pipeline, corpus=corpus)