In [None]:
import pandas as pd
from bertopic import BERTopic
from sentence_transformers import SentenceTransformer
import pickle
from scipy.cluster import hierarchy as sch

In [None]:
df = pd.read_csv("../../data/processed/clean_reviews.csv")
df.head()

In [None]:
reviews = df['Cleaned Text'].tolist()
time = pd.to_datetime(df.Time).tolist()

In [None]:
# Prepare embeddings
# sentence_model = SentenceTransformer("all-MiniLM-L6-v2")
# embeddings = sentence_model.encode(reviews, show_progress_bar=True)

In [None]:
# save embeddings
# with open('reviews_BERT_embedding.pickle', 'wb') as pkl:
#     pickle.dump(embeddings, pkl)

In [None]:
# load embeddings
with open('reviews_BERT_embedding.pickle', 'rb') as pkl:
    embeddings = pickle.load(pkl)

In [None]:
topic_model = BERTopic(language="english", min_topic_size=20)
topics, probs = topic_model.fit_transform(reviews, embeddings)

In [None]:
topic_model.get_topic_info()


In [None]:
topic_model.visualize_topics()

In [None]:
topic_model.visualize_barchart()

In [None]:
topic_model.visualize_heatmap()

In [None]:
# Hierarchical topics
linkage_function = lambda x: sch.linkage(x, 'single', optimal_ordering=True)
hierarchical_topics = topic_model.hierarchical_topics(reviews, linkage_function=linkage_function)
topic_model.visualize_hierarchy(hierarchical_topics=hierarchical_topics)


In [None]:
topics_over_time = topic_model.topics_over_time(reviews, time, nr_bins=10)

In [None]:
topic_model.visualize_topics_over_time(topics_over_time, top_n_topics=10, normalize_frequency=True)

In [None]:
topic_model.visualize_topics_over_time(topics_over_time, top_n_topics=10, normalize_frequency=False)