In [None]:
# mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# read a file
gdrive_path = '/content/drive/MyDrive/mydata/'

**Import Data**

In [None]:
#read the reviews
import pandas as pd
df = pd.read_csv("extracted_data.csv")

In [None]:
# Drop unnamed columns (which is index from web scrapping)
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
df.head(5)

**Data Preparation**

In [None]:
#Prepare data for training
from ast import literal_eval

data_words = []
for x in df['Clean']:
    data_words.append(' '.join(literal_eval(x)))

**BERTopic Model Training**

In [None]:
%%capture
!pip install bertopic

In [None]:
#Train BERTopic
from bertopic import BERTopic

topic_model = BERTopic(language="english", verbose=True)
topics, probs = topic_model.fit_transform(data_words)

**Save Results and Trained Model**

In [None]:
import pickle

topic_model.get_topic_info().head(50).to_csv(f'BERT_topics.csv')
topic_model.save(f'BERTopic_model')

**Visualize Results**

In [None]:
## Extracting Topics

freq = topic_model.get_topic_info(); freq.head(5)

In [None]:
topic_model.get_topic(0)  # Select the most frequent topic

In [None]:
topic_model.topics_[:10]

In [None]:
#Visualize Topics
topic_model.visualize_topics()

In [None]:
#Visualize Topics
topic_model.visualize_topics()

In [None]:
#Visualize Topic Hierarchy
topic_model.visualize_hierarchy(top_n_topics=50)

In [None]:
#Visualize Terms
topic_model.visualize_barchart(top_n_topics=10)