In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [1]:
%cd /content/drive/Othercomputers/My Computer/Google Drive Offline/PHD/Neural Topic Model/dpotm-tung/new-self-code

/content/drive/Othercomputers/My Computer/Google Drive Offline/PHD/Neural Topic Model/dpotm-tung/new-self-code


In [None]:
!pip install gensim==4.3.3

In [None]:
import os
import json
from openai import OpenAI
from dotenv import load_dotenv
from tqdm import tqdm

In [None]:
load_dotenv()
model = 'gpt-4o'
llm = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
system_prompt = """
You are an expert in topic modeling and natural language processing.
Your task is to evaluate the coherence of topics represented by lists of words.
A coherent topic is one where the words are closely related in meaning and context, while an incoherent topic contains words that are unrelated or only loosely connected.
Given a list of words representing a topic, please provide a coherence score from 1 to 3, where:
+) 1 - Incoherent: The words do not relate well to each other.
+) 2 - Somewhat Coherent: The words have some relation but also include unrelated terms.
+) 3 - Coherent: The words are closely related and form a clear topic.
Please respond in the following JSON format:
{"coherence_score": <score>, "explanation": "<brief explanation of the score>"}

Note: Only provide the JSON response without any additional text.
"""

In [None]:
def evaluate_topic_coherence(topic_word_path):
    with open(topic_word_path, 'r') as f:
        topics = f.readlines()

    results = []
    for topic in tqdm(topics):
        words = topic.strip().split()
        prompt_content = f"Evaluate the coherence of the following topic represented by these words: {', '.join(words)}."
        response = llm.chat.completions.create(
                model=model,
                messages=[
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": prompt_content}
                ],
                temperature=0.0
        )
        response_text = json.loads(response.choices[0].message.content)
        results.append(response_text)
    return results

In [None]:
# Cell 1: setup path
import os
from evaluations.topic_coherence import TC_on_wikipedia_llm_itl

top_word_path = r"results\ECRTM\20NG\50\120.0-500-2026-02-10_18-21-42\top_words_15.txt"

if not os.path.isfile(top_word_path):
    raise FileNotFoundError(top_word_path)

# Cell 2: run TC (C_V) with Palmetto style from LLM-ITL
tc_scores, tc_mean = TC_on_wikipedia_llm_itl(top_word_path, tc_metric="C_V")

print(f"TC (C_V) mean DPO: {tc_mean:.6f}")

top_word_path = r"results\ECRTM\20NG\50\120.0-500-2026-02-10_18-21-42\base_content\top_words_15.txt"

if not os.path.isfile(top_word_path):
    raise FileNotFoundError(top_word_path)

# Cell 2: run TC (C_V) with Palmetto style from LLM-ITL
tc_scores, tc_mean = TC_on_wikipedia_llm_itl(top_word_path, tc_metric="C_V")

print(f"TC (C_V) mean base: {tc_mean:.6f}")

TC (C_V) mean DPO: 0.507311
TC (C_V) mean base: 0.487569


In [2]:
# Cell 1: setup path
import os
from evaluations.topic_coherence import TC_on_wikipedia_llm_itl

top_word_path = r"results/ECRTM/20NG/50/200.0-500-2026-02-11_21-58-45/top_words_15.txt"

if not os.path.isfile(top_word_path):
    raise FileNotFoundError(top_word_path)

# Cell 2: run TC (C_V) with Palmetto style from LLM-ITL
tc_scores, tc_mean = TC_on_wikipedia_llm_itl(top_word_path, tc_metric="C_V")

print(f"TC (C_V) mean DPO: {tc_mean:.6f}")

top_word_path = r"results/ECRTM/20NG/50/200.0-500-2026-02-11_21-58-45/base_content/top_words_15.txt"

if not os.path.isfile(top_word_path):
    raise FileNotFoundError(top_word_path)

# Cell 2: run TC (C_V) with Palmetto style from LLM-ITL
tc_scores, tc_mean = TC_on_wikipedia_llm_itl(top_word_path, tc_metric="C_V")

print(f"TC (C_V) mean base: {tc_mean:.6f}")

TC (C_V) mean DPO: 0.471001
TC (C_V) mean base: 0.455594


In [None]:
import os
from evaluations.topic_coherence import TC_on_wikipedia_llm_itl

top_word_path = r"results\ECRTM\20NG\50\200.0-500-2026-02-12_00-03-05\top_words_15.txt"

if not os.path.isfile(top_word_path):
    raise FileNotFoundError(top_word_path)

# Cell 2: run TC (C_V) with Palmetto style from LLM-ITL
tc_scores, tc_mean = TC_on_wikipedia_llm_itl(top_word_path, tc_metric="C_V")

print(f"TC (C_V) mean raw ECRTM: {tc_mean:.6f}")

TC (C_V) mean raw ECRTM: 0.459906


In [None]:
import os
from evaluations.topic_coherence import TC_on_wikipedia_llm_itl

top_word_path = r"results\ECRTM\20NG\50\200.0-500-2026-02-12_12-08-22\top_words_15.txt"

if not os.path.isfile(top_word_path):
    raise FileNotFoundError(top_word_path)

# Cell 2: run TC (C_V) with Palmetto style from LLM-ITL
tc_scores, tc_mean = TC_on_wikipedia_llm_itl(top_word_path, tc_metric="C_V")

print(f"TC (C_V) mean DPO: {tc_mean:.6f}")

top_word_path = r"results\ECRTM\20NG\50\200.0-500-2026-02-12_12-08-22\base_content\top_words_15.txt"

if not os.path.isfile(top_word_path):
    raise FileNotFoundError(top_word_path)

# Cell 2: run TC (C_V) with Palmetto style from LLM-ITL
tc_scores, tc_mean = TC_on_wikipedia_llm_itl(top_word_path, tc_metric="C_V")

print(f"TC (C_V) mean base: {tc_mean:.6f}")

TC (C_V) mean DPO: 0.359184
TC (C_V) mean base: 0.346328


### ECRTM

In [None]:
import evaluations
NPMI_15_list, NPMI_15 = evaluations.topic_coherence.TC_on_wikipedia(
    "./results/ECRTM/20NG/2026-01-05_17-08-38/top_words_15.txt", cv_type='NPMI')
NPMI_15

-0.08157239999999998

In [None]:
results = evaluate_topic_coherence("./results/ECRTM/20NG/2026-01-05_17-08-38/top_words_15.txt")

100%|██████████| 50/50 [01:25<00:00,  1.71s/it]


In [None]:
results

[{'coherence_score': 2,
  'explanation': "The words are mostly related to ice hockey, with references to NHL teams and players. However, 'cubs' is unrelated to the topic, which affects the overall coherence."},
 {'coherence_score': 1,
  'explanation': 'The words are a mix of names, places, and unrelated terms, lacking a clear thematic connection.'},
 {'coherence_score': 1,
  'explanation': 'The words are largely unrelated, covering diverse areas such as sensory experiences, names, scientific terms, and general nouns without a clear unifying theme.'},
 {'coherence_score': 1,
  'explanation': 'The words are a mix of unrelated terms from different contexts such as locations, sports, gender, and abstract concepts, lacking a clear, cohesive theme.'},
 {'coherence_score': 2,
  'explanation': "The words are somewhat related to computer hardware and software, particularly focusing on video and graphics technology (e.g., vga, ati, vram, svga, monitor, card). However, there are some terms like '

In [None]:
list_1 = []
list_2 = []
list_3 = []
for idx, res in enumerate(results):
    score = res['coherence_score']
    if score == 1:
        list_1.append(idx)
    elif score == 2:
        list_2.append(idx)
    elif score == 3:
        list_3.append(idx)

In [None]:
print(list_1)

[1, 2, 3, 7, 8, 10, 14, 15, 19, 20, 21, 26, 28, 30, 31, 36, 38, 40, 41, 42, 45, 48]


In [None]:
print(list_2)

[0, 4, 5, 6, 9, 11, 12, 13, 16, 17, 22, 23, 25, 29, 33, 35, 47]


In [None]:
print(list_3)

[18, 24, 27, 32, 34, 37, 39, 43, 44, 46, 49]


In [None]:
with open("./results/ECRTM/20NG/2026-01-05_17-08-38/top_words_15.txt", 'r') as f:
    topics = f.readlines()
topics[11]

'morality morals objective moral rushdie consent mutual subjective orientation morally islamic overwhelming atheists hudson assert\n'

In [None]:
topics[4]

'diamond vga ati vram vesa video svga mouse windows centris monitor dos simms card com\n'

In [None]:
print([i for i in (NPMI_15_list[j] for j in list_1)])

[-0.12384, -0.21978, -0.11696, -0.21027, -0.18032, -0.1097, -0.13884, -0.18763, -0.12687, -0.1661, -0.03517, -0.12166, -0.16447, -0.12732, -0.08304, -0.06575, -0.14678, -0.09677, -0.17056, -0.18547, -0.09211, -0.12636, -0.24619, -0.15971, -0.00604, -0.14589, -0.14259, -0.14032]


In [None]:
print([i for i in (NPMI_15_list[j] for j in list_2)])

[-0.03225, -0.06573, -0.01097, -0.10894, -0.03895, -0.08882, -0.01416, 0.06798, 0.04761, -0.01807, -0.06535, 0.03987, -0.08636]


In [None]:
print([i for i in (NPMI_15_list[j] for j in list_3)])

[0.01203, -0.07539, 0.12256, -0.01388, -0.01038, -0.04261, 0.06912, 0.05698, 0.0136]


In [None]:
import evaluations
TC_15_list, TC_15 = evaluations.topic_coherence.TC_on_wikipedia(
    "./results/ECRTM/20NG/2026-01-05_17-08-38/top_words_15.txt", cv_type='C_V')
TC_15

0.43756600000000007

In [None]:
print([i for i in (TC_15_list[j] for j in list_1)])
print([i for i in (TC_15_list[j] for j in list_2)])
print([i for i in (TC_15_list[j] for j in list_3)])

[0.45052, 0.54373, 0.36812, 0.50761, 0.45803, 0.46973, 0.43048, 0.45919, 0.44736, 0.51607, 0.46536, 0.43278, 0.36027, 0.48891, 0.56632, 0.44712, 0.54525, 0.45428, 0.14327, 0.49841, 0.44796, 0.41158]
[0.48488, 0.52851, 0.59044, 0.56336, 0.401, 0.33268, 0.61459, 0.42872, 0.28714, 0.3954, 0.32311, 0.45228, 0.24387, 0.30749, 0.4396, 0.41887, 0.50719]
[0.46293, 0.39047, 0.4194, 0.46072, 0.48066, 0.29384, 0.39843, 0.42547, 0.45834, 0.3169, 0.53966]


In [None]:
print([i for i in (NPMI_15_list[j] for j in list_1)])
print([i for i in (NPMI_15_list[j] for j in list_2)])
print([i for i in (NPMI_15_list[j] for j in list_3)])

[-0.12384, -0.21978, -0.11696, -0.18032, -0.1097, -0.13884, -0.12687, -0.1661, -0.12166, -0.16447, -0.12732, -0.14678, -0.09677, -0.17056, -0.18547, -0.12636, -0.24619, -0.15971, -0.00604, -0.14589, -0.14259, -0.14032]
[-0.03225, -0.06573, -0.21027, -0.01097, -0.10894, -0.03895, -0.18763, -0.08882, -0.01416, -0.03517, 0.06798, -0.08304, -0.06575, -0.01807, -0.06535, -0.09211, -0.08636]
[0.01203, -0.07539, 0.04761, 0.12256, 0.03987, -0.01388, -0.01038, -0.04261, 0.06912, 0.05698, 0.0136]


In [None]:
import evaluations
TC_15_list, TC_15 = evaluations.topic_coherence.TC_on_wikipedia(
    "./results/ECRTM/20NG/50/200.0-600-2026-01-09_15-13-17/top_words_15.txt", cv_type='C_V')
print("C_v:", TC_15)


C_v: 0.4410064000000001


In [None]:
import evaluations
NPMI_15_list, NPMI_15 = evaluations.topic_coherence.TC_on_wikipedia(
    "./results/ECRTM/20NG/50/200.0-600-2026-01-09_15-13-17/top_words_15.txt", cv_type='NPMI')
print("NPMI:", NPMI_15)

NPMI: -0.0834394


In [None]:
import evaluations
top_word_path = "./results/ECRTM/20NG/2026-01-05_17-08-38/top_words_15.txt"
CV_15_list, CV_15 = evaluations.topic_coherence.TC_on_wikipedia(top_word_path, cv_type='C_V')
NPMI_15_list, NPMI_15 = evaluations.topic_coherence.TC_on_wikipedia(top_word_path, cv_type='NPMI')
Umass_15_list, Umass_15 = evaluations.topic_coherence.TC_on_wikipedia(top_word_path, cv_type='UMass')
UCI_15_list, UCI_15 = evaluations.topic_coherence.TC_on_wikipedia(top_word_path, cv_type='UCI')
CA_15_list, CA_15 = evaluations.topic_coherence.TC_on_wikipedia(top_word_path, cv_type='C_A')
CP_15_list, CP_15 = evaluations.topic_coherence.TC_on_wikipedia(top_word_path, cv_type='C_P')
print("ECRTM UCI", UCI_15)
print("ECRTM NPMI", NPMI_15)
print("ECRTM C_V", CV_15)
print("ECRTM C_A", CA_15)
print("ECRTM C_P", CP_15)
print("ECRTM UMass", Umass_15)

ECRTM UCI -2.7442625999999994
ECRTM NPMI -0.08157239999999998
ECRTM C_V 0.43756600000000007
ECRTM C_A 0.1609267999999999
ECRTM C_P -0.242523
ECRTM UMass -4.8879470000000005


In [None]:
import evaluations
import datasethandler
import os
DATA_DIR = 'datasets'
dataset = datasethandler.BasicDatasetHandler(
        os.path.join(DATA_DIR, "20NG"), device="cuda", read_labels=True,
        as_tensor=True, contextual_embed=True)
top_word_path = "./results/ECRTM/20NG/2026-01-05_17-08-38/top_words_15.txt"
with open(top_word_path, 'r') as f:
    top_words_15 = [line.strip() for line in f.readlines()]

#{'u_mass', 'c_v', 'c_uci', 'c_npmi'}
TC_list, TC = evaluations.compute_topic_coherence(
        dataset.train_texts, dataset.vocab, top_words_15, cv_type='c_v')
umass_list, umass = evaluations.compute_topic_coherence(
        dataset.train_texts, dataset.vocab, top_words_15, cv_type='u_mass')
cuci_list, cuci = evaluations.compute_topic_coherence(
        dataset.train_texts, dataset.vocab, top_words_15, cv_type='c_uci')
npmi_list, npmi = evaluations.compute_topic_coherence(
        dataset.train_texts, dataset.vocab, top_words_15, cv_type='c_npmi')
print("ECRTM topic coherence on training set:")
print(f"TC: {TC:.5f}, UMass: {umass:.5f}, CUCI: {cuci:.5f}, NPMI: {npmi:.5f}")

===>train_size:  11314
===>test_size:  7532
===>vocab_size:  5000
===>average length: 110.543
ECRTM topic coherence on training set:
TC: 0.58065, UMass: -9.35300, CUCI: -6.76414, NPMI: -0.18610


In [None]:
results = evaluate_topic_coherence("./results/ECRTM/20NG/2026-01-05_17-08-38/top_words_15.txt")
list_1 = []
list_2 = []
list_3 = []
for idx, res in enumerate(results):
    score = res['coherence_score']
    if score == 1:
        list_1.append(idx)
    elif score == 2:
        list_2.append(idx)
    elif score == 3:
        list_3.append(idx)
print(list_1)
print(list_2)
print(list_3)

100%|██████████| 50/50 [01:37<00:00,  1.96s/it]

[1, 2, 3, 7, 8, 10, 14, 15, 19, 20, 21, 26, 28, 30, 31, 36, 38, 40, 41, 42, 45, 48]
[0, 4, 5, 6, 9, 11, 12, 13, 16, 17, 22, 23, 25, 29, 33, 35, 47]
[18, 24, 27, 32, 34, 37, 39, 43, 44, 46, 49]





In [None]:
print([i for i in (npmi_list[j] for j in list_1)])
print([i for i in (npmi_list[j] for j in list_2)])
print([i for i in (npmi_list[j] for j in list_3)])

[-0.314883242985852, -0.418129856432301, -0.4052075533132331, -0.3288955862170613, -0.2799646619102836, -0.4078719820105057, -0.3578599510063627, -0.06573546220572413, -0.38357657786852656, -0.3720264476590977, -0.34875685997403544, -0.36233280427195336, -0.3826947401102229, -0.2856076218860008, -0.3716021159456863, -0.3706821996334611, -0.363714807763434, -0.41860473055779246, -0.3128781587491622, -0.38607515229838063, -0.40054378937639407, -0.364365144939854]
[0.12071652753834333, 0.09349892820454775, -0.3903805503677742, -0.13352057743850962, -0.049882556865570825, -0.2600350194534592, -0.3411474281558217, -0.15630883540290047, -0.16132327278688913, -0.31602294019739424, -0.07286434124012928, 0.10251928959900618, -0.2656378848041017, -0.04656836539698138, 0.008443720268298356, -0.3522985348119565, -0.06869968644844598]
[0.011611197124843296, 0.06612318962662164, 0.1398499978259104, 0.20301920516883476, 0.028572969947945913, -0.10136883105871605, 0.0575129163842342, 0.047060637644709

In [None]:
print([i for i in (TC_list[j] for j in list_1)])
print([i for i in (TC_list[j] for j in list_2)])
print([i for i in (TC_list[j] for j in list_3)])

[0.47899156797338427, 0.6269065826700564, 0.5398120447417375, 0.44800197293620136, 0.4164083707356513, 0.6724754464849103, 0.46634532112162486, 0.5510938671500278, 0.642381553125938, 0.557438892209067, 0.5245644665136902, 0.5599151605708268, 0.566477061470417, 0.3160694836194234, 0.621653521047994, 0.5800493265381071, 0.5833097977946754, 0.6244649546832608, 0.3885842945157755, 0.5982425803078074, 0.6024556884100207, 0.4454903973780769]
[0.7712574110196154, 0.6744790102243586, 0.5625145760083735, 0.40193979923354395, 0.4903549148734773, 0.36104315302823875, 0.35287099579817227, 0.36454489043523053, 0.4084715614063325, 0.390578291515311, 0.6012327274080399, 0.7640992524388307, 0.38289459842243756, 0.5517547539521893, 0.6444155942655729, 0.4302568739281883, 0.42911117218872197]
[0.9041095247253707, 0.8520338823585905, 0.8426640966528087, 0.9368149572639053, 0.7788092659433159, 0.5626869275448371, 0.7009143463374621, 0.7381436919462676, 0.7253091150430503, 0.7155266996147741, 0.88272705868

## ETM

In [None]:
import evaluations
top_word_path = "./results/ETM/20NG/50/100.0-500-2026-01-09_17-34-28/top_words_15.txt"
CV_15_list, CV_15 = evaluations.topic_coherence.TC_on_wikipedia(top_word_path, cv_type='C_V')
NPMI_15_list, NPMI_15 = evaluations.topic_coherence.TC_on_wikipedia(top_word_path, cv_type='NPMI')
Umass_15_list, Umass_15 = evaluations.topic_coherence.TC_on_wikipedia(top_word_path, cv_type='UMass')
UCI_15_list, UCI_15 = evaluations.topic_coherence.TC_on_wikipedia(top_word_path, cv_type='UCI')
CA_15_list, CA_15 = evaluations.topic_coherence.TC_on_wikipedia(top_word_path, cv_type='C_A')
CP_15_list, CP_15 = evaluations.topic_coherence.TC_on_wikipedia(top_word_path, cv_type='C_P')
print("ETM UCI", UCI_15)
print("ETM NPMI", NPMI_15)
print("ETM C_V", CV_15)
print("ETM C_A", CA_15)
print("ETM C_P", CP_15)
print("ETM UMass", Umass_15)

ETM UCI -1.3592522000000002
ETM NPMI -0.027569999999999997
ETM C_V 0.3694534000000001
ETM C_A 0.1481174
ETM C_P -0.035828799999999994
ETM UMass -3.7633514000000012


In [None]:
top_word_path = "./results/ETM/20NG/50/100.0-500-2026-01-09_17-34-28/top_words_15.txt"
with open(top_word_path, 'r') as f:
    top_words_15 = [line.strip() for line in f.readlines()]

#{'u_mass', 'c_v', 'c_uci', 'c_npmi'}
TC_list, TC = evaluations.compute_topic_coherence(
        dataset.train_texts, dataset.vocab, top_words_15, cv_type='c_v')
umass_list, umass = evaluations.compute_topic_coherence(
        dataset.train_texts, dataset.vocab, top_words_15, cv_type='u_mass')
cuci_list, cuci = evaluations.compute_topic_coherence(
        dataset.train_texts, dataset.vocab, top_words_15, cv_type='c_uci')
npmi_list, npmi = evaluations.compute_topic_coherence(
        dataset.train_texts, dataset.vocab, top_words_15, cv_type='c_npmi')
print("ETM topic coherence on training set:")
print(f"TC: {TC:.5f}, UMass: {umass:.5f}, CUCI: {cuci:.5f}, NPMI: {npmi:.5f}")

ETM topic coherence on training set:
TC: 0.46805, UMass: -2.63573, CUCI: -0.65852, NPMI: 0.01195


In [None]:
results = evaluate_topic_coherence("./results/ETM/20NG/50/100.0-500-2026-01-09_17-34-28/top_words_15.txt")
list_1 = []
list_2 = []
list_3 = []
for idx, res in enumerate(results):
    score = res['coherence_score']
    if score == 1:
        list_1.append(idx)
    elif score == 2:
        list_2.append(idx)
    elif score == 3:
        list_3.append(idx)
print(list_1)
print(list_2)
print(list_3)

100%|██████████| 50/50 [01:36<00:00,  1.92s/it]

[1, 7, 9, 10, 12, 13, 18, 26, 30, 32, 35, 37, 39, 42, 44, 45, 46]
[0, 2, 3, 4, 5, 6, 14, 16, 19, 20, 21, 22, 24, 25, 27, 29, 31, 33, 34, 36, 38, 40, 43, 47, 48, 49]
[8, 11, 15, 17, 23, 28, 41]





In [None]:
print([i for i in (npmi_list[j] for j in list_1)])
print([i for i in (npmi_list[j] for j in list_2)])
print([i for i in (npmi_list[j] for j in list_3)])

[0.022752942143465973, 0.05103809947006055, -0.0735899626386231, -0.04688608939979156, 0.030753112823297, -0.002163138148739781, -0.05058947610663966, 0.018024606683217294, 0.024314052149482127, -0.07392558536174301, -0.08965888183656724, -0.015103198118742675, 0.05395440596712257, -0.011753930535969136, 0.01920023614699978, 0.01943120577467426, 0.0033937791189831055]
[-0.1556962480047321, -0.1222128084974802, 0.04393409660785356, -0.0051749040656704735, -0.10824775569271106, 0.026264323039413385, 0.030196622010528223, 0.054955347238254404, 0.10706614119064015, 0.01687944685491855, 0.04758970228597919, -0.007783216313191518, -0.10348795433321026, -0.04054549541728268, 0.01878801241123961, -0.024637567880142566, -0.010231863990324512, 0.008512181015250602, 0.007987928834397374, 0.019548702509874987, 0.048043873241190665, 0.13394360383192644, -0.06242273636022296, 0.012443087434899068, -0.026959954632488153, 0.0015346460787857138]
[0.13488254580480266, 0.10290826670391456, 0.033253811559

In [None]:
print([i for i in (NPMI_15_list[j] for j in list_1)])
print([i for i in (NPMI_15_list[j] for j in list_2)])
print([i for i in (NPMI_15_list[j] for j in list_3)])

[0.00769, -0.12087, -0.05619, -0.07904, 0.04818, 0.00076, -0.11952, -0.08739, 0.02978, -0.10461, -0.11706, -0.08732, 0.02194, 0.01571, -0.04421, 0.02511, -0.07249]
[-0.05304, -0.08176, -0.04284, -0.07767, -0.04064, 0.00857, -0.07213, 0.01273, -0.03475, 0.00033, 0.04456, 0.00112, -0.10052, 0.02783, 0.00841, -0.15271, -0.05506, -0.02991, -0.05067, 0.09322, -0.09819, -0.02, -0.08095, -0.03318, -0.04843, -0.05161]
[0.045, 0.04121, 0.02283, -0.03879, 0.14829, 0.03421, 0.03557]
