In [1]:
import sys
import os
import re
from tqdm import tqdm

sys.path.append("../LDP-RM")
from ldp_rm import LDP_RM
from data_rm import Data
from svsm_rm import SVSM
from metrics import Metrics

import ast

In [6]:
def analyze_cluster_txt(file_path):
    with open(file_path, "r", encoding="utf-8") as f:
        lines = f.readlines()

    limit = len(lines)
    domain_ids = set()

    for line in lines:
        line = line.strip()
        if not line:
            continue
        if "(" in line:  # 格式是 like: (1, 2) # (3, 4)
            parts = line.split("#")
            for item in parts:
                try:
                    pair = ast.literal_eval(item.strip())
                    domain_ids.update(pair)
                except:
                    continue
        else:  # 格式是 like: 1 2 3 4 5
            ids = map(int, line.strip().split())
            domain_ids.update(ids)

    domain_size = max(domain_ids) + 1  # 假設 ID 從 0 開始

    return limit, domain_size, limit  # user_total = limit

def extract_sequence(line):
    pairs = re.findall(r"\((\d+),\s*(\d+)\)", line)
    sequence = [int(pairs[0][0])] + [int(p[1]) for p in pairs]
    return sequence

In [7]:
cluster_folder = "../dataset/cluster_movie/"
cluster_files = [
    f for f in os.listdir(cluster_folder) if re.match(r"cluster_\d+\.txt$", f)
]

# ===== 可調參數 =====
PRESET_TOP_K = 64  # 預設全域 top_k，可依需求自訂
TOP_KS = 1600  # 預設 top_ks
EPSILON = 4.0  # 預設 epsilon
SUBMAT = 4  # 預設 submat
# ====================

# filename = 'cluster_27.txt'
max_ncr = 0
max_f1 = 0
min_var = 1
ncr_sum = 0
f1_sum = 0
var_sum = 0
ct_sum = 0
total = 0

# global conf_dict
import heapq

top_rules_heap = []

In [12]:
max_file_ncr = ''
max_file_f1 = ''
for filename in sorted(cluster_files):
    # for filename in file:
    total += 1
    print(filename)
    cluster_path = filename
    cluster_path = os.path.join(cluster_folder, filename)
    cluster_id = filename.replace(".txt", "")
    # cluster_id = "movie_new2"

    # analyze cluster and it corresponding parameter for Data
    limit, domain_size, user_total = analyze_cluster_txt(cluster_path)

    if user_total < 10:
        continue

    print(limit, domain_size, user_total)
    # get the movie list in each cluster file
    with open(cluster_path, "r") as f:
        lines = f.readlines()
    cluster_size = len(lines)
    sequences = [
        extract_sequence(line) for line in tqdm(lines, desc="Extracting sequences...")
    ]
    movie_ids = sorted(set(movie_id for seq in sequences for movie_id in seq))
    if len(movie_ids) < 15:
        continue
    # 決定 epsilon
    epsilon = min(6.0, EPSILON * (400000 / cluster_size) ** 0.5)
    print(epsilon)
    # 根據每個 cluster 動態決定 top_k
    top_k = min(PRESET_TOP_K, int(len(movie_ids) * 0.5))
    # top_k = PRESET_TOP_K
    # 決定 top_ks
    top_ks = min(TOP_KS, int(top_k * (top_k - 1) / 2))  # 不超過可組合的 pair 數

    # 決定 top_kc
    top_kc = min(32, top_ks / 2)

    # Build Data, Metrics, LDP_RM parameters
    data = Data(
        dataname=cluster_id,
        limit=limit,
        domain_size=5020,
        user_total=user_total,
    )  # Movie dataset
    # metrics = Metrics(data, top_k=64, top_ks=1600, top_kc=32)
    metrics = Metrics(data, top_k=top_k, top_ks=top_ks, top_kc=top_kc)
    ldp_rm = LDP_RM(
        data, epsilon=epsilon, top_k=top_k, top_ks=top_ks, top_kc=top_kc, submat=SUBMAT
    )
    import time

    # 10 rounds
    for t in range(10):
        t1 = time.time()
        result_fre_dict_svd, result_conf_dict, hitrate_rm = ldp_rm.find_itemset_svd(
            task="RM",
            method="AMN",
            singnum=0.5,
            use_group=True,
            group_num=5,
            test="test_constant",
        )

        # 取 top 64
        for rule, confidence in result_conf_dict.items():
            entry = (confidence, rule)
            if len(top_rules_heap) < 32:
                heapq.heappush(top_rules_heap, entry)
            else:
                if confidence > top_rules_heap[0][0]:  # 比目前最小的還大
                    heapq.heappushpop(top_rules_heap, entry)
        final_conf_dict = dict(
            sorted(
                [(rule, confidence) for confidence, rule in top_rules_heap],
                key=lambda x: x[1],
                reverse=True,
            )
        )

        t2 = time.time()
        consume_time = int(t2 - t1)
        print("Final mining topks relations:", result_conf_dict)
        print("ldp_rm NCR", ncr := metrics.NCR(result_conf_dict))
        print("ldp_rm F1", f1 := metrics.F1(result_conf_dict))
        print("ldp_rm VAR", var := metrics.VARt(result_conf_dict))
        print("time:", ct := consume_time)
        ncr_sum += ncr
        f1_sum += f1
        var_sum += var
        ct_sum += ct
        max_ncr = max(max_ncr, ncr)
        max_f1 = max(max_f1, f1)
        if max_ncr == ncr:
            max_file_ncr = filename
        if max_f1 == f1:
            max_file_f1 = filename
        

cluster_0.txt
3274 5001 3274


Extracting sequences...: 100%|██████████| 3274/3274 [00:00<00:00, 219338.91it/s]


6.0
----------------------LDP-RM Result----------------------
***********SVIM Result**********
True top singleton of all users: {(1348,): 2542, (554,): 849, (2616,): 794, (551,): 724, (542,): 629, (1344,): 555, (2220,): 527, (164,): 523, (593,): 478, (159,): 457, (519,): 453, (169,): 452, (1585,): 430, (1920,): 419, (484,): 410, (2271,): 395, (197,): 393, (1294,): 389, (369,): 385, (49,): 373, (1347,): 373, (1368,): 363, (173,): 358, (1828,): 346, (341,): 333, (2574,): 319, (3017,): 315, (1209,): 304, (465,): 287, (2019,): 279, (1925,): 272, (2324,): 271, (187,): 264, (1352,): 264, (1574,): 262, (1973,): 254, (1619,): 251, (889,): 238, (1210,): 235, (3091,): 235, (1253,): 226, (1075,): 222, (1331,): 221, (6,): 219, (1795,): 219, (3535,): 218, (3092,): 214, (1343,): 209, (1257,): 206, (4968,): 203, (1227,): 202, (2388,): 197, (1378,): 192, (2680,): 189, (939,): 188, (1182,): 188, (296,): 188, (2000,): 186, (1192,): 185, (2325,): 183, (146,): 182, (899,): 180, (3886,): 177, (2968,): 175}

Extracting sequences...: 100%|██████████| 545/545 [00:00<00:00, 231225.54it/s]

6.0





----------------------LDP-RM Result----------------------
***********SVIM Result**********
True top singleton of all users: {(1893,): 545, (318,): 51, (846,): 51, (296,): 43, (2775,): 41, (1901,): 40, (355,): 39, (2489,): 36, (526,): 36, (1320,): 35, (590,): 34, (111,): 30, (1174,): 29, (1315,): 29, (1472,): 29, (2431,): 28, (1187,): 28, (3180,): 27, (2876,): 27, (479,): 26, (2636,): 24, (2757,): 24, (260,): 24, (2339,): 23, (2639,): 23, (2372,): 23, (1176,): 23, (2589,): 22, (2709,): 22, (70,): 22, (2346,): 21, (1813,): 21, (3189,): 21, (2743,): 21, (501,): 21, (1172,): 21, (1664,): 20, (1323,): 20, (2630,): 19, (1360,): 19, (1947,): 19, (51,): 19, (2774,): 19, (176,): 19, (3270,): 19, (1923,): 18, (4901,): 18, (1236,): 18, (1198,): 18, (2579,): 18, (1888,): 18, (2056,): 17, (2247,): 17, (1353,): 17, (2679,): 17, (2296,): 17, (3610,): 17, (2859,): 16, (827,): 16, (2312,): 16, (3616,): 16, (1830,): 16, (3390,): 16, (2328,): 16}
Top 64 estimate singleton reported after step3(represent a

Extracting sequences...: 100%|██████████| 26244/26244 [00:00<00:00, 233531.52it/s]

6.0





----------------------LDP-RM Result----------------------
***********SVIM Result**********
True top singleton of all users: {(2775,): 9378, (1377,): 2523, (1082,): 2046, (2627,): 1815, (2623,): 1678, (2914,): 1660, (2600,): 1655, (590,): 1631, (2679,): 1615, (2876,): 1538, (1187,): 1468, (318,): 1427, (526,): 1417, (296,): 1350, (355,): 1327, (1836,): 1322, (260,): 1321, (846,): 1288, (2489,): 1227, (1664,): 1206, (111,): 1142, (1947,): 1092, (1474,): 1039, (4901,): 1037, (1172,): 1034, (479,): 1013, (1176,): 1000, (1567,): 991, (2093,): 991, (3705,): 981, (1174,): 935, (2687,): 911, (4215,): 910, (605,): 907, (1931,): 889, (2517,): 868, (2714,): 867, (2708,): 847, (1245,): 845, (3030,): 844, (2629,): 835, (51,): 830, (1448,): 830, (1206,): 804, (2546,): 794, (1107,): 747, (1535,): 732, (2616,): 728, (1198,): 716, (17,): 714, (2314,): 713, (1061,): 701, (2807,): 692, (1880,): 682, (2490,): 682, (1175,): 677, (894,): 677, (1185,): 676, (898,): 670, (3905,): 664, (1233,): 659, (1599,): 6

Extracting sequences...: 100%|██████████| 1811/1811 [00:00<00:00, 234643.66it/s]

6.0





----------------------LDP-RM Result----------------------
***********SVIM Result**********
True top singleton of all users: {(3332,): 1737, (3017,): 411, (2239,): 267, (3886,): 245, (1920,): 240, (344,): 226, (1619,): 224, (1925,): 215, (1574,): 209, (2271,): 209, (4794,): 206, (2019,): 202, (106,): 199, (1665,): 197, (1225,): 193, (71,): 184, (2325,): 182, (1710,): 181, (1294,): 173, (1182,): 173, (146,): 168, (889,): 164, (1620,): 164, (2574,): 162, (2968,): 161, (1795,): 160, (773,): 153, (1065,): 152, (2687,): 150, (1344,): 146, (159,): 145, (1916,): 145, (593,): 142, (1548,): 141, (1828,): 136, (1128,): 135, (1251,): 131, (341,): 131, (2220,): 129, (1347,): 128, (1250,): 127, (1686,): 126, (3535,): 122, (3936,): 122, (1276,): 119, (32,): 118, (49,): 117, (2247,): 113, (1192,): 113, (1073,): 110, (3092,): 110, (1378,): 109, (1257,): 107, (3931,): 106, (1947,): 106, (277,): 101, (590,): 100, (2000,): 100, (939,): 100, (3362,): 99, (1187,): 98, (542,): 96, (1348,): 94, (1260,): 91}
T

Extracting sequences...: 100%|██████████| 406/406 [00:00<00:00, 226568.31it/s]

6.0
----------------------LDP-RM Result----------------------





***********SVIM Result**********
True top singleton of all users: {(3276,): 385, (318,): 58, (846,): 48, (523,): 41, (355,): 41, (260,): 40, (590,): 39, (2775,): 38, (526,): 37, (1187,): 35, (1198,): 34, (1176,): 33, (1172,): 33, (479,): 31, (296,): 29, (2679,): 29, (2546,): 23, (898,): 23, (3467,): 22, (1664,): 22, (1947,): 22, (1245,): 21, (3014,): 21, (2289,): 21, (3049,): 21, (4532,): 19, (1175,): 19, (89,): 19, (2419,): 18, (586,): 18, (905,): 18, (3464,): 17, (1174,): 17, (111,): 16, (2712,): 16, (51,): 15, (940,): 15, (1081,): 15, (1880,): 15, (3911,): 15, (1567,): 15, (755,): 15, (1722,): 15, (3335,): 15, (1887,): 15, (605,): 14, (3338,): 14, (1206,): 14, (2316,): 14, (2490,): 14, (1878,): 13, (2060,): 13, (61,): 13, (2277,): 13, (361,): 13, (2624,): 13, (3016,): 13, (3162,): 12, (1107,): 12, (1215,): 12, (136,): 12, (2680,): 12, (1350,): 12, (3311,): 12}
Top 64 estimate singleton reported after step3(represent all users):
1269:0   1479:0   1809:0   4240:0   1794:0   3037:0   3

Extracting sequences...: 100%|██████████| 999/999 [00:00<00:00, 240658.76it/s]

6.0





----------------------LDP-RM Result----------------------
***********SVIM Result**********
True top singleton of all users: {(1840,): 516, (1833,): 198, (86,): 186, (289,): 94, (1187,): 94, (296,): 93, (605,): 90, (318,): 90, (1947,): 87, (33,): 86, (2775,): 83, (590,): 79, (2489,): 78, (18,): 77, (355,): 77, (260,): 75, (846,): 74, (526,): 73, (1842,): 71, (2,): 60, (1172,): 55, (479,): 55, (1174,): 53, (1664,): 52, (586,): 52, (30,): 52, (26,): 51, (1828,): 49, (37,): 48, (51,): 48, (1837,): 48, (2876,): 48, (111,): 47, (1245,): 47, (59,): 45, (1925,): 44, (2345,): 44, (3858,): 44, (2314,): 44, (1836,): 43, (4786,): 42, (2546,): 40, (151,): 39, (4901,): 37, (2679,): 37, (1074,): 36, (770,): 36, (2311,): 36, (112,): 36, (1183,): 35, (540,): 35, (1176,): 34, (35,): 33, (53,): 33, (2914,): 33, (1175,): 32, (898,): 32, (1732,): 32, (768,): 32, (2687,): 31, (1887,): 31, (1078,): 31, (1531,): 31, (4881,): 31}
Top 64 estimate singleton reported after step3(represent all users):
1780:1674   

Extracting sequences...: 100%|██████████| 2056/2056 [00:00<00:00, 241832.05it/s]

6.0





----------------------LDP-RM Result----------------------
***********SVIM Result**********
True top singleton of all users: {(795,): 1962, (770,): 490, (260,): 417, (727,): 371, (643,): 370, (2,): 322, (724,): 321, (33,): 276, (63,): 262, (1055,): 261, (605,): 260, (792,): 259, (142,): 247, (37,): 231, (375,): 228, (96,): 222, (341,): 211, (26,): 209, (6,): 206, (187,): 197, (18,): 197, (169,): 196, (776,): 195, (4,): 195, (2616,): 183, (1187,): 182, (820,): 180, (778,): 173, (699,): 171, (3017,): 170, (542,): 168, (7,): 167, (8,): 163, (2220,): 159, (493,): 156, (59,): 153, (752,): 151, (551,): 151, (1329,): 149, (624,): 148, (774,): 146, (484,): 145, (2807,): 141, (2019,): 141, (2271,): 141, (159,): 138, (164,): 132, (1348,): 132, (105,): 130, (1217,): 129, (315,): 127, (1828,): 126, (1352,): 124, (1448,): 120, (648,): 120, (773,): 119, (1619,): 119, (1331,): 119, (593,): 118, (1548,): 117, (519,): 115, (1710,): 113, (2680,): 111, (775,): 111}
Top 64 estimate singleton reported after

Extracting sequences...: 100%|██████████| 670/670 [00:00<00:00, 225211.07it/s]

6.0





----------------------LDP-RM Result----------------------
***********SVIM Result**********
True top singleton of all users: {(1722,): 668, (318,): 88, (1633,): 49, (846,): 45, (260,): 43, (2489,): 40, (3094,): 40, (1813,): 39, (1537,): 39, (3467,): 39, (51,): 39, (355,): 38, (2001,): 38, (296,): 38, (2876,): 37, (111,): 35, (1176,): 35, (2798,): 33, (1187,): 32, (1174,): 32, (89,): 32, (1406,): 31, (590,): 30, (4901,): 30, (2720,): 30, (2722,): 28, (2725,): 28, (3656,): 28, (2908,): 27, (1357,): 27, (3217,): 27, (3214,): 26, (2775,): 26, (1542,): 26, (2709,): 26, (1811,): 26, (479,): 26, (4532,): 26, (3911,): 26, (1198,): 26, (3029,): 26, (1947,): 26, (1472,): 26, (267,): 25, (526,): 25, (108,): 25, (3779,): 25, (3364,): 25, (2922,): 25, (2013,): 24, (2149,): 24, (2247,): 24, (3536,): 24, (3366,): 24, (3270,): 24, (586,): 24, (3609,): 24, (1191,): 24, (824,): 24, (3932,): 24, (1569,): 24, (3738,): 24, (3023,): 23, (4230,): 23}
Top 64 estimate singleton reported after step3(represent al

Extracting sequences...: 100%|██████████| 38/38 [00:00<00:00, 199979.36it/s]

6.0
----------------------LDP-RM Result----------------------
***********SVIM Result**********
True top singleton of all users: {(1899,): 35, (1187,): 9, (590,): 9, (260,): 7, (111,): 7, (1947,): 7, (846,): 6, (1176,): 6, (3017,): 5, (1172,): 5, (1664,): 5, (318,): 5, (1078,): 4, (1174,): 4, (888,): 4, (1893,): 4, (1896,): 4, (1898,): 4, (1359,): 4, (2680,): 4, (1198,): 4, (2775,): 4, (526,): 4, (355,): 4, (894,): 4, (898,): 4, (296,): 3, (1175,): 3, (1894,): 3, (1895,): 3, (1897,): 3, (1900,): 3, (2687,): 3, (112,): 3, (1190,): 3, (1913,): 3, (51,): 3, (605,): 3, (479,): 3, (1202,): 3, (2988,): 3, (1316,): 2, (4881,): 2, (1880,): 2, (2679,): 2, (1795,): 2, (1233,): 2, (2489,): 2, (1245,): 2, (1191,): 2, (1011,): 2, (3335,): 2, (1917,): 2, (2026,): 2, (4876,): 2, (3366,): 2, (2893,): 2, (2000,): 2, (2004,): 2, (2784,): 2, (1320,): 2, (2633,): 2, (2206,): 2, (2534,): 2}
Top 64 estimate singleton reported after step3(represent all users):
1839:0   1838:0   1652:0   1681:0   1682:0   1683




True top singleton of all users: {(1899,): 35, (1187,): 9, (590,): 9, (260,): 7, (111,): 7, (1947,): 7, (846,): 6, (1176,): 6, (3017,): 5, (1172,): 5, (1664,): 5, (318,): 5, (1078,): 4, (1174,): 4, (888,): 4, (1893,): 4, (1896,): 4, (1898,): 4, (1359,): 4, (2680,): 4, (1198,): 4, (2775,): 4, (526,): 4, (355,): 4, (894,): 4, (898,): 4, (296,): 3, (1175,): 3, (1894,): 3, (1895,): 3, (1897,): 3, (1900,): 3, (2687,): 3, (112,): 3, (1190,): 3, (1913,): 3, (51,): 3, (605,): 3, (479,): 3, (1202,): 3, (2988,): 3, (1316,): 2, (4881,): 2, (1880,): 2, (2679,): 2, (1795,): 2, (1233,): 2, (2489,): 2, (1245,): 2, (1191,): 2, (1011,): 2, (3335,): 2, (1917,): 2, (2026,): 2, (4876,): 2, (3366,): 2, (2893,): 2, (2000,): 2, (2004,): 2, (2784,): 2, (1320,): 2, (2633,): 2, (2206,): 2, (2534,): 2}
Top 64 estimate singleton reported after step3(represent all users):
1842:0   1841:0   1673:0   1693:0   1674:0   1675:0   1676:0   1692:0   1691:0   1650:0   1678:0   1679:0   1680:0   1681:0   1682:0   1683:0   

Extracting sequences...: 100%|██████████| 12966/12966 [00:00<00:00, 235899.58it/s]

6.0





----------------------LDP-RM Result----------------------
***********SVIM Result**********
True top singleton of all users: {(231,): 10572, (589,): 8537, (343,): 8491, (151,): 8033, (379,): 7854, (296,): 7837, (587,): 7765, (154,): 7541, (585,): 7042, (166,): 7026, (348,): 6411, (316,): 6370, (592,): 6193, (318,): 5187, (456,): 4983, (328,): 4939, (590,): 4475, (292,): 3988, (162,): 3522, (433,): 3068, (355,): 3035, (11,): 2341, (1116,): 2029, (338,): 2003, (479,): 1620, (186,): 1489, (209,): 1219, (253,): 1158, (111,): 1155, (2,): 1090, (288,): 617, (35,): 612, (260,): 545, (409,): 509, (1187,): 477, (300,): 467, (2775,): 443, (1174,): 441, (48,): 439, (1176,): 424, (20,): 413, (2489,): 409, (526,): 403, (846,): 390, (1175,): 389, (363,): 381, (586,): 357, (51,): 339, (453,): 326, (2876,): 321, (1947,): 309, (1245,): 306, (4901,): 296, (22,): 293, (366,): 292, (1172,): 280, (1664,): 270, (40,): 261, (1061,): 257, (112,): 234, (376,): 220, (33,): 212, (2546,): 209, (225,): 206}
Top 64 

Extracting sequences...: 100%|██████████| 3548/3548 [00:00<00:00, 199728.76it/s]

6.0





----------------------LDP-RM Result----------------------
***********SVIM Result**********
True top singleton of all users: {(2574,): 2416, (3934,): 854, (1920,): 491, (593,): 484, (159,): 479, (1619,): 439, (2019,): 432, (519,): 425, (1294,): 423, (1352,): 422, (3886,): 400, (2616,): 399, (2271,): 396, (344,): 377, (3943,): 376, (2420,): 364, (1973,): 356, (2000,): 345, (2220,): 329, (49,): 328, (939,): 321, (3535,): 320, (4794,): 313, (1128,): 313, (1225,): 309, (2968,): 308, (2239,): 308, (1348,): 305, (551,): 304, (3017,): 300, (3092,): 294, (542,): 294, (1574,): 290, (1344,): 283, (341,): 276, (889,): 264, (1182,): 257, (533,): 245, (169,): 239, (1585,): 237, (1710,): 228, (1378,): 226, (1368,): 222, (2447,): 222, (773,): 220, (4215,): 220, (2113,): 204, (23,): 199, (106,): 191, (164,): 191, (260,): 191, (1187,): 189, (1925,): 185, (1448,): 183, (318,): 182, (369,): 180, (3807,): 174, (470,): 174, (1251,): 172, (3332,): 170, (4901,): 167, (187,): 164, (1075,): 163, (1620,): 162}
T

Extracting sequences...: 100%|██████████| 23/23 [00:00<00:00, 161050.07it/s]

6.0
----------------------LDP-RM Result----------------------
***********SVIM Result**********
True top singleton of all users: {(3874,): 12, (2145,): 8, (1187,): 6, (111,): 4, (1942,): 4, (3893,): 4, (1174,): 4, (846,): 4, (3872,): 4, (1172,): 3, (3886,): 3, (3868,): 3, (2775,): 3, (4135,): 3, (2876,): 3, (1658,): 3, (526,): 3, (2914,): 2, (1947,): 2, (479,): 2, (3889,): 2, (1531,): 2, (3836,): 2, (3861,): 2, (1245,): 2, (3877,): 2, (1078,): 2, (4215,): 2, (260,): 2, (1234,): 2, (894,): 2, (296,): 2, (3920,): 2, (3858,): 2, (3324,): 2, (1483,): 2, (2489,): 2, (3245,): 2, (3894,): 2, (979,): 2, (884,): 2, (1366,): 2, (899,): 2, (1198,): 2, (906,): 2, (3821,): 2, (890,): 2, (1880,): 2, (3664,): 1, (3807,): 1, (1842,): 1, (2841,): 1, (4217,): 1, (1567,): 1, (3845,): 1, (1865,): 1, (3837,): 1, (951,): 1, (2866,): 1, (1206,): 1, (3545,): 1, (1082,): 1, (710,): 1, (154,): 1}
Top 64 estimate singleton reported after step3(represent all users):
1741:0   1714:0   1689:0   1688:0   1687:0   168




-100.0
0
-100.0
0
-100.0
0
-100.0
0
-100.0
0
-100.0
0
-100.0
0
-100.0
0
-100.0
0
-100.0
0
-100.0
0
-100.0
0
-100.0
0
-100.0
0
-100.0
0
-100.0
0
-100.0
0
final hitrate 0.0
The number of top_ks relations: 0
***********Task3 Result**********
0
Final mining topks relations: {}
ldp_rm NCR 0.0
ldp_rm F1 0.0
ldp_rm VAR 0.5766
time: 0
----------------------LDP-RM Result----------------------
***********SVIM Result**********
True top singleton of all users: {(3874,): 12, (2145,): 8, (1187,): 6, (111,): 4, (1942,): 4, (3893,): 4, (1174,): 4, (846,): 4, (3872,): 4, (1172,): 3, (3886,): 3, (3868,): 3, (2775,): 3, (4135,): 3, (2876,): 3, (1658,): 3, (526,): 3, (2914,): 2, (1947,): 2, (479,): 2, (3889,): 2, (1531,): 2, (3836,): 2, (3861,): 2, (1245,): 2, (3877,): 2, (1078,): 2, (4215,): 2, (260,): 2, (1234,): 2, (894,): 2, (296,): 2, (3920,): 2, (3858,): 2, (3324,): 2, (1483,): 2, (2489,): 2, (3245,): 2, (3894,): 2, (979,): 2, (884,): 2, (1366,): 2, (899,): 2, (1198,): 2, (906,): 2, (3821,): 2, (890

Extracting sequences...: 100%|██████████| 2888/2888 [00:00<00:00, 61012.97it/s]

6.0





----------------------LDP-RM Result----------------------
***********SVIM Result**********
True top singleton of all users: {(1128,): 2716, (735,): 718, (1294,): 402, (3886,): 371, (4794,): 364, (2968,): 360, (1619,): 335, (3535,): 315, (2000,): 308, (2239,): 301, (2574,): 293, (318,): 284, (1225,): 283, (2775,): 281, (889,): 276, (49,): 275, (159,): 273, (1574,): 272, (344,): 269, (1187,): 263, (593,): 261, (2271,): 260, (526,): 248, (260,): 244, (3943,): 237, (590,): 233, (1200,): 231, (1920,): 231, (1710,): 222, (2,): 221, (1176,): 215, (1344,): 215, (1182,): 210, (2420,): 210, (296,): 208, (1925,): 206, (519,): 205, (3017,): 196, (51,): 196, (711,): 195, (1174,): 195, (355,): 193, (2687,): 189, (846,): 188, (2616,): 182, (1973,): 174, (1378,): 173, (773,): 170, (1665,): 167, (1585,): 162, (1175,): 160, (2220,): 160, (2314,): 159, (1352,): 159, (1348,): 158, (1567,): 157, (1505,): 155, (542,): 152, (1172,): 151, (1664,): 151, (1116,): 150, (1620,): 150, (1368,): 150, (939,): 150}
To

Extracting sequences...: 100%|██████████| 166108/166108 [00:00<00:00, 191783.82it/s]


6.0
----------------------LDP-RM Result----------------------
***********SVIM Result**********
True top singleton of all users: {(296,): 21478, (318,): 19199, (151,): 17635, (589,): 17500, (587,): 16939, (590,): 16471, (379,): 15431, (355,): 14600, (585,): 14260, (343,): 14211, (260,): 14126, (154,): 13998, (166,): 12994, (348,): 12699, (456,): 12433, (592,): 12210, (2,): 12065, (316,): 11508, (231,): 10567, (770,): 10220, (1187,): 10121, (479,): 10053, (328,): 9978, (111,): 9519, (2775,): 9273, (33,): 8921, (846,): 8603, (526,): 8455, (643,): 8413, (605,): 8213, (727,): 7912, (162,): 7724, (292,): 7714, (1174,): 7312, (2489,): 6840, (1176,): 6742, (433,): 6520, (724,): 6494, (2876,): 6362, (142,): 6314, (11,): 6243, (96,): 6220, (63,): 6064, (1947,): 5979, (4901,): 5765, (51,): 5724, (37,): 5659, (26,): 5576, (4215,): 5558, (1172,): 5521, (18,): 5483, (4,): 5378, (1664,): 5347, (2600,): 5185, (7,): 5085, (1245,): 5060, (338,): 4971, (6,): 4958, (586,): 4928, (1055,): 4917, (493,): 471

Extracting sequences...: 100%|██████████| 861/861 [00:00<00:00, 196704.38it/s]

6.0





----------------------LDP-RM Result----------------------
***********SVIM Result**********
True top singleton of all users: {(1871,): 854, (846,): 149, (1198,): 76, (318,): 76, (1267,): 72, (2218,): 70, (1583,): 70, (1003,): 69, (2775,): 68, (1172,): 66, (590,): 63, (1206,): 63, (2079,): 62, (260,): 60, (898,): 58, (1187,): 55, (1876,): 55, (526,): 53, (1947,): 52, (1932,): 52, (1219,): 49, (2064,): 49, (1074,): 48, (910,): 48, (740,): 46, (1065,): 46, (1165,): 46, (1877,): 45, (355,): 45, (307,): 45, (605,): 44, (1164,): 44, (884,): 44, (2976,): 43, (2287,): 43, (3311,): 43, (1085,): 42, (1350,): 42, (789,): 42, (909,): 42, (3338,): 42, (2861,): 41, (2289,): 41, (2270,): 40, (1084,): 39, (3233,): 39, (2028,): 39, (1058,): 38, (1176,): 38, (2345,): 38, (1174,): 38, (1884,): 37, (296,): 37, (381,): 37, (940,): 36, (3911,): 36, (2013,): 36, (1184,): 36, (1986,): 35, (1872,): 34, (1626,): 34, (3279,): 34, (112,): 33, (1211,): 33}
Top 64 estimate singleton reported after step3(represent al

Extracting sequences...: 100%|██████████| 291/291 [00:00<00:00, 219957.19it/s]

6.0
----------------------LDP-RM Result----------------------





***********SVIM Result**********
True top singleton of all users: {(2334,): 124, (3862,): 87, (1917,): 61, (1187,): 58, (260,): 40, (3807,): 38, (2775,): 36, (1176,): 36, (846,): 35, (355,): 35, (3857,): 35, (1175,): 34, (526,): 32, (590,): 30, (1664,): 29, (111,): 29, (1245,): 29, (1174,): 26, (3803,): 25, (3821,): 24, (479,): 22, (1078,): 21, (1198,): 21, (318,): 21, (898,): 21, (3861,): 20, (296,): 20, (1567,): 19, (2546,): 19, (2489,): 18, (3878,): 17, (3773,): 16, (2679,): 16, (1880,): 16, (1947,): 16, (3886,): 15, (3826,): 15, (2490,): 15, (1172,): 15, (2988,): 14, (2600,): 13, (906,): 13, (2627,): 13, (586,): 13, (1887,): 13, (3887,): 13, (1916,): 12, (2616,): 12, (112,): 12, (2721,): 12, (2708,): 11, (2623,): 11, (3329,): 11, (1206,): 11, (2314,): 11, (605,): 11, (1942,): 11, (1191,): 10, (1190,): 10, (1240,): 10, (888,): 10, (3490,): 10, (3889,): 10, (3395,): 10}
Top 64 estimate singleton reported after step3(represent all users):
2079:1518   2496:0   3426:0   232:0   1452:0  

Extracting sequences...: 100%|██████████| 11104/11104 [00:00<00:00, 192533.84it/s]

6.0





----------------------LDP-RM Result----------------------
***********SVIM Result**********
True top singleton of all users: {(4881,): 3108, (1257,): 2068, (2798,): 2013, (1184,): 1644, (318,): 1343, (2775,): 1210, (846,): 1088, (2876,): 1044, (526,): 1007, (2489,): 900, (2774,): 893, (296,): 830, (260,): 825, (4901,): 817, (590,): 809, (355,): 763, (1172,): 755, (1187,): 687, (1176,): 675, (4135,): 672, (51,): 638, (593,): 603, (898,): 597, (899,): 585, (2679,): 556, (1174,): 553, (4794,): 552, (1198,): 521, (4215,): 514, (2247,): 511, (773,): 472, (1947,): 472, (3017,): 457, (111,): 452, (1175,): 450, (2687,): 447, (479,): 421, (49,): 415, (2807,): 412, (1828,): 404, (4903,): 404, (542,): 401, (1664,): 393, (740,): 384, (1245,): 381, (1567,): 370, (1128,): 364, (159,): 361, (344,): 355, (2680,): 355, (656,): 354, (2629,): 353, (2220,): 352, (889,): 351, (1235,): 350, (3490,): 350, (2616,): 345, (2242,): 341, (2546,): 337, (1182,): 334, (605,): 333, (1620,): 330, (2968,): 325, (896,): 

Extracting sequences...: 100%|██████████| 21714/21714 [00:00<00:00, 198383.13it/s]

6.0





----------------------LDP-RM Result----------------------
***********SVIM Result**********
True top singleton of all users: {(1795,): 2502, (582,): 1671, (1828,): 615, (1665,): 453, (146,): 443, (1925,): 427, (2687,): 424, (3017,): 422, (1920,): 419, (2271,): 395, (773,): 395, (32,): 395, (1344,): 394, (2325,): 390, (1548,): 376, (277,): 355, (344,): 353, (3886,): 345, (159,): 342, (1294,): 335, (1257,): 334, (4794,): 333, (45,): 325, (1347,): 320, (2968,): 312, (2019,): 311, (354,): 300, (1574,): 295, (1619,): 290, (2239,): 288, (656,): 285, (593,): 285, (1710,): 282, (1260,): 280, (1836,): 277, (1343,): 274, (1192,): 269, (1686,): 264, (1636,): 264, (542,): 264, (1620,): 261, (3535,): 254, (145,): 251, (775,): 246, (3171,): 243, (106,): 242, (2574,): 241, (256,): 240, (2247,): 236, (1073,): 233, (1837,): 228, (1348,): 227, (551,): 224, (2220,): 218, (49,): 216, (2000,): 215, (2616,): 213, (896,): 212, (3332,): 212, (715,): 205, (1973,): 203, (4215,): 203, (1069,): 202, (2388,): 201}


In [13]:
data = Data(
    dataname="movie_new2", limit=400000, domain_size=5020, user_total=400000
)  # Movie dataset
metrics = Metrics(data, top_k=64, top_ks=1600, top_kc=32)
print("===============")
print("# FINAL RESULT:")
print("Rules in final_conf_dict:", list(final_conf_dict.keys())[:64])
print("Rules in true_rules_dict:", list(metrics.true_rules_dict.keys())[:64])
print(
    "Intersected rules:",
    set(final_conf_dict).intersection(set(metrics.true_rules_dict)),
)

print("average NCR:", round(ncr_sum / total, 4))
print("average F1:", round(f1_sum / total, 4))
print("average consume time:", round(ct_sum / total, 4))
print("max NCR:", max_ncr)
print("max F1: ", max_f1)
print("Final mining topks relations:", final_conf_dict)
print("ldp_rm NCR", ncr := metrics.NCR(final_conf_dict))
print("ldp_rm F1", f1 := metrics.F1(final_conf_dict))

# FINAL RESULT:
Rules in final_conf_dict: [(4738, 3260), (1363, 1190), (1341, 161), (237, 1567), (701, 45), (3237, 4155), (1566, 3373), (1165, 1294), (542, 2296), (637, 1161), (4902, 3773), (2036, 920), (1377, 4902), (262, 1161), (920, 1837), (2069, 2616), (2271, 4465), (3189, 752), (605, 2186), (3903, 1378), (4012, 2212), (2212, 245), (3957, 3434), (260, 4992), (2616, 2263), (1238, 1161), (2618, 1795), (1176, 3373), (3899, 369), (2832, 113), (2969, 1999), (752, 3178), (1065, 113), (63, 3434), (63, 292), (3362, 3436), (1074, 2832), (484, 341), (49, 1070), (1347, 1795), (1006, 1837), (1006, 4902), (735, 538), (2271, 1348), (2907, 3037), (159, 346), (4662, 1962), (288, 3434), (288, 1910), (161, 231), (542, 1615), (3063, 1664), (3063, 846), (159, 574), (778, 792), (2629, 1836), (1587, 1377), (1837, 2535), (3826, 20), (3826, 3178), (3826, 1191), (4222, 20), (4222, 292), (4773, 102)]
Rules in true_rules_dict: [(5009, 5010), (5011, 5012), (5013, 5014), (5015, 5016), (5017, 5018), (5019, 5020

In [17]:
print("Rules in final_conf_dict:", list(final_conf_dict.keys())[:5])
print("Rules in true_rules_dict:", list(metrics.true_rules_dict.keys())[:5])

Rules in final_conf_dict: [(4738, 3260), (1363, 1190), (1341, 161), (237, 1567), (701, 45)]
Rules in true_rules_dict: [(5009, 5010), (5011, 5012), (5013, 5014), (5015, 5016), (5017, 5018)]


In [11]:
# 取前 32 筆（依 confidence 由大到小排序）
final_conf_dict_top32 = dict(
    sorted(final_conf_dict.items(), key=lambda x: x[1], reverse=True)[:32]
)
print("ldp_rm NCR", ncr := metrics.NCR(final_conf_dict_top32))
print("ldp_rm F1", f1 := metrics.F1(final_conf_dict_top32))

ldp_rm NCR 0.0
ldp_rm F1 0.0


In [14]:
print("max NCR:", max_ncr)
print("file: ", max_file_ncr)
print("max F1: ", max_f1)
print("file: ", max_file_f1)


max NCR: 0.7576
file:  cluster_36.txt
max F1:  0.7188
file:  cluster_36.txt
