-
Notifications
You must be signed in to change notification settings - Fork 1
/
test_sketchsize_MSE.py
76 lines (67 loc) · 2.65 KB
/
test_sketchsize_MSE.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import math
import pandas as pd
from plotting_tools import draw_sketchsize_MSE
from RACE import count_race_l2
from mLDP_KDE import mldp_kde_l2kernel_kde
from evaluation import MSE
from kde_tools import l2kernel_kde
from parameters import dataset_parameters
''' Select dataset '''
datasets = ['CodRNA', 'CovType', 'RCV1', 'Yelp', 'SYN']
selected_flag = 0 # 0: CodRNA, 1:CovType, 2:RCV1, 3: Yelp, 4: SYN
nearest_flag = 100
''' Initialize '''
params = dataset_parameters[datasets[selected_flag]]
m = params['m']
r_set = params['r_set']
omega = params['omega']
seed_l2lsh = params['seed_l2lsh']
seed_grr_rehash = params['seed_grr_rehash']
L_R_set_e_1 = params['L_R_set_for_testSketchSize_1']
L_R_set_e_5 = params['L_R_set_for_testSketchSize_5']
L_R_set_e_20 = params['L_R_set_for_testSketchSize_20']
L_R_set = [L_R_set_e_1, L_R_set_e_5, L_R_set_e_20]
L_R_set_race = params['L_R_set_for_testSketchSize_race']
const_file = f"small_datasets/{datasets[selected_flag]}_const.csv"
query_file = f"small_datasets/{datasets[selected_flag]}_query.csv"
const_data = pd.read_csv(const_file, sep=',', lineterminator='\n', header=None)
const_data = const_data.values
query_data = pd.read_csv(query_file, sep=',', lineterminator='\n', header=None)
query_data = query_data.values
N = const_data.shape[0]
epsilon = [1, 5, 20]
''' accurate kde values'''
acc_kde_vals = l2kernel_kde(query_data, const_data, N, omega)
''' RACE '''
race_mse = []
race_LR = []
for comb in L_R_set_race:
L = comb[0]
R = comb[1]
sum = 0
for temp_seed_l2lsh in seed_l2lsh:
race_kde_value, _, _ = count_race_l2(query_data, const_data, m, omega, N, temp_seed_l2lsh, L, R)
sum += MSE(acc_kde_vals, race_kde_value)
sum /= len(seed_l2lsh)
race_mse.append(sum)
race_LR.append(L * R)
''' mLDP-KDE '''
mldp_kde_mse_e_1 = []
mldp_kde_LR_e_1 = []
mldp_kde_mse_e_5 = []
mldp_kde_LR_e_5 = []
mldp_kde_mse_e_20 = []
mldp_kde_LR_e_20 = []
for e in epsilon:
for comb in globals()[f'L_R_set_e_{e}']:
L = comb[0]
R = comb[1]
sum = 0
for temp_seed_l2lsh, temp_seed_grr_rehash in zip(seed_l2lsh, seed_grr_rehash):
mldp_kde_val, _, _, _ = mldp_kde_l2kernel_kde(query_data, e, const_data, L, R, m, omega, N, r_set[int(math.log10(nearest_flag))], temp_seed_l2lsh, temp_seed_grr_rehash)
sum = sum + MSE(acc_kde_vals, mldp_kde_val)
sum = sum / len(seed_l2lsh)
globals()[f'mldp_kde_mse_e_{e}'].append(sum)
globals()[f'mldp_kde_LR_e_{e}'].append(L * R)
draw_sketchsize_MSE(mldp_kde_LR_e_1, mldp_kde_LR_e_5, mldp_kde_LR_e_20, race_LR,
mldp_kde_mse_e_1, mldp_kde_mse_e_5, mldp_kde_mse_e_20, race_mse, title=datasets[selected_flag])