@@ -11,24 +11,24 @@ latex_table_best <- function(data) {
11
11
ungroup() %> %
12
12
group_by(dataset , threshold ) %> %
13
13
mutate(
14
- dataset = str_remove(dataset , " -sample-*" ),
15
- k = if_else(algorithm == " two-round-lsh " ,
16
- str_c(k , " [k2=" , k2 , " ]" ),
17
- as.character(k )
18
- ),
14
+ # dataset = str_remove(dataset, "-sample-*"),
15
+ # k = if_else(algorithm == "TwoLevelLSH ",
16
+ # str_c(k, " [k2=", k2, "]"),
17
+ # as.character(k)
18
+ # ),
19
19
total_time_num = drop_units(total_time ),
20
20
total_time = total_time %> %
21
- set_units(" s " ) %> %
21
+ set_units(" min " ) %> %
22
22
drop_units() %> %
23
- scales :: number(accuracy = 0.01 ),
23
+ scales :: number(accuracy = 0.1 ),
24
24
recall = scales :: number(recall , accuracy = 0.01 ),
25
25
total_time = cell_spec(total_time ,
26
- # background = spec_color(total_time_num, direction = -1),
27
- # color = "white",
28
- # underline = total_time == min(total_time),
29
26
underline = id %in% best_runs ,
30
27
format = " latex"
31
- )
28
+ ),
29
+ # more compact names
30
+ # algorithm = str_remove(algorithm, "LSH"),
31
+ # dataset = if_else(dataset == "Livejournal", "LJ", as.character(dataset))
32
32
) %> %
33
33
ungroup() %> %
34
34
select(dataset , threshold , algorithm , total_time , recall , k , sketch_bits ) %> %
@@ -40,14 +40,14 @@ latex_table_best <- function(data) {
40
40
) %> %
41
41
kbl(
42
42
format = " latex" ,
43
- align = " ll rrll rrll " ,
43
+ align = c( " l " , " l " , " r " , " r " , " l " , " l " , " r " , " r " , " l " , " l " ) ,
44
44
escape = F ,
45
45
booktabs = T ,
46
46
linesep = c(" " , " " , " " , " \\ addlinespace" ),
47
47
col.names = c(
48
48
" dataset" , " algorithm" ,
49
- " total time (s) " , " recall" , " k" , " b" ,
50
- " total time (s) " , " recall" , " k" , " b"
49
+ " time" , " recall" , " k" , " b" ,
50
+ " time" , " recall" , " k" , " b"
51
51
)
52
52
) %> %
53
53
add_header_above(c(" " = 2 , " 0.5" = 4 , " 0.7" = 4 ))
@@ -63,6 +63,8 @@ latex_table_info <- function(data) {
63
63
filter(threshold %in% c(0.5 , 0.7 )) %> %
64
64
select(dataset , threshold , n , dim , output_size ) %> %
65
65
mutate(
66
+ # Count in the output size the self pairs, which are not reported by the implementations
67
+ output_size = output_size + n ,
66
68
sample = as.integer(str_match(dataset , " sample-(\\ d+)" )[, 2 ]),
67
69
sample = if_else(is.na(sample ), " Full dataset" , str_c(" Sample of " , sample )),
68
70
dataset = case_when(
@@ -71,7 +73,6 @@ latex_table_info <- function(data) {
71
73
str_detect(dataset , " [Gg]love" ) ~ " Glove" ,
72
74
str_detect(dataset , " Orkut" ) ~ " Orkut"
73
75
),
74
- # selectivity = scales::percent(selectivity, accuracy = 0.00001),
75
76
avg_neighbors = scales :: number(output_size / n , accuracy = 0.01 , big.mark = " \\\\ ," )
76
77
) %> %
77
78
select(- output_size ) %> %
@@ -94,56 +95,56 @@ latex_table_info <- function(data) {
94
95
linesep = " "
95
96
) %> %
96
97
# kable_styling() %>%
97
- add_header_above(c(" " = 1 , " " = 1 , " " = 1 , " average neighbors" = 2 )) %> %
98
- pack_rows(" Full dataset" , 1 , 4 ) %> %
99
- pack_rows(" Sample of 200000 vectors" , 5 , 8 )
98
+ add_header_above(c(" " = 1 , " " = 1 , " " = 1 , " average neighbors" = 2 ))
100
99
}
101
100
102
101
table_data_info() %> %
103
102
latex_table_info() %> %
104
103
write_file(" tex/info.tex" )
105
104
106
105
107
- latex_normalized_profile <- function (data ) {
108
- data %> %
109
- select(- ends_with(" input" ), - sketch , - verify , - deduplicate ) %> %
110
- pivot_longer(ends_with(" ppf" ), names_to = " component" , values_to = " ppf" ) %> %
111
- mutate(
112
- component = str_remove(component , " _ppf" ),
113
- component = if_else(component == " dedup" , " deduplicate" , component ),
114
- component = factor (component ,
115
- levels = c(" sketch" , " verify" , " deduplicate" ),
116
- ordered = T
117
- ),
118
- ppf = scales :: number(ppf , big.mark = " \\\\ ," , scale = 0.001 , accuracy = 1 ),
119
- algorithm = factor (algorithm , ordered = T , levels = c(
120
- " LocalLSH" ,
121
- " OneLevelLSH" ,
122
- " TwoLevelLSH"
123
- ))
124
- ) %> %
125
- ungroup() %> %
126
- select(- id , - threshold ) %> %
127
- pivot_wider(names_from = c(algorithm , component ), values_from = ppf ) %> %
128
- select(dataset ,
129
- LocalLSH_sketch , LocalLSH_verify , LocalLSH_deduplicate ,
130
- OneLevelLSH_sketch , OneLevelLSH_verify , OneLevelLSH_deduplicate ,
131
- TwoLevelLSH_sketch , TwoLevelLSH_verify , TwoLevelLSH_deduplicate
132
- ) %> %
133
- kbl(format = " latex" , booktabs = T , escape = F ,
134
- col.names = c(
135
- " dataset" ,
136
- " sketching" , " verify" , " dedup." ,
137
- " sketching" , " verify" , " dedup." ,
138
- " sketching" , " verify" , " dedup."
139
- )
140
- ) %> %
141
- add_header_above(c(" " = 1 , " \\\\ local" = 3 , " \\\\ onelevel" = 3 , " \\\\ twolevel" = 3 ), escape = F )
142
- }
143
-
144
- table_normalized_profile() %> %
145
- latex_normalized_profile() %> %
146
- write_file(" tex/profiling.tex" )
106
+ # latex_normalized_profile <- function(data) {
107
+ # data %>%
108
+ # select(-ends_with("input"), -sketch, -verify, -deduplicate) %>%
109
+ # pivot_longer(ends_with("ppf"), names_to = "component", values_to = "ppf") %>%
110
+ # mutate(
111
+ # component = str_remove(component, "_ppf"),
112
+ # component = if_else(component == "dedup", "deduplicate", component),
113
+ # component = factor(component,
114
+ # levels = c("sketch", "verify", "deduplicate"),
115
+ # ordered = T
116
+ # ),
117
+ # ppf = scales::number(ppf, big.mark = "\\\\,", scale = 0.001, accuracy = 1),
118
+ # algorithm = factor(algorithm, ordered = T, levels = c(
119
+ # "LocalLSH",
120
+ # "OneLevelLSH",
121
+ # "TwoLevelLSH"
122
+ # ))
123
+ # ) %>%
124
+ # ungroup() %>%
125
+ # select(-id, -threshold) %>%
126
+ # pivot_wider(names_from = c(algorithm, component), values_from = ppf) %>%
127
+ # select(
128
+ # dataset,
129
+ # LocalLSH_sketch, LocalLSH_verify, LocalLSH_deduplicate,
130
+ # OneLevelLSH_sketch, OneLevelLSH_verify, OneLevelLSH_deduplicate,
131
+ # TwoLevelLSH_sketch, TwoLevelLSH_verify, TwoLevelLSH_deduplicate
132
+ # ) %>%
133
+ # kbl(
134
+ # format = "latex", booktabs = T, escape = F,
135
+ # col.names = c(
136
+ # "dataset",
137
+ # "sketching", "verify", "dedup.",
138
+ # "sketching", "verify", "dedup.",
139
+ # "sketching", "verify", "dedup."
140
+ # )
141
+ # ) %>%
142
+ # add_header_above(c(" " = 1, "\\\\local" = 3, "\\\\onelevel" = 3, "\\\\twolevel" = 3), escape = F)
143
+ # }
144
+
145
+ # table_normalized_profile() %>%
146
+ # latex_normalized_profile() %>%
147
+ # write_file("tex/profiling.tex")
147
148
148
149
latex_bench <- function () {
149
150
tbldata <- table_bench() %> %
@@ -160,13 +161,14 @@ latex_bench <- function() {
160
161
max_verify = max(verify ) %> % scales :: number(accuracy = 0.1 )
161
162
) %> %
162
163
ungroup()
163
-
164
+
164
165
165
166
tbldata %> %
166
167
select(dataset , classification , mean_sketch , median_sketch , max_sketch , mean_dedup , median_dedup , max_dedup , mean_verify , median_verify , max_verify ) %> %
167
- kbl(format = " latex" , escape = F , booktabs = TRUE ,
168
+ kbl(
169
+ format = " latex" , escape = F , booktabs = TRUE ,
168
170
col.names = c(
169
- " data type" , " pair type" ,
171
+ " data type" , " pair type" ,
170
172
" mean" , " median" , " max" ,
171
173
" mean" , " median" , " max" ,
172
174
" mean" , " median" , " max"
@@ -175,17 +177,20 @@ latex_bench <- function() {
175
177
add_header_above(c(" " = 1 , " " = 1 , " sketch" = 3 , " deduplication" = 3 , " similarity" = 3 ))
176
178
}
177
179
178
- latex_bench() %> % write_file(" tex/bench.tex" )
180
+ # latex_bench() %>% write_file("tex/bench.tex")
179
181
180
182
181
183
latex_motivation <- function (data ) {
182
184
data %> %
183
185
filter(
186
+ ! dry_run ,
184
187
algorithm == " OneLevelLSH" ,
185
- sketch_bits == 0 ,
188
+ sketch_bits == 0 ,
186
189
required_recall == 0.8 ,
187
- threshold == 0.5
190
+ threshold == 0.7
188
191
) %> %
192
+ drop_na(Load , total_time ) %> %
193
+ mutate(total_time = set_units(total_time , " min" )) %> %
189
194
select(dataset , k , total_time , Load ) %> %
190
195
arrange(k ) %> %
191
196
group_by(dataset ) %> %
@@ -194,18 +199,22 @@ latex_motivation <- function(data) {
194
199
total_time == min(total_time ) ~ " practical" ,
195
200
Load == min(Load ) ~ " theoretical"
196
201
),
197
- total_time = set_units(total_time , " s" ) %> % drop_units() %> % scales :: number(accuracy = 1 )
202
+ total_time = drop_units(total_time ) %> % scales :: number(accuracy = 0.1 ),
203
+ Load = scales :: number(Load , big.mark = " \\\\ ," )
198
204
) %> %
199
- drop_na() %> %
200
- arrange(dataset , Load ) %> %
201
205
select(dataset , kind , total_time , Load ) %> %
202
- kbl(format = " latex" , booktabs = T , escape = F ,
206
+ drop_na(kind ) %> %
207
+ arrange(dataset , Load ) %> %
208
+ kbl(
209
+ format = " latex" , booktabs = T , escape = F ,
203
210
linesep = " " ,
204
211
col.names = c(
205
- " " , " " , " time (s )" , " load"
212
+ " " , " " , " time (min )" , " load"
206
213
)
207
214
) %> %
208
215
collapse_rows(columns = 1 , latex_hline = " major" )
209
216
}
210
217
211
- table_search_best() %> % latex_motivation() %> % write_file(" tex/motivation.tex" )
218
+ best <- table_search_best()
219
+ latex_motivation(best ) %> %
220
+ write_file(" tex/motivation.tex" )
0 commit comments