@@ -31,6 +31,53 @@ recode_algorithms <- function(data) {
31
31
)
32
32
}
33
33
34
+ table_sketch_quality <- function () {
35
+ db <- DBI :: dbConnect(RSQLite :: SQLite(), " danny-results.sqlite" )
36
+ # The load is the maximum load among all the workers in a given experiment
37
+ all <- tbl(db , " result_recent" ) %> %
38
+ collect() %> %
39
+ mutate(
40
+ timed_out = is.na(output_size ) # && !is.na(total_time_ms)
41
+ ) %> %
42
+ filter(hosts == " 10.1.1.1:2001__10.1.1.2:2001__10.1.1.3:2001__10.1.1.4:2001__10.1.1.5:2001" ) %> %
43
+ filter(profile_frequency == 0 ) %> %
44
+ filter(! str_detect(path , " sample-200000.bin" )) %> %
45
+ filter(required_recall == 0.8 ) %> %
46
+ filter(threshold %in% c(0.5 , 0.7 )) %> %
47
+ filter(! no_verify , ! no_dedup ) %> %
48
+ # filter(algorithm != "two-level-lsh" | (repetition_batch >= 1000)) %>%
49
+ mutate(dataset = basename(path )) %> %
50
+ inner_join(baseinfo ) %> %
51
+ mutate(
52
+ dry_run = as.logical(dry_run ),
53
+ total_time = set_units(total_time_ms , " ms" ),
54
+ dataset = case_when(
55
+ str_detect(dataset , " sift" ) ~ " SIFT" ,
56
+ str_detect(dataset , " Livejournal" ) ~ " Livejournal" ,
57
+ str_detect(dataset , " [Gg]love" ) ~ " Glove" ,
58
+ str_detect(dataset , " Orkut" ) ~ " Orkut"
59
+ )
60
+ ) %> %
61
+ order_datasets() %> %
62
+ recode_algorithms() %> %
63
+ filter(algorithm == " Cartesian" ) %> %
64
+ select(- total_time_ms )
65
+
66
+ baseline <- filter(all , sketch_bits == 0 ) %> %
67
+ select(dataset , threshold , base_output_size = output_size )
68
+
69
+ all <- inner_join(all , baseline ) %> %
70
+ mutate(
71
+ lost_pairs = base_output_size - output_size ,
72
+ lost_fraction = lost_pairs / output_size
73
+ )
74
+
75
+
76
+ DBI :: dbDisconnect(db )
77
+ all
78
+ }
79
+
80
+
34
81
table_search_best <- function () {
35
82
db <- DBI :: dbConnect(RSQLite :: SQLite(), " danny-results.sqlite" )
36
83
# The load is the maximum load among all the workers in a given experiment
0 commit comments