-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathplot_subproblem_size.R
112 lines (102 loc) · 3.47 KB
/
plot_subproblem_size.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
source("tables.R")
source("plots.R")
sizes <- tribble(
~dataset, ~data_size,
"SIFT", 1000000,
"Livejournal", 3201203,
"Orkut", 2783196,
"Glove", 1193514
) %>%
mutate(
max_pairs = data_size * (data_size - 1) / 2
)
plotdata <- table_candidates() %>%
# we focus just on experiments with no sketches, to see the effect of k
filter(sketch_bits == 0) %>%
# filter((k == 0) || between(k, 4, 16)) %>%
filter(k2 %in% c(0, 6)) %>%
mutate(
total_time = set_units(total_time, "s") %>% drop_units()
) %>%
group_by(dataset, algorithm, threshold) %>%
filter(
total_time == min(total_time)
) %>%
mutate(CandidatePairs = as.double(CandidatePairs - SelfPairsDiscarded))
plot_threshold <- function(data, t, ytitle = TRUE, yticks = TRUE, title = TRUE) {
first_iter <- filter(data, threshold == t, step == 0)
worker_total <- filter(data, threshold == t) %>%
group_by(algorithm, dataset, worker) %>%
summarise(CandidatePairs = sum(CandidatePairs))
totals <- data %>%
group_by(algorithm, dataset) %>%
summarise(CandidatePairs = sum(CandidatePairs))
draw <- function(d) {
ggplot(
d,
aes(algorithm, CandidatePairs,
color = algorithm
)
) +
geom_boxplot(outlier.size = 0.2) +
geom_point(position = "jitter", size = 0.1) +
scale_y_continuous() +
scale_color_algorithm() +
facet_wrap(vars(dataset), ncol = 4) +
guides(shape = FALSE, linetype = FALSE, size = FALSE) +
theme_paper() +
theme(
axis.text.x = element_blank(),
axis.title.x = element_blank(),
strip.background = element_blank(),
strip.text = element_text(hjust = 0)
)
}
p_first_iter <- draw(first_iter) + labs(
y = "Candidates per worker, first iteration"
)
p_candidates <- draw(worker_total) + labs(
y = "Total candidates per worker"
)
p_averages <- draw(totals) + labs(
y = "Average candidates per worker"
)
if (title) {
p_first_iter <- p_first_iter + labs(title = str_c("Threshold ", t))
}
if (!ytitle) {
# p_averages <- p_averages + theme(
# axis.title.y = element_blank()
# )
# p_candidates <- p_candidates + theme(
# axis.title.y = element_blank()
# )
# p_first_iter <- p_first_iter + theme(
# axis.title.y = element_blank()
# )
}
if (!yticks) {
# p_averages <- p_averages + theme(
# axis.text.y = element_blank(),
# )
# p_candidates <- p_candidates + theme(
# axis.text.y = element_blank(),
# )
# p_first_iter <- p_first_iter + theme(
# axis.text.y = element_blank(),
# )
}
p_first_iter / p_candidates / p_averages
}
(
plot_threshold(filter(plotdata, dataset %in% c("Glove", "SIFT")), 0.5) |
plot_threshold(filter(plotdata, dataset %in% c("Livejournal", "Orkut")), 0.5, title = F, ytitle = F) |
plot_threshold(filter(plotdata, dataset %in% c("Glove", "SIFT")), 0.7, ytitle = F) |
plot_threshold(filter(plotdata, dataset %in% c("Livejournal", "Orkut")), 0.7, title = F, ytitle = F)
) /
guide_area() +
plot_layout(
guides = "collect",
heights = c(5, 1)
)
ggsave("imgs/subproblem_size.png", width = 8, height = 6)