Skip to content

Commit 980902e

Browse files
committed
Update figures
1 parent 34c661f commit 980902e

File tree

5 files changed

+285
-92
lines changed

5 files changed

+285
-92
lines changed

.gitignore

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
danny-results.sqlite
1+
*.sqlite
22
/env
33
.vscode/
44
.smt

analysis/plot_k_dep.R

+4-2
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,8 @@ plot_counters <- function(data, t, ylabels = FALSE) {
3232
p_candidates <- ggplot(data, aes(
3333
x = k,
3434
y = fraction_candidates,
35-
color = algorithm
35+
color = algorithm,
36+
shape = algorithm
3637
)) +
3738
geom_line(size = 0.4, alpha = 0.5, position = position_dodge(width = 0.5)) +
3839
geom_point(size = 0.5, position = position_dodge(width = 0.5)) +
@@ -75,7 +76,8 @@ plot_counters <- function(data, t, ylabels = FALSE) {
7576
p_load <- ggplot(data, aes(
7677
x = k,
7778
y = Load,
78-
color = algorithm
79+
color = algorithm,
80+
shape = algorithm
7981
)) +
8082
geom_line(size = 0.4, alpha = 0.5, position = position_dodge(width = 0.5)) +
8183
geom_point(size = 0.5, position = position_dodge(width = 0.5)) +

analysis/plot_scalability.R

+83-45
Original file line numberDiff line numberDiff line change
@@ -3,68 +3,106 @@ source("tables.R")
33
source("plots.R")
44

55
plotdata <- table_scalability() %>%
6-
filter(threshold == 0.5) %>%
7-
filter(sketch_bits == 256, k2 %in% c(0, 6)) %>%
6+
filter(dataset == "Glove") %>%
87
group_by(algorithm, dataset, workers, k, k2, sketch_bits) %>%
98
summarise(total_time = mean(total_time)) %>%
109
mutate(total_time = set_units(total_time, "s") %>% drop_units()) %>%
1110
ungroup() %>%
12-
select(algorithm, dataset, workers, k, k2, sketch_bits, total_time)
13-
14-
fast_params <- plotdata %>%
15-
group_by(algorithm, dataset, workers) %>%
16-
slice_min(total_time) %>%
17-
ungroup() %>%
18-
select(algorithm, dataset, workers, k, k2, sketch_bits)
19-
20-
best_40_params <- plotdata %>%
21-
filter(workers == 40) %>%
11+
select(algorithm, dataset, workers, k, k2, sketch_bits, total_time) %>%
2212
group_by(algorithm, dataset, workers) %>%
23-
slice_min(total_time) %>%
24-
ungroup() %>%
25-
select(algorithm, dataset, k, k2, sketch_bits)
26-
27-
with_best_40 <- semi_join(plotdata, best_40_params) %>%
28-
mutate(conf = "best40")
13+
slice_min(total_time) %>%
14+
ungroup()
2915

30-
fast_runs <- semi_join(plotdata, fast_params) %>%
31-
mutate(conf = "bestWorker")
32-
33-
plotdata <- bind_rows(
34-
with_best_40,
35-
fast_runs
36-
) %>%
37-
select(-k, -k2, -sketch_bits) %>%
38-
pivot_wider(names_from=conf, values_from=total_time)
16+
labels <- plotdata %>%
17+
arrange(algorithm, k) %>%
18+
group_by(algorithm) %>%
19+
filter((k > lag(k)) | (workers == 8)) %>%
20+
ungroup() %>%
21+
mutate(
22+
y = case_when(
23+
(algorithm == "LocalLSH") & workers == 8 ~ total_time - 100,
24+
(algorithm == "LocalLSH") & workers == 40 ~ total_time + 30,
25+
(algorithm == "OneLevelLSH") & workers == 8 ~ total_time + 1500,
26+
(algorithm == "TwoLevelLSH") & workers == 8 ~ total_time - 450,
27+
(algorithm == "OneLevelLSH") & workers == 16 ~ total_time + 800,
28+
(algorithm == "TwoLevelLSH") & workers == 16 ~ total_time - 350,
29+
(algorithm == "OneLevelLSH") & workers == 40 ~ total_time - 130,
30+
(algorithm == "TwoLevelLSH") & workers == 40 ~ total_time + 200,
31+
# (algorithm == "LocalLSH") & workers == 40 ~ total_time + 30,
32+
T ~ total_time
33+
)
34+
)
3935

40-
ggplot(
36+
p <- ggplot(
4137
plotdata,
4238
aes(
4339
x = workers,
44-
ymin = bestWorker,
45-
ymax = best40,
46-
color = algorithm
40+
y = total_time,
41+
color = algorithm,
42+
shape = algorithm
4743
)
4844
) +
49-
geom_linerange(
50-
position=position_dodge(2),
51-
size = 1
45+
geom_line() +
46+
geom_point() +
47+
geom_text(
48+
aes(label=scales::number(total_time, suffix=" s")),
49+
data=function(d) {filter(d, workers == 8)},
50+
vjust=0.5,
51+
hjust=1,
52+
nudge_x=-1,
53+
size=2.5,
54+
show.legend=F,
55+
color = "black"
56+
) +
57+
geom_text(
58+
aes(label=scales::number(total_time, suffix=" s")),
59+
data=function(d) {filter(d, workers == 72, !((dataset == "Glove") & (algorithm=="OneLevelLSH")))},
60+
vjust=0.5,
61+
hjust=0,
62+
nudge_x=1,
63+
size=2.5,
64+
show.legend=F,
65+
color = "black"
5266
) +
53-
geom_point(
54-
mapping = aes(y = bestWorker),
55-
data=~filter(.x, bestWorker == best40),
56-
shape=18,
57-
position = position_dodge(2),
58-
size = 3
67+
geom_text(
68+
aes(label=scales::number(total_time, suffix=" s")),
69+
data=function(d) {filter(d, workers == 40, !((dataset == "Glove") & (algorithm=="OneLevelLSH")))},
70+
vjust=0.5,
71+
hjust=1,
72+
nudge_y=-0.05,
73+
nudge_x=-1,
74+
size=2.5,
75+
show.legend=F,
76+
color = "black"
5977
) +
60-
facet_wrap(vars(dataset), scales = "free", ncol = 4) +
61-
scale_shape_manual(values = c(2, 6)) +
78+
geom_label(
79+
aes(label=k, y=y),
80+
data = labels,
81+
size=2,
82+
show.legend=F
83+
) +
84+
# facet_wrap(vars(dataset), scales = "free", ncol = 2) +
6285
scale_color_algorithm() +
63-
scale_x_continuous(breaks = c(1:5 * 8)) +
86+
scale_x_continuous(
87+
breaks = c(1:9 * 8),
88+
expand=expansion(mult=c(0.15,0.15))
89+
) +
90+
scale_y_log10() +
91+
# scale_y_continuous(
92+
# breaks = c(272, 1412, 3351)
93+
# ) +
6494
labs(
6595
x = "number of workers",
6696
y = "total time (s)"
6797
) +
68-
theme_paper()
98+
theme_paper() +
99+
theme(
100+
panel.grid.minor = element_blank(),
101+
panel.grid.major = element_blank(),
102+
legend.position = c(0.8, 0.75),
103+
legend.title = element_blank(),
104+
panel.border = element_blank(),
105+
axis.line = element_line(size = 0.3, colour = "black")
106+
)
69107

70-
ggsave("imgs/scalability.png", width = 8, height = 3)
108+
ggsave("imgs/scalability.png", width = 4, height = 2.2)

analysis/plot_system.R

+73-26
Original file line numberDiff line numberDiff line change
@@ -2,62 +2,109 @@ source("tables.R")
22
source("plots.R")
33

44
plotdata <- table_sysmonitor() %>%
5-
filter(id %in% c(2719, 2720, 2721)) %>%
5+
ungroup() %>%
6+
filter(hostname == "desktop2") %>%
67
mutate(time = time / 60) # put time in minutes
78

8-
tot_net <- 1024 * 1024
9+
tot_net <- plotdata %>% summarise(max(net_tx)) %>% pull()
910

10-
ggplot(plotdata, aes(x=time)) +
11+
endtimes <- plotdata %>%
12+
group_by(algorithm) %>%
13+
summarise(end_local = max(time))
14+
15+
plotdata %>%
16+
group_by(algorithm) %>%
17+
mutate(net_frac = net_tx / max(net_tx)) %>%
18+
filter(net_frac > 0.1) %>%
19+
summarise(start_comm = min(time), end_comm=max(time), comm_span = 60*(end_comm - start_comm)) %>%
20+
inner_join(endtimes) %>%
21+
mutate(span_local = 60*(end_local - end_comm), span_init = 60*start_comm)
22+
23+
annotations <- tribble(
24+
~algorithm, ~from, ~to, ~text, ~y, ~ytext,
25+
"TwoLevelLSH", 0.5, 1.4, "58 s", 0.25, 0.12,
26+
"TwoLevelLSH", 1.5, 6.5, "307 s", 0.15, 0.25,
27+
"TwoLevelLSH", 0, 0.45, "29 s", 0.75, 0.55,
28+
"OneLevelLSH", 0.12, 1.2, "68 s", 0.25, 0.12,
29+
"OneLevelLSH", 1.22, 6.1, " 293 s", 0.75, 0.55,
30+
"LocalLSH", 4, 4, "Initialization: 2 s", NA, 0.75,
31+
"LocalLSH", 4, 4, "Communication: 16 s", NA, 0.5,
32+
"LocalLSH", 4, 4, "Local computation: 44 s", NA, 0.25,
33+
)
34+
35+
percent_idx <- function(p) {
36+
as.integer(quantile(1:n(), p))
37+
}
38+
39+
ggplot(plotdata, aes(x=time, group=hostname)) +
1140
# CPU
12-
geom_line(aes(y=cpu_user), color="darkorange") +
13-
geom_line(aes(y=mem_used / mem_total), color="darkgreen") +
1441
geom_line(aes(y=net_tx / tot_net), color="blue") +
1542
geom_line(aes(y=net_rx / tot_net), color="steelblue1") +
43+
geom_line(aes(y=cpu_user), color="darkorange") +
44+
geom_ribbon(aes(
45+
ymax=(mem_used / mem_total) + 0.02,
46+
ymin=(mem_used / mem_total) - 0.02
47+
),
48+
fill="white"
49+
) +
50+
geom_line(aes(y=mem_used / mem_total), color="darkgreen") +
1651
geom_label(
17-
aes(y=cpu_user),
52+
aes(y=cpu_user, hjust=0),
1853
label = "CPU",
1954
color = "darkorange",
2055
fill="white",
56+
alpha=0.0,
2157
label.size=NA,
22-
size=3,
23-
data=function (d) { group_by(d, id) %>% sample_n(1) }
58+
size=2.5,
59+
data=function (d) { group_by(d, id) %>% slice(percent_idx(.99)) }
2460
) +
2561
# Memory
2662
geom_label(
27-
aes(y=mem_used / mem_total),
63+
aes(y=mem_used / mem_total, hjust=0),
2864
label = "Memory",
2965
color = "darkgreen",
3066
fill="white",
67+
alpha=0.0,
3168
label.size=NA,
32-
size=3,
33-
data=function (d) { group_by(d, id) %>% sample_n(1) }
69+
size=2.5,
70+
data=function (d) { group_by(d, id) %>% slice(percent_idx(0.99)) }
3471
) +
3572
# Network
3673
geom_label(
37-
aes(y=net_tx / tot_net),
38-
label = "Net (tx)",
74+
aes(y=net_tx / tot_net, hjust=0),
75+
label = "Network",
3976
color = "blue",
4077
fill="white",
78+
alpha=0.0,
4179
label.size=NA,
42-
size=3,
43-
data=function (d) { group_by(d, id) %>% sample_n(1) }
80+
size=2.5,
81+
data=function (d) { group_by(d, id) %>% slice(percent_idx(0.99)) }
4482
) +
45-
# Network
46-
geom_label(
47-
aes(y=net_rx / tot_net),
48-
label = "Net (rx)",
49-
color = "steelblue1",
50-
fill="white",
51-
label.size=NA,
52-
size=3,
53-
data=function (d) { group_by(d, id) %>% sample_n(1) }
83+
geom_segment(
84+
aes(y=y, yend=y, x=from, xend=to),
85+
data=annotations,
86+
inherit.aes=F,
87+
linetype="dotted",
88+
size=.4
89+
) +
90+
geom_text(
91+
aes(y=ytext, x=(from+to)/2, label=text),
92+
data=annotations,
93+
inherit.aes=F,
94+
size=2.5
5495
) +
55-
facet_wrap(vars(id), ncol=1) +
96+
facet_wrap(vars(algorithm), ncol=1) +
5697
scale_y_continuous(labels=scales::percent_format(), expand=expansion(mult=0.1)) +
98+
scale_x_continuous(expand=expansion(add=c(0.3, 1))) +
5799
labs(
58100
x = "time (minutes)",
59101
y = "usage"
60102
) +
61-
theme_paper()
103+
theme_paper() +
104+
theme(
105+
panel.border = element_rect(size=0.5),
106+
panel.grid = element_blank()
107+
)
62108

109+
ggsave("imgs/system.png", width = 4, height = 3)
63110

0 commit comments

Comments
 (0)