/
plot-postHocTest.R
146 lines (127 loc) · 4.65 KB
/
plot-postHocTest.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
#' `postHocTest` plot
#'
#' Visualize the result of post-hoc test using ggplot2
#'
#' @param pht a [`postHocTest-class`] object
#' @param feature character, to plot the post-toc test result of this feature
#' @param step_increase numeric vector with the increase in fraction of total
#' height for every additional comparison to minimize overlap, default `0.12`.
#' @name plot_postHocTest
#' @return a `ggplot` object
#' @importFrom dplyr filter
#' @importFrom ggplot2 ggplot aes labs geom_errorbar geom_point geom_boxplot
#' @export
#' @examples
#' data(enterotypes_arumugam)
#' ps <- phyloseq::subset_samples(
#' enterotypes_arumugam,
#' Enterotype %in% c("Enterotype 3", "Enterotype 2", "Enterotype 1")
#' ) %>%
#' phyloseq::subset_taxa(Phylum == "Bacteroidetes")
#' pht <- run_posthoc_test(ps, group = "Enterotype")
#' plot_postHocTest(pht, feature = "p__Bacteroidetes|g__Alistipes")
plot_postHocTest <- function(pht,
feature,
step_increase = 0.12) {
abd_long <- pht@abundance %>%
tidyr::pivot_longer(-.data$group, names_to = "feat")
if (!is.null(feature)) {
abd_long <- filter(abd_long, .data$feat %in% feature)
}
annotation <- get_sig_annotation(pht, step_increase = step_increase)
p_box <- suppressWarnings(
ggplot(abd_long, aes(x = .data$group, y = .data$value)) +
geom_boxplot() +
ggsignif::geom_signif(
data = annotation[annotation$feature %in% feature, ],
aes(
xmin = .data$xmin, xmax = .data$xmax,
annotations = .data$annotation,
y_position = .data$y_position),
manual = TRUE, textsize = 3, vjust = 0.2
) +
labs(x = NULL, y = "Abundance")
)
test_res <- as.data.frame(pht@result[[feature]])
p_test <- ggplot(test_res, aes(x = .data$comparisons)) +
geom_errorbar(
aes(ymin = .data$ci_lower, ymax = .data$ci_upper),
width = 0.2
) +
geom_point(aes(y = .data$diff_mean)) +
labs(x = NULL, y = "95% confidence intervals")
patchwork::wrap_plots(p_box + p_test)
}
#' get the annotation used for setting the location of the the significance bars
#'
#' @param pht a [postHocTest-class] object.
#' @param step_increase numeric vector with the increase in fraction of total
#' height for every additional comparison to minimize overlap, default `0.12`.
#' @noRd
get_sig_annotation <- function(pht, step_increase = 0.12) {
abd <- pht@abundance
group <- abd$group
abd <- dplyr::mutate(abd, group = NULL)
pht_res <- pht@result
sig_annotation <- purrr::map2_df(
abd, as.list(pht_res),
~ get_sig_annotation_single(.x, .y,
group = group,
step_increase = step_increase
)
)
sig_annotation$feature <- rep(names(abd), each = nrow(pht_res[[1]]))
sig_annotation
}
#' get the annotation data frame of a single feature
#'
#' @param abd numeric vector, abundance of a given feature
#' @param pht_df `data.frame` or `DFrame`, post hoc test result of a given
#' feature
#' @param group character vector the same length with `abd`, the group of the
#' samples
#' @param step_increase numeric vector with the increase in fraction of total
#' height for every additional comparison to minimize overlap, default `0.12`.
#' @return a data frame with four variables, `start`, `end`, `y_position`,
#' and `annotation`.
#' @noRd
get_sig_annotation_single <- function(abd,
pht_df,
group,
step_increase = 0.12) {
if (inherits(pht_df, "DFrame")) {
pht_df <- as.data.frame(pht_df)
}
y_max <- split(abd, group) %>%
purrr::map(range) %>%
purrr::map_dbl(2)
y_range <- max(abd) - min(abd)
comps <- strsplit(pht_df$comparisons, "-", fixed = TRUE)
start <- purrr::map_chr(comps, 1)
end <- purrr::map_chr(comps, 2)
y_max <- purrr::map2_dbl(start, end, ~ max(y_max[.x], y_max[.y]))
y_pos <- purrr::map_dbl(
seq_len(3),
~ y_max[.x] + y_range * step_increase * (.x - 1)
)
annotate_df <- data.frame(
xmin = start,
xmax = end,
y_position = y_pos,
annotation = pvalue2siglevel(pht_df$pvalue),
stringsAsFactors = FALSE
)
annotate_df
}
#' covert p value to significance level
#'
#' <= 0.001 "\\*\\*\\*", <= 0.01 "\\*\\*", <=0.05 "\\*", > 0.05 "NS."
#' @noRd
#'
pvalue2siglevel <- function(p) {
p[p <= 0.001] <- "***"
p[p <= 0.01 & p > 0.001] <- "**"
p[p > 0.01 & p <= 0.05] <- "*"
p[p > 0.05] <- "NS."
p
}