-
Notifications
You must be signed in to change notification settings - Fork 0
/
consistency_check.R
132 lines (124 loc) · 5.07 KB
/
consistency_check.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
#' Consistency Check Function
#'
#' This function performs a consistency check to ensure that the number of tests
#' is greater than the number of cases for given columns in a dataset. It
#' returns a \code{\link[=ggplot2]{ggplot2}} object visualizing the results.
#'
#' @param data A data frame containing the test and case data.
#' @param tests A character vector specifying the column names for the test
#' data.
#'
#' @param cases A character vector specifying the column names for the case
#' data. The length of 'tests' and 'cases' must be the same, and each element
#' in 'tests' corresponds to an element in 'cases'.
#'
#' @return A \code{\link[=ggplot2]{ggplot2::ggplot()}} object showing
#' the consistency between the number of tests and cases. The x-axis represents
#' the cases, and the y-axis represents the tests. Each facet represents a
#' disease, and the diagonal line shows where the number of tests equals the
#' number of cases.
#'
#' @examples
#' # check the consistency between malaria tests and cases
#'
#' # get path
#' path <- system.file(
#' "extdata",
#' "fake_epi_df_togo.rds",
#' package = "epiCleanr")
#'
# # get example data
#' fake_epi_df_togo <- import(path)
#'
#' consistency_check(fake_epi_df_togo,
#' tests = c("malaria_tests","cholera_tests"),
#' cases = c("malaria_cases", "cholera_cases"))
#'
#'
#' @importFrom ggplot2 ggplot geom_point geom_abline theme_bw facet_wrap labs
#' theme_minimal theme scale_x_continuous scale_y_continuous element_text margin
#' @importFrom glue glue
#' @importFrom crayon green red
#'
#' @export
consistency_check <- function(data, tests, cases) {
# Check if the length of tests and cases are the same
if (length(tests) != length(cases)) {
stop("The length of 'tests' and 'cases' must be the same.")
}
# Initialize a data frame to store results
results <- data.frame(disease = character(),
cases = numeric(),
tests = numeric())
# Compute statistics and find rows where tests are less than cases
inconsistent_rows <- list()
for (i in seq_along(tests)) {
test_column <- tests[i]
case_column <- cases[i]
# Rows where there are more cases than tests
inconsistency <- data |>
filter(!is.na(!!sym(case_column)), !is.na(!!sym(test_column))) |>
filter(!!sym(case_column) > !!sym(test_column))
inconsistent_count <- nrow(inconsistency)
inconsistent_prop <- inconsistent_count / nrow(data) * 100
inconsistent_rows[[i]] <- inconsistency
disease_name <- paste(test_column, case_column, sep = " vs ")
results <- rbind(results, data.frame(disease = disease_name,
cases = data[[case_column]],
tests = data[[test_column]]))
# Check if there are more tests than cases
if (inconsistent_count == 0) {
message(
crayon::green(
glue::glue(
"Consistency test passed for {disease_name}: ",
"There are more tests than there are cases!")
)
)
} else {
message(
crayon::red(
glue::glue(
"Consistency test failed for {disease_name}: ",
"There are {scales::comma(inconsistent_count)} ",
"({round(inconsistent_prop, 2)}%) ",
"rows where cases are greater than tests.")
)
)
}
}
# Create the plot
plot <- ggplot2::ggplot(results, ggplot2::aes(y = cases, x = tests)) +
ggplot2::geom_point(shape = 16, size = 4, show.legend = FALSE, alpha = .5,
color = "#1e81b0", na.rm = TRUE) +
ggplot2::geom_abline(intercept = 0, linewidth = 1, alpha = .7, slope = 1,
color = "darkred") +
ggplot2::theme_bw() +
ggplot2::facet_wrap(~ disease, scales = "free") +
ggplot2::labs(
y = "Cases",
x = "Tests",
title = paste("<span style = 'font-size:10pt'><b style='color",
":#526A83'>Consistency Check</b>: Comparing",
"the number of tests and cases</span>")) +
ggplot2::theme_minimal() +
ggplot2::theme(
legend.position = "top",
plot.title = ggtext::element_markdown(),
plot.caption = ggplot2::element_text(size = 8),
panel.grid.minor = ggplot2::element_blank(),
panel.grid.major = ggplot2::element_line(
color = 'grey90', linetype = 3),
panel.background = ggplot2::element_rect(color = 'grey10',
linewidth = 0.6),
axis.title.y = ggplot2::element_text(margin = ggplot2::margin(r = 10)),
axis.title.x = ggplot2::element_text(margin = ggplot2::margin(t = 12)),
axis.text.x = ggplot2::element_text(size = 8, angle = 45, hjust = 1,
margin = ggplot2::margin(t = 2, b = 2)),
) +
ggplot2::scale_x_continuous(labels = scales::comma_format(big.mark = ','),
limits = c(0, NA), expand = c(0, 0)) +
ggplot2::scale_y_continuous(labels = scales::comma_format(big.mark = ','),
limits = c(0, NA), expand = c(0, 0))
return(plot)
}