Merge pull request #89 from ukhsa-collaboration/DEVELOPMENT_MASTER

Development master
ukhsa-collaboration · Jan 24, 2024 · f419c6d · f419c6d
2 parents dd838e9 + 49a6332
commit f419c6d
Show file tree

Hide file tree

Showing 28 changed files with 1,237 additions and 620 deletions.
diff --git a/.gitignore b/.gitignore
@@ -33,4 +33,3 @@ vignettes/*.R
 inst/doc
 data-raw
 README.Rmd
-data-raw/LoadTestdata.R
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: PHEindicatormethods
 Type: Package
-Version: 2.0.1
+Version: 2.0.2
 Title: Common Public Health Statistics and their Confidence Intervals 
 Description: Functions to calculate commonly used public health statistics and 
     their confidence intervals using methods approved for use in the production  
@@ -21,12 +21,13 @@ Description: Functions to calculate commonly used public health statistics and
     Silcocks PBS et al (2001) <doi:10.1136/jech.55.1.38>.
     Low and Low (2004) <doi:10.1093/pubmed/fdh175>.
 Authors@R: c(person("Anderson",   "Georgina",  email = "georgina.anderson@dhsc.gov.uk",  role = c("aut", "cre")),
-             person("Fox",        "Sebastian", email = "sebastian.fox@dhsc.gov.uk",     role = c("ctb")),
+             person("Fox",        "Sebastian",                                           role = c("ctb")),
              person("Francis",    "Matthew",                                             role = c("ctb")),
              person("Fryers",     "Paul",      email = "paul.fryers@dhsc.gov.uk",        role = c("ctb")),
              person("Clegg",      "Emma",                                                role = c("ctb")),
              person("Westermann", "Annabel",   email = "annabel.westermann@dhsc.gov.uk", role = c("ctb")),
-             person("Woolner",    "Joshua",                                              role = c("ctb"))
+             person("Woolner",    "Joshua",                                              role = c("ctb")),
+             person("Fellows",    "Charlotte", email = "charlotte.fellows@dhsc.gov.uk",  role = c("ctb"))
              )
 BugReports: https://github.com/ukhsa-collaboration/PHEindicatormethods/issues
 Depends: R (>= 3.1.0)
@@ -46,8 +47,9 @@ Suggests:
     knitr,
     readxl,
     rmarkdown,
-    testthat,
+    testthat (>= 3.0.0),
     withr
 Roxygen: list(markdown = TRUE)
 RoxygenNote: 7.2.3
 VignetteBuilder: knitr
+Config/testthat/edition: 3
diff --git a/NAMESPACE b/NAMESPACE
@@ -18,6 +18,7 @@ importFrom(purrr,map)
 importFrom(purrr,map_chr)
 importFrom(rlang,":=")
 importFrom(rlang,.data)
+importFrom(rlang,.env)
 importFrom(rlang,quo_name)
 importFrom(rlang,quo_text)
 importFrom(rlang,sym)

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,8 @@
+## PHEindicatormethods v2.0.2
+* Amended phe_quantile function so it will not produce quantiles when the number of small areas within a group is less than the number of quantiles requested. A warning will be generated when quantiles cannot be produced for this reason.
+* removed the highergeog argument from phe_quantile function, previously soft-deprecated in v1.2.0.
+* `phe_sii` amended to allow data to be transformed prior to calculation of the SII, and to allow the intercept value to be output.
+
 ## PHEindicatormethods v2.0.1
 * `calculate_ISRate` and `calculate_ISRatio` updated so observed events can be passed as total without age breakdowns
 * amended GitHub referencing as code repository now owned and hosted by UKHSA-collaboration not PublicHealthEngland

diff --git a/R/Proportions.R b/R/Proportions.R
@@ -153,7 +153,7 @@ phe_proportion <- function(data, x, n, type="full", confidence=0.95, multiplier=
                  uppercl = wilson_upper(({{ x }}),({{ n }}),confidence) * multiplier,
                  confidence = paste(confidence*100,"%",sep=""),
                  method = "Wilson") |>
-          relocate(.data$statistic, .after = confidence)
+          relocate("statistic", .after = "confidence")
         }
 
 

diff --git a/R/Quantiles.R b/R/Quantiles.R
@@ -1,48 +1,54 @@
-# -------------------------------------------------------------------------------------------------
+# ------------------------------------------------------------------------------
 #' Assign Quantiles using phe_quantile
 #'
 #' Assigns data to quantiles based on numeric data rankings.
 #'
-#' @param data a data frame containing the quantitative data to be assigned to quantiles.
-#'             If pre-grouped, separate sets of quantiles will be assigned for each grouping set;
-#'             unquoted string; no default
-#' @param values field name from data containing the numeric values to rank data by and assign quantiles from;
-#'             unquoted string; no default
-#' @param highergeog deprecated - functionality replaced by pre-grouping the input data frame
-#' @param nquantiles the number of quantiles to separate each grouping set into; numeric; default=10L
-#' @param invert whether the quantiles should be directly (FALSE) or inversely (TRUE) related to the numerical value order;
-#'               logical (to apply same value to all grouping sets) OR unquoted string referencing field name from data
-#'               that stores logical values for each grouping set; default = TRUE (ie highest values assigned to quantile 1)
-#' @param inverttype whether the invert argument has been specified as a logical value or a field name from data;
-#'                   quoted string "field" or "logical"; default = "logical"
-#' @param type defines whether to include metadata columns in output to reference the arguments passed;
-#'             can be "standard" or "full"; quoted string; default = "full"
+#' @param data a data frame containing the quantitative data to be assigned to
+#'   quantiles. If pre-grouped, separate sets of quantiles will be assigned for
+#'   each grouping set; unquoted string; no default
+#' @param values field name from data containing the numeric values to rank data
+#'   by and assign quantiles from; unquoted string; no default
+#' @param nquantiles the number of quantiles to separate each grouping set into;
+#'   numeric; default=10L
+#' @param invert whether the quantiles should be directly (FALSE) or inversely
+#'   (TRUE) related to the numerical value order; logical (to apply same value
+#'   to all grouping sets) OR unquoted string referencing field name from data
+#'   that stores logical values for each grouping set; default = TRUE (ie
+#'   highest values assigned to quantile 1)
+#' @param inverttype whether the invert argument has been specified as a logical
+#'   value or a field name from data; quoted string "field" or "logical";
+#'   default = "logical"
+#' @param type defines whether to include metadata columns in output to
+#'   reference the arguments passed; can be "standard" or "full"; quoted string;
+#'   default = "full"
 #'
 #' @import dplyr
 #' @importFrom rlang sym quo_name
 #' @export
 #'
-#' @return When type = "full", returns the original data.frame with quantile (quantile value),
-#'         nquantiles (number of quantiles requested), groupvars (grouping sets quantiles assigned within)
-#'         and invert (indicating direction of quantile assignment) fields appended.
-#'
+#' @return When type = "full", returns the original data.frame with quantile
+#'   (quantile value), nquantiles (number of quantiles requested), groupvars
+#'   (grouping sets quantiles assigned within) and invert (indicating direction
+#'   of quantile assignment) fields appended.
 #'
-#' @section Notes: See [PHE Technical Guide - Assigning Deprivation Quintiles](https://fingertips.phe.org.uk/profile/guidance/supporting-information/PH-methods) for methodology.
-#'          In particular, note that this function strictly applies the algorithm defined but some manual
-#'          review, and potentially adjustment, is advised in some cases where multiple small areas with equal rank
-#'          fall across a natural quantile boundary.
+#' @section Notes: See [OHID Technical Guide - Assigning Deprivation Categories](https://fingertips.phe.org.uk/profile/guidance/supporting-information/PH-methods)
+#'   for methodology. In particular, note that this function strictly applies
+#'   the algorithm defined but some manual review, and potentially adjustment,
+#'   is advised in some cases where multiple small areas with equal rank fall
+#'   across a natural quantile boundary.
 #'
 #' @examples
-#'
-#' df <- data.frame(region = as.character(rep(c("Region1","Region2","Region3","Region4"), each=250)),
-#'                    smallarea = as.character(paste0("Area",seq_along(1:1000))),
-#'                    vals = as.numeric(sample(200, 1000, replace = TRUE)),
-#'                    stringsAsFactors=FALSE)
+#' df <- data.frame(
+#'   region    = as.character(rep(c("Region1","Region2","Region3","Region4"),
+#'                                each=250)),
+#'   smallarea = as.character(paste0("Area",seq_along(1:1000))),
+#'   vals      = as.numeric(sample(200, 1000, replace = TRUE)),
+#'   stringsAsFactors = FALSE)
 #'
 #' # assign small areas to deciles across whole data frame
 #' phe_quantile(df, vals)
 #'
-#' # assign small area to deciles within regions by pre-grouping the input data frame
+#' # assign small areas to deciles within regions by pre-grouping the data frame
 #' library(dplyr)
 #' df_grp <- df %>% group_by(region)
 #' phe_quantile(df_grp, vals)
@@ -53,36 +59,34 @@
 #' @family PHEindicatormethods package functions
 # -------------------------------------------------------------------------------------------------
 
-
 # create phe_quantile function using PHE method
-phe_quantile <- function(data, values, highergeog = NULL, nquantiles=10L,
-                         invert=TRUE, inverttype = "logical", type = "full") {
+phe_quantile <- function(data,
+                         values,
+                         nquantiles = 10L,
+                         invert = TRUE,
+                         inverttype = "logical",
+                         type = "full") {
 
 
     # check required arguments present
-    if (missing(data)|missing(values)) {
-        stop("function phe_quantile requires at least 2 arguments: data and values")
+    if (missing(data) | missing(values)) {
+        stop(paste0("function phe_quantile requires at least 2 arguments: ",
+                    "data and values"))
     }
 
-    # give useful error if deprecated highergeog argument used
-    if (!missing(highergeog)) {
-      stop("highergeog argument is deprecated - pregroup input dataframe to replace this functionality")
-    }
-
-
     # check invert is valid and append to data
-    if (!(inverttype %in% c("logical","field"))) {
+    if (!(inverttype %in% c("logical", "field"))) {
       stop("valid values for inverttype are logical and field")
 
     } else if (inverttype == "logical") {
-      if (!(invert %in% c(TRUE,FALSE))) {
+      if (!(invert %in% c(TRUE, FALSE))) {
         stop("invert expressed as a logical must equal TRUE or FALSE")
       }
-      data <- mutate(data,invert_calc = invert)
+      data <- mutate(data, invert_calc = invert)
 
     } else if (inverttype == "field") {
       if (deparse(substitute(invert)) %in% colnames(data)) {
-          data <- mutate(data,invert_calc = {{ invert }})
+          data <- mutate(data, invert_calc = {{ invert }})
       } else stop("invert is not a field name from data")
     }
 
@@ -93,34 +97,54 @@ phe_quantile <- function(data, values, highergeog = NULL, nquantiles=10L,
     }
 
     #check all invert values are identical within groups
-    if (!n_groups(data) == nrow(unique(select(data,"invert_calc")))) {
-        stop("invert field values must take the same logical value for each data grouping set")
+    if (!n_groups(data) == nrow(unique(select(data, "invert_calc")))) {
+        stop(paste0("invert field values must take the same logical value for ",
+                    "each data grouping set"))
     }
 
 
-    # assign quantiles
+    # assign quantiles - unless number of small areas within a  group is less
+    # than number of quantiles requested. Ignore areas with no value present
     phe_quantile <- data %>%
-        mutate(naflag = if_else(is.na({{ values }}),0,1)) %>%
-        add_count(name = "na_flag", wt = .data$naflag) %>%
-        mutate(adj_value = if_else(.data$invert_calc == TRUE, max({{ values }}, na.rm=TRUE)-{{ values }},{{ values }}),
-               rank      = rank(.data$adj_value, ties.method="min", na.last = "keep"),
-               quantile  = floor((nquantiles+1)-ceiling(((.data$na_flag+1)-rank)/(.data$na_flag/nquantiles))),
-               quantile  = if_else(.data$quantile == 0, 1, .data$quantile)) %>%
-        select(!c("naflag", "na_flag", "adj_value", "rank")) %>%
+        mutate(num_small_areas = sum(!is.na({{ values }})),
+               rank = case_when(
+                        .data$invert_calc == TRUE ~ rank(- {{ values }},
+                                                         ties.method = "min",
+                                                         na.last = "keep"),
+                        .default = rank({{ values }},
+                                   ties.method = "min",
+                                   na.last = "keep")
+                      ),
+               quantile  = if_else(.data$num_small_areas < nquantiles,
+                                     NA_real_,
+                                     floor((nquantiles + 1) - ceiling(((.data$num_small_areas + 1) - rank) /
+                                                                      (.data$num_small_areas / nquantiles)
+                                                                      )
+                                           )
+                                   ),
+               quantile  = if_else(.data$quantile == 0, 1, .data$quantile)
+    ) %>%
+        select(!c("num_small_areas", "rank")) %>%
         mutate(nquantiles= nquantiles,
-               groupvars = paste0(group_vars(data),collapse = ", "),
+               groupvars = paste0(group_vars(data), collapse = ", "),
                qinverted = if_else(.data$invert_calc == TRUE,
                                    "lowest quantile represents highest values",
                                    "lowest quantile represents lowest values"))
 
+    # warn if any groups had too few snall areas with values to assign quantiles
+    if (nrow(filter(phe_quantile, all(is.na(.data$quantile)))) > 0
+        ) {
+    warning(paste0("One or more groups had too few small areas with values to ",
+                   "allow quantiles to be assigned"))
+    }
 
     # remove columns if not required based on value of type argument
     if (type == "standard") {
         phe_quantile <- phe_quantile %>%
-                            select(!c("nquantiles", "groupvars", "qinverted", "invert_calc"))
+          select(!c("nquantiles", "groupvars", "qinverted", "invert_calc"))
     } else {
         phe_quantile <- phe_quantile %>%
-                            select(!c("invert_calc"))
+          select(!c("invert_calc"))
     }