Skip to content

Commit

Permalink
Merge pull request #89 from ukhsa-collaboration/DEVELOPMENT_MASTER
Browse files Browse the repository at this point in the history
Development master
  • Loading branch information
PHEgeorginaanderson committed Jan 24, 2024
2 parents dd838e9 + 49a6332 commit f419c6d
Show file tree
Hide file tree
Showing 28 changed files with 1,237 additions and 620 deletions.
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,3 @@ vignettes/*.R
inst/doc
data-raw
README.Rmd
data-raw/LoadTestdata.R
10 changes: 6 additions & 4 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: PHEindicatormethods
Type: Package
Version: 2.0.1
Version: 2.0.2
Title: Common Public Health Statistics and their Confidence Intervals
Description: Functions to calculate commonly used public health statistics and
their confidence intervals using methods approved for use in the production
Expand All @@ -21,12 +21,13 @@ Description: Functions to calculate commonly used public health statistics and
Silcocks PBS et al (2001) <doi:10.1136/jech.55.1.38>.
Low and Low (2004) <doi:10.1093/pubmed/fdh175>.
Authors@R: c(person("Anderson", "Georgina", email = "georgina.anderson@dhsc.gov.uk", role = c("aut", "cre")),
person("Fox", "Sebastian", email = "sebastian.fox@dhsc.gov.uk", role = c("ctb")),
person("Fox", "Sebastian", role = c("ctb")),
person("Francis", "Matthew", role = c("ctb")),
person("Fryers", "Paul", email = "paul.fryers@dhsc.gov.uk", role = c("ctb")),
person("Clegg", "Emma", role = c("ctb")),
person("Westermann", "Annabel", email = "annabel.westermann@dhsc.gov.uk", role = c("ctb")),
person("Woolner", "Joshua", role = c("ctb"))
person("Woolner", "Joshua", role = c("ctb")),
person("Fellows", "Charlotte", email = "charlotte.fellows@dhsc.gov.uk", role = c("ctb"))
)
BugReports: https://github.com/ukhsa-collaboration/PHEindicatormethods/issues
Depends: R (>= 3.1.0)
Expand All @@ -46,8 +47,9 @@ Suggests:
knitr,
readxl,
rmarkdown,
testthat,
testthat (>= 3.0.0),
withr
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.3
VignetteBuilder: knitr
Config/testthat/edition: 3
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ importFrom(purrr,map)
importFrom(purrr,map_chr)
importFrom(rlang,":=")
importFrom(rlang,.data)
importFrom(rlang,.env)
importFrom(rlang,quo_name)
importFrom(rlang,quo_text)
importFrom(rlang,sym)
Expand Down
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
## PHEindicatormethods v2.0.2
* Amended phe_quantile function so it will not produce quantiles when the number of small areas within a group is less than the number of quantiles requested. A warning will be generated when quantiles cannot be produced for this reason.
* removed the highergeog argument from phe_quantile function, previously soft-deprecated in v1.2.0.
* `phe_sii` amended to allow data to be transformed prior to calculation of the SII, and to allow the intercept value to be output.

## PHEindicatormethods v2.0.1
* `calculate_ISRate` and `calculate_ISRatio` updated so observed events can be passed as total without age breakdowns
* amended GitHub referencing as code repository now owned and hosted by UKHSA-collaboration not PublicHealthEngland
Expand Down
2 changes: 1 addition & 1 deletion R/Proportions.R
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ phe_proportion <- function(data, x, n, type="full", confidence=0.95, multiplier=
uppercl = wilson_upper(({{ x }}),({{ n }}),confidence) * multiplier,
confidence = paste(confidence*100,"%",sep=""),
method = "Wilson") |>
relocate(.data$statistic, .after = confidence)
relocate("statistic", .after = "confidence")
}


Expand Down
138 changes: 81 additions & 57 deletions R/Quantiles.R
Original file line number Diff line number Diff line change
@@ -1,48 +1,54 @@
# -------------------------------------------------------------------------------------------------
# ------------------------------------------------------------------------------
#' Assign Quantiles using phe_quantile
#'
#' Assigns data to quantiles based on numeric data rankings.
#'
#' @param data a data frame containing the quantitative data to be assigned to quantiles.
#' If pre-grouped, separate sets of quantiles will be assigned for each grouping set;
#' unquoted string; no default
#' @param values field name from data containing the numeric values to rank data by and assign quantiles from;
#' unquoted string; no default
#' @param highergeog deprecated - functionality replaced by pre-grouping the input data frame
#' @param nquantiles the number of quantiles to separate each grouping set into; numeric; default=10L
#' @param invert whether the quantiles should be directly (FALSE) or inversely (TRUE) related to the numerical value order;
#' logical (to apply same value to all grouping sets) OR unquoted string referencing field name from data
#' that stores logical values for each grouping set; default = TRUE (ie highest values assigned to quantile 1)
#' @param inverttype whether the invert argument has been specified as a logical value or a field name from data;
#' quoted string "field" or "logical"; default = "logical"
#' @param type defines whether to include metadata columns in output to reference the arguments passed;
#' can be "standard" or "full"; quoted string; default = "full"
#' @param data a data frame containing the quantitative data to be assigned to
#' quantiles. If pre-grouped, separate sets of quantiles will be assigned for
#' each grouping set; unquoted string; no default
#' @param values field name from data containing the numeric values to rank data
#' by and assign quantiles from; unquoted string; no default
#' @param nquantiles the number of quantiles to separate each grouping set into;
#' numeric; default=10L
#' @param invert whether the quantiles should be directly (FALSE) or inversely
#' (TRUE) related to the numerical value order; logical (to apply same value
#' to all grouping sets) OR unquoted string referencing field name from data
#' that stores logical values for each grouping set; default = TRUE (ie
#' highest values assigned to quantile 1)
#' @param inverttype whether the invert argument has been specified as a logical
#' value or a field name from data; quoted string "field" or "logical";
#' default = "logical"
#' @param type defines whether to include metadata columns in output to
#' reference the arguments passed; can be "standard" or "full"; quoted string;
#' default = "full"
#'
#' @import dplyr
#' @importFrom rlang sym quo_name
#' @export
#'
#' @return When type = "full", returns the original data.frame with quantile (quantile value),
#' nquantiles (number of quantiles requested), groupvars (grouping sets quantiles assigned within)
#' and invert (indicating direction of quantile assignment) fields appended.
#'
#' @return When type = "full", returns the original data.frame with quantile
#' (quantile value), nquantiles (number of quantiles requested), groupvars
#' (grouping sets quantiles assigned within) and invert (indicating direction
#' of quantile assignment) fields appended.
#'
#' @section Notes: See [PHE Technical Guide - Assigning Deprivation Quintiles](https://fingertips.phe.org.uk/profile/guidance/supporting-information/PH-methods) for methodology.
#' In particular, note that this function strictly applies the algorithm defined but some manual
#' review, and potentially adjustment, is advised in some cases where multiple small areas with equal rank
#' fall across a natural quantile boundary.
#' @section Notes: See [OHID Technical Guide - Assigning Deprivation Categories](https://fingertips.phe.org.uk/profile/guidance/supporting-information/PH-methods)
#' for methodology. In particular, note that this function strictly applies
#' the algorithm defined but some manual review, and potentially adjustment,
#' is advised in some cases where multiple small areas with equal rank fall
#' across a natural quantile boundary.
#'
#' @examples
#'
#' df <- data.frame(region = as.character(rep(c("Region1","Region2","Region3","Region4"), each=250)),
#' smallarea = as.character(paste0("Area",seq_along(1:1000))),
#' vals = as.numeric(sample(200, 1000, replace = TRUE)),
#' stringsAsFactors=FALSE)
#' df <- data.frame(
#' region = as.character(rep(c("Region1","Region2","Region3","Region4"),
#' each=250)),
#' smallarea = as.character(paste0("Area",seq_along(1:1000))),
#' vals = as.numeric(sample(200, 1000, replace = TRUE)),
#' stringsAsFactors = FALSE)
#'
#' # assign small areas to deciles across whole data frame
#' phe_quantile(df, vals)
#'
#' # assign small area to deciles within regions by pre-grouping the input data frame
#' # assign small areas to deciles within regions by pre-grouping the data frame
#' library(dplyr)
#' df_grp <- df %>% group_by(region)
#' phe_quantile(df_grp, vals)
Expand All @@ -53,36 +59,34 @@
#' @family PHEindicatormethods package functions
# -------------------------------------------------------------------------------------------------


# create phe_quantile function using PHE method
phe_quantile <- function(data, values, highergeog = NULL, nquantiles=10L,
invert=TRUE, inverttype = "logical", type = "full") {
phe_quantile <- function(data,
values,
nquantiles = 10L,
invert = TRUE,
inverttype = "logical",
type = "full") {


# check required arguments present
if (missing(data)|missing(values)) {
stop("function phe_quantile requires at least 2 arguments: data and values")
if (missing(data) | missing(values)) {
stop(paste0("function phe_quantile requires at least 2 arguments: ",
"data and values"))
}

# give useful error if deprecated highergeog argument used
if (!missing(highergeog)) {
stop("highergeog argument is deprecated - pregroup input dataframe to replace this functionality")
}


# check invert is valid and append to data
if (!(inverttype %in% c("logical","field"))) {
if (!(inverttype %in% c("logical", "field"))) {
stop("valid values for inverttype are logical and field")

} else if (inverttype == "logical") {
if (!(invert %in% c(TRUE,FALSE))) {
if (!(invert %in% c(TRUE, FALSE))) {
stop("invert expressed as a logical must equal TRUE or FALSE")
}
data <- mutate(data,invert_calc = invert)
data <- mutate(data, invert_calc = invert)

} else if (inverttype == "field") {
if (deparse(substitute(invert)) %in% colnames(data)) {
data <- mutate(data,invert_calc = {{ invert }})
data <- mutate(data, invert_calc = {{ invert }})
} else stop("invert is not a field name from data")
}

Expand All @@ -93,34 +97,54 @@ phe_quantile <- function(data, values, highergeog = NULL, nquantiles=10L,
}

#check all invert values are identical within groups
if (!n_groups(data) == nrow(unique(select(data,"invert_calc")))) {
stop("invert field values must take the same logical value for each data grouping set")
if (!n_groups(data) == nrow(unique(select(data, "invert_calc")))) {
stop(paste0("invert field values must take the same logical value for ",
"each data grouping set"))
}


# assign quantiles
# assign quantiles - unless number of small areas within a group is less
# than number of quantiles requested. Ignore areas with no value present
phe_quantile <- data %>%
mutate(naflag = if_else(is.na({{ values }}),0,1)) %>%
add_count(name = "na_flag", wt = .data$naflag) %>%
mutate(adj_value = if_else(.data$invert_calc == TRUE, max({{ values }}, na.rm=TRUE)-{{ values }},{{ values }}),
rank = rank(.data$adj_value, ties.method="min", na.last = "keep"),
quantile = floor((nquantiles+1)-ceiling(((.data$na_flag+1)-rank)/(.data$na_flag/nquantiles))),
quantile = if_else(.data$quantile == 0, 1, .data$quantile)) %>%
select(!c("naflag", "na_flag", "adj_value", "rank")) %>%
mutate(num_small_areas = sum(!is.na({{ values }})),
rank = case_when(
.data$invert_calc == TRUE ~ rank(- {{ values }},
ties.method = "min",
na.last = "keep"),
.default = rank({{ values }},
ties.method = "min",
na.last = "keep")
),
quantile = if_else(.data$num_small_areas < nquantiles,
NA_real_,
floor((nquantiles + 1) - ceiling(((.data$num_small_areas + 1) - rank) /
(.data$num_small_areas / nquantiles)
)
)
),
quantile = if_else(.data$quantile == 0, 1, .data$quantile)
) %>%
select(!c("num_small_areas", "rank")) %>%
mutate(nquantiles= nquantiles,
groupvars = paste0(group_vars(data),collapse = ", "),
groupvars = paste0(group_vars(data), collapse = ", "),
qinverted = if_else(.data$invert_calc == TRUE,
"lowest quantile represents highest values",
"lowest quantile represents lowest values"))

# warn if any groups had too few snall areas with values to assign quantiles
if (nrow(filter(phe_quantile, all(is.na(.data$quantile)))) > 0
) {
warning(paste0("One or more groups had too few small areas with values to ",
"allow quantiles to be assigned"))
}

# remove columns if not required based on value of type argument
if (type == "standard") {
phe_quantile <- phe_quantile %>%
select(!c("nquantiles", "groupvars", "qinverted", "invert_calc"))
select(!c("nquantiles", "groupvars", "qinverted", "invert_calc"))
} else {
phe_quantile <- phe_quantile %>%
select(!c("invert_calc"))
select(!c("invert_calc"))
}


Expand Down
Loading

0 comments on commit f419c6d

Please sign in to comment.