From 02df52deb10564535054f65c14200a7756a23869 Mon Sep 17 00:00:00 2001 From: Daniel Sjoberg Date: Wed, 12 Jun 2024 13:12:09 -0700 Subject: [PATCH 01/23] Create ard_categorical.survey.design.R --- R/ard_categorical.survey.design.R | 36 +++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 R/ard_categorical.survey.design.R diff --git a/R/ard_categorical.survey.design.R b/R/ard_categorical.survey.design.R new file mode 100644 index 00000000..dd121a12 --- /dev/null +++ b/R/ard_categorical.survey.design.R @@ -0,0 +1,36 @@ + + +ard_categorical.survey.design <- function(data, + variables, + by = NULL, + statistic = everything() ~ c("n", "N", "p"), + denominator = c("column", "row", "cell"), + fmt_fn = NULL, + stat_label = everything() ~ cards::default_stat_labels(), + ...) { + set_cli_abort_call() + + # process arguments ---------------------------------------------------------- + cards::process_selectors( + data = data$variables, + variables = {{ variables }}, + by = {{ by }} + ) + cards::process_formula_selectors( + data = data$variables[variables], + statistic = statistic, + fmt_fn = fmt_fn, + stat_label = stat_label + ) + denominator <- arg_match(denominator) + + # calculate counts ----------------------------------------------------------- + lst_svytable_counts <- + lapply( + variables, + \(variable) { + survey::svytable(formula = reformulate2(c(by, variable))) + } + ) + +} From 73e9b8ca7be138542182f47d2f74e9627845c06e Mon Sep 17 00:00:00 2001 From: Daniel Sjoberg Date: Wed, 12 Jun 2024 22:01:47 -0700 Subject: [PATCH 02/23] in progress --- R/ard_categorical.survey.design.R | 314 +++++++++++++++++++++++++++++- R/ard_continuous.survey.design.R | 2 +- R/import-standalone-tibble.R | 48 +++++ 3 files changed, 357 insertions(+), 7 deletions(-) create mode 100644 R/import-standalone-tibble.R diff --git a/R/ard_categorical.survey.design.R b/R/ard_categorical.survey.design.R index dd121a12..d51d5577 100644 --- a/R/ard_categorical.survey.design.R +++ b/R/ard_categorical.survey.design.R @@ -1,14 +1,45 @@ +#' ARD survey tabulation +#' +#' Compute tabulations on survey-weighted data. +#' +#' @param data (`survey.design`)\cr +#' a design object often created with [`survey::svydesign()`]. +#' @param variables ([`tidy-select`][dplyr::dplyr_tidy_select])\cr +#' columns to include in summaries. Default is `everything()`. +#' @param by ([`tidy-select`][dplyr::dplyr_tidy_select])\cr +#' results are calculated for **all combinations** of the column specified +#' and the variables. A single column may be specified. +#' @param statistic ([`formula-list-selector`][syntax])\cr +#' a named list, a list of formulas, +#' or a single formula where the list element is a character vector of +#' statistic names to include. See default value for options. +#' @param fmt_fn ([`formula-list-selector`][syntax])\cr +#' a named list, a list of formulas, +#' or a single formula where the list element is a named list of functions +#' (or the RHS of a formula), +#' e.g. `list(mpg = list(mean = \(x) round(x, digits = 2) |> as.character))`. +#' @param stat_label ([`formula-list-selector`][syntax])\cr +#' a named list, a list of formulas, or a single formula where +#' the list element is either a named list or a list of formulas defining the +#' statistic labels, e.g. `everything() ~ list(mean = "Mean", sd = "SD")` or +#' `everything() ~ list(mean ~ "Mean", sd ~ "SD")`. +#' @inheritParams rlang::args_dots_empty - +#' +#' @return +#' @export +#' +#' @examples ard_categorical.survey.design <- function(data, - variables, + variables = everything(), by = NULL, - statistic = everything() ~ c("n", "N", "p"), + statistic = everything() ~ c("n", "N", "p", "p.std.error", "deff"), denominator = c("column", "row", "cell"), fmt_fn = NULL, - stat_label = everything() ~ cards::default_stat_labels(), + stat_label = everything() ~ list(p = "%", p.std.error = "SE(%)", deff = "Design Effect"), ...) { set_cli_abort_call() + deff <- TRUE # we may update in the future to make this an argument for users # process arguments ---------------------------------------------------------- cards::process_selectors( @@ -16,21 +47,292 @@ ard_categorical.survey.design <- function(data, variables = {{ variables }}, by = {{ by }} ) + variables <- setdiff(variables, by) + check_scalar(by, allow_empty = TRUE) + cards::process_formula_selectors( data = data$variables[variables], statistic = statistic, fmt_fn = fmt_fn, stat_label = stat_label ) + cards::fill_formula_selectors( + data = data$variables[variables], + statistic = formals(asNamespace("cardx")[["ard_categorical.survey.design"]])[["statistic"]] |> eval(), + ) + accepted_svy_stats <- c("n", "N", "p", "p.std.error", "deff") + cards::check_list_elements( + x = statistic, + predicate = \(x) all(x %in% accepted_svy_stats), + error_msg = c("Error in the values of the {.arg statistic} argument.", + i = "Values must be in {.val {accepted_svy_stats}}" + ) + ) denominator <- arg_match(denominator) # calculate counts ----------------------------------------------------------- - lst_svytable_counts <- + # this tabulation accounts for unobserved combinations + browser() + svytable_counts <- .svytable_counts(data, variables, by, denominator) + + # calculate rates along with SE and DEFF ------------------------------------- + svytable_rates <- .svytable_rates(data, variables, by, denominator, deff) + + # convert results into a proper ARD object ----------------------------------- + browser() + cards <- + svytable_counts |> + dplyr::left_join( + svytable_rates |> dplyr::select(-"p"), + by = intersect(c("group1", "group1_level", "variable", "variable_level"), names(svytable_counts)) + ) |> + dplyr::mutate(across(-any_of(c("group1", "variable")), as.list)) |> + tidyr::pivot_longer( + cols = -c(cards::all_ard_groups(), cards::all_ard_variables()), + names_to = "stat_name", + values_to = "stat" + ) |> + # keep statistics requested by user + dplyr::inner_join( + statistic |> enframe("variable", "stat_name") |> tidyr::unnest(cols = "stat_name"), + by = c("variable", "stat_name") + ) |> + # merge in statistic labels + dplyr::left_join( + stat_label |> enframe("variable", "stat_label") |> tidyr::unnest(cols = "stat_label"), + by = c("variable", "stat_label") + ) |> + dplyr::mutate(stat_label = dplyr::coalesce(.data$stat_label, .data$stat_name)) |> + dplyr::mutate(context = "categorical") |> + cards::tidy_ard_column_order() %>% + {structure(., class = c("card", class(.)))} # styler: off +} + +.svytable_rates <- function(data, variables, by, denominator, deff) { + lapply( + variables, + \(variable) { + case_switch( + # first chunk with by variable specified + !is_empty(by) && denominator == "column" ~ + .one_svytable_rates_by_column(data, variable, by, deff), + !is_empty(by) && denominator == "row" ~ + .one_svytable_rates_by_row(data, variable, by, deff), + !is_empty(by) && denominator == "cell" ~ + .one_svytable_rates_by_cell(data, variable, by, deff), + # this chunk without a by variable + denominator %in% c("column", "cell") ~ + .one_svytable_rates_no_by_column_and_cell(data, variable, deff), + denominator == "row" ~ + .one_svytable_rates_no_by_row(data, variable, deff) + ) + } + ) |> + dplyr::bind_rows() +} + +.one_svytable_rates_no_by_row <- function(data, variable, deff) { + dplyr::tibble( + variable = .env$variable, + variable_level = unique(data$variables[[variable]]) |> sort(), + p = 1, + p.std.error = 0, + deff = NaN + ) +} + +.one_svytable_rates_no_by_column_and_cell <- function(data, variable, deff) { + browser() + survey::svymean(reformulate2(variable), design = data, na.rm = TRUE, deff = deff) |> + dplyr::as_tibble(rownames = "var_level") |> + dplyr::mutate( + variable_level = stringr::str_remove(.data$var_level, pattern = paste0("^", .env$variable)), + variable = .env$variable + ) |> + dplyr::select("variable", "variable_level", p = "mean", p.std.error = "SE", any_of("deff")) +} + +.one_svytable_rates_by_cell <- function(data, variable, by, deff) { + browser() + + df_interaction_id <- + .df_all_combos(data, variable, by) |> + dplyr::mutate( + var_level = + glue::glue("interaction({.env$by}, {.env$variable}){.data$group1_level}.{.data$variable_level}") + ) + + survey::svymean( + x = inject(~interaction(!!sym(bt(by)), !!sym(bt(variable)))), + design = data, + na.rm = TRUE, + deff = deff + ) |> + dplyr::as_tibble(rownames = "var_level") |> + dplyr::left_join(df_interaction_id, by = "var_level") |> + dplyr::select( + cards::all_ard_groups(), cards::all_ard_variables(), + p = "mean", p.std.error = "SE", any_of("deff") + ) +} + +.one_svytable_rates_by_row <- function(data, variable, by, deff) { + survey::svyby( + formula = reformulate2(by), + by = reformulate2(variable), + design = data, + FUN = survey::svymean, + na.rm = TRUE, + deff = deff + ) |> + dplyr::as_tibble() |> + tidyr::pivot_longer(-all_of(variable)) |> + dplyr::mutate( + stat = + dplyr::case_when( + startsWith(.data$name, paste0("se.", by)) | startsWith(.data$name, paste0("se.`", by, "`")) ~ "p.std.error", + startsWith(.data$name, paste0("DEff.", by)) | startsWith(.data$name, paste0("DEff.`", by, "`")) ~ "deff", + TRUE ~ "p" + ), + name = + str_remove_all(.data$name, "se\\.") %>% + stringr::str_remove_all("DEff\\.") %>% + str_remove_all(by) %>% + str_remove_all("`") + ) |> + tidyr::pivot_wider(names_from = "stat", values_from = "value") |> + set_names(c("variable_level", "group1_level", "p", "p.std.error", "deff")) |> + dplyr::mutate( + group1 = .env$by, + variable = .env$variable, + across(c("group1_level", "variable_level"), as.character) + ) +} + +.one_svytable_rates_by_column <- function(data, variable, by, deff) { + survey::svyby( + formula = reformulate2(variable), + by = reformulate2(by), + design = data, + FUN = survey::svymean, + na.rm = TRUE, + deff = deff + ) |> + dplyr::as_tibble() |> + tidyr::pivot_longer(-all_of(by)) |> + dplyr::mutate( + stat = + dplyr::case_when( + startsWith(.data$name, paste0("se.", variable)) | startsWith(.data$name, paste0("se.`", variable, "`")) ~ "p.std.error", + startsWith(.data$name, paste0("DEff.", variable)) | startsWith(.data$name, paste0("DEff.`", variable, "`")) ~ "deff", + TRUE ~ "p" + ), + name = + str_remove_all(.data$name, "se\\.") %>% + str_remove_all("DEff\\.") %>% + str_remove_all(variable) %>% + str_remove_all("`") + ) |> + tidyr::pivot_wider(names_from = "stat", values_from = "value") |> + set_names(c("group1_level", "variable_level", "p", "p.std.error", "deff")) |> + dplyr::mutate( + group1 = .env$by, + variable = .env$variable, + across(c("group1_level", "variable_level"), as.character) + ) +} + +.svytable_counts <- function(data, variables, by, denominator) { + df_counts <- lapply( variables, \(variable) { - survey::svytable(formula = reformulate2(c(by, variable))) + # perform weighted tabulation + df_count <- + survey::svytable(formula = reformulate2(c(by, variable)), design = data) |> + dplyr::as_tibble() + if (is_empty(by)) { + names(df_count) <- c("variable_level", "n") + df_count$variable <- variable + } + else { + names(df_count) <- c("group1_level", "variable_level", "n") + df_count$variable <- variable + df_count$group1 <- by + } + + # adding unobserved levels + .df_all_combos(data, variable, by) %>% + dplyr::left_join( + df_count, + by = names(.) + ) } + ) |> + dplyr::bind_rows() + + # add big N and p, then return data frame of results + switch( + denominator, + "column" = + df_counts |> + dplyr::mutate( + .by = any_of("group1_level"), + N = sum(.data$n), + p = .data$n / .data$N + ), + "row" = + df_counts |> + dplyr::mutate( + .by = any_of("variable_level"), + N = sum(.data$n), + p = .data$n / .data$N + ), + "cell" = + df_counts |> + dplyr::mutate( + .by = any_of(c("group1_level", "variable_level")), + N = sum(.data$n), + p = .data$n / .data$N + ) + ) +} + +.df_all_combos <- function(data, variable, by) { + df <- cards::nest_for_ard( + data = data$variables, + by = c(by, variable), + list_columns = FALSE, + include_data = FALSE + ) + + # renaming with variable colnames + if (!is_empty(by)) { + df <- dplyr::rename(df, variable = "group2", variable_level = "group2_level") + } + else { + df <- dplyr::rename(df, variable = "group1", variable_level = "group1_level") + } + + # convert levels to character for merging later + df |> + dplyr::mutate( + across( + c(cards::all_ard_groups("levels"), cards::all_ard_variables("levels")), + as.character + ) ) +} + +case_switch <- function(..., .default = NULL) { + dots <- dots_list(...) + for (f in dots) { + if (isTRUE(eval(f_lhs(f), envir = attr(f, ".Environment")))) { + return(eval(f_rhs(f), envir = attr(f, ".Environment"))) + } + } + + return(.default) } + diff --git a/R/ard_continuous.survey.design.R b/R/ard_continuous.survey.design.R index e96def3b..e6b53380 100644 --- a/R/ard_continuous.survey.design.R +++ b/R/ard_continuous.survey.design.R @@ -141,7 +141,7 @@ ard_continuous.survey.design <- function(data, variables, by = NULL, # add class and return ARD object -------------------------------------------- df_stats |> - dplyr::mutate(context = "survey_svycontinuous") |> + dplyr::mutate(context = "continuous") |> cards::tidy_ard_column_order() %>% {structure(., class = c("card", class(.)))} # styler: off } diff --git a/R/import-standalone-tibble.R b/R/import-standalone-tibble.R new file mode 100644 index 00000000..f4c7b001 --- /dev/null +++ b/R/import-standalone-tibble.R @@ -0,0 +1,48 @@ +# Standalone file: do not edit by hand +# Source: +# ---------------------------------------------------------------------- +# +# --- +# file: standalone-tibble.R +# last-updated: 2024-05-07 +# license: https://unlicense.org +# imports: [dplyr] +# --- +# +# This file provides a minimal shim to provide a tibble-like API on top of +# base R functions. They are not drop-in replacements but allow a similar style +# of programming. +# +# ## Changelog +# +# nocov start +# styler: off + +deframe <- function(x) { + if (ncol(x) == 1L) return(x[[1]]) + x[[2]] |> stats::setNames(x[[1]]) +} + +enframe <- function(x, name = "name", value = "value") { + if (!is.null(names(x))) { + lst <- list(names(x), unname(x)) |> stats::setNames(c(name, value)) + } + else { + lst <- list(seq_along(x), unname(x)) |> stats::setNames(c(name, value)) + } + dplyr::tibble(!!!lst) +} + +remove_rownames <- function(.data) { + rownames(.data) <- NULL + .data +} + +rownames_to_column <- function(.data, var = "rowname") { + .data[[var]] <- rownames(.data) + + dplyr::relocate(.data, dplyr::all_of(var), .before = 1L) +} + +# nocov end +# styler: on From 76cef0ed19bd9140775e17c7504254a75ec1cf10 Mon Sep 17 00:00:00 2001 From: Daniel Sjoberg Date: Thu, 13 Jun 2024 10:41:50 -0700 Subject: [PATCH 03/23] Update ard_categorical.survey.design.R --- R/ard_categorical.survey.design.R | 116 +++++++++++++++++++++++++++--- 1 file changed, 107 insertions(+), 9 deletions(-) diff --git a/R/ard_categorical.survey.design.R b/R/ard_categorical.survey.design.R index d51d5577..0520bb71 100644 --- a/R/ard_categorical.survey.design.R +++ b/R/ard_categorical.survey.design.R @@ -72,14 +72,12 @@ ard_categorical.survey.design <- function(data, # calculate counts ----------------------------------------------------------- # this tabulation accounts for unobserved combinations - browser() svytable_counts <- .svytable_counts(data, variables, by, denominator) # calculate rates along with SE and DEFF ------------------------------------- svytable_rates <- .svytable_rates(data, variables, by, denominator, deff) # convert results into a proper ARD object ----------------------------------- - browser() cards <- svytable_counts |> dplyr::left_join( @@ -96,11 +94,22 @@ ard_categorical.survey.design <- function(data, dplyr::inner_join( statistic |> enframe("variable", "stat_name") |> tidyr::unnest(cols = "stat_name"), by = c("variable", "stat_name") + ) + + # final processing of fmt_fn ------------------------------------------------- + cards <- cards |> + .process_nested_list_as_df( + arg = fmt_fn, + new_column = "fmt_fn" ) |> - # merge in statistic labels - dplyr::left_join( - stat_label |> enframe("variable", "stat_label") |> tidyr::unnest(cols = "stat_label"), - by = c("variable", "stat_label") + .default_svy_cat_fmt_fn() + + # merge in statistic labels + cards <- cards |> + .process_nested_list_as_df( + arg = stat_label, + new_column = "stat_label", + unlist = TRUE ) |> dplyr::mutate(stat_label = dplyr::coalesce(.data$stat_label, .data$stat_name)) |> dplyr::mutate(context = "categorical") |> @@ -142,7 +151,6 @@ ard_categorical.survey.design <- function(data, } .one_svytable_rates_no_by_column_and_cell <- function(data, variable, deff) { - browser() survey::svymean(reformulate2(variable), design = data, na.rm = TRUE, deff = deff) |> dplyr::as_tibble(rownames = "var_level") |> dplyr::mutate( @@ -153,8 +161,6 @@ ard_categorical.survey.design <- function(data, } .one_svytable_rates_by_cell <- function(data, variable, by, deff) { - browser() - df_interaction_id <- .df_all_combos(data, variable, by) |> dplyr::mutate( @@ -336,3 +342,95 @@ case_switch <- function(..., .default = NULL) { return(.default) } +.default_svy_cat_fmt_fn <- function(x) { + x |> + dplyr::mutate( + fmt_fn = + pmap( + list(.data$stat_name, .data$stat, .data$fmt_fn), + function(stat_name, stat, fmt_fn) { + if (!is_empty(fmt_fn)) { + return(fmt_fn) + } + if (stat_name %in% c("p", "p_miss", "p_nonmiss")) { + return(cards::label_cards(digits = 1, scale = 100)) + } + if (stat_name %in% c("n", "N", "N_miss", "N_nonmiss", "N_obs")) { + return(cards::label_cards(digits = 0)) + } + if (is.integer(stat)) { + return(0L) + } + if (is.numeric(stat)) { + return(1L) + } + return(as.character) + } + ) + ) +} + +#' Convert Nested Lists to Column +#' +#' Some arguments, such as `stat_label`, are passed as nested lists. This +#' function properly unnests these lists and adds them to the results data frame. +#' +#' @param x (`data.frame`)\cr +#' result data frame +#' @param arg (`list`)\cr +#' the nested list +#' @param new_column (`string`)\cr +#' new column name +#' @param unlist (`logical`)\cr +#' whether to fully unlist final results +#' +#' @return a data frame +#' @keywords internal +#' +#' @examples +#' ard <- ard_categorical(ADSL, by = "ARM", variables = "AGEGR1") +#' +#' cards:::.process_nested_list_as_df(ard, NULL, "new_col") +.process_nested_list_as_df <- function(x, arg, new_column, unlist = FALSE) { + # add fmt_fn column if not already present + if (!new_column %in% names(x)) { + x[[new_column]] <- list(NULL) + } + + # process argument if not NULL, and update new column + if (!is_empty(arg)) { + df_argument <- + imap( + arg, + function(enlst_arg, variable) { + lst_stat_names <- + x[c("variable", "stat_name")] |> + dplyr::filter(.data$variable %in% .env$variable) |> + unique() %>% + {stats::setNames(as.list(.[["stat_name"]]), .[["stat_name"]])} # styler: off + + cards::compute_formula_selector( + data = lst_stat_names, + x = enlst_arg + ) %>% + # styler: off + {dplyr::tibble( + variable = variable, + stat_name = names(.), + "{new_column}" := unname(.) + )} + # styler: on + } + ) |> + dplyr::bind_rows() + + x <- x |> dplyr::rows_update(df_argument, by = c("variable", "stat_name"), unmatched = "ignore") + } + + if (isTRUE(unlist)) { + x[[new_column]] <- lapply(x[[new_column]], function(x) x %||% NA) |> unlist() + } + + x +} + From 2994779cdabd7f4b494dfaf1aba46ab2049f13e0 Mon Sep 17 00:00:00 2001 From: Daniel Sjoberg Date: Thu, 13 Jun 2024 10:55:10 -0700 Subject: [PATCH 04/23] updates --- NAMESPACE | 1 + R/ard_categorical.survey.design.R | 24 +++++++--- man/ard_categorical.survey.design.Rd | 65 ++++++++++++++++++++++++++++ man/dot-process_nested_list_as_df.Rd | 34 +++++++++++++++ 4 files changed, 117 insertions(+), 7 deletions(-) create mode 100644 man/ard_categorical.survey.design.Rd create mode 100644 man/dot-process_nested_list_as_df.Rd diff --git a/NAMESPACE b/NAMESPACE index ca9bbf79..9c95ccf4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,7 @@ # Generated by roxygen2: do not edit by hand S3method(ard_attributes,survey.design) +S3method(ard_categorical,survey.design) S3method(ard_continuous,survey.design) S3method(ard_regression,default) S3method(ard_stats_anova,anova) diff --git a/R/ard_categorical.survey.design.R b/R/ard_categorical.survey.design.R index 0520bb71..1b337f3d 100644 --- a/R/ard_categorical.survey.design.R +++ b/R/ard_categorical.survey.design.R @@ -1,7 +1,12 @@ #' ARD survey tabulation #' +#' @description #' Compute tabulations on survey-weighted data. #' +#' The counts and proportion (`"N"`, `"n"`, `"p"`) are calculated using `survey::svytable()`, +#' and the standard errors and design effect (`"p.std.error"`, `"deff"`) are +#' calculated using `survey::svymean()`. +#' #' @param data (`survey.design`)\cr #' a design object often created with [`survey::svydesign()`]. #' @param variables ([`tidy-select`][dplyr::dplyr_tidy_select])\cr @@ -24,12 +29,14 @@ #' statistic labels, e.g. `everything() ~ list(mean = "Mean", sd = "SD")` or #' `everything() ~ list(mean ~ "Mean", sd ~ "SD")`. #' @inheritParams rlang::args_dots_empty - #' -#' @return +#' @return an ARD data frame of class 'card' #' @export #' -#' @examples +#' @examplesIf cardx:::is_pkg_installed("survey", reference_pkg = "cardx") +#' svy_titanic <- survey::svydesign(~1, data = as.data.frame(Titanic), weights = ~Freq) +#' +#' ard_categorical(svy_titanic, variables = Class, by = Survived) ard_categorical.survey.design <- function(data, variables = everything(), by = NULL, @@ -104,14 +111,17 @@ ard_categorical.survey.design <- function(data, ) |> .default_svy_cat_fmt_fn() - # merge in statistic labels + # merge in statistic labels -------------------------------------------------- cards <- cards |> .process_nested_list_as_df( arg = stat_label, new_column = "stat_label", unlist = TRUE ) |> - dplyr::mutate(stat_label = dplyr::coalesce(.data$stat_label, .data$stat_name)) |> + dplyr::mutate(stat_label = dplyr::coalesce(.data$stat_label, .data$stat_name)) + + # return final object -------------------------------------------------------- + cards |> dplyr::mutate(context = "categorical") |> cards::tidy_ard_column_order() %>% {structure(., class = c("card", class(.)))} # styler: off @@ -154,7 +164,7 @@ ard_categorical.survey.design <- function(data, survey::svymean(reformulate2(variable), design = data, na.rm = TRUE, deff = deff) |> dplyr::as_tibble(rownames = "var_level") |> dplyr::mutate( - variable_level = stringr::str_remove(.data$var_level, pattern = paste0("^", .env$variable)), + variable_level = str_remove(.data$var_level, pattern = paste0("^", .env$variable)), variable = .env$variable ) |> dplyr::select("variable", "variable_level", p = "mean", p.std.error = "SE", any_of("deff")) @@ -202,7 +212,7 @@ ard_categorical.survey.design <- function(data, ), name = str_remove_all(.data$name, "se\\.") %>% - stringr::str_remove_all("DEff\\.") %>% + str_remove_all("DEff\\.") %>% str_remove_all(by) %>% str_remove_all("`") ) |> diff --git a/man/ard_categorical.survey.design.Rd b/man/ard_categorical.survey.design.Rd new file mode 100644 index 00000000..e021d7c0 --- /dev/null +++ b/man/ard_categorical.survey.design.Rd @@ -0,0 +1,65 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ard_categorical.survey.design.R +\name{ard_categorical.survey.design} +\alias{ard_categorical.survey.design} +\title{ARD survey tabulation} +\usage{ +\method{ard_categorical}{survey.design}( + data, + variables = everything(), + by = NULL, + statistic = everything() ~ c("n", "N", "p", "p.std.error", "deff"), + denominator = c("column", "row", "cell"), + fmt_fn = NULL, + stat_label = everything() ~ list(p = "\%", p.std.error = "SE(\%)", deff = + "Design Effect"), + ... +) +} +\arguments{ +\item{data}{(\code{survey.design})\cr +a design object often created with \code{\link[survey:svydesign]{survey::svydesign()}}.} + +\item{variables}{(\code{\link[dplyr:dplyr_tidy_select]{tidy-select}})\cr +columns to include in summaries. Default is \code{everything()}.} + +\item{by}{(\code{\link[dplyr:dplyr_tidy_select]{tidy-select}})\cr +results are calculated for \strong{all combinations} of the column specified +and the variables. A single column may be specified.} + +\item{statistic}{(\code{\link[=syntax]{formula-list-selector}})\cr +a named list, a list of formulas, +or a single formula where the list element is a character vector of +statistic names to include. See default value for options.} + +\item{fmt_fn}{(\code{\link[=syntax]{formula-list-selector}})\cr +a named list, a list of formulas, +or a single formula where the list element is a named list of functions +(or the RHS of a formula), +e.g. \verb{list(mpg = list(mean = \\(x) round(x, digits = 2) |> as.character))}.} + +\item{stat_label}{(\code{\link[=syntax]{formula-list-selector}})\cr +a named list, a list of formulas, or a single formula where +the list element is either a named list or a list of formulas defining the +statistic labels, e.g. \code{everything() ~ list(mean = "Mean", sd = "SD")} or +\code{everything() ~ list(mean ~ "Mean", sd ~ "SD")}.} + +\item{...}{These dots are for future extensions and must be empty.} +} +\value{ +an ARD data frame of class 'card' +} +\description{ +Compute tabulations on survey-weighted data. + +The counts and proportion (\code{"N"}, \code{"n"}, \code{"p"}) are calculated using \code{survey::svytable()}, +and the standard errors and design effect (\code{"p.std.error"}, \code{"deff"}) are +calculated using \code{survey::svymean()}. +} +\examples{ +\dontshow{if (cardx:::is_pkg_installed("survey", reference_pkg = "cardx")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +svy_titanic <- survey::svydesign(~1, data = as.data.frame(Titanic), weights = ~Freq) + +ard_categorical(svy_titanic, variables = Class, by = Survived) +\dontshow{\}) # examplesIf} +} diff --git a/man/dot-process_nested_list_as_df.Rd b/man/dot-process_nested_list_as_df.Rd new file mode 100644 index 00000000..2a9b2d67 --- /dev/null +++ b/man/dot-process_nested_list_as_df.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ard_categorical.survey.design.R +\name{.process_nested_list_as_df} +\alias{.process_nested_list_as_df} +\title{Convert Nested Lists to Column} +\usage{ +.process_nested_list_as_df(x, arg, new_column, unlist = FALSE) +} +\arguments{ +\item{x}{(\code{data.frame})\cr +result data frame} + +\item{arg}{(\code{list})\cr +the nested list} + +\item{new_column}{(\code{string})\cr +new column name} + +\item{unlist}{(\code{logical})\cr +whether to fully unlist final results} +} +\value{ +a data frame +} +\description{ +Some arguments, such as \code{stat_label}, are passed as nested lists. This +function properly unnests these lists and adds them to the results data frame. +} +\examples{ +ard <- ard_categorical(ADSL, by = "ARM", variables = "AGEGR1") + +cards:::.process_nested_list_as_df(ard, NULL, "new_col") +} +\keyword{internal} From 6398b8a2ef0b07d910952e7991e14777f456f247 Mon Sep 17 00:00:00 2001 From: Daniel Sjoberg Date: Thu, 13 Jun 2024 11:01:07 -0700 Subject: [PATCH 05/23] Update ard_continuous.survey.design.md --- .../_snaps/ard_continuous.survey.design.md | 36 +++++++++---------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/tests/testthat/_snaps/ard_continuous.survey.design.md b/tests/testthat/_snaps/ard_continuous.survey.design.md index 34862192..dfa6ed90 100644 --- a/tests/testthat/_snaps/ard_continuous.survey.design.md +++ b/tests/testthat/_snaps/ard_continuous.survey.design.md @@ -6,16 +6,16 @@ {cards} data frame: 10 x 8 Output variable context stat_name stat_label stat fmt_fn - 1 api00 survey_s… mean Mean 644.169 1 - 2 api00 survey_s… median Median 652 1 - 3 api00 survey_s… min Minimum 411 1 - 4 api00 survey_s… max Maximum 905 1 - 5 api00 survey_s… sum Sum 3989985 1 - 6 api00 survey_s… var Variance 11182.82 1 - 7 api00 survey_s… sd Standard… 105.749 1 - 8 api00 survey_s… mean.std.error SE(Mean) 23.542 1 - 9 api00 survey_s… deff Design E… 9.346 1 - 10 api00 survey_s… p75 75% Perc… 719 1 + 1 api00 continuo… mean Mean 644.169 1 + 2 api00 continuo… median Median 652 1 + 3 api00 continuo… min Minimum 411 1 + 4 api00 continuo… max Maximum 905 1 + 5 api00 continuo… sum Sum 3989985 1 + 6 api00 continuo… var Variance 11182.82 1 + 7 api00 continuo… sd Standard… 105.749 1 + 8 api00 continuo… mean.std.error SE(Mean) 23.542 1 + 9 api00 continuo… deff Design E… 9.346 1 + 10 api00 continuo… p75 75% Perc… 719 1 Message i 2 more variables: warning, error @@ -28,10 +28,10 @@ {cards} data frame: 4 x 8 Output variable context stat_name stat_label stat fmt_fn - 1 api00 survey_s… mean Mean 644.169 2 - 2 api00 survey_s… median Median 652 xx.xx - 3 api00 survey_s… min Minimum 411 - 4 api00 survey_s… max Maximum 905 1 + 1 api00 continuo… mean Mean 644.169 2 + 2 api00 continuo… median Median 652 xx.xx + 3 api00 continuo… min Minimum 411 + 4 api00 continuo… max Maximum 905 1 Message i 2 more variables: warning, error @@ -45,10 +45,10 @@ {cards} data frame: 4 x 8 Output variable context stat_name stat_label stat fmt_fn - 1 api00 survey_s… mean MeAn 644.169 1 - 2 api00 survey_s… median MEDian 652 1 - 3 api00 survey_s… min MINimum 411 1 - 4 api00 survey_s… max Maximum 905 1 + 1 api00 continuo… mean MeAn 644.169 1 + 2 api00 continuo… median MEDian 652 1 + 3 api00 continuo… min MINimum 411 1 + 4 api00 continuo… max Maximum 905 1 Message i 2 more variables: warning, error From af2057d7964b79d1e84a0f7693b33f72b3dc104c Mon Sep 17 00:00:00 2001 From: Daniel Sjoberg Date: Thu, 13 Jun 2024 11:26:08 -0700 Subject: [PATCH 06/23] updates --- R/ard_categorical.survey.design.R | 5 ++++- inst/WORDLIST | 2 ++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/R/ard_categorical.survey.design.R b/R/ard_categorical.survey.design.R index 1b337f3d..42f76800 100644 --- a/R/ard_categorical.survey.design.R +++ b/R/ard_categorical.survey.design.R @@ -14,6 +14,9 @@ #' @param by ([`tidy-select`][dplyr::dplyr_tidy_select])\cr #' results are calculated for **all combinations** of the column specified #' and the variables. A single column may be specified. +#' @param denominator (`string`)\cr +#' a string indicating the type proportions to calculate. Must be one of +#' `"column"` (the default), `"row"`, and `"cell"`. #' @param statistic ([`formula-list-selector`][syntax])\cr #' a named list, a list of formulas, #' or a single formula where the list element is a character vector of @@ -398,7 +401,7 @@ case_switch <- function(..., .default = NULL) { #' @keywords internal #' #' @examples -#' ard <- ard_categorical(ADSL, by = "ARM", variables = "AGEGR1") +#' ard <- ard_categorical(cards::ADSL, by = "ARM", variables = "AGEGR1") #' #' cards:::.process_nested_list_as_df(ard, NULL, "new_col") .process_nested_list_as_df <- function(x, arg, new_column, unlist = FALSE) { diff --git a/inst/WORDLIST b/inst/WORDLIST index 5d725809..54fc0715 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -37,6 +37,8 @@ quosures rlang's sd strat +unlist +unnests vif wald waldcc From bd964d1e548e6b23fd8663711e05a9a3c918475c Mon Sep 17 00:00:00 2001 From: Daniel Sjoberg Date: Thu, 13 Jun 2024 13:16:27 -0700 Subject: [PATCH 07/23] progress --- R/ard_categorical.survey.design.R | 18 ++++++++++++++++-- man/ard_categorical.survey.design.Rd | 6 +++++- man/dot-process_nested_list_as_df.Rd | 2 +- .../test-ard_categorical.survey.design.R | 15 +++++++++++++++ 4 files changed, 37 insertions(+), 4 deletions(-) create mode 100644 tests/testthat/test-ard_categorical.survey.design.R diff --git a/R/ard_categorical.survey.design.R b/R/ard_categorical.survey.design.R index 42f76800..bf3929e2 100644 --- a/R/ard_categorical.survey.design.R +++ b/R/ard_categorical.survey.design.R @@ -1,4 +1,4 @@ -#' ARD survey tabulation +#' ARD Categorical Survey Statistics #' #' @description #' Compute tabulations on survey-weighted data. @@ -80,6 +80,19 @@ ard_categorical.survey.design <- function(data, ) denominator <- arg_match(denominator) + # check the missingness + walk( + variables, + \(.x) { + if (all(is.na(data$variables[[.x]])) && !inherits(.x, c("logical", "factor"))) { + cli::cli_abort( + c("Column {.val {.x}} is all missing and cannot be tabulated.", + i = "Only columns of class {.cls logical} and {.cls factor} can be tabulated when all values are missing.") + ) + } + } + ) + # calculate counts ----------------------------------------------------------- # this tabulation accounts for unobserved combinations svytable_counts <- .svytable_counts(data, variables, by, denominator) @@ -285,7 +298,8 @@ ard_categorical.survey.design <- function(data, dplyr::left_join( df_count, by = names(.) - ) + ) |> + tidyr::replace_na(list(n = 0)) # unobserved levels assigned zero count } ) |> dplyr::bind_rows() diff --git a/man/ard_categorical.survey.design.Rd b/man/ard_categorical.survey.design.Rd index e021d7c0..ef5d8fc9 100644 --- a/man/ard_categorical.survey.design.Rd +++ b/man/ard_categorical.survey.design.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/ard_categorical.survey.design.R \name{ard_categorical.survey.design} \alias{ard_categorical.survey.design} -\title{ARD survey tabulation} +\title{ARD Categorical Survey Statistics} \usage{ \method{ard_categorical}{survey.design}( data, @@ -32,6 +32,10 @@ a named list, a list of formulas, or a single formula where the list element is a character vector of statistic names to include. See default value for options.} +\item{denominator}{(\code{string})\cr +a string indicating the type proportions to calculate. Must be one of +\code{"column"} (the default), \code{"row"}, and \code{"cell"}.} + \item{fmt_fn}{(\code{\link[=syntax]{formula-list-selector}})\cr a named list, a list of formulas, or a single formula where the list element is a named list of functions diff --git a/man/dot-process_nested_list_as_df.Rd b/man/dot-process_nested_list_as_df.Rd index 2a9b2d67..6cda6106 100644 --- a/man/dot-process_nested_list_as_df.Rd +++ b/man/dot-process_nested_list_as_df.Rd @@ -27,7 +27,7 @@ Some arguments, such as \code{stat_label}, are passed as nested lists. This function properly unnests these lists and adds them to the results data frame. } \examples{ -ard <- ard_categorical(ADSL, by = "ARM", variables = "AGEGR1") +ard <- ard_categorical(cards::ADSL, by = "ARM", variables = "AGEGR1") cards:::.process_nested_list_as_df(ard, NULL, "new_col") } diff --git a/tests/testthat/test-ard_categorical.survey.design.R b/tests/testthat/test-ard_categorical.survey.design.R new file mode 100644 index 00000000..8154fe30 --- /dev/null +++ b/tests/testthat/test-ard_categorical.survey.design.R @@ -0,0 +1,15 @@ +# Items to test +# - First, everything needs to be tested independently for denominator='column'|'row'|'cell' AND by whether there is a by variable +# This is because these 6 scenarios are calculated entirely separately +# - What happens with a variable that is all NA? How does that behavior compare to `ard_categorical()` for data frames +# The function _should_ work if the underlying type is factor or logical +# - Do we get results for unobserved factor levels in the `by` and `variable` variables? +# - Do we get results for unobserved logical levels in the `by` and `variable` variables, e.g. if there are only TRUE, we should have FALSE rows too? +# - A trick to test survey data is to take a normal data frame, convert it to survey using equal weights. Then all the results should equal the unweighted summariess +# dplyr::tibble(y = rep(FALSE, 15), x = rep(TRUE, 15)) |> +# survey::svydesign(ids = ~1, data = _, weights = ~1) |> +# ard_categorical(by = y) + +test_that("multiplication works", { + expect_equal(2 * 2, 4) +}) From 715ea63cdf39fcf305772a6a5e9146fb3b510618 Mon Sep 17 00:00:00 2001 From: Daniel Sjoberg Date: Thu, 13 Jun 2024 13:46:58 -0700 Subject: [PATCH 08/23] update --- R/ard_categorical.survey.design.R | 8 ++++++-- man/dot-process_nested_list_as_df.Rd | 2 +- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/R/ard_categorical.survey.design.R b/R/ard_categorical.survey.design.R index bf3929e2..796e1da1 100644 --- a/R/ard_categorical.survey.design.R +++ b/R/ard_categorical.survey.design.R @@ -138,7 +138,11 @@ ard_categorical.survey.design <- function(data, # return final object -------------------------------------------------------- cards |> - dplyr::mutate(context = "categorical") |> + dplyr::mutate( + context = "categorical", + warning = list(NULL), + error = list(NULL), + ) |> cards::tidy_ard_column_order() %>% {structure(., class = c("card", class(.)))} # styler: off } @@ -417,7 +421,7 @@ case_switch <- function(..., .default = NULL) { #' @examples #' ard <- ard_categorical(cards::ADSL, by = "ARM", variables = "AGEGR1") #' -#' cards:::.process_nested_list_as_df(ard, NULL, "new_col") +#' cardx:::.process_nested_list_as_df(ard, NULL, "new_col") .process_nested_list_as_df <- function(x, arg, new_column, unlist = FALSE) { # add fmt_fn column if not already present if (!new_column %in% names(x)) { diff --git a/man/dot-process_nested_list_as_df.Rd b/man/dot-process_nested_list_as_df.Rd index 6cda6106..0555ecaa 100644 --- a/man/dot-process_nested_list_as_df.Rd +++ b/man/dot-process_nested_list_as_df.Rd @@ -29,6 +29,6 @@ function properly unnests these lists and adds them to the results data frame. \examples{ ard <- ard_categorical(cards::ADSL, by = "ARM", variables = "AGEGR1") -cards:::.process_nested_list_as_df(ard, NULL, "new_col") +cardx:::.process_nested_list_as_df(ard, NULL, "new_col") } \keyword{internal} From bc54a4a145b34319ebfbcaf1a09b97b867645cb6 Mon Sep 17 00:00:00 2001 From: Daniel Sjoberg Date: Thu, 13 Jun 2024 13:56:46 -0700 Subject: [PATCH 09/23] Update ard_categorical.survey.design.R --- R/ard_categorical.survey.design.R | 59 +++++++++++++++---------------- 1 file changed, 28 insertions(+), 31 deletions(-) diff --git a/R/ard_categorical.survey.design.R b/R/ard_categorical.survey.design.R index 796e1da1..a96aa878 100644 --- a/R/ard_categorical.survey.design.R +++ b/R/ard_categorical.survey.design.R @@ -75,7 +75,7 @@ ard_categorical.survey.design <- function(data, x = statistic, predicate = \(x) all(x %in% accepted_svy_stats), error_msg = c("Error in the values of the {.arg statistic} argument.", - i = "Values must be in {.val {accepted_svy_stats}}" + i = "Values must be in {.val {accepted_svy_stats}}" ) ) denominator <- arg_match(denominator) @@ -87,7 +87,8 @@ ard_categorical.survey.design <- function(data, if (all(is.na(data$variables[[.x]])) && !inherits(.x, c("logical", "factor"))) { cli::cli_abort( c("Column {.val {.x}} is all missing and cannot be tabulated.", - i = "Only columns of class {.cls logical} and {.cls factor} can be tabulated when all values are missing.") + i = "Only columns of class {.cls logical} and {.cls factor} can be tabulated when all values are missing." + ) ) } } @@ -199,7 +200,7 @@ ard_categorical.survey.design <- function(data, ) survey::svymean( - x = inject(~interaction(!!sym(bt(by)), !!sym(bt(variable)))), + x = inject(~ interaction(!!sym(bt(by)), !!sym(bt(variable)))), design = data, na.rm = TRUE, deff = deff @@ -232,9 +233,9 @@ ard_categorical.survey.design <- function(data, ), name = str_remove_all(.data$name, "se\\.") %>% - str_remove_all("DEff\\.") %>% - str_remove_all(by) %>% - str_remove_all("`") + str_remove_all("DEff\\.") %>% + str_remove_all(by) %>% + str_remove_all("`") ) |> tidyr::pivot_wider(names_from = "stat", values_from = "value") |> set_names(c("variable_level", "group1_level", "p", "p.std.error", "deff")) |> @@ -265,9 +266,9 @@ ard_categorical.survey.design <- function(data, ), name = str_remove_all(.data$name, "se\\.") %>% - str_remove_all("DEff\\.") %>% - str_remove_all(variable) %>% - str_remove_all("`") + str_remove_all("DEff\\.") %>% + str_remove_all(variable) %>% + str_remove_all("`") ) |> tidyr::pivot_wider(names_from = "stat", values_from = "value") |> set_names(c("group1_level", "variable_level", "p", "p.std.error", "deff")) |> @@ -290,8 +291,7 @@ ard_categorical.survey.design <- function(data, if (is_empty(by)) { names(df_count) <- c("variable_level", "n") df_count$variable <- variable - } - else { + } else { names(df_count) <- c("group1_level", "variable_level", "n") df_count$variable <- variable df_count$group1 <- by @@ -309,29 +309,28 @@ ard_categorical.survey.design <- function(data, dplyr::bind_rows() # add big N and p, then return data frame of results - switch( - denominator, + switch(denominator, "column" = df_counts |> - dplyr::mutate( - .by = any_of("group1_level"), - N = sum(.data$n), - p = .data$n / .data$N - ), + dplyr::mutate( + .by = any_of("group1_level"), + N = sum(.data$n), + p = .data$n / .data$N + ), "row" = df_counts |> - dplyr::mutate( - .by = any_of("variable_level"), - N = sum(.data$n), - p = .data$n / .data$N - ), + dplyr::mutate( + .by = any_of("variable_level"), + N = sum(.data$n), + p = .data$n / .data$N + ), "cell" = df_counts |> - dplyr::mutate( - .by = any_of(c("group1_level", "variable_level")), - N = sum(.data$n), - p = .data$n / .data$N - ) + dplyr::mutate( + .by = any_of(c("group1_level", "variable_level")), + N = sum(.data$n), + p = .data$n / .data$N + ) ) } @@ -346,8 +345,7 @@ ard_categorical.survey.design <- function(data, # renaming with variable colnames if (!is_empty(by)) { df <- dplyr::rename(df, variable = "group2", variable_level = "group2_level") - } - else { + } else { df <- dplyr::rename(df, variable = "group1", variable_level = "group1_level") } @@ -464,4 +462,3 @@ case_switch <- function(..., .default = NULL) { x } - From ebc661068fb3a071da37a32f2c41d92d86e2e96c Mon Sep 17 00:00:00 2001 From: Daniel Sjoberg Date: Thu, 13 Jun 2024 14:08:33 -0700 Subject: [PATCH 10/23] progress --- R/ard_categorical.survey.design.R | 23 +++++++++++++++++------ man/ard_categorical.survey.design.Rd | 2 +- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/R/ard_categorical.survey.design.R b/R/ard_categorical.survey.design.R index a96aa878..dfab6467 100644 --- a/R/ard_categorical.survey.design.R +++ b/R/ard_categorical.survey.design.R @@ -39,7 +39,7 @@ #' @examplesIf cardx:::is_pkg_installed("survey", reference_pkg = "cardx") #' svy_titanic <- survey::svydesign(~1, data = as.data.frame(Titanic), weights = ~Freq) #' -#' ard_categorical(svy_titanic, variables = Class, by = Survived) +#' ard_categorical(svy_titanic, variables = c(Class, Age), by = Survived) ard_categorical.survey.design <- function(data, variables = everything(), by = NULL, @@ -60,6 +60,9 @@ ard_categorical.survey.design <- function(data, variables <- setdiff(variables, by) check_scalar(by, allow_empty = TRUE) + # if no variables selected, return empty data frame + if (is_empty(variables)) return(dplyr::tibble()) # styler: off + cards::process_formula_selectors( data = data$variables[variables], statistic = statistic, @@ -98,16 +101,18 @@ ard_categorical.survey.design <- function(data, # this tabulation accounts for unobserved combinations svytable_counts <- .svytable_counts(data, variables, by, denominator) - # calculate rates along with SE and DEFF ------------------------------------- - svytable_rates <- .svytable_rates(data, variables, by, denominator, deff) + # calculate rate SE and DEFF ------------------------------------------------- + svytable_rates <- .svytable_rate_stats(data, variables, by, denominator, deff) # convert results into a proper ARD object ----------------------------------- cards <- svytable_counts |> + # merge in the SE(p) and DEFF dplyr::left_join( svytable_rates |> dplyr::select(-"p"), by = intersect(c("group1", "group1_level", "variable", "variable_level"), names(svytable_counts)) ) |> + # make columns list columns dplyr::mutate(across(-any_of(c("group1", "variable")), as.list)) |> tidyr::pivot_longer( cols = -c(cards::all_ard_groups(), cards::all_ard_variables()), @@ -148,21 +153,27 @@ ard_categorical.survey.design <- function(data, {structure(., class = c("card", class(.)))} # styler: off } -.svytable_rates <- function(data, variables, by, denominator, deff) { + +# this function returns a tibble with the SE(p) and DEFF +.svytable_rate_stats <- function(data, variables, by, denominator, deff) { lapply( variables, \(variable) { + # each combination of denominator and whether there is a by variable is handled separately case_switch( - # first chunk with by variable specified + # by variable and column percentages !is_empty(by) && denominator == "column" ~ .one_svytable_rates_by_column(data, variable, by, deff), + # by variable and row percentages !is_empty(by) && denominator == "row" ~ .one_svytable_rates_by_row(data, variable, by, deff), + # by variable and cell percentages !is_empty(by) && denominator == "cell" ~ .one_svytable_rates_by_cell(data, variable, by, deff), - # this chunk without a by variable + # no by variable and column/cell percentages denominator %in% c("column", "cell") ~ .one_svytable_rates_no_by_column_and_cell(data, variable, deff), + # no by variable and row percentages denominator == "row" ~ .one_svytable_rates_no_by_row(data, variable, deff) ) diff --git a/man/ard_categorical.survey.design.Rd b/man/ard_categorical.survey.design.Rd index ef5d8fc9..4d05d0e5 100644 --- a/man/ard_categorical.survey.design.Rd +++ b/man/ard_categorical.survey.design.Rd @@ -64,6 +64,6 @@ calculated using \code{survey::svymean()}. \dontshow{if (cardx:::is_pkg_installed("survey", reference_pkg = "cardx")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} svy_titanic <- survey::svydesign(~1, data = as.data.frame(Titanic), weights = ~Freq) -ard_categorical(svy_titanic, variables = Class, by = Survived) +ard_categorical(svy_titanic, variables = c(Class, Age), by = Survived) \dontshow{\}) # examplesIf} } From bc7c3c3c3c2bd75f07f637f41b6d384a38529949 Mon Sep 17 00:00:00 2001 From: Daniel Sjoberg Date: Thu, 13 Jun 2024 15:04:41 -0700 Subject: [PATCH 11/23] more updates --- NEWS.md | 1 + R/ard_categorical.survey.design.R | 28 +++++++++++++++++++++++----- _pkgdown.yml | 1 + man/ard_categorical.survey.design.Rd | 7 +++++-- 4 files changed, 30 insertions(+), 7 deletions(-) diff --git a/NEWS.md b/NEWS.md index 22917183..de7f9a82 100644 --- a/NEWS.md +++ b/NEWS.md @@ -33,6 +33,7 @@ ard_moodtest() -> ard_stats_mood_test() - `ard_smd_smd()` for calculating standardized mean differences using `smd::smd()`. (#4) - `ard_survival_survfit()` for survival analyses using `survival::survfit()`. (#43) - `ard_continuous.survey.design()` for calculating univariate summary statistics from weighted/survey data using many functions from the {survey} package. (#68) + - `ard_categorical.survey.design()` for tabulating summary statistics from weighted/survey data using many functions from the {survey} package. (#140) - `ard_attributes.survey.design()` for summarizing labels and attributes from weighted/survey data using many functions from the {survey} package. - `ard_survey_svychisq()` for weighted/survey chi-squared test using `survey::svychisq()`. (#72) - `ard_survey_svyttest()` for weighted/survey t-tests using `survey::svyttest()`. (#70) diff --git a/R/ard_categorical.survey.design.R b/R/ard_categorical.survey.design.R index dfab6467..d8f02cda 100644 --- a/R/ard_categorical.survey.design.R +++ b/R/ard_categorical.survey.design.R @@ -7,6 +7,8 @@ #' and the standard errors and design effect (`"p.std.error"`, `"deff"`) are #' calculated using `survey::svymean()`. #' +#' The unweighted statistics are calculated with `cards::ard_categorical.data.frame()`. +#' #' @param data (`survey.design`)\cr #' a design object often created with [`survey::svydesign()`]. #' @param variables ([`tidy-select`][dplyr::dplyr_tidy_select])\cr @@ -43,10 +45,11 @@ ard_categorical.survey.design <- function(data, variables = everything(), by = NULL, - statistic = everything() ~ c("n", "N", "p", "p.std.error", "deff"), + statistic = everything() ~ c("n", "N", "p", "p.std.error", "deff", "n_unweighted", "N_unweighted", "p_unweighted"), denominator = c("column", "row", "cell"), fmt_fn = NULL, - stat_label = everything() ~ list(p = "%", p.std.error = "SE(%)", deff = "Design Effect"), + stat_label = everything() ~ list(p = "%", p.std.error = "SE(%)", deff = "Design Effect", + "n" = "Unweighted n", "N" = "Unweighted N", "p" = "Unweighted %"), ...) { set_cli_abort_call() deff <- TRUE # we may update in the future to make this an argument for users @@ -73,7 +76,7 @@ ard_categorical.survey.design <- function(data, data = data$variables[variables], statistic = formals(asNamespace("cardx")[["ard_categorical.survey.design"]])[["statistic"]] |> eval(), ) - accepted_svy_stats <- c("n", "N", "p", "p.std.error", "deff") + accepted_svy_stats <- c("n", "N", "p", "p.std.error", "deff", "n_unweighted", "N_unweighted", "p_unweighted") cards::check_list_elements( x = statistic, predicate = \(x) all(x %in% accepted_svy_stats), @@ -125,6 +128,21 @@ ard_categorical.survey.design <- function(data, by = c("variable", "stat_name") ) + # add unweighted statistics -------------------------------------------------- + cards_unweighted <- + ard_categorical( + data = data[["variables"]], + variables = all_of(variables), + by = any_of(by), + denominator = denominator + ) |> + dplyr::select(-c("stat_label", "fmt_fn", "warning", "error")) |> + dplyr::mutate( + stat_name = + dplyr::case_match(.data$stat_name, "n" ~ "n_unweighted", "N" ~ "N_unweighted", "p" ~ "p_unweighted") + ) + cards <- cards |> dplyr::bind_rows(cards_unweighted) # styler: off + # final processing of fmt_fn ------------------------------------------------- cards <- cards |> .process_nested_list_as_df( @@ -392,10 +410,10 @@ case_switch <- function(..., .default = NULL) { if (!is_empty(fmt_fn)) { return(fmt_fn) } - if (stat_name %in% c("p", "p_miss", "p_nonmiss")) { + if (stat_name %in% c("p", "p_miss", "p_nonmiss", "p_unweighted")) { return(cards::label_cards(digits = 1, scale = 100)) } - if (stat_name %in% c("n", "N", "N_miss", "N_nonmiss", "N_obs")) { + if (stat_name %in% c("n", "N", "N_miss", "N_nonmiss", "N_obs", "n_unweighted", "N_unweighted")) { return(cards::label_cards(digits = 0)) } if (is.integer(stat)) { diff --git a/_pkgdown.yml b/_pkgdown.yml index 7300e850..dd1e1700 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -63,6 +63,7 @@ reference: - subtitle: "{survey} package" - contents: - ard_continuous.survey.design + - ard_categorical.survey.design - ard_attributes.survey.design - ard_survey_svychisq - ard_survey_svyranktest diff --git a/man/ard_categorical.survey.design.Rd b/man/ard_categorical.survey.design.Rd index 4d05d0e5..51c65374 100644 --- a/man/ard_categorical.survey.design.Rd +++ b/man/ard_categorical.survey.design.Rd @@ -8,11 +8,12 @@ data, variables = everything(), by = NULL, - statistic = everything() ~ c("n", "N", "p", "p.std.error", "deff"), + statistic = everything() ~ c("n", "N", "p", "p.std.error", "deff", "n_unweighted", + "N_unweighted", "p_unweighted"), denominator = c("column", "row", "cell"), fmt_fn = NULL, stat_label = everything() ~ list(p = "\%", p.std.error = "SE(\%)", deff = - "Design Effect"), + "Design Effect", n = "Unweighted n", N = "Unweighted N", p = "Unweighted \%"), ... ) } @@ -59,6 +60,8 @@ Compute tabulations on survey-weighted data. The counts and proportion (\code{"N"}, \code{"n"}, \code{"p"}) are calculated using \code{survey::svytable()}, and the standard errors and design effect (\code{"p.std.error"}, \code{"deff"}) are calculated using \code{survey::svymean()}. + +The unweighted statistics are calculated with \code{cards::ard_categorical.data.frame()}. } \examples{ \dontshow{if (cardx:::is_pkg_installed("survey", reference_pkg = "cardx")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} From 9c5e3e35db1c6e8cb8c5fa8be237a6db4e55fe69 Mon Sep 17 00:00:00 2001 From: Daniel Sjoberg Date: Fri, 14 Jun 2024 08:38:28 -0700 Subject: [PATCH 12/23] in progress --- NAMESPACE | 2 + R/ard_categorical.survey.design.R | 137 ++++++++++------ R/ard_continuous.survey.design.R | 4 +- R/ard_dichotomous.survey.design.R | 146 ++++++++++++++++++ R/ard_missing.survey.design.R | 114 ++++++++++++++ man/ard_categorical.survey.design.Rd | 4 +- man/ard_continuous.survey.design.Rd | 2 +- man/ard_dichotomous.survey.design.Rd | 69 +++++++++ man/ard_missing.survey.design.Rd | 61 ++++++++ man/dot-check_dichotomous_value.Rd | 25 +++ man/dot-unique_and_sorted.Rd | 29 ++++ .../test-ard_categorical.survey.design.R | 3 + 12 files changed, 541 insertions(+), 55 deletions(-) create mode 100644 R/ard_dichotomous.survey.design.R create mode 100644 R/ard_missing.survey.design.R create mode 100644 man/ard_dichotomous.survey.design.Rd create mode 100644 man/ard_missing.survey.design.Rd create mode 100644 man/dot-check_dichotomous_value.Rd create mode 100644 man/dot-unique_and_sorted.Rd diff --git a/NAMESPACE b/NAMESPACE index 9c95ccf4..3675bcc7 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,6 +3,8 @@ S3method(ard_attributes,survey.design) S3method(ard_categorical,survey.design) S3method(ard_continuous,survey.design) +S3method(ard_dichotomous,survey.design) +S3method(ard_missing,survey.design) S3method(ard_regression,default) S3method(ard_stats_anova,anova) S3method(ard_stats_anova,data.frame) diff --git a/R/ard_categorical.survey.design.R b/R/ard_categorical.survey.design.R index d8f02cda..108b6537 100644 --- a/R/ard_categorical.survey.design.R +++ b/R/ard_categorical.survey.design.R @@ -12,7 +12,7 @@ #' @param data (`survey.design`)\cr #' a design object often created with [`survey::svydesign()`]. #' @param variables ([`tidy-select`][dplyr::dplyr_tidy_select])\cr -#' columns to include in summaries. Default is `everything()`. +#' columns to include in summaries. #' @param by ([`tidy-select`][dplyr::dplyr_tidy_select])\cr #' results are calculated for **all combinations** of the column specified #' and the variables. A single column may be specified. @@ -43,7 +43,7 @@ #' #' ard_categorical(svy_titanic, variables = c(Class, Age), by = Survived) ard_categorical.survey.design <- function(data, - variables = everything(), + variables, by = NULL, statistic = everything() ~ c("n", "N", "p", "p.std.error", "deff", "n_unweighted", "N_unweighted", "p_unweighted"), denominator = c("column", "row", "cell"), @@ -52,9 +52,12 @@ ard_categorical.survey.design <- function(data, "n" = "Unweighted n", "N" = "Unweighted N", "p" = "Unweighted %"), ...) { set_cli_abort_call() + check_pkg_installed(pkg = "survey", reference_pkg = "cardx") + check_dots_empty() deff <- TRUE # we may update in the future to make this an argument for users # process arguments ---------------------------------------------------------- + check_not_missing(variables) cards::process_selectors( data = data$variables, variables = {{ variables }}, @@ -81,7 +84,7 @@ ard_categorical.survey.design <- function(data, x = statistic, predicate = \(x) all(x %in% accepted_svy_stats), error_msg = c("Error in the values of the {.arg statistic} argument.", - i = "Values must be in {.val {accepted_svy_stats}}" + i = "Values must be in {.val {accepted_svy_stats}}" ) ) denominator <- arg_match(denominator) @@ -136,6 +139,13 @@ ard_categorical.survey.design <- function(data, by = any_of(by), denominator = denominator ) |> + # all the survey levels are reported as character, so we do the same here. + dplyr::mutate( + across( + c(cards::all_ard_groups("levels"), cards::all_ard_variables("levels")), + ~map(.x, as.character) + ) + ) |> dplyr::select(-c("stat_label", "fmt_fn", "warning", "error")) |> dplyr::mutate( stat_name = @@ -166,9 +176,10 @@ ard_categorical.survey.design <- function(data, context = "categorical", warning = list(NULL), error = list(NULL), - ) |> + ) |> cards::tidy_ard_column_order() %>% - {structure(., class = c("card", class(.)))} # styler: off + {structure(., class = c("card", class(.)))} |> # styler: off + cards::tidy_ard_row_order() } @@ -177,24 +188,52 @@ ard_categorical.survey.design <- function(data, lapply( variables, \(variable) { + # convert the variable to a factor if not already one or a lgl, so we get the correct rate stats from svymean + if (!inherits(data$variables[[variable]], c("factor", "logical"))) { + data$variables[[variable]] <- factor(data$variables[[variable]]) + } + + # there are issues with svymean() when a variable has only one level. adding a second as needed + variable_lvls <- .unique_values_sort(data$variables, variable) + if (!is_empty(by)) by_lvls <- .unique_values_sort(data$variables, by) # styler: off + if (length(variable_lvls) == 1L) { + data$variables[[variable]] <- + factor(data$variables[[variable]], levels = c(variable_lvls, paste("not", variable_lvls))) + } + if (!is_empty(by) && length(by_lvls) == 1L) { + data$variables[[by]] <- + factor(data$variables[[by]], levels = c(by_lvls, paste("not", by_lvls))) + } + # each combination of denominator and whether there is a by variable is handled separately - case_switch( - # by variable and column percentages - !is_empty(by) && denominator == "column" ~ - .one_svytable_rates_by_column(data, variable, by, deff), - # by variable and row percentages - !is_empty(by) && denominator == "row" ~ - .one_svytable_rates_by_row(data, variable, by, deff), - # by variable and cell percentages - !is_empty(by) && denominator == "cell" ~ - .one_svytable_rates_by_cell(data, variable, by, deff), - # no by variable and column/cell percentages - denominator %in% c("column", "cell") ~ - .one_svytable_rates_no_by_column_and_cell(data, variable, deff), - # no by variable and row percentages - denominator == "row" ~ - .one_svytable_rates_no_by_row(data, variable, deff) - ) + result <- + case_switch( + # by variable and column percentages + !is_empty(by) && denominator == "column" ~ + .one_svytable_rates_by_column(data, variable, by, deff), + # by variable and row percentages + !is_empty(by) && denominator == "row" ~ + .one_svytable_rates_by_row(data, variable, by, deff), + # by variable and cell percentages + !is_empty(by) && denominator == "cell" ~ + .one_svytable_rates_by_cell(data, variable, by, deff), + # no by variable and column/cell percentages + denominator %in% c("column", "cell") ~ + .one_svytable_rates_no_by_column_and_cell(data, variable, deff), + # no by variable and row percentages + denominator == "row" ~ + .one_svytable_rates_no_by_row(data, variable, deff) + ) + + # if a level was added, remove the fake level + if (length(variable_lvls) == 1L) { + result <- result |> dplyr::filter(.data$variable_level %in% variable_lvls) + } + if (!is_empty(by) && length(by_lvls) == 1L) { + result <- result |> dplyr::filter(.data$group1_level %in% by_lvls) + } + + result } ) |> dplyr::bind_rows() @@ -262,9 +301,9 @@ ard_categorical.survey.design <- function(data, ), name = str_remove_all(.data$name, "se\\.") %>% - str_remove_all("DEff\\.") %>% - str_remove_all(by) %>% - str_remove_all("`") + str_remove_all("DEff\\.") %>% + str_remove_all(by) %>% + str_remove_all("`") ) |> tidyr::pivot_wider(names_from = "stat", values_from = "value") |> set_names(c("variable_level", "group1_level", "p", "p.std.error", "deff")) |> @@ -295,9 +334,9 @@ ard_categorical.survey.design <- function(data, ), name = str_remove_all(.data$name, "se\\.") %>% - str_remove_all("DEff\\.") %>% - str_remove_all(variable) %>% - str_remove_all("`") + str_remove_all("DEff\\.") %>% + str_remove_all(variable) %>% + str_remove_all("`") ) |> tidyr::pivot_wider(names_from = "stat", values_from = "value") |> set_names(c("group1_level", "variable_level", "p", "p.std.error", "deff")) |> @@ -339,27 +378,27 @@ ard_categorical.survey.design <- function(data, # add big N and p, then return data frame of results switch(denominator, - "column" = - df_counts |> - dplyr::mutate( - .by = any_of("group1_level"), - N = sum(.data$n), - p = .data$n / .data$N - ), - "row" = - df_counts |> - dplyr::mutate( - .by = any_of("variable_level"), - N = sum(.data$n), - p = .data$n / .data$N - ), - "cell" = - df_counts |> - dplyr::mutate( - .by = any_of(c("group1_level", "variable_level")), - N = sum(.data$n), - p = .data$n / .data$N - ) + "column" = + df_counts |> + dplyr::mutate( + .by = any_of("group1_level"), + N = sum(.data$n), + p = .data$n / .data$N + ), + "row" = + df_counts |> + dplyr::mutate( + .by = any_of("variable_level"), + N = sum(.data$n), + p = .data$n / .data$N + ), + "cell" = + df_counts |> + dplyr::mutate( + .by = any_of(c("group1_level", "variable_level")), + N = sum(.data$n), + p = .data$n / .data$N + ) ) } diff --git a/R/ard_continuous.survey.design.R b/R/ard_continuous.survey.design.R index e6b53380..5e8d1a31 100644 --- a/R/ard_continuous.survey.design.R +++ b/R/ard_continuous.survey.design.R @@ -5,7 +5,7 @@ #' @param data (`survey.design`)\cr #' a design object often created with [`survey::svydesign()`]. #' @param variables ([`tidy-select`][dplyr::dplyr_tidy_select])\cr -#' columns to include in summaries. Default is `everything()`. +#' columns to include in summaries. #' @param by ([`tidy-select`][dplyr::dplyr_tidy_select])\cr #' results are calculated for **all combinations** of the columns specified, #' including unobserved combinations and unobserved factor levels. @@ -56,8 +56,6 @@ ard_continuous.survey.design <- function(data, variables, by = NULL, check_pkg_installed(pkg = "survey", reference_pkg = "cardx") # check inputs --------------------------------------------------------------- - check_not_missing(data) - check_class(data, cls = "survey.design") check_not_missing(variables) # process inputs ------------------------------------------------------------- diff --git a/R/ard_dichotomous.survey.design.R b/R/ard_dichotomous.survey.design.R new file mode 100644 index 00000000..53cc8bd6 --- /dev/null +++ b/R/ard_dichotomous.survey.design.R @@ -0,0 +1,146 @@ +#' ARD Dichotomous Survey Statistics +#' +#' Compute Analysis Results Data (ARD) for dichotomous summary statistics. +#' +#' @inheritParams ard_categorical.survey.design +#' @param value (named `list`)\cr +#' named list of dichotomous values to tabulate. +#' Default is `cards::maximum_variable_value(data$variables)`, +#' which returns the largest/last value after a sort. +#' +#' @return an ARD data frame of class 'card' +#' @export +#' +#' @examples +#' survey::svydesign(ids = ~1, data = mtcars, weights = ~1) |> +#' ard_dichotomous(by = vs, variables = c(cyl, am), value = list(cyl = 4)) +ard_dichotomous.survey.design <- function(data, + variables, + by = NULL, + value = cards::maximum_variable_value(data$variables[variables]), + statistic = everything() ~ c("n", "N", "p", "p.std.error", "deff", "n_unweighted", "N_unweighted", "p_unweighted"), + denominator = c("column", "row", "cell"), + fmt_fn = NULL, + stat_label = everything() ~ list(p = "%", p.std.error = "SE(%)", deff = "Design Effect", + "n" = "Unweighted n", "N" = "Unweighted N", "p" = "Unweighted %"), + ...) { + set_cli_abort_call() + check_dots_empty() + check_pkg_installed(pkg = "survey", reference_pkg = "cardx") + + # check inputs --------------------------------------------------------------- + check_not_missing(variables) + + # process inputs ------------------------------------------------------------- + cards::process_selectors(data$variables, variables = {{ variables }}) + cards::process_formula_selectors(data$variables[variables], value = value) + cards::fill_formula_selectors( + data$variables[variables], + value = formals(asNamespace("cardx")[["ard_dichotomous.survey.design"]])[["value"]] |> eval() + ) + .check_dichotomous_value(data$variables, value) + + # return empty tibble if no variables selected ------------------------------- + if (is_empty(variables)) { + return(dplyr::tibble()) + } + + # calculate summary statistics ----------------------------------------------- + ard_categorical( + data = data, + variables = all_of(variables), + by = {{ by }}, + statistic = statistic, + denominator = denominator, + fmt_fn = fmt_fn, + stat_label = stat_label + ) |> + dplyr::filter( + pmap( + list(.data$variable, .data$variable_level), + function(variable, variable_level) { + variable_level %in% .env$value[[variable]] + } + ) |> + unlist() + ) |> + dplyr::mutate(context = "dichotomous") +} + +#' Perform Value Checks +#' +#' Check the validity of the values passed in `ard_dichotomous(value)`. +#' +#' @param data (`data.frame`)\cr +#' a data frame +#' @param value (named `list`)\cr +#' a named list +#' +#' @return returns invisible if check is successful, throws an error message if not. +#' @keywords internal +#' +#' @examples +#' cardx:::.check_dichotomous_value(mtcars, list(cyl = 4)) +.check_dichotomous_value <- function(data, value) { + imap( + value, + function(value, column) { + accepted_values <- .unique_and_sorted(data[[column]]) + if (length(value) != 1L || !value %in% accepted_values) { + message <- "Error in argument {.arg value} for variable {.val {column}}." + cli::cli_abort( + if (length(value) != 1L) { + c(message, "i" = "The value must be one of {.val {accepted_values}}.") + } else { + c(message, "i" = "A value of {.val {value}} was passed, but must be one of {.val {accepted_values}}.") + }, + call = get_cli_abort_call() + ) + } + } + ) |> + invisible() +} + +#' ARD-flavor of unique() +#' +#' Essentially a wrapper for `unique(x) |> sort()` with `NA` levels removed. +#' For factors, all levels are returned even if they are unobserved. +#' Similarly, logical vectors always return `c(TRUE, FALSE)`, even if +#' both levels are not observed. +#' +#' @param x (`any`)\cr +#' a vector +#' +#' @return a vector +#' @keywords internal +#' +#' @examples +#' cards:::.unique_and_sorted(factor(letters[c(5, 5:1)], levels = letters)) +#' +#' cards:::.unique_and_sorted(c(FALSE, TRUE, TRUE, FALSE)) +#' +#' cards:::.unique_and_sorted(c(5, 5:1)) +.unique_and_sorted <- function(x, useNA = c("no", "always")) { + # styler: off + useNA <- match.arg(useNA) + # if a factor return a factor that includes the same levels (including unobserved levels) + if (inherits(x, "factor")) { + return( + factor( + if (useNA == "no") levels(x) + else c(levels(x), NA_character_), + levels = levels(x) + ) + ) + } + if (inherits(x, "logical")) { + if (useNA == "no") return(c(TRUE, FALSE)) + else return(c(TRUE, FALSE, NA)) + } + + # otherwise, return a simple unique and sort of the vector + if (useNA == "no") return(unique(x) |> sort()) + else return(unique(x) |> sort() |> c(NA)) + # styler: on +} diff --git a/R/ard_missing.survey.design.R b/R/ard_missing.survey.design.R new file mode 100644 index 00000000..b0a27763 --- /dev/null +++ b/R/ard_missing.survey.design.R @@ -0,0 +1,114 @@ +#' ARD Missing Survey Statistics +#' +#' Compute Analysis Results Data (ARD) for statistics related to data missingness for survey objects +#' +#' @inheritParams ard_categorical.survey.design +#' +#' @return an ARD data frame of class 'card' +#' @export +#' +#' @examplesIf cardx:::is_pkg_installed("survey", reference_pkg = "cardx") +#' svy_titanic <- survey::svydesign(~1, data = as.data.frame(Titanic), weights = ~Freq) +#' +#' ard_missing(svy_titanic, variables = c(Class, Age), by = Survived) +ard_missing.survey.design <- function(data, + variables, + by = NULL, + statistic = + everything() ~ c("N_obs", "N_miss", "N_nonmiss", "p_miss", "p_nonmiss", + "N_obs_unweighted", "N_miss_unweighted", "N_nonmiss_unweighted", + "p_miss_unweighted", "p_nonmiss_unweighted"), + fmt_fn = NULL, + stat_label = + everything() ~ list("N_obs", "N_miss", "N_nonmiss", "p_miss", "p_nonmiss", + "N_obs_unweighted", "N_miss_unweighted", "N_nonmiss_unweighted", + "p_miss_unweighted", "p_nonmiss_unweighted"), + ...) { + set_cli_abort_call() + check_dots_empty() + check_pkg_installed(pkg = "survey", reference_pkg = "cardx") + + # process inputs ------------------------------------------------------------- + check_not_missing(variables) + cards::process_selectors( + data = data$variables, + variables = {{ variables }}, + by = {{ by }} + ) + + # convert all variables to T/F whether it's missing -------------------------- + data$variables <- data$variables |> + dplyr::mutate(across(all_of(variables), Negate(is.na))) + + cards::process_formula_selectors( + data$variables[variables], + statistic = statistic, + fmt_fn = fmt_fn, + stat_label = stat_label + ) + cards::fill_formula_selectors( + data$variables[variables], + statistic = formals(asNamespace("cards")[["ard_missing.survey.design"]])[["statistic"]] |> eval() + ) + + stats_available <- c("N_obs", "N_miss", "N_nonmiss", "p_miss", "p_nonmiss", + "N_obs_unweighted", "N_miss_unweighted", "N_nonmiss_unweighted", + "p_miss_unweighted", "p_nonmiss_unweighted") + cards::check_list_elements( + x = statistic, + predicate = \(x) is.character(x) && all(x %in% stats_available), + error_msg = "Elements passed in the {.arg statistic} argument must be one or more of {.val {stats_available}}" + ) + + # calculate results ---------------------------------------------------------- + result <- + ard_categorical( + data = data, + variables = all_of(variables), + by = any_of(by), + statistic = everything() ~ c("n", "N", "p", "n_unweighted", "N_unweighted", "p_unweighted") + ) + + # rename the stats for missingness ------------------------------------------- + result <- result |> + dplyr::mutate( + stat_name = + dplyr::case_when( + .data$stat_name %in% "N" ~ "N_obs", + .data$stat_name %in% "n" & .data$variable_level %in% FALSE ~ "N_miss", + .data$stat_name %in% "n" & .data$variable_level %in% TRUE ~ "N_nonmiss", + .data$stat_name %in% "p" & .data$variable_level %in% FALSE ~ "p_miss", + .data$stat_name %in% "p" & .data$variable_level %in% TRUE ~ "p_nonmiss", + .data$stat_name %in% "N_unweighted" ~ "N_obs_unweighted", + .data$stat_name %in% "n_unweighted" & .data$variable_level %in% FALSE ~ "N_miss_unweighted", + .data$stat_name %in% "n_unweighted" & .data$variable_level %in% TRUE ~ "N_nonmiss_unweighted", + .data$stat_name %in% "p_unweighted" & .data$variable_level %in% FALSE ~ "p_miss_unweighted", + .data$stat_name %in% "p_unweighted" & .data$variable_level %in% TRUE ~ "p_nonmiss_unweighted" + ) + ) |> + dplyr::select(-cards::all_ard_variables("levels"), -"stat_label", -"fmt_fn") |> + dplyr::slice(1L, .by = "stat_name") + + # final processing of fmt_fn ------------------------------------------------- + result <- result |> + .process_nested_list_as_df( + arg = fmt_fn, + new_column = "fmt_fn" + ) |> + .default_svy_cat_fmt_fn() + + # merge in statistic labels -------------------------------------------------- + result <- result |> + .process_nested_list_as_df( + arg = stat_label, + new_column = "stat_label", + unlist = TRUE + ) |> + dplyr::mutate(stat_label = dplyr::coalesce(.data$stat_label, .data$stat_name)) + + # return final object -------------------------------------------------------- + result |> + dplyr::mutate(context = "missing") |> + cards::tidy_ard_column_order() %>% + {structure(., class = c("card", class(.)))} # styler: off +} diff --git a/man/ard_categorical.survey.design.Rd b/man/ard_categorical.survey.design.Rd index 51c65374..a96a43e9 100644 --- a/man/ard_categorical.survey.design.Rd +++ b/man/ard_categorical.survey.design.Rd @@ -6,7 +6,7 @@ \usage{ \method{ard_categorical}{survey.design}( data, - variables = everything(), + variables, by = NULL, statistic = everything() ~ c("n", "N", "p", "p.std.error", "deff", "n_unweighted", "N_unweighted", "p_unweighted"), @@ -22,7 +22,7 @@ a design object often created with \code{\link[survey:svydesign]{survey::svydesign()}}.} \item{variables}{(\code{\link[dplyr:dplyr_tidy_select]{tidy-select}})\cr -columns to include in summaries. Default is \code{everything()}.} +columns to include in summaries.} \item{by}{(\code{\link[dplyr:dplyr_tidy_select]{tidy-select}})\cr results are calculated for \strong{all combinations} of the column specified diff --git a/man/ard_continuous.survey.design.Rd b/man/ard_continuous.survey.design.Rd index 6ba18722..690238cb 100644 --- a/man/ard_continuous.survey.design.Rd +++ b/man/ard_continuous.survey.design.Rd @@ -19,7 +19,7 @@ a design object often created with \code{\link[survey:svydesign]{survey::svydesign()}}.} \item{variables}{(\code{\link[dplyr:dplyr_tidy_select]{tidy-select}})\cr -columns to include in summaries. Default is \code{everything()}.} +columns to include in summaries.} \item{by}{(\code{\link[dplyr:dplyr_tidy_select]{tidy-select}})\cr results are calculated for \strong{all combinations} of the columns specified, diff --git a/man/ard_dichotomous.survey.design.Rd b/man/ard_dichotomous.survey.design.Rd new file mode 100644 index 00000000..86247263 --- /dev/null +++ b/man/ard_dichotomous.survey.design.Rd @@ -0,0 +1,69 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ard_dichotomous.survey.design.R +\name{ard_dichotomous.survey.design} +\alias{ard_dichotomous.survey.design} +\title{ARD Dichotomous Survey Statistics} +\usage{ +\method{ard_dichotomous}{survey.design}( + data, + variables, + by = NULL, + value = cards::maximum_variable_value(data$variables[variables]), + statistic = everything() ~ c("n", "N", "p", "p.std.error", "deff", "n_unweighted", + "N_unweighted", "p_unweighted"), + denominator = c("column", "row", "cell"), + fmt_fn = NULL, + stat_label = everything() ~ list(p = "\%", p.std.error = "SE(\%)", deff = + "Design Effect", n = "Unweighted n", N = "Unweighted N", p = "Unweighted \%"), + ... +) +} +\arguments{ +\item{data}{(\code{survey.design})\cr +a design object often created with \code{\link[survey:svydesign]{survey::svydesign()}}.} + +\item{variables}{(\code{\link[dplyr:dplyr_tidy_select]{tidy-select}})\cr +columns to include in summaries.} + +\item{by}{(\code{\link[dplyr:dplyr_tidy_select]{tidy-select}})\cr +results are calculated for \strong{all combinations} of the column specified +and the variables. A single column may be specified.} + +\item{value}{(named \code{list})\cr +named list of dichotomous values to tabulate. +Default is \code{cards::maximum_variable_value(data$variables)}, +which returns the largest/last value after a sort.} + +\item{statistic}{(\code{\link[=syntax]{formula-list-selector}})\cr +a named list, a list of formulas, +or a single formula where the list element is a character vector of +statistic names to include. See default value for options.} + +\item{denominator}{(\code{string})\cr +a string indicating the type proportions to calculate. Must be one of +\code{"column"} (the default), \code{"row"}, and \code{"cell"}.} + +\item{fmt_fn}{(\code{\link[=syntax]{formula-list-selector}})\cr +a named list, a list of formulas, +or a single formula where the list element is a named list of functions +(or the RHS of a formula), +e.g. \verb{list(mpg = list(mean = \\(x) round(x, digits = 2) |> as.character))}.} + +\item{stat_label}{(\code{\link[=syntax]{formula-list-selector}})\cr +a named list, a list of formulas, or a single formula where +the list element is either a named list or a list of formulas defining the +statistic labels, e.g. \code{everything() ~ list(mean = "Mean", sd = "SD")} or +\code{everything() ~ list(mean ~ "Mean", sd ~ "SD")}.} + +\item{...}{These dots are for future extensions and must be empty.} +} +\value{ +an ARD data frame of class 'card' +} +\description{ +Compute Analysis Results Data (ARD) for dichotomous summary statistics. +} +\examples{ +survey::svydesign(ids = ~1, data = mtcars, weights = ~1) |> + ard_dichotomous(by = vs, variables = c(cyl, am), value = list(cyl = 4)) +} diff --git a/man/ard_missing.survey.design.Rd b/man/ard_missing.survey.design.Rd new file mode 100644 index 00000000..616cbb22 --- /dev/null +++ b/man/ard_missing.survey.design.Rd @@ -0,0 +1,61 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ard_missing.survey.design.R +\name{ard_missing.survey.design} +\alias{ard_missing.survey.design} +\title{ARD Missing Survey Statistics} +\usage{ +\method{ard_missing}{survey.design}( + data, + variables, + by = NULL, + statistic = everything() ~ c("N_obs", "N_miss", "N_nonmiss", "p_miss", "p_nonmiss", + "N_obs_unweighted", "N_miss_unweighted", "N_nonmiss_unweighted", "p_miss_unweighted", + "p_nonmiss_unweighted"), + fmt_fn = NULL, + stat_label = everything() ~ default_stat_labels(), + ... +) +} +\arguments{ +\item{data}{(\code{survey.design})\cr +a design object often created with \code{\link[survey:svydesign]{survey::svydesign()}}.} + +\item{variables}{(\code{\link[dplyr:dplyr_tidy_select]{tidy-select}})\cr +columns to include in summaries.} + +\item{by}{(\code{\link[dplyr:dplyr_tidy_select]{tidy-select}})\cr +results are calculated for \strong{all combinations} of the column specified +and the variables. A single column may be specified.} + +\item{statistic}{(\code{\link[=syntax]{formula-list-selector}})\cr +a named list, a list of formulas, +or a single formula where the list element is a character vector of +statistic names to include. See default value for options.} + +\item{fmt_fn}{(\code{\link[=syntax]{formula-list-selector}})\cr +a named list, a list of formulas, +or a single formula where the list element is a named list of functions +(or the RHS of a formula), +e.g. \verb{list(mpg = list(mean = \\(x) round(x, digits = 2) |> as.character))}.} + +\item{stat_label}{(\code{\link[=syntax]{formula-list-selector}})\cr +a named list, a list of formulas, or a single formula where +the list element is either a named list or a list of formulas defining the +statistic labels, e.g. \code{everything() ~ list(mean = "Mean", sd = "SD")} or +\code{everything() ~ list(mean ~ "Mean", sd ~ "SD")}.} + +\item{...}{These dots are for future extensions and must be empty.} +} +\value{ +an ARD data frame of class 'card' +} +\description{ +Compute Analysis Results Data (ARD) for statistics related to data missingness for survey objects +} +\examples{ +\dontshow{if (cardx:::is_pkg_installed("survey", reference_pkg = "cardx")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +svy_titanic <- survey::svydesign(~1, data = as.data.frame(Titanic), weights = ~Freq) + +ard_missing(svy_titanic, variables = c(Class, Age), by = Survived) +\dontshow{\}) # examplesIf} +} diff --git a/man/dot-check_dichotomous_value.Rd b/man/dot-check_dichotomous_value.Rd new file mode 100644 index 00000000..b008619e --- /dev/null +++ b/man/dot-check_dichotomous_value.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ard_dichotomous.survey.design.R +\name{.check_dichotomous_value} +\alias{.check_dichotomous_value} +\title{Perform Value Checks} +\usage{ +.check_dichotomous_value(data, value) +} +\arguments{ +\item{data}{(\code{data.frame})\cr +a data frame} + +\item{value}{(named \code{list})\cr +a named list} +} +\value{ +returns invisible if check is successful, throws an error message if not. +} +\description{ +Check the validity of the values passed in \code{ard_dichotomous(value)}. +} +\examples{ +cardx:::.check_dichotomous_value(mtcars, list(cyl = 4)) +} +\keyword{internal} diff --git a/man/dot-unique_and_sorted.Rd b/man/dot-unique_and_sorted.Rd new file mode 100644 index 00000000..b030838b --- /dev/null +++ b/man/dot-unique_and_sorted.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ard_dichotomous.survey.design.R +\name{.unique_and_sorted} +\alias{.unique_and_sorted} +\title{ARD-flavor of unique()} +\usage{ +.unique_and_sorted(x, useNA = c("no", "always")) +} +\arguments{ +\item{x}{(\code{any})\cr +a vector} +} +\value{ +a vector +} +\description{ +Essentially a wrapper for \code{unique(x) |> sort()} with \code{NA} levels removed. +For factors, all levels are returned even if they are unobserved. +Similarly, logical vectors always return \code{c(TRUE, FALSE)}, even if +both levels are not observed. +} +\examples{ +cards:::.unique_and_sorted(factor(letters[c(5, 5:1)], levels = letters)) + +cards:::.unique_and_sorted(c(FALSE, TRUE, TRUE, FALSE)) + +cards:::.unique_and_sorted(c(5, 5:1)) +} +\keyword{internal} diff --git a/tests/testthat/test-ard_categorical.survey.design.R b/tests/testthat/test-ard_categorical.survey.design.R index 8154fe30..a7dc25a4 100644 --- a/tests/testthat/test-ard_categorical.survey.design.R +++ b/tests/testthat/test-ard_categorical.survey.design.R @@ -5,6 +5,9 @@ # The function _should_ work if the underlying type is factor or logical # - Do we get results for unobserved factor levels in the `by` and `variable` variables? # - Do we get results for unobserved logical levels in the `by` and `variable` variables, e.g. if there are only TRUE, we should have FALSE rows too? +# - It turns out variables (both by and variables) that only have one level are problematic in some ways. +# I've coded around these issues, but we need thorough testing when either by or a variable has a single level. +# We need tests for when these variables are factor, logical, and other to ensure every case is handled properly. # - A trick to test survey data is to take a normal data frame, convert it to survey using equal weights. Then all the results should equal the unweighted summariess # dplyr::tibble(y = rep(FALSE, 15), x = rep(TRUE, 15)) |> # survey::svydesign(ids = ~1, data = _, weights = ~1) |> From 43ee82e414df271b40006a3ad89d575de4abeeb5 Mon Sep 17 00:00:00 2001 From: Daniel Sjoberg Date: Fri, 14 Jun 2024 08:46:06 -0700 Subject: [PATCH 13/23] updates --- R/ard_missing.survey.design.R | 13 ++++++++++--- _pkgdown.yml | 2 ++ man/ard_missing.survey.design.Rd | 7 ++++++- tests/testthat/test-ard_categorical.survey.design.R | 3 ++- 4 files changed, 20 insertions(+), 5 deletions(-) diff --git a/R/ard_missing.survey.design.R b/R/ard_missing.survey.design.R index b0a27763..09f53d53 100644 --- a/R/ard_missing.survey.design.R +++ b/R/ard_missing.survey.design.R @@ -20,9 +20,16 @@ ard_missing.survey.design <- function(data, "p_miss_unweighted", "p_nonmiss_unweighted"), fmt_fn = NULL, stat_label = - everything() ~ list("N_obs", "N_miss", "N_nonmiss", "p_miss", "p_nonmiss", - "N_obs_unweighted", "N_miss_unweighted", "N_nonmiss_unweighted", - "p_miss_unweighted", "p_nonmiss_unweighted"), + everything() ~ list(N_obs = "Total N", + N_miss = "N Missing", + N_nonmiss = "N not Missing", + p_miss = "% Missing", + p_nonmiss = "% not Missing", + N_obs_unweighted = "Total N (unweighted)", + N_miss_unweighted = "N Missing (unweighted)", + N_nonmiss_unweighted = "N not Missing (unweighted)", + p_miss_unweighted = "% Missing (unweighted)", + p_nonmiss_unweighted = "% not Missing (unweighted)"), ...) { set_cli_abort_call() check_dots_empty() diff --git a/_pkgdown.yml b/_pkgdown.yml index dd1e1700..3b90b6e5 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -64,6 +64,8 @@ reference: - contents: - ard_continuous.survey.design - ard_categorical.survey.design + - ard_dichotomous.survey.design + - ard_missing.survey.design - ard_attributes.survey.design - ard_survey_svychisq - ard_survey_svyranktest diff --git a/man/ard_missing.survey.design.Rd b/man/ard_missing.survey.design.Rd index 616cbb22..056849c1 100644 --- a/man/ard_missing.survey.design.Rd +++ b/man/ard_missing.survey.design.Rd @@ -12,7 +12,12 @@ "N_obs_unweighted", "N_miss_unweighted", "N_nonmiss_unweighted", "p_miss_unweighted", "p_nonmiss_unweighted"), fmt_fn = NULL, - stat_label = everything() ~ default_stat_labels(), + stat_label = everything() ~ list(N_obs = "Total N", N_miss = "N Missing", N_nonmiss = + "N not Missing", p_miss = "\% Missing", p_nonmiss = "\% not Missing", + N_obs_unweighted = "Total N (unweighted)", N_miss_unweighted = + "N Missing (unweighted)", N_nonmiss_unweighted = "N not Missing (unweighted)", + p_miss_unweighted = "\% Missing (unweighted)", p_nonmiss_unweighted = + "\% not Missing (unweighted)"), ... ) } diff --git a/tests/testthat/test-ard_categorical.survey.design.R b/tests/testthat/test-ard_categorical.survey.design.R index a7dc25a4..826717f5 100644 --- a/tests/testthat/test-ard_categorical.survey.design.R +++ b/tests/testthat/test-ard_categorical.survey.design.R @@ -8,7 +8,8 @@ # - It turns out variables (both by and variables) that only have one level are problematic in some ways. # I've coded around these issues, but we need thorough testing when either by or a variable has a single level. # We need tests for when these variables are factor, logical, and other to ensure every case is handled properly. -# - A trick to test survey data is to take a normal data frame, convert it to survey using equal weights. Then all the results should equal the unweighted summariess +# - A trick to test survey data is to take a normal data frame, convert it to survey using equal weights. +# Then all the results should equal the unweighted summaries and we can perform expect_equal() checks against the data.frame S3 methods. # dplyr::tibble(y = rep(FALSE, 15), x = rep(TRUE, 15)) |> # survey::svydesign(ids = ~1, data = _, weights = ~1) |> # ard_categorical(by = y) From d1a777bcb70c2057fb1babc33dba63b9ae05aafc Mon Sep 17 00:00:00 2001 From: Daniel Sjoberg Date: Fri, 14 Jun 2024 09:00:44 -0700 Subject: [PATCH 14/23] style --- R/ard_categorical.survey.design.R | 66 ++++++++++++++++--------------- R/ard_dichotomous.survey.design.R | 6 ++- R/ard_missing.survey.design.R | 40 +++++++++++-------- 3 files changed, 61 insertions(+), 51 deletions(-) diff --git a/R/ard_categorical.survey.design.R b/R/ard_categorical.survey.design.R index 108b6537..ec9f1cfa 100644 --- a/R/ard_categorical.survey.design.R +++ b/R/ard_categorical.survey.design.R @@ -48,8 +48,10 @@ ard_categorical.survey.design <- function(data, statistic = everything() ~ c("n", "N", "p", "p.std.error", "deff", "n_unweighted", "N_unweighted", "p_unweighted"), denominator = c("column", "row", "cell"), fmt_fn = NULL, - stat_label = everything() ~ list(p = "%", p.std.error = "SE(%)", deff = "Design Effect", - "n" = "Unweighted n", "N" = "Unweighted N", "p" = "Unweighted %"), + stat_label = everything() ~ list( + p = "%", p.std.error = "SE(%)", deff = "Design Effect", + "n" = "Unweighted n", "N" = "Unweighted N", "p" = "Unweighted %" + ), ...) { set_cli_abort_call() check_pkg_installed(pkg = "survey", reference_pkg = "cardx") @@ -84,7 +86,7 @@ ard_categorical.survey.design <- function(data, x = statistic, predicate = \(x) all(x %in% accepted_svy_stats), error_msg = c("Error in the values of the {.arg statistic} argument.", - i = "Values must be in {.val {accepted_svy_stats}}" + i = "Values must be in {.val {accepted_svy_stats}}" ) ) denominator <- arg_match(denominator) @@ -143,7 +145,7 @@ ard_categorical.survey.design <- function(data, dplyr::mutate( across( c(cards::all_ard_groups("levels"), cards::all_ard_variables("levels")), - ~map(.x, as.character) + ~ map(.x, as.character) ) ) |> dplyr::select(-c("stat_label", "fmt_fn", "warning", "error")) |> @@ -176,7 +178,7 @@ ard_categorical.survey.design <- function(data, context = "categorical", warning = list(NULL), error = list(NULL), - ) |> + ) |> cards::tidy_ard_column_order() %>% {structure(., class = c("card", class(.)))} |> # styler: off cards::tidy_ard_row_order() @@ -301,9 +303,9 @@ ard_categorical.survey.design <- function(data, ), name = str_remove_all(.data$name, "se\\.") %>% - str_remove_all("DEff\\.") %>% - str_remove_all(by) %>% - str_remove_all("`") + str_remove_all("DEff\\.") %>% + str_remove_all(by) %>% + str_remove_all("`") ) |> tidyr::pivot_wider(names_from = "stat", values_from = "value") |> set_names(c("variable_level", "group1_level", "p", "p.std.error", "deff")) |> @@ -334,9 +336,9 @@ ard_categorical.survey.design <- function(data, ), name = str_remove_all(.data$name, "se\\.") %>% - str_remove_all("DEff\\.") %>% - str_remove_all(variable) %>% - str_remove_all("`") + str_remove_all("DEff\\.") %>% + str_remove_all(variable) %>% + str_remove_all("`") ) |> tidyr::pivot_wider(names_from = "stat", values_from = "value") |> set_names(c("group1_level", "variable_level", "p", "p.std.error", "deff")) |> @@ -378,27 +380,27 @@ ard_categorical.survey.design <- function(data, # add big N and p, then return data frame of results switch(denominator, - "column" = - df_counts |> - dplyr::mutate( - .by = any_of("group1_level"), - N = sum(.data$n), - p = .data$n / .data$N - ), - "row" = - df_counts |> - dplyr::mutate( - .by = any_of("variable_level"), - N = sum(.data$n), - p = .data$n / .data$N - ), - "cell" = - df_counts |> - dplyr::mutate( - .by = any_of(c("group1_level", "variable_level")), - N = sum(.data$n), - p = .data$n / .data$N - ) + "column" = + df_counts |> + dplyr::mutate( + .by = any_of("group1_level"), + N = sum(.data$n), + p = .data$n / .data$N + ), + "row" = + df_counts |> + dplyr::mutate( + .by = any_of("variable_level"), + N = sum(.data$n), + p = .data$n / .data$N + ), + "cell" = + df_counts |> + dplyr::mutate( + .by = any_of(c("group1_level", "variable_level")), + N = sum(.data$n), + p = .data$n / .data$N + ) ) } diff --git a/R/ard_dichotomous.survey.design.R b/R/ard_dichotomous.survey.design.R index 53cc8bd6..dd96f2b3 100644 --- a/R/ard_dichotomous.survey.design.R +++ b/R/ard_dichotomous.survey.design.R @@ -21,8 +21,10 @@ ard_dichotomous.survey.design <- function(data, statistic = everything() ~ c("n", "N", "p", "p.std.error", "deff", "n_unweighted", "N_unweighted", "p_unweighted"), denominator = c("column", "row", "cell"), fmt_fn = NULL, - stat_label = everything() ~ list(p = "%", p.std.error = "SE(%)", deff = "Design Effect", - "n" = "Unweighted n", "N" = "Unweighted N", "p" = "Unweighted %"), + stat_label = everything() ~ list( + p = "%", p.std.error = "SE(%)", deff = "Design Effect", + "n" = "Unweighted n", "N" = "Unweighted N", "p" = "Unweighted %" + ), ...) { set_cli_abort_call() check_dots_empty() diff --git a/R/ard_missing.survey.design.R b/R/ard_missing.survey.design.R index 09f53d53..40d7613f 100644 --- a/R/ard_missing.survey.design.R +++ b/R/ard_missing.survey.design.R @@ -15,21 +15,25 @@ ard_missing.survey.design <- function(data, variables, by = NULL, statistic = - everything() ~ c("N_obs", "N_miss", "N_nonmiss", "p_miss", "p_nonmiss", - "N_obs_unweighted", "N_miss_unweighted", "N_nonmiss_unweighted", - "p_miss_unweighted", "p_nonmiss_unweighted"), + everything() ~ c( + "N_obs", "N_miss", "N_nonmiss", "p_miss", "p_nonmiss", + "N_obs_unweighted", "N_miss_unweighted", "N_nonmiss_unweighted", + "p_miss_unweighted", "p_nonmiss_unweighted" + ), fmt_fn = NULL, stat_label = - everything() ~ list(N_obs = "Total N", - N_miss = "N Missing", - N_nonmiss = "N not Missing", - p_miss = "% Missing", - p_nonmiss = "% not Missing", - N_obs_unweighted = "Total N (unweighted)", - N_miss_unweighted = "N Missing (unweighted)", - N_nonmiss_unweighted = "N not Missing (unweighted)", - p_miss_unweighted = "% Missing (unweighted)", - p_nonmiss_unweighted = "% not Missing (unweighted)"), + everything() ~ list( + N_obs = "Total N", + N_miss = "N Missing", + N_nonmiss = "N not Missing", + p_miss = "% Missing", + p_nonmiss = "% not Missing", + N_obs_unweighted = "Total N (unweighted)", + N_miss_unweighted = "N Missing (unweighted)", + N_nonmiss_unweighted = "N not Missing (unweighted)", + p_miss_unweighted = "% Missing (unweighted)", + p_nonmiss_unweighted = "% not Missing (unweighted)" + ), ...) { set_cli_abort_call() check_dots_empty() @@ -58,9 +62,11 @@ ard_missing.survey.design <- function(data, statistic = formals(asNamespace("cards")[["ard_missing.survey.design"]])[["statistic"]] |> eval() ) - stats_available <- c("N_obs", "N_miss", "N_nonmiss", "p_miss", "p_nonmiss", - "N_obs_unweighted", "N_miss_unweighted", "N_nonmiss_unweighted", - "p_miss_unweighted", "p_nonmiss_unweighted") + stats_available <- c( + "N_obs", "N_miss", "N_nonmiss", "p_miss", "p_nonmiss", + "N_obs_unweighted", "N_miss_unweighted", "N_nonmiss_unweighted", + "p_miss_unweighted", "p_nonmiss_unweighted" + ) cards::check_list_elements( x = statistic, predicate = \(x) is.character(x) && all(x %in% stats_available), @@ -115,7 +121,7 @@ ard_missing.survey.design <- function(data, # return final object -------------------------------------------------------- result |> - dplyr::mutate(context = "missing") |> + dplyr::mutate(context = "missing") |> cards::tidy_ard_column_order() %>% {structure(., class = c("card", class(.)))} # styler: off } From 20d2dca2ad4df10db4e24d253f0dcc28cf5f2379 Mon Sep 17 00:00:00 2001 From: Daniel Sjoberg Date: Fri, 14 Jun 2024 09:01:51 -0700 Subject: [PATCH 15/23] Update ard_missing.survey.design.R --- R/ard_missing.survey.design.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/R/ard_missing.survey.design.R b/R/ard_missing.survey.design.R index 40d7613f..30323ac2 100644 --- a/R/ard_missing.survey.design.R +++ b/R/ard_missing.survey.design.R @@ -61,6 +61,10 @@ ard_missing.survey.design <- function(data, data$variables[variables], statistic = formals(asNamespace("cards")[["ard_missing.survey.design"]])[["statistic"]] |> eval() ) + cards::fill_formula_selectors( + data$variables[variables], + stat_label = formals(asNamespace("cards")[["ard_missing.survey.design"]])[["stat_label"]] |> eval() + ) stats_available <- c( "N_obs", "N_miss", "N_nonmiss", "p_miss", "p_nonmiss", From 3afa330c74965b5314d152df82479f9c7fef2523 Mon Sep 17 00:00:00 2001 From: Daniel Sjoberg Date: Fri, 14 Jun 2024 09:16:01 -0700 Subject: [PATCH 16/23] Update NEWS.md --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index de7f9a82..f7a2039e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -34,6 +34,8 @@ ard_moodtest() -> ard_stats_mood_test() - `ard_survival_survfit()` for survival analyses using `survival::survfit()`. (#43) - `ard_continuous.survey.design()` for calculating univariate summary statistics from weighted/survey data using many functions from the {survey} package. (#68) - `ard_categorical.survey.design()` for tabulating summary statistics from weighted/survey data using many functions from the {survey} package. (#140) + - `ard_dichotomous.survey.design()` for tabulating dichotomous summary statistics from weighted/survey data using many functions from the {survey} package. (#2) + - `ard_missing.survey.design()` for tabulating missing summary statistics from weighted/survey data using many functions from the {survey} package. (#2) - `ard_attributes.survey.design()` for summarizing labels and attributes from weighted/survey data using many functions from the {survey} package. - `ard_survey_svychisq()` for weighted/survey chi-squared test using `survey::svychisq()`. (#72) - `ard_survey_svyttest()` for weighted/survey t-tests using `survey::svyttest()`. (#70) From ae8c862af99d68584749ffa0253b8daea0d3897a Mon Sep 17 00:00:00 2001 From: Daniel Sjoberg Date: Fri, 14 Jun 2024 19:54:36 -0700 Subject: [PATCH 17/23] Update test-ard_categorical.survey.design.R --- tests/testthat/test-ard_categorical.survey.design.R | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tests/testthat/test-ard_categorical.survey.design.R b/tests/testthat/test-ard_categorical.survey.design.R index 826717f5..86e6e7d8 100644 --- a/tests/testthat/test-ard_categorical.survey.design.R +++ b/tests/testthat/test-ard_categorical.survey.design.R @@ -1,10 +1,13 @@ # Items to test -# - First, everything needs to be tested independently for denominator='column'|'row'|'cell' AND by whether there is a by variable +# - First, everything needs to be tested independently for +# denominator='column'|'row'|'cell' AND by whether there is a by variable # This is because these 6 scenarios are calculated entirely separately -# - What happens with a variable that is all NA? How does that behavior compare to `ard_categorical()` for data frames +# - What happens with a variable that is all NA? How does that behavior +# compare to `ard_categorical()` for data frames # The function _should_ work if the underlying type is factor or logical # - Do we get results for unobserved factor levels in the `by` and `variable` variables? -# - Do we get results for unobserved logical levels in the `by` and `variable` variables, e.g. if there are only TRUE, we should have FALSE rows too? +# - Do we get results for unobserved logical levels in the `by` and `variable` +# variables, e.g. if there are only TRUE, we should have FALSE rows too? # - It turns out variables (both by and variables) that only have one level are problematic in some ways. # I've coded around these issues, but we need thorough testing when either by or a variable has a single level. # We need tests for when these variables are factor, logical, and other to ensure every case is handled properly. From e48f6878496c4c3cc009ff4f13ab2e6e09318855 Mon Sep 17 00:00:00 2001 From: Daniel Sjoberg Date: Wed, 19 Jun 2024 13:29:10 -0700 Subject: [PATCH 18/23] review updates --- R/ard_categorical.survey.design.R | 19 +++++++++++++++---- man/ard_categorical.survey.design.Rd | 3 ++- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/R/ard_categorical.survey.design.R b/R/ard_categorical.survey.design.R index ec9f1cfa..11c5c5ca 100644 --- a/R/ard_categorical.survey.design.R +++ b/R/ard_categorical.survey.design.R @@ -49,8 +49,12 @@ ard_categorical.survey.design <- function(data, denominator = c("column", "row", "cell"), fmt_fn = NULL, stat_label = everything() ~ list( - p = "%", p.std.error = "SE(%)", deff = "Design Effect", - "n" = "Unweighted n", "N" = "Unweighted N", "p" = "Unweighted %" + p = "%", + p.std.error = "SE(%)", + deff = "Design Effect", + "n_unweighted" = "Unweighted n", + "N_unweighted" = "Unweighted N", + "p_unweighted" = "Unweighted %" ), ...) { set_cli_abort_call() @@ -200,11 +204,18 @@ ard_categorical.survey.design <- function(data, if (!is_empty(by)) by_lvls <- .unique_values_sort(data$variables, by) # styler: off if (length(variable_lvls) == 1L) { data$variables[[variable]] <- - factor(data$variables[[variable]], levels = c(variable_lvls, paste("not", variable_lvls))) + case_switch( + inherits(data$variables[[by]], "factor") ~ fct_expand(data$variables[[variable]], paste("not", variable_lvls)), + .default = factor(data$variables[[variable]], levels = c(variable_lvls, paste("not", variable_lvls))) + ) + } if (!is_empty(by) && length(by_lvls) == 1L) { data$variables[[by]] <- - factor(data$variables[[by]], levels = c(by_lvls, paste("not", by_lvls))) + case_switch( + inherits(data$variables[[by]], "factor") ~ fct_expand(data$variables[[by]], paste("not", by_lvls)), + .default = factor(data$variables[[by]], levels = c(by_lvls, paste("not", by_lvls))) + ) } # each combination of denominator and whether there is a by variable is handled separately diff --git a/man/ard_categorical.survey.design.Rd b/man/ard_categorical.survey.design.Rd index a96a43e9..08042390 100644 --- a/man/ard_categorical.survey.design.Rd +++ b/man/ard_categorical.survey.design.Rd @@ -13,7 +13,8 @@ denominator = c("column", "row", "cell"), fmt_fn = NULL, stat_label = everything() ~ list(p = "\%", p.std.error = "SE(\%)", deff = - "Design Effect", n = "Unweighted n", N = "Unweighted N", p = "Unweighted \%"), + "Design Effect", n_unweighted = "Unweighted n", N_unweighted = "Unweighted N", + p_unweighted = "Unweighted \%"), ... ) } From 07cd51e5d9c2d82fcc74fe0ad9cc4e4449fe8fd3 Mon Sep 17 00:00:00 2001 From: Daniel Sjoberg Date: Thu, 20 Jun 2024 07:57:05 -0700 Subject: [PATCH 19/23] added check for NA factor levels --- R/ard_categorical.survey.design.R | 16 ++++++++++++++++ R/ard_continuous.survey.design.R | 2 ++ 2 files changed, 18 insertions(+) diff --git a/R/ard_categorical.survey.design.R b/R/ard_categorical.survey.design.R index 11c5c5ca..64e9f620 100644 --- a/R/ard_categorical.survey.design.R +++ b/R/ard_categorical.survey.design.R @@ -75,6 +75,8 @@ ard_categorical.survey.design <- function(data, # if no variables selected, return empty data frame if (is_empty(variables)) return(dplyr::tibble()) # styler: off + check_na_factor_levels(data$variables, c(by, variables)) + cards::process_formula_selectors( data = data$variables[variables], statistic = statistic, @@ -188,6 +190,20 @@ ard_categorical.survey.design <- function(data, cards::tidy_ard_row_order() } +# check for functions with NA factor levels (these are not allowed) +check_na_factor_levels <- function(data, variables) { + walk( + variables, + \(variable) { + if (is.factor(data[[variable]]) && any(is.na(levels(data[[variable]])))) { + cli::cli_abort( + "Factors with {.val {NA}} levels are not allowed, which are present in column {.val {variable}}.", + call = get_cli_abort_call() + ) + } + } + ) +} # this function returns a tibble with the SE(p) and DEFF .svytable_rate_stats <- function(data, variables, by, denominator, deff) { diff --git a/R/ard_continuous.survey.design.R b/R/ard_continuous.survey.design.R index 5e8d1a31..f02bef32 100644 --- a/R/ard_continuous.survey.design.R +++ b/R/ard_continuous.survey.design.R @@ -61,6 +61,8 @@ ard_continuous.survey.design <- function(data, variables, by = NULL, # process inputs ------------------------------------------------------------- cards::process_selectors(data$variables, variables = {{ variables }}, by = {{ by }}) variables <- setdiff(variables, by) + check_na_factor_levels(data$variables, by) + cards::process_formula_selectors( data$variables[variables], statistic = statistic, From 7812a2643f487463520505317083261a24007523 Mon Sep 17 00:00:00 2001 From: Daniel Sjoberg Date: Thu, 20 Jun 2024 08:08:24 -0700 Subject: [PATCH 20/23] Update ard_categorical.survey.design.R --- R/ard_categorical.survey.design.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/ard_categorical.survey.design.R b/R/ard_categorical.survey.design.R index 64e9f620..b54f77e4 100644 --- a/R/ard_categorical.survey.design.R +++ b/R/ard_categorical.survey.design.R @@ -197,7 +197,7 @@ check_na_factor_levels <- function(data, variables) { \(variable) { if (is.factor(data[[variable]]) && any(is.na(levels(data[[variable]])))) { cli::cli_abort( - "Factors with {.val {NA}} levels are not allowed, which are present in column {.val {variable}}.", + "Column {.val {variable}} is a factor with {.val {NA}} levels, which are not allowed.", call = get_cli_abort_call() ) } From ef5308d77ce64bbd96f5473062c1231d062c536c Mon Sep 17 00:00:00 2001 From: Daniel Sjoberg Date: Thu, 20 Jun 2024 09:23:36 -0700 Subject: [PATCH 21/23] Update ard_categorical.survey.design.R --- R/ard_categorical.survey.design.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/ard_categorical.survey.design.R b/R/ard_categorical.survey.design.R index b54f77e4..c99f8651 100644 --- a/R/ard_categorical.survey.design.R +++ b/R/ard_categorical.survey.design.R @@ -271,7 +271,7 @@ check_na_factor_levels <- function(data, variables) { .one_svytable_rates_no_by_row <- function(data, variable, deff) { dplyr::tibble( variable = .env$variable, - variable_level = unique(data$variables[[variable]]) |> sort(), + variable_level = unique(data$variables[[variable]]) |> sort() |> as.character(), p = 1, p.std.error = 0, deff = NaN From 0385f7e294bdc55ddf40a792c641e57882492da2 Mon Sep 17 00:00:00 2001 From: Daniel Sjoberg Date: Tue, 25 Jun 2024 18:09:41 -0700 Subject: [PATCH 22/23] Update ard_categorical.survey.design.R --- R/ard_categorical.survey.design.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/ard_categorical.survey.design.R b/R/ard_categorical.survey.design.R index c99f8651..ea231d7b 100644 --- a/R/ard_categorical.survey.design.R +++ b/R/ard_categorical.survey.design.R @@ -224,7 +224,6 @@ check_na_factor_levels <- function(data, variables) { inherits(data$variables[[by]], "factor") ~ fct_expand(data$variables[[variable]], paste("not", variable_lvls)), .default = factor(data$variables[[variable]], levels = c(variable_lvls, paste("not", variable_lvls))) ) - } if (!is_empty(by) && length(by_lvls) == 1L) { data$variables[[by]] <- From e943b8799c652f0a7b9607b238373e348524f34d Mon Sep 17 00:00:00 2001 From: Abinaya Yogasekaram <73252787+ayogasekaram@users.noreply.github.com> Date: Wed, 3 Jul 2024 19:42:46 -0400 Subject: [PATCH 23/23] Svy ard categorical unit tests (#171) **What changes are proposed in this pull request?** * Style this entry in a way that can be copied directly into `NEWS.md`. (#, @) Provide more detail here as needed. **Reference GitHub issue associated with pull request.** _e.g., 'closes #'_ -------------------------------------------------------------------------------- Pre-review Checklist (if item does not apply, mark is as complete) - [ ] **All** GitHub Action workflows pass with a :white_check_mark: - [ ] PR branch has pulled the most recent updates from master branch: `usethis::pr_merge_main()` - [ ] If a bug was fixed, a unit test was added. - [ ] If a new `ard_*()` function was added, it passes the ARD structural checks from `cards::check_ard_structure()`. - [ ] If a new `ard_*()` function was added, `set_cli_abort_call()` has been set. - [ ] If a new `ard_*()` function was added and it depends on another package (such as, `broom`), `is_pkg_installed("broom", reference_pkg = "cardx")` has been set in the function call and the following added to the roxygen comments: `@examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "broom"", reference_pkg = "cardx"))` - [ ] Code coverage is suitable for any new functions/features (generally, 100% coverage for new code): `devtools::test_coverage()` Reviewer Checklist (if item does not apply, mark is as complete) - [ ] If a bug was fixed, a unit test was added. - [ ] Code coverage is suitable for any new functions/features: `devtools::test_coverage()` When the branch is ready to be merged: - [ ] Update `NEWS.md` with the changes from this pull request under the heading "`# cardx (development version)`". If there is an issue associated with the pull request, reference it in parentheses at the end update (see `NEWS.md` for examples). - [ ] **All** GitHub Action workflows pass with a :white_check_mark: - [ ] Approve Pull Request - [ ] Merge the PR. Please use "Squash and merge" or "Rebase and merge". --------- Co-authored-by: Daniel Sjoberg --- R/ard_categorical.survey.design.R | 43 +- R/ard_continuous.survey.design.R | 21 +- R/ard_dichotomous.survey.design.R | 8 +- R/ard_missing.survey.design.R | 6 +- man/ard_dichotomous.survey.design.Rd | 3 +- man/ard_survey_categorical_ci.Rd | 2 +- man/ard_survey_continuous_ci.Rd | 2 +- .../_snaps/ard_categorical.survey.design.md | 30 + .../_snaps/ard_continuous.survey.design.md | 47 +- .../_snaps/ard_dichotomous.survey.design.md | 30 + .../test-ard_categorical.survey.design.R | 1192 ++++++++++++++++- .../test-ard_continuous.survey.design.R | 11 +- .../test-ard_dichotomous.survey.design.R | 450 +++++++ .../testthat/test-ard_missing.survey.design.R | 105 ++ 14 files changed, 1878 insertions(+), 72 deletions(-) create mode 100644 tests/testthat/_snaps/ard_categorical.survey.design.md create mode 100644 tests/testthat/_snaps/ard_dichotomous.survey.design.md create mode 100644 tests/testthat/test-ard_dichotomous.survey.design.R create mode 100644 tests/testthat/test-ard_missing.survey.design.R diff --git a/R/ard_categorical.survey.design.R b/R/ard_categorical.survey.design.R index ea231d7b..82d4b928 100644 --- a/R/ard_categorical.survey.design.R +++ b/R/ard_categorical.survey.design.R @@ -97,6 +97,11 @@ ard_categorical.survey.design <- function(data, ) denominator <- arg_match(denominator) + # return empty tibble if no variables selected ------------------------------- + if (is_empty(variables)) { + return(dplyr::tibble()) + } + # check the missingness walk( variables, @@ -105,7 +110,8 @@ ard_categorical.survey.design <- function(data, cli::cli_abort( c("Column {.val {.x}} is all missing and cannot be tabulated.", i = "Only columns of class {.cls logical} and {.cls factor} can be tabulated when all values are missing." - ) + ), + call = get_cli_abort_call() ) } } @@ -207,6 +213,21 @@ check_na_factor_levels <- function(data, variables) { # this function returns a tibble with the SE(p) and DEFF .svytable_rate_stats <- function(data, variables, by, denominator, deff) { + if (!is_empty(by)) by_lvls <- .unique_values_sort(data$variables, by) # styler: off + if (!is_empty(by) && length(by_lvls) == 1L) { + data$variables[[by]] <- + case_switch( + inherits(data$variables[[by]], "factor") ~ fct_expand(data$variables[[by]], paste("not", by_lvls)), + .default = factor(data$variables[[by]], levels = c(by_lvls, paste("not", by_lvls))) + ) + } + if (!is_empty(by) && inherits(data$variables[[by]], "logical")) { + data$variables[[by]] <- factor(data$variables[[by]], levels = c(TRUE, FALSE)) + } + if (!is_empty(by) && !inherits(data$variables[[by]], "factor")) { + data$variables[[by]] <- factor(data$variables[[by]]) + } + lapply( variables, \(variable) { @@ -217,20 +238,18 @@ check_na_factor_levels <- function(data, variables) { # there are issues with svymean() when a variable has only one level. adding a second as needed variable_lvls <- .unique_values_sort(data$variables, variable) - if (!is_empty(by)) by_lvls <- .unique_values_sort(data$variables, by) # styler: off if (length(variable_lvls) == 1L) { data$variables[[variable]] <- case_switch( - inherits(data$variables[[by]], "factor") ~ fct_expand(data$variables[[variable]], paste("not", variable_lvls)), + inherits(data$variables[[variable]], "factor") ~ fct_expand(data$variables[[variable]], paste("not", variable_lvls)), .default = factor(data$variables[[variable]], levels = c(variable_lvls, paste("not", variable_lvls))) ) } - if (!is_empty(by) && length(by_lvls) == 1L) { - data$variables[[by]] <- - case_switch( - inherits(data$variables[[by]], "factor") ~ fct_expand(data$variables[[by]], paste("not", by_lvls)), - .default = factor(data$variables[[by]], levels = c(by_lvls, paste("not", by_lvls))) - ) + if (inherits(data$variables[[variable]], "logical")) { + data$variables[[variable]] <- factor(data$variables[[variable]], levels = c(TRUE, FALSE)) + } + if (!inherits(data$variables[[variable]], "factor")) { + data$variables[[variable]] <- factor(data$variables[[variable]]) } # each combination of denominator and whether there is a by variable is handled separately @@ -409,21 +428,21 @@ check_na_factor_levels <- function(data, variables) { "column" = df_counts |> dplyr::mutate( - .by = any_of("group1_level"), + .by = c(cards::all_ard_groups(), cards::all_ard_variables("names")), N = sum(.data$n), p = .data$n / .data$N ), "row" = df_counts |> dplyr::mutate( - .by = any_of("variable_level"), + .by = cards::all_ard_variables(), N = sum(.data$n), p = .data$n / .data$N ), "cell" = df_counts |> dplyr::mutate( - .by = any_of(c("group1_level", "variable_level")), + .by = c(cards::all_ard_groups("names"), cards::all_ard_variables("names")), N = sum(.data$n), p = .data$n / .data$N ) diff --git a/R/ard_continuous.survey.design.R b/R/ard_continuous.survey.design.R index f02bef32..0459836f 100644 --- a/R/ard_continuous.survey.design.R +++ b/R/ard_continuous.survey.design.R @@ -76,11 +76,16 @@ ard_continuous.survey.design <- function(data, variables, by = NULL, cards::check_list_elements( x = statistic, predicate = \(x) all(x %in% accepted_svy_stats()), - error_msg = c("Error in the values of the {.arg statistic} argument.", - i = "Values must be in {.val {accepted_svy_stats(FALSE)}}" + error_msg = c("Error in the values of the {.arg statistic} argument for variable {.val {variable}}.", + i = "Values must be in {.val {cardx:::accepted_svy_stats(FALSE)}}" ) ) + # return empty tibble if no variables selected ------------------------------- + if (is_empty(variables)) { + return(dplyr::tibble()) + } + # compute the weighted statistics -------------------------------------------- df_stats <- map( @@ -117,7 +122,7 @@ ard_continuous.survey.design <- function(data, variables, by = NULL, unlist()) ) |> tidyr::unnest(cols = c("stat_name", "stat_label")), - by = "stat_name", + by = c("variable", "stat_name"), unmatched = "ignore" ) } @@ -134,7 +139,7 @@ ard_continuous.survey.design <- function(data, variables, by = NULL, fmt_fn = map(.data$variable, ~ fmt_fn[[.x]] |> unname()) ) |> tidyr::unnest(cols = c("stat_name", "fmt_fn")), - by = "stat_name", + by = c("variable", "stat_name"), unmatched = "ignore" ) } @@ -306,5 +311,11 @@ accepted_svy_stats <- function(expand_quantiles = TRUE) { } df_stat |> - dplyr::mutate(stat_name = .env$stat_name) + dplyr::mutate( + stat_name = .env$stat_name, + across( + c(cards::all_ard_groups("levels"), cards::all_ard_variables("levels")), + ~ map(.x, as.character) + ) + ) } diff --git a/R/ard_dichotomous.survey.design.R b/R/ard_dichotomous.survey.design.R index dd96f2b3..0bc62085 100644 --- a/R/ard_dichotomous.survey.design.R +++ b/R/ard_dichotomous.survey.design.R @@ -22,8 +22,12 @@ ard_dichotomous.survey.design <- function(data, denominator = c("column", "row", "cell"), fmt_fn = NULL, stat_label = everything() ~ list( - p = "%", p.std.error = "SE(%)", deff = "Design Effect", - "n" = "Unweighted n", "N" = "Unweighted N", "p" = "Unweighted %" + p = "%", + p.std.error = "SE(%)", + deff = "Design Effect", + "n_unweighted" = "Unweighted n", + "N_unweighted" = "Unweighted N", + "p_unweighted" = "Unweighted %" ), ...) { set_cli_abort_call() diff --git a/R/ard_missing.survey.design.R b/R/ard_missing.survey.design.R index 30323ac2..8cb4ad58 100644 --- a/R/ard_missing.survey.design.R +++ b/R/ard_missing.survey.design.R @@ -59,11 +59,11 @@ ard_missing.survey.design <- function(data, ) cards::fill_formula_selectors( data$variables[variables], - statistic = formals(asNamespace("cards")[["ard_missing.survey.design"]])[["statistic"]] |> eval() + statistic = formals(asNamespace("cardx")[["ard_missing.survey.design"]])[["statistic"]] |> eval() ) cards::fill_formula_selectors( data$variables[variables], - stat_label = formals(asNamespace("cards")[["ard_missing.survey.design"]])[["stat_label"]] |> eval() + stat_label = formals(asNamespace("cardx")[["ard_missing.survey.design"]])[["stat_label"]] |> eval() ) stats_available <- c( @@ -104,7 +104,7 @@ ard_missing.survey.design <- function(data, ) ) |> dplyr::select(-cards::all_ard_variables("levels"), -"stat_label", -"fmt_fn") |> - dplyr::slice(1L, .by = "stat_name") + dplyr::slice(1L, .by = c(cards::all_ard_groups(), cards::all_ard_variables(), "stat_name")) # final processing of fmt_fn ------------------------------------------------- result <- result |> diff --git a/man/ard_dichotomous.survey.design.Rd b/man/ard_dichotomous.survey.design.Rd index 86247263..7dd2ce69 100644 --- a/man/ard_dichotomous.survey.design.Rd +++ b/man/ard_dichotomous.survey.design.Rd @@ -14,7 +14,8 @@ denominator = c("column", "row", "cell"), fmt_fn = NULL, stat_label = everything() ~ list(p = "\%", p.std.error = "SE(\%)", deff = - "Design Effect", n = "Unweighted n", N = "Unweighted N", p = "Unweighted \%"), + "Design Effect", n_unweighted = "Unweighted n", N_unweighted = "Unweighted N", + p_unweighted = "Unweighted \%"), ... ) } diff --git a/man/ard_survey_categorical_ci.Rd b/man/ard_survey_categorical_ci.Rd index 97375461..ba66bdbe 100644 --- a/man/ard_survey_categorical_ci.Rd +++ b/man/ard_survey_categorical_ci.Rd @@ -19,7 +19,7 @@ ard_survey_categorical_ci( a design object often created with \code{\link[survey:svydesign]{survey::svydesign()}}.} \item{variables}{(\code{\link[dplyr:dplyr_tidy_select]{tidy-select}})\cr -columns to include in summaries. Default is \code{everything()}.} +columns to include in summaries.} \item{by}{(\code{\link[dplyr:dplyr_tidy_select]{tidy-select}})\cr results are calculated for \strong{all combinations} of the columns specified, diff --git a/man/ard_survey_continuous_ci.Rd b/man/ard_survey_continuous_ci.Rd index 330d8862..bf367f62 100644 --- a/man/ard_survey_continuous_ci.Rd +++ b/man/ard_survey_continuous_ci.Rd @@ -19,7 +19,7 @@ ard_survey_continuous_ci( a design object often created with \code{\link[survey:svydesign]{survey::svydesign()}}.} \item{variables}{(\code{\link[dplyr:dplyr_tidy_select]{tidy-select}})\cr -columns to include in summaries. Default is \code{everything()}.} +columns to include in summaries.} \item{by}{(\code{\link[dplyr:dplyr_tidy_select]{tidy-select}})\cr results are calculated for \strong{all combinations} of the columns specified, diff --git a/tests/testthat/_snaps/ard_categorical.survey.design.md b/tests/testthat/_snaps/ard_categorical.survey.design.md new file mode 100644 index 00000000..c72bebd0 --- /dev/null +++ b/tests/testthat/_snaps/ard_categorical.survey.design.md @@ -0,0 +1,30 @@ +# ard_categorical.survey.design() returns an error when variables have all NAs + + Code + ard_categorical(svy_titanic, variables = c(Class, Age), by = Survived, + denominator = "row") + Condition + Error in `ard_categorical()`: + ! Column "Class" is all missing and cannot be tabulated. + i Only columns of class and can be tabulated when all values are missing. + +--- + + Code + ard_categorical(svy_titanic, variables = c(Class, Age), by = Survived, + denominator = "column") + Condition + Error in `ard_categorical()`: + ! Column "Class" is all missing and cannot be tabulated. + i Only columns of class and can be tabulated when all values are missing. + +--- + + Code + ard_categorical(svy_titanic, variables = c(Class, Age), by = Survived, + denominator = "cell") + Condition + Error in `ard_categorical()`: + ! Column "Class" is all missing and cannot be tabulated. + i Only columns of class and can be tabulated when all values are missing. + diff --git a/tests/testthat/_snaps/ard_continuous.survey.design.md b/tests/testthat/_snaps/ard_continuous.survey.design.md index dfa6ed90..545a0ccf 100644 --- a/tests/testthat/_snaps/ard_continuous.survey.design.md +++ b/tests/testthat/_snaps/ard_continuous.survey.design.md @@ -22,33 +22,34 @@ # ard_continuous.survey.design(fmt_fn) Code - ard_continuous(dclus1, variables = api00, statistic = ~ c("mean", "median", - "min", "max"), fmt_fn = list(api00 = list(mean = 2, median = "xx.xx", min = as.character))) - Message - {cards} data frame: 4 x 8 + as.data.frame(dplyr::select(ard_continuous(dclus1, variables = c(api99, api00), + statistic = ~ c("mean", "median", "min", "max"), fmt_fn = list(api00 = list( + mean = 2, median = "xx.xx", min = as.character))), -warning, -error)) Output - variable context stat_name stat_label stat fmt_fn - 1 api00 continuo… mean Mean 644.169 2 - 2 api00 continuo… median Median 652 xx.xx - 3 api00 continuo… min Minimum 411 - 4 api00 continuo… max Maximum 905 1 - Message - i 2 more variables: warning, error + variable context stat_name stat_label stat fmt_fn + 1 api99 continuous mean Mean 606.9781 1 + 2 api99 continuous median Median 615 1 + 3 api99 continuous min Minimum 365 1 + 4 api99 continuous max Maximum 890 1 + 5 api00 continuous mean Mean 644.1694 2 + 6 api00 continuous median Median 652 xx.xx + 7 api00 continuous min Minimum 411 .Primitive("as.character") + 8 api00 continuous max Maximum 905 1 # ard_continuous.survey.design(stat_label) Code - ard_continuous(dclus1, variables = api00, statistic = ~ c("mean", "median", - "min", "max"), stat_label = list(api00 = list(mean = "MeAn", median = "MEDian", - min = "MINimum"))) - Message - {cards} data frame: 4 x 8 + as.data.frame(ard_continuous(dclus1, variables = c(api00, api99), statistic = ~ + c("mean", "median", "min", "max"), stat_label = list(api00 = list(mean = "MeAn", + median = "MEDian", min = "MINimum")))) Output - variable context stat_name stat_label stat fmt_fn - 1 api00 continuo… mean MeAn 644.169 1 - 2 api00 continuo… median MEDian 652 1 - 3 api00 continuo… min MINimum 411 1 - 4 api00 continuo… max Maximum 905 1 - Message - i 2 more variables: warning, error + variable context stat_name stat_label stat fmt_fn warning error + 1 api00 continuous mean MeAn 644.1694 1 NULL NULL + 2 api00 continuous median MEDian 652 1 NULL NULL + 3 api00 continuous min MINimum 411 1 NULL NULL + 4 api00 continuous max Maximum 905 1 NULL NULL + 5 api99 continuous mean Mean 606.9781 1 NULL NULL + 6 api99 continuous median Median 615 1 NULL NULL + 7 api99 continuous min Minimum 365 1 NULL NULL + 8 api99 continuous max Maximum 890 1 NULL NULL diff --git a/tests/testthat/_snaps/ard_dichotomous.survey.design.md b/tests/testthat/_snaps/ard_dichotomous.survey.design.md new file mode 100644 index 00000000..547ff4be --- /dev/null +++ b/tests/testthat/_snaps/ard_dichotomous.survey.design.md @@ -0,0 +1,30 @@ +# ard_dichotomous.survey.design() returns an error with erroneous input + + Code + ard_dichotomous(svy_dicho, by = vs, variables = c(cyl, am), value = list(cyl = 4), + denominator = "row") + Condition + Error in `ard_dichotomous()`: + ! Error in argument `value` for variable "cyl". + i A value of 4 was passed, but must be one of TRUE and FALSE. + +--- + + Code + ard_dichotomous(svy_dicho, by = cyl, variables = c(vs, am), value = list(vs = 4), + denominator = "row") + Condition + Error in `ard_dichotomous()`: + ! Error in argument `value` for variable "vs". + i A value of 4 was passed, but must be one of 0 and 1. + +--- + + Code + ard_dichotomous(svy_dicho, by = cyl, variables = c(vs, disp), value = list( + disp = "turn"), denominator = "row") + Condition + Error in `ard_dichotomous()`: + ! Error in argument `value` for variable "disp". + i A value of "turn" was passed, but must be one of 71.1, 75.7, 78.7, 79, 95.1, 108, 120.1, 120.3, 121, 140.8, 145, 146.7, 160, 167.6, 225, 258, 275.8, 301, ..., 460, and 472. + diff --git a/tests/testthat/test-ard_categorical.survey.design.R b/tests/testthat/test-ard_categorical.survey.design.R index 86e6e7d8..7ba6333e 100644 --- a/tests/testthat/test-ard_categorical.survey.design.R +++ b/tests/testthat/test-ard_categorical.survey.design.R @@ -1,22 +1,1174 @@ -# Items to test -# - First, everything needs to be tested independently for -# denominator='column'|'row'|'cell' AND by whether there is a by variable -# This is because these 6 scenarios are calculated entirely separately -# - What happens with a variable that is all NA? How does that behavior -# compare to `ard_categorical()` for data frames -# The function _should_ work if the underlying type is factor or logical +skip_if_not(is_pkg_installed("survey", reference_pkg = "cardx")) + +# Test survey.design working (2x3) +test_that("ard_categorical.survey.design() works", { + data(api, package = "survey") + svy_titanic <- survey::svydesign(~1, data = as.data.frame(Titanic), weights = ~Freq) + + # setup for value checks + df_titanic <- as.data.frame(Titanic) |> tidyr::uncount(weights = Freq) + + # denom = row, with by + expect_error( + ard_svy_cat_row <- + ard_categorical( + svy_titanic, + variables = c(Class, Age), + by = Survived, + denominator = "row" + ), + NA + ) + expect_invisible(cards::check_ard_structure(ard_svy_cat_row, method = FALSE)) + + # denom = column, with by + expect_error( + ard_svy_cat_col <- + ard_categorical( + svy_titanic, + variables = c(Class, Age), + by = Survived, + denominator = "column" + ), + NA + ) + expect_invisible(cards::check_ard_structure(ard_svy_cat_col, method = FALSE)) + + # denom = cell, with by + expect_error( + ard_svy_cat_cell <- + ard_categorical( + svy_titanic, + variables = c(Class, Age), + by = Survived, + denominator = "cell" + ), + NA + ) + expect_invisible(cards::check_ard_structure(ard_svy_cat_cell, method = FALSE)) + + # check the calculated stats are correct + + # section 1: by variable, row denominator + expect_equal( + cards::get_ard_statistics(ard_svy_cat_row |> dplyr::arrange_all(), stat_name %in% "n") |> unlist(), + ard_categorical(df_titanic, variables = c(Class, Age), by = Survived, denominator = "row") |> dplyr::arrange_all() |> + cards::get_ard_statistics(stat_name %in% "n") |> unlist() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_row |> dplyr::arrange_all(), stat_name %in% "N") |> unlist(), + ard_categorical(df_titanic, variables = c(Class, Age), by = Survived, denominator = "row") |> dplyr::arrange_all() |> + cards::get_ard_statistics(stat_name %in% "N") |> unlist() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_row |> dplyr::arrange_all(), stat_name %in% "p") |> unlist(), + ard_categorical(df_titanic, variables = c(Class, Age), by = Survived, denominator = "row") |> dplyr::arrange_all() |> + cards::get_ard_statistics(stat_name %in% "p") |> unlist() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_row |> dplyr::arrange_all(), stat_name %in% "p.std.error") |> unlist() |> unname() |> sort(), + unname(c( + survey::svyby( + formula = reformulate2("Survived"), + by = reformulate2("Age"), + design = svy_titanic, + FUN = survey::svymean, + na.rm = TRUE, + deff = "Design Effect" + )[, c("se.SurvivedYes", "se.SurvivedNo")] |> unlist(), + survey::svyby( + formula = reformulate2("Survived"), + by = reformulate2("Class"), + design = svy_titanic, + FUN = survey::svymean, + na.rm = TRUE, + deff = "Design Effect" + )[, c("se.SurvivedYes", "se.SurvivedNo")] |> unlist() + )) |> sort() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_row |> dplyr::arrange_all(), stat_name %in% "n_unweighted") |> unlist() |> unname(), + ard_categorical(as.data.frame(Titanic), variables = c(Class, Age), by = Survived, denominator = "row") |> dplyr::arrange_all() |> + cards::get_ard_statistics(stat_name %in% "n") |> unlist() |> unname() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_row, stat_name %in% "N_unweighted") |> unlist() |> unname(), + ard_categorical(as.data.frame(Titanic), variables = c(Class, Age), by = Survived, denominator = "row") |> + cards::get_ard_statistics(stat_name %in% "N") |> unlist() |> unname() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_row, stat_name %in% "p_unweighted") |> unlist() |> unname(), + ard_categorical(as.data.frame(Titanic), variables = c(Class, Age), by = Survived, denominator = "row") |> + cards::get_ard_statistics(stat_name %in% "p") |> unlist() |> unname() + ) + + # section 2: by variable, column denominator + expect_equal( + cards::get_ard_statistics(ard_svy_cat_col |> dplyr::arrange_all(), stat_name %in% "n") |> unlist(), + ard_categorical(df_titanic, variables = c(Class, Age), by = Survived, denominator = "column") |> + dplyr::arrange_all() |> + cards::get_ard_statistics(stat_name %in% "n") |> unlist() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_col |> dplyr::arrange_all(), stat_name %in% "N") |> unlist(), + ard_categorical(df_titanic, variables = c(Class, Age), by = Survived, denominator = "column") |> + dplyr::arrange_all() |> + cards::get_ard_statistics(stat_name %in% "N") |> unlist() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_col |> dplyr::arrange_all(), stat_name %in% "p") |> unlist(), + ard_categorical(df_titanic, variables = c(Class, Age), by = Survived, denominator = "column") |> + dplyr::arrange_all() |> + cards::get_ard_statistics(stat_name %in% "p") |> unlist() + ) + + expect_equal( + unname(cards::get_ard_statistics(ard_svy_cat_col |> dplyr::arrange_all(), stat_name %in% "p.std.error")) |> unlist() |> sort(), + unname(c( + survey::svyby( + formula = reformulate2("Age"), + by = reformulate2("Survived"), + design = svy_titanic, + FUN = survey::svymean, + na.rm = TRUE, + deff = "Design Effect" + )[, c("se.AgeAdult", "se.AgeChild")] |> unlist(), + survey::svyby( + formula = reformulate2("Class"), + by = reformulate2("Survived"), + design = svy_titanic, + FUN = survey::svymean, + na.rm = TRUE, + deff = "Design Effect" + )[, c("se.Class1st", "se.Class2nd", "se.Class3rd", "se.ClassCrew")] |> unlist() + )) |> sort() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_col, stat_name %in% "n_unweighted") |> unlist() |> unname(), + ard_categorical(as.data.frame(Titanic), variables = c(Class, Age), by = Survived, denominator = "column") |> + cards::get_ard_statistics(stat_name %in% "n") |> unlist() |> unname() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_col, stat_name %in% "N_unweighted") |> unlist() |> unname(), + ard_categorical(as.data.frame(Titanic), variables = c(Class, Age), by = Survived, denominator = "column") |> + cards::get_ard_statistics(stat_name %in% "N") |> unlist() |> unname() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_col, stat_name %in% "p_unweighted") |> unlist() |> unname(), + ard_categorical(as.data.frame(Titanic), variables = c(Class, Age), by = Survived, denominator = "column") |> + cards::get_ard_statistics(stat_name %in% "p") |> unlist() |> unname() + ) + + # section 3: by variable, cell denominator + expect_equal( + cards::get_ard_statistics(ard_svy_cat_cell |> dplyr::arrange_all(), stat_name %in% "n") |> unlist(), + ard_categorical(df_titanic, variables = c(Class, Age), by = Survived, denominator = "cell") |> + dplyr::arrange_all() |> + cards::get_ard_statistics(stat_name %in% "n") |> unlist() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_cell |> dplyr::arrange_all(), stat_name %in% "N") |> unlist(), + ard_categorical(df_titanic, variables = c(Class, Age), by = Survived, denominator = "cell") |> + dplyr::arrange_all() |> + cards::get_ard_statistics(stat_name %in% "N") |> unlist() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_cell |> dplyr::arrange_all(), stat_name %in% "p") |> unlist(), + ard_categorical(df_titanic, variables = c(Class, Age), by = Survived, denominator = "cell") |> + dplyr::arrange_all() |> + cards::get_ard_statistics(stat_name %in% "p") |> unlist() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_cell |> dplyr::arrange_all(), stat_name %in% "p.std.error") |> unlist() |> unname() |> sort(), + unname(c( + as.data.frame(survey::svymean( + x = inject(~ interaction(!!sym(bt("Survived")), !!sym(bt("Class")))), + design = svy_titanic, + na.rm = TRUE, + deff = "Design Effect" + ))[, "SE"] |> unlist(), + as.data.frame(survey::svymean( + x = inject(~ interaction(!!sym(bt("Survived")), !!sym(bt("Age")))), + design = svy_titanic, + na.rm = TRUE, + deff = "Design Effect" + ))[, "SE"] |> unlist() + )) |> sort() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_cell, stat_name %in% "n_unweighted") |> unlist() |> unname(), + ard_categorical(as.data.frame(Titanic), variables = c(Class, Age), by = Survived, denominator = "cell") |> + cards::get_ard_statistics(stat_name %in% "n") |> unlist() |> unname() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_cell, stat_name %in% "N_unweighted") |> unlist() |> unname(), + ard_categorical(as.data.frame(Titanic), variables = c(Class, Age), by = Survived, denominator = "cell") |> + cards::get_ard_statistics(stat_name %in% "N") |> unlist() |> unname() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_cell, stat_name %in% "p_unweighted") |> unlist() |> unname(), + ard_categorical(as.data.frame(Titanic), variables = c(Class, Age), by = Survived, denominator = "cell") |> + cards::get_ard_statistics(stat_name %in% "p") |> unlist() |> unname() + ) + + + # denom = row, without by + expect_error( + ard_svy_cat_row <- + ard_categorical( + svy_titanic, + variables = c(Class, Age), + denominator = "row" + ), + NA + ) + expect_invisible(cards::check_ard_structure(ard_svy_cat_row, method = FALSE)) + + # denom = column, without by + expect_error( + ard_svy_cat_col <- + ard_categorical( + svy_titanic, + variables = c(Class, Age), + denominator = "column" + ), + NA + ) + expect_invisible(cards::check_ard_structure(ard_svy_cat_cell, method = FALSE)) + + # denom = cell, without by + expect_error( + ard_svy_cat_cell <- + ard_categorical( + svy_titanic, + variables = c(Class, Age), + denominator = "cell" + ), + NA + ) + expect_invisible(cards::check_ard_structure(ard_svy_cat_cell, method = FALSE)) + + # check the calculated stats are correct + + # section 4: without by variable, row denominator + expect_equal( + cards::get_ard_statistics(ard_svy_cat_row, stat_name %in% "n") |> unlist(), + cards::ard_categorical(df_titanic, variables = c(Class, Age), by = NULL, denominator = "row") |> + cards::get_ard_statistics(stat_name %in% "n") |> unlist() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_row, stat_name %in% "N") |> unlist(), + cards::ard_categorical(df_titanic, variables = c(Class, Age), by = NULL, denominator = "row") |> + cards::get_ard_statistics(stat_name %in% "N") |> unlist() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_row, stat_name %in% "p") |> unlist(), + cards::ard_categorical(df_titanic, variables = c(Class, Age), by = NULL, denominator = "row") |> + cards::get_ard_statistics(stat_name %in% "p") |> unlist() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_row |> dplyr::arrange_all(), stat_name %in% "p.std.error") |> unlist() |> unname() |> sort(), + c(dplyr::tibble( + variable_level = unique(svy_titanic$variables[["Class"]]) |> sort() |> as.character(), + p = 1, + p.std.error = 0, + deff = NaN + ) |> dplyr::select("p.std.error") |> unlist() |> unname(), dplyr::tibble( + variable_level = unique(svy_titanic$variables[["Age"]]) |> sort() |> as.character(), + p = 1, + p.std.error = 0, + deff = NaN + ) |> dplyr::select("p.std.error") |> unlist() |> unname()) + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_row, stat_name %in% "n_unweighted") |> unlist() |> unname(), + cards::ard_categorical(as.data.frame(Titanic), variables = c(Class, Age), by = NULL, denominator = "row") |> + cards::get_ard_statistics(stat_name %in% "n") |> unlist() |> unname() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_row, stat_name %in% "N_unweighted") |> unlist() |> unname(), + cards::ard_categorical(as.data.frame(Titanic), variables = c(Class, Age), by = NULL, denominator = "row") |> + cards::get_ard_statistics(stat_name %in% "N") |> unlist() |> unname() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_row, stat_name %in% "p_unweighted") |> unlist() |> unname(), + cards::ard_categorical(df_titanic, variables = c(Class, Age), by = NULL, denominator = "row") |> + cards::get_ard_statistics(stat_name %in% "p") |> unlist() |> unname() + ) + + # section 5: without by variable, column denominator + expect_equal( + cards::get_ard_statistics(ard_svy_cat_col, stat_name %in% "n") |> unlist(), + cards::ard_categorical(df_titanic, variables = c(Class, Age), by = NULL, denominator = "column") |> + cards::get_ard_statistics(stat_name %in% "n") |> unlist() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_col, stat_name %in% "N") |> unlist(), + cards::ard_categorical(df_titanic, variables = c(Class, Age), by = NULL, denominator = "column") |> + cards::get_ard_statistics(stat_name %in% "N") |> unlist() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_col, stat_name %in% "p") |> unlist(), + cards::ard_categorical(df_titanic, variables = c(Class, Age), by = NULL, denominator = "column") |> + cards::get_ard_statistics(stat_name %in% "p") |> unlist() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_col, stat_name %in% "p.std.error") |> unlist() |> unname(), + c( + survey::svymean(reformulate2("Class"), design = svy_titanic, na.rm = TRUE, deff = "Design Effect") |> + dplyr::as_tibble(rownames = "var_level") |> + dplyr::select("SE") |> unlist() |> unname(), + survey::svymean(reformulate2("Age"), design = svy_titanic, na.rm = TRUE, deff = "Design Effect") |> + dplyr::as_tibble(rownames = "var_level") |> + dplyr::select("SE") |> unlist() |> unname() + ) + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_col, stat_name %in% "n_unweighted") |> unlist() |> unname(), + cards::ard_categorical(as.data.frame(Titanic), variables = c(Class, Age), by = NULL, denominator = "column") |> + cards::get_ard_statistics(stat_name %in% "n") |> unlist() |> unname() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_col, stat_name %in% "N_unweighted") |> unlist() |> unname(), + cards::ard_categorical(as.data.frame(Titanic), variables = c(Class, Age), by = NULL, denominator = "column") |> + cards::get_ard_statistics(stat_name %in% "N") |> unlist() |> unname() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_col, stat_name %in% "p_unweighted") |> unlist() |> unname(), + cards::ard_categorical(as.data.frame(Titanic), variables = c(Class, Age), by = NULL, denominator = "column") |> + cards::get_ard_statistics(stat_name %in% "p") |> unlist() |> unname() + ) + + # section 6: without by variable, cell denominator + expect_equal( + cards::get_ard_statistics(ard_svy_cat_cell, stat_name %in% "n") |> unlist(), + cards::ard_categorical(df_titanic, variables = c(Class, Age), by = NULL, denominator = "cell") |> + cards::get_ard_statistics(stat_name %in% "n") |> unlist() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_cell, stat_name %in% "N") |> unlist(), + cards::ard_categorical(df_titanic, variables = c(Class, Age), by = NULL, denominator = "cell") |> + cards::get_ard_statistics(stat_name %in% "N") |> unlist() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_cell, stat_name %in% "p") |> unlist(), + cards::ard_categorical(df_titanic, variables = c(Class, Age), by = NULL, denominator = "cell") |> + cards::get_ard_statistics(stat_name %in% "p") |> unlist() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_cell, stat_name %in% "p.std.error") |> unlist() |> unname(), + c( + survey::svymean(reformulate2("Class"), design = svy_titanic, na.rm = TRUE, deff = "Design Effect") |> + dplyr::as_tibble(rownames = "var_level") |> + dplyr::select("SE") |> unlist() |> unname(), + survey::svymean(reformulate2("Age"), design = svy_titanic, na.rm = TRUE, deff = "Design Effect") |> + dplyr::as_tibble(rownames = "var_level") |> + dplyr::select("SE") |> unlist() |> unname() + ) + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_cell, stat_name %in% "n_unweighted") |> unlist() |> unname(), + cards::ard_categorical(as.data.frame(Titanic), variables = c(Class, Age), by = NULL, denominator = "cell") |> + cards::get_ard_statistics(stat_name %in% "n") |> unlist() |> unname() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_cell, stat_name %in% "N_unweighted") |> unlist() |> unname(), + cards::ard_categorical(as.data.frame(Titanic), variables = c(Class, Age), by = NULL, denominator = "cell") |> + cards::get_ard_statistics(stat_name %in% "N") |> unlist() |> unname() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_cell, stat_name %in% "p_unweighted") |> unlist() |> unname(), + cards::ard_categorical(as.data.frame(Titanic), variables = c(Class, Age), by = NULL, denominator = "cell") |> + cards::get_ard_statistics(stat_name %in% "p") |> unlist() |> unname() + ) +}) + +test_that("ard_categorical.survey.design() returns an error when variables have all NAs", { + data(api, package = "survey") + svy_titanic <- survey::svydesign(~1, data = as.data.frame(Titanic), weights = ~Freq) + + # row denom + svy_titanic$variables$Class <- NA + + expect_snapshot( + ard_categorical( + svy_titanic, + variables = c(Class, Age), + by = Survived, + denominator = "row" + ), + error = TRUE + ) + + # column denom + expect_snapshot( + ard_categorical( + svy_titanic, + variables = c(Class, Age), + by = Survived, + denominator = "column" + ), + error = TRUE + ) + + # cell denom + expect_snapshot( + ard_categorical( + svy_titanic, + variables = c(Class, Age), + by = Survived, + denominator = "cell" + ), + error = TRUE + ) +}) + # - Do we get results for unobserved factor levels in the `by` and `variable` variables? -# - Do we get results for unobserved logical levels in the `by` and `variable` -# variables, e.g. if there are only TRUE, we should have FALSE rows too? -# - It turns out variables (both by and variables) that only have one level are problematic in some ways. -# I've coded around these issues, but we need thorough testing when either by or a variable has a single level. -# We need tests for when these variables are factor, logical, and other to ensure every case is handled properly. -# - A trick to test survey data is to take a normal data frame, convert it to survey using equal weights. -# Then all the results should equal the unweighted summaries and we can perform expect_equal() checks against the data.frame S3 methods. -# dplyr::tibble(y = rep(FALSE, 15), x = rep(TRUE, 15)) |> -# survey::svydesign(ids = ~1, data = _, weights = ~1) |> -# ard_categorical(by = y) - -test_that("multiplication works", { - expect_equal(2 * 2, 4) +test_that("ard_categorical.survey.design() works for unobserved factor levels", { + data(api, package = "survey") + svy_titanic <- survey::svydesign(~1, data = as.data.frame(Titanic), weights = ~Freq) + svy_titanic$variables$Survived <- fct_expand(svy_titanic$variables$Survived, "Unknown") + + # data setup for equality checks + df_titanic <- as.data.frame(Titanic) |> tidyr::uncount(weights = Freq) + df_titanic$Survived <- fct_expand(df_titanic$Survived, "Unknown") + + # for unweighted <- + df_uw <- as.data.frame(Titanic) + df_uw$Survived <- fct_expand(df_uw$Survived, "Unknown") + + expect_error( + ard_svy_cat_row <- + ard_categorical( + svy_titanic, + variables = c(Class, Age), + by = Survived, + denominator = "row" + ), + NA + ) + expect_invisible(cards::check_ard_structure(ard_svy_cat_row, method = FALSE)) + + expect_error( + ard_svy_cat_col <- + ard_categorical( + svy_titanic, + variables = c(Class, Age), + by = Survived, + denominator = "column" + ), + NA + ) + expect_invisible(cards::check_ard_structure(ard_svy_cat_col, method = FALSE)) + + expect_error( + ard_svy_cat_cell <- + ard_categorical( + svy_titanic, + variables = c(Class, Age), + by = Survived, + denominator = "cell" + ), + NA + ) + expect_invisible(cards::check_ard_structure(ard_svy_cat_cell, method = FALSE)) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_row, stat_name %in% "n") |> unlist() |> sort(), + ard_categorical(df_titanic, variables = c(Class, Age), by = Survived, denominator = "row") |> + cards::get_ard_statistics(stat_name %in% "n") |> unlist() |> sort() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_row, stat_name %in% "N") |> unlist() |> sort(), + cards::ard_categorical(df_titanic, variables = c(Class, Age), by = Survived, denominator = "row") |> + cards::get_ard_statistics(stat_name %in% "N") |> unlist() |> sort() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_row, stat_name %in% "p") |> unlist() |> sort(), + cards::ard_categorical(df_titanic, variables = c(Class, Age), by = Survived, denominator = "row") |> + cards::get_ard_statistics(stat_name %in% "p") |> unlist() |> sort() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_row |> dplyr::arrange_all(), stat_name %in% "p.std.error") |> unlist() |> unname() |> sort(), + unname(c( + survey::svyby( + formula = reformulate2("Survived"), + by = reformulate2("Age"), + design = svy_titanic, + FUN = survey::svymean, + na.rm = TRUE, + deff = "Design Effect" + )[, c("se.SurvivedYes", "se.SurvivedNo", "se.SurvivedUnknown")] |> unlist(), + survey::svyby( + formula = reformulate2("Survived"), + by = reformulate2("Class"), + design = svy_titanic, + FUN = survey::svymean, + na.rm = TRUE, + deff = "Design Effect" + )[, c("se.SurvivedYes", "se.SurvivedNo", "se.SurvivedUnknown")] |> unlist() + )) |> sort() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_row, stat_name %in% "n_unweighted") |> unlist() |> unname() |> sort(), + cards::ard_categorical(df_uw, variables = c(Class, Age), by = Survived, denominator = "row") |> + cards::get_ard_statistics(stat_name %in% "n") |> unlist() |> unname() |> sort() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_row, stat_name %in% "N_unweighted") |> unlist() |> unname() |> sort(), + cards::ard_categorical(df_uw, variables = c(Class, Age), by = Survived, denominator = "row") |> + cards::get_ard_statistics(stat_name %in% "N") |> unlist() |> unname() |> sort() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_row, stat_name %in% "p_unweighted") |> unlist() |> unname() |> sort(), + cards::ard_categorical(df_uw, variables = c(Class, Age), by = Survived, denominator = "row") |> + cards::get_ard_statistics(stat_name %in% "p") |> unlist() |> unname() |> sort() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_col, stat_name %in% "n") |> unlist() |> sort(), + cards::ard_categorical(df_titanic, variables = c(Class, Age), by = Survived, denominator = "column") |> + cards::get_ard_statistics(stat_name %in% "n") |> unlist() |> sort() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_col, stat_name %in% "N") |> unlist() |> sort(), + cards::ard_categorical(df_titanic, variables = c(Class, Age), by = Survived, denominator = "column") |> + cards::get_ard_statistics(stat_name %in% "N") |> unlist() |> sort() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_col, stat_name %in% "p") |> unlist() |> sort(), + cards::ard_categorical(df_titanic, variables = c(Class, Age), by = Survived, denominator = "column") |> + cards::get_ard_statistics(stat_name %in% "p") |> unlist() |> sort() + ) + + expect_equal( + unname(cards::get_ard_statistics(ard_svy_cat_col |> dplyr::arrange_all(), stat_name %in% "p.std.error")) |> unlist() |> sort(), + unname(c( + survey::svyby( + formula = reformulate2("Age"), + by = reformulate2("Survived"), + design = svy_titanic, + FUN = survey::svymean, + na.rm = TRUE, + deff = "Design Effect" + )[, c("se.AgeAdult", "se.AgeChild")] |> unlist(), + survey::svyby( + formula = reformulate2("Class"), + by = reformulate2("Survived"), + design = svy_titanic, + FUN = survey::svymean, + na.rm = TRUE, + deff = "Design Effect" + )[, c("se.Class1st", "se.Class2nd", "se.Class3rd", "se.ClassCrew")] |> unlist() + )) |> sort() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_col, stat_name %in% "n_unweighted") |> unlist() |> unname() |> sort(), + cards::ard_categorical(df_uw, variables = c(Class, Age), by = Survived, denominator = "column") |> + cards::get_ard_statistics(stat_name %in% "n") |> unlist() |> unname() |> sort() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_col, stat_name %in% "N_unweighted") |> unlist() |> unname() |> sort(), + cards::ard_categorical(df_uw, variables = c(Class, Age), by = Survived, denominator = "column") |> + cards::get_ard_statistics(stat_name %in% "N") |> unlist() |> unname() |> sort() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_col, stat_name %in% "p_unweighted") |> unlist() |> unname() |> sort(), + cards::ard_categorical(df_uw, variables = c(Class, Age), by = Survived, denominator = "column") |> + cards::get_ard_statistics(stat_name %in% "p") |> unlist() |> unname() |> sort() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_cell, stat_name %in% "n") |> unlist() |> sort(), + cards::ard_categorical(df_titanic, variables = c(Class, Age), by = Survived, denominator = "cell") |> + cards::get_ard_statistics(stat_name %in% "n") |> unlist() |> sort() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_cell, stat_name %in% "N") |> unlist(), + cards::ard_categorical(df_titanic, variables = c(Class, Age), by = Survived, denominator = "cell") |> + cards::get_ard_statistics(stat_name %in% "N") |> unlist() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_cell, stat_name %in% "p") |> unlist() |> sort(), + cards::ard_categorical(df_titanic, variables = c(Class, Age), by = Survived, denominator = "cell") |> + cards::get_ard_statistics(stat_name %in% "p") |> unlist() |> sort() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_cell |> dplyr::arrange_all(), stat_name %in% "p.std.error") |> unlist() |> unname() |> sort(), + unname(c( + as.data.frame(survey::svymean( + x = inject(~ interaction(!!sym(bt("Survived")), !!sym(bt("Class")))), + design = svy_titanic, + na.rm = TRUE, + deff = "Design Effect" + ))[, "SE"] |> unlist(), + as.data.frame(survey::svymean( + x = inject(~ interaction(!!sym(bt("Survived")), !!sym(bt("Age")))), + design = svy_titanic, + na.rm = TRUE, + deff = "Design Effect" + ))[, "SE"] |> unlist() + )) |> sort() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_cell, stat_name %in% "n_unweighted") |> unlist() |> unname() |> sort(), + cards::ard_categorical(df_uw, variables = c(Class, Age), by = Survived, denominator = "cell") |> + cards::get_ard_statistics(stat_name %in% "n") |> unlist() |> unname() |> sort() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_cell, stat_name %in% "N_unweighted") |> unlist() |> unname() |> sort(), + cards::ard_categorical(df_uw, variables = c(Class, Age), by = Survived, denominator = "cell") |> + cards::get_ard_statistics(stat_name %in% "N") |> unlist() |> unname() |> sort() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_cell, stat_name %in% "p_unweighted") |> unlist() |> unname() |> sort(), + cards::ard_categorical(df_uw, variables = c(Class, Age), by = Survived, denominator = "cell") |> + cards::get_ard_statistics(stat_name %in% "p") |> unlist() |> unname() |> sort() + ) + # variables have unobserved levels, no by variable + data(api, package = "survey") + svy_titanic <- survey::svydesign(~1, data = as.data.frame(Titanic), weights = ~Freq) + svy_titanic$variables$Class <- fct_expand(svy_titanic$variables$Survived, "Peasant") + + expect_error( + ard_svy_cat_row <- + ard_categorical( + svy_titanic, + variables = c(Class, Age), + denominator = "row" + ), + NA + ) + expect_invisible(cards::check_ard_structure(ard_svy_cat_row, method = FALSE)) + + expect_error( + ard_svy_cat_col <- + ard_categorical( + svy_titanic, + variables = c(Class, Age), + denominator = "column" + ), + NA + ) + expect_invisible(cards::check_ard_structure(ard_svy_cat_cell, method = FALSE)) + + expect_error( + ard_svy_cat_cell <- + ard_categorical( + svy_titanic, + variables = c(Class, Age), + denominator = "cell" + ), + NA + ) + expect_invisible(cards::check_ard_structure(ard_svy_cat_cell, method = FALSE)) + + # variable AND by have unobserved levels + svy_titanic$variables$Survived <- fct_expand(svy_titanic$variables$Survived, "Unknown") + + expect_error( + ard_svy_cat_row <- + ard_categorical( + svy_titanic, + variables = c(Class, Age), + by = Survived, + denominator = "row" + ), + NA + ) + expect_invisible(cards::check_ard_structure(ard_svy_cat_row, method = FALSE)) + + expect_error( + ard_svy_cat_col <- + ard_categorical( + svy_titanic, + variables = c(Class, Age), + by = Survived, + denominator = "column" + ), + NA + ) + expect_invisible(cards::check_ard_structure(ard_svy_cat_cell, method = FALSE)) + + expect_error( + ard_svy_cat_cell <- + ard_categorical( + svy_titanic, + variables = c(Class, Age), + by = Survived, + denominator = "cell" + ), + NA + ) + expect_invisible(cards::check_ard_structure(ard_svy_cat_cell, method = FALSE)) +}) + +# - Do we get results for unobserved logical levels in the `by` and `variable` variables? +test_that("ard_categorical.survey.design() works for unobserved logical levels", { + data(api, package = "survey") + svy_titanic <- survey::svydesign(~1, data = as.data.frame(Titanic), weights = ~Freq) + svy_titanic$variables$Survived <- rep(TRUE, length(svy_titanic$variables$Survived)) + + df_titanic <- as.data.frame(Titanic) |> tidyr::uncount(weights = Freq) + df_titanic$Survived <- rep(TRUE, length(df_titanic$Survived)) + + # for unweighted + df_uw <- as.data.frame(Titanic) + df_uw$Survived <- rep(TRUE, length(df_uw$Survived)) + + expect_error( + ard_svy_cat_row <- + ard_categorical( + svy_titanic, + variables = c(Class, Age), + by = Survived, + denominator = "row" + ), + NA + ) + expect_invisible(cards::check_ard_structure(ard_svy_cat_row, method = FALSE)) + + expect_error( + ard_svy_cat_col <- + ard_categorical( + svy_titanic, + variables = c(Class, Age), + by = Survived, + denominator = "column" + ), + NA + ) + expect_invisible(cards::check_ard_structure(ard_svy_cat_col, method = FALSE)) + + expect_error( + ard_svy_cat_cell <- + ard_categorical( + svy_titanic, + variables = c(Class, Age), + by = Survived, + denominator = "cell" + ), + NA + ) + expect_invisible(cards::check_ard_structure(ard_svy_cat_cell, method = FALSE)) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_row, stat_name %in% "n") |> unlist() |> sort(), + ard_categorical(df_titanic, variables = c(Class, Age), by = Survived, denominator = "row") |> + cards::get_ard_statistics(stat_name %in% "n") |> unlist() |> sort() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_row, stat_name %in% "N") |> unlist() |> sort(), + cards::ard_categorical(df_titanic, variables = c(Class, Age), by = Survived, denominator = "row") |> + cards::get_ard_statistics(stat_name %in% "N") |> unlist() |> sort() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_row, stat_name %in% "p") |> unlist() |> sort(), + cards::ard_categorical(df_titanic, variables = c(Class, Age), by = Survived, denominator = "row") |> + cards::get_ard_statistics(stat_name %in% "p") |> unlist() |> sort() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_row |> dplyr::arrange_all(), stat_name %in% "p.std.error") |> unlist() |> unname() |> sort(), + unname(c( + survey::svyby( + formula = reformulate2("Survived"), + by = reformulate2("Age"), + design = svy_titanic, + FUN = survey::svymean, + na.rm = TRUE, + deff = "Design Effect" + )[, c("se.SurvivedFALSE", "se.SurvivedTRUE")] |> unlist(), + survey::svyby( + formula = reformulate2("Survived"), + by = reformulate2("Class"), + design = svy_titanic, + FUN = survey::svymean, + na.rm = TRUE, + deff = "Design Effect" + )[, c("se.SurvivedFALSE", "se.SurvivedTRUE")] |> unlist() + )) |> sort() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_row, stat_name %in% "n_unweighted") |> unlist() |> unname() |> sort(), + cards::ard_categorical(df_uw, variables = c(Class, Age), by = Survived, denominator = "row") |> + cards::get_ard_statistics(stat_name %in% "n") |> unlist() |> unname() |> sort() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_row, stat_name %in% "N_unweighted") |> unlist() |> unname() |> sort(), + cards::ard_categorical(df_uw, variables = c(Class, Age), by = Survived, denominator = "row") |> + cards::get_ard_statistics(stat_name %in% "N") |> unlist() |> unname() |> sort() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_row, stat_name %in% "p_unweighted") |> unlist() |> unname() |> sort(), + cards::ard_categorical(df_uw, variables = c(Class, Age), by = Survived, denominator = "row") |> + cards::get_ard_statistics(stat_name %in% "p") |> unlist() |> unname() |> sort() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_col, stat_name %in% "n") |> unlist() |> sort(), + cards::ard_categorical(df_titanic, variables = c(Class, Age), by = Survived, denominator = "column") |> + cards::get_ard_statistics(stat_name %in% "n") |> unlist() |> sort() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_col, stat_name %in% "N") |> unlist() |> sort(), + cards::ard_categorical(df_titanic, variables = c(Class, Age), by = Survived, denominator = "column") |> + cards::get_ard_statistics(stat_name %in% "N") |> unlist() |> sort() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_col, stat_name %in% "p") |> unlist() |> sort(), + cards::ard_categorical(df_titanic, variables = c(Class, Age), by = Survived, denominator = "column") |> + cards::get_ard_statistics(stat_name %in% "p") |> unlist() |> sort() + ) + + expect_equal( + unname(cards::get_ard_statistics(ard_svy_cat_col |> dplyr::arrange_all(), stat_name %in% "p.std.error")) |> unlist() |> sort(), + unname(c( + survey::svyby( + formula = reformulate2("Age"), + by = reformulate2("Survived"), + design = svy_titanic, + FUN = survey::svymean, + na.rm = TRUE, + deff = "Design Effect" + )[, c("se.AgeAdult", "se.AgeChild")] |> unlist(), + survey::svyby( + formula = reformulate2("Class"), + by = reformulate2("Survived"), + design = svy_titanic, + FUN = survey::svymean, + na.rm = TRUE, + deff = "Design Effect" + )[, c("se.Class1st", "se.Class2nd", "se.Class3rd", "se.ClassCrew")] |> unlist() + )) |> sort() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_col, stat_name %in% "n_unweighted") |> unlist() |> unname() |> sort(), + cards::ard_categorical(df_uw, variables = c(Class, Age), by = Survived, denominator = "column") |> + cards::get_ard_statistics(stat_name %in% "n") |> unlist() |> unname() |> sort() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_col, stat_name %in% "N_unweighted") |> unlist() |> unname() |> sort(), + cards::ard_categorical(df_uw, variables = c(Class, Age), by = Survived, denominator = "column") |> + cards::get_ard_statistics(stat_name %in% "N") |> unlist() |> unname() |> sort() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_col, stat_name %in% "p_unweighted") |> unlist() |> unname() |> sort(), + cards::ard_categorical(df_uw, variables = c(Class, Age), by = Survived, denominator = "column") |> + cards::get_ard_statistics(stat_name %in% "p") |> unlist() |> unname() |> sort() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_cell, stat_name %in% "n") |> unlist() |> sort(), + cards::ard_categorical(df_titanic, variables = c(Class, Age), by = Survived, denominator = "cell") |> + cards::get_ard_statistics(stat_name %in% "n") |> unlist() |> sort() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_cell, stat_name %in% "N") |> unlist(), + cards::ard_categorical(df_titanic, variables = c(Class, Age), by = Survived, denominator = "cell") |> + cards::get_ard_statistics(stat_name %in% "N") |> unlist() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_cell, stat_name %in% "p") |> unlist() |> sort(), + cards::ard_categorical(df_titanic, variables = c(Class, Age), by = Survived, denominator = "cell") |> + cards::get_ard_statistics(stat_name %in% "p") |> unlist() |> sort() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_cell |> dplyr::arrange_all(), stat_name %in% "p.std.error" & group1_level == TRUE) |> unlist() |> unname() |> sort(), + unname(c( + as.data.frame(survey::svymean( + x = inject(~ interaction(!!sym(bt("Survived")), !!sym(bt("Class")))), + design = svy_titanic, + na.rm = TRUE, + deff = "Design Effect" + ))[, "SE"] |> unlist(), + as.data.frame(survey::svymean( + x = inject(~ interaction(!!sym(bt("Survived")), !!sym(bt("Age")))), + design = svy_titanic, + na.rm = TRUE, + deff = "Design Effect" + ))[, "SE"] |> unlist() + )) |> sort() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_cell, stat_name %in% "n_unweighted") |> unlist() |> unname() |> sort(), + cards::ard_categorical(df_uw, variables = c(Class, Age), by = Survived, denominator = "cell") |> + cards::get_ard_statistics(stat_name %in% "n") |> unlist() |> unname() |> sort() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_cell, stat_name %in% "N_unweighted") |> unlist() |> unname() |> sort(), + cards::ard_categorical(df_uw, variables = c(Class, Age), by = Survived, denominator = "cell") |> + cards::get_ard_statistics(stat_name %in% "N") |> unlist() |> unname() |> sort() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_cat_cell, stat_name %in% "p_unweighted") |> unlist() |> unname() |> sort(), + cards::ard_categorical(df_uw, variables = c(Class, Age), by = Survived, denominator = "cell") |> + cards::get_ard_statistics(stat_name %in% "p") |> unlist() |> unname() |> sort() + ) + + # variables have unobserved levels, no by variable + data(api, package = "survey") + svy_titanic <- survey::svydesign(~1, data = as.data.frame(Titanic), weights = ~Freq) + svy_titanic$variables$Age <- rep(TRUE, length(svy_titanic$variables$Age)) + + expect_error( + ard_svy_cat_row <- + ard_categorical( + svy_titanic, + variables = c(Class, Age), + denominator = "row" + ), + NA + ) + expect_invisible(cards::check_ard_structure(ard_svy_cat_row, method = FALSE)) + + expect_error( + ard_svy_cat_col <- + ard_categorical( + svy_titanic, + variables = c(Class, Age), + denominator = "column" + ), + NA + ) + expect_invisible(cards::check_ard_structure(ard_svy_cat_cell, method = FALSE)) + + expect_error( + ard_svy_cat_cell <- + ard_categorical( + svy_titanic, + variables = c(Class, Age), + denominator = "cell" + ), + NA + ) + expect_invisible(cards::check_ard_structure(ard_svy_cat_cell, method = FALSE)) + + # variable AND by have unobserved levels + svy_titanic$variables$Survived <- rep(TRUE, length(svy_titanic$variables$Survived)) + + expect_error( + ard_svy_cat_row <- + ard_categorical( + svy_titanic, + variables = c(Class, Age), + by = Survived, + denominator = "row" + ), + NA + ) + expect_invisible(cards::check_ard_structure(ard_svy_cat_row, method = FALSE)) + + expect_error( + ard_svy_cat_col <- + ard_categorical( + svy_titanic, + variables = c(Class, Age), + by = Survived, + denominator = "column" + ), + NA + ) + expect_invisible(cards::check_ard_structure(ard_svy_cat_cell, method = FALSE)) + + expect_error( + ard_svy_cat_cell <- + ard_categorical( + svy_titanic, + variables = c(Class, Age), + by = Survived, + denominator = "cell" + ), + NA + ) + expect_invisible(cards::check_ard_structure(ard_svy_cat_cell, method = FALSE)) +}) + +# - Does the work around apply for variables with only 1 level +test_that("ard_categorical.survey.design() works with variables with only 1 level", { + data(api, package = "survey") + svy_titanic <- survey::svydesign(~1, data = as.data.frame(Titanic), weights = ~Freq) + svy_titanic$variables$Survived <- rep("Yes", length(svy_titanic$variables$Survived)) + + # by variable only has 1 level + expect_error( + ard_svy_cat_row <- + ard_categorical( + svy_titanic, + variables = c(Class, Age), + by = Survived, + denominator = "row" + ), + NA + ) + expect_invisible(cards::check_ard_structure(ard_svy_cat_row, method = FALSE)) + + expect_error( + ard_svy_cat_col <- + ard_categorical( + svy_titanic, + variables = c(Class, Age), + by = Survived, + denominator = "column" + ), + NA + ) + expect_invisible(cards::check_ard_structure(ard_svy_cat_col, method = FALSE)) + + expect_error( + ard_svy_cat_cell <- + ard_categorical( + svy_titanic, + variables = c(Class, Age), + by = Survived, + denominator = "cell" + ), + NA + ) + expect_invisible(cards::check_ard_structure(ard_svy_cat_cell, method = FALSE)) + + # variables have only 1 level, no by variable + data(api, package = "survey") + svy_titanic <- survey::svydesign(~1, data = as.data.frame(Titanic), weights = ~Freq) + svy_titanic$variables$Age <- as.factor(rep("Child", length(svy_titanic$variables$Age))) + + expect_error( + ard_svy_cat_row <- + ard_categorical( + svy_titanic, + variables = c(Class, Age), + denominator = "row" + ), + NA + ) + expect_invisible(cards::check_ard_structure(ard_svy_cat_row, method = FALSE)) + + expect_error( + ard_svy_cat_col <- + ard_categorical( + svy_titanic, + variables = c(Class, Age), + denominator = "column" + ), + NA + ) + expect_invisible(cards::check_ard_structure(ard_svy_cat_cell, method = FALSE)) + + expect_error( + ard_svy_cat_cell <- + ard_categorical( + svy_titanic, + variables = c(Class, Age), + denominator = "cell" + ), + NA + ) + expect_invisible(cards::check_ard_structure(ard_svy_cat_cell, method = FALSE)) + + # variable AND by have only 1 level + svy_titanic$variables$Survived <- as.factor(rep("Yes", length(svy_titanic$variables$Survived))) + + expect_error( + ard_svy_cat_row <- + ard_categorical( + svy_titanic, + variables = c(Class, Age), + by = Survived, + denominator = "row" + ), + NA + ) + expect_invisible(cards::check_ard_structure(ard_svy_cat_row, method = FALSE)) + + expect_error( + ard_svy_cat_col <- + ard_categorical( + svy_titanic, + variables = c(Class, Age), + by = Survived, + denominator = "column" + ), + NA + ) + expect_invisible(cards::check_ard_structure(ard_svy_cat_cell, method = FALSE)) + + expect_error( + ard_svy_cat_cell <- + ard_categorical( + svy_titanic, + variables = c(Class, Age), + by = Survived, + denominator = "cell" + ), + NA + ) + expect_invisible(cards::check_ard_structure(ard_svy_cat_cell, method = FALSE)) }) diff --git a/tests/testthat/test-ard_continuous.survey.design.R b/tests/testthat/test-ard_continuous.survey.design.R index 5b999d91..e5a6e1a1 100644 --- a/tests/testthat/test-ard_continuous.survey.design.R +++ b/tests/testthat/test-ard_continuous.survey.design.R @@ -322,10 +322,12 @@ test_that("ard_continuous.survey.design(fmt_fn)", { expect_snapshot( ard_continuous( dclus1, - variables = api00, + variables = c(api99, api00), statistic = ~ c("mean", "median", "min", "max"), fmt_fn = list(api00 = list(mean = 2, median = "xx.xx", min = as.character)) - ) + ) |> + dplyr::select(-warning, -error) |> + as.data.frame() ) }) @@ -336,10 +338,11 @@ test_that("ard_continuous.survey.design(stat_label)", { expect_snapshot( ard_continuous( dclus1, - variables = api00, + variables = c(api00, api99), statistic = ~ c("mean", "median", "min", "max"), stat_label = list(api00 = list(mean = "MeAn", median = "MEDian", min = "MINimum")) - ) + ) |> + as.data.frame() ) }) diff --git a/tests/testthat/test-ard_dichotomous.survey.design.R b/tests/testthat/test-ard_dichotomous.survey.design.R new file mode 100644 index 00000000..1fdc49eb --- /dev/null +++ b/tests/testthat/test-ard_dichotomous.survey.design.R @@ -0,0 +1,450 @@ +skip_if_not(is_pkg_installed("survey", reference_pkg = "cardx")) + +# Test survey.design works +test_that("ard_dichotomous.survey.design() works", { + svy_dicho <- survey::svydesign(ids = ~1, data = mtcars, weights = ~1) + # convert variables to factor + svy_dicho$variables <- svy_dicho$variables |> + dplyr::mutate(across(c("cyl", "am", "vs"), as.factor)) + + # row denom with by var + expect_error( + ard_dichotomous_row <- + ard_dichotomous(svy_dicho, + by = vs, + variables = c(cyl, am), + value = list(cyl = 4), + denominator = "row" + ), + NA + ) + expect_invisible(cards::check_ard_structure(ard_dichotomous_row, method = FALSE)) + + # col denom with by var + expect_error( + ard_dichotomous_col <- ard_dichotomous(svy_dicho, + by = vs, + variables = c(cyl, am), + value = list(cyl = 4), + denominator = "column" + ), + NA + ) + expect_invisible(cards::check_ard_structure(ard_dichotomous_col, method = FALSE)) + + # cell denom with by var + expect_error( + ard_dichotomous_cell <- ard_dichotomous(svy_dicho, + by = vs, + variables = c(cyl, am), + value = list(cyl = 4), + denominator = "cell" + ), + NA + ) + expect_invisible(cards::check_ard_structure(ard_dichotomous_cell, method = FALSE)) + + + # test individual stats + + # section 1: by variable, row denominator + expect_equal( + cards::get_ard_statistics(ard_dichotomous_row, stat_name %in% "n") |> unlist(), + ard_dichotomous(mtcars, + by = vs, + variables = c(cyl, am), + value = list(cyl = 4), + denominator = "row" + ) |> + cards::get_ard_statistics(stat_name %in% "n") |> unlist() + ) + + expect_equal( + cards::get_ard_statistics(ard_dichotomous_row, stat_name %in% "N") |> unlist(), + ard_dichotomous(mtcars, by = vs, variables = c(cyl, am), value = list(cyl = 4), denominator = "row") |> + cards::get_ard_statistics(stat_name %in% "N") |> unlist() + ) + + expect_equal( + cards::get_ard_statistics(ard_dichotomous_row, stat_name %in% "p") |> unlist(), + ard_dichotomous(mtcars, by = vs, variables = c(cyl, am), value = list(cyl = 4), denominator = "row") |> + cards::get_ard_statistics(stat_name %in% "p") |> unlist() + ) + + expect_equal( + cards::get_ard_statistics(ard_dichotomous_row |> dplyr::arrange_all(), stat_name %in% "p.std.error") |> unlist() |> unname() |> sort(), + c( + survey::svyby( + formula = reformulate2("vs"), + by = reformulate2("am"), + design = svy_dicho, + FUN = survey::svymean, + na.rm = TRUE, + deff = "Design Effect" + )[2, c(4, 5)] |> unlist() |> unname(), + survey::svyby( + formula = reformulate2("vs"), + by = reformulate2("cyl"), + design = svy_dicho, + FUN = survey::svymean, + na.rm = TRUE, + deff = "Design Effect" + )[1, c(4, 5)] |> unlist() |> unname() + ) |> + sort() + ) + + + + expect_equal( + cards::get_ard_statistics(ard_dichotomous_row, stat_name %in% "deff") |> unlist() |> unname() |> sort(), + c( + survey::svyby( + formula = reformulate2("vs"), + by = reformulate2("am"), + design = svy_dicho, + FUN = survey::svymean, + na.rm = TRUE, + deff = "Design Effect" + )[2, c(6, 7)] |> unlist() |> unname(), + survey::svyby( + formula = reformulate2("vs"), + by = reformulate2("cyl"), + design = svy_dicho, + FUN = survey::svymean, + na.rm = TRUE, + deff = "Design Effect" + )[1, c(6, 7)] |> unlist() |> unname() + ) |> + sort() + ) + + expect_equal( + cards::get_ard_statistics(ard_dichotomous_row, stat_name %in% "n_unweighted") |> unlist() |> unname(), + ard_dichotomous(mtcars, by = vs, variables = c(cyl, am), value = list(cyl = 4), denominator = "row") |> + cards::get_ard_statistics(stat_name %in% "n") |> unlist() |> unname() + ) + + expect_equal( + cards::get_ard_statistics(ard_dichotomous_row, stat_name %in% "N_unweighted") |> unlist() |> unname(), + ard_dichotomous(mtcars, by = vs, variables = c(cyl, am), value = list(cyl = 4), denominator = "row") |> + cards::get_ard_statistics(stat_name %in% "N") |> unlist() |> unname() + ) + + # section 2: by variable, column denominator + expect_equal( + cards::get_ard_statistics(ard_dichotomous_col, stat_name %in% "n") |> unlist(), + ard_dichotomous(mtcars, by = vs, variables = c(cyl, am), value = list(cyl = 4), denominator = "column") |> + cards::get_ard_statistics(stat_name %in% "n") |> unlist() + ) + + expect_equal( + cards::get_ard_statistics(ard_dichotomous_col, stat_name %in% "N") |> unlist(), + ard_dichotomous(mtcars, by = vs, variables = c(cyl, am), value = list(cyl = 4), denominator = "column") |> + cards::get_ard_statistics(stat_name %in% "N") |> unlist() + ) + + expect_equal( + cards::get_ard_statistics(ard_dichotomous_col, stat_name %in% "p") |> unlist(), + ard_dichotomous(mtcars, by = vs, variables = c(cyl, am), value = list(cyl = 4), denominator = "column") |> + cards::get_ard_statistics(stat_name %in% "p") |> unlist() + ) + + expect_equal( + cards::get_ard_statistics(ard_dichotomous_col |> dplyr::arrange_all(), stat_name %in% "p.std.error") |> unlist() |> unname() |> sort(), + c( + survey::svyby( + formula = reformulate2("am"), + by = reformulate2("vs"), + design = svy_dicho, + FUN = survey::svymean, + na.rm = TRUE, + deff = "Design Effect" + )[, c(4)] |> unlist() |> unname(), + survey::svyby( + formula = reformulate2("cyl"), + by = reformulate2("vs"), + design = svy_dicho, + FUN = survey::svymean, + na.rm = TRUE, + deff = "Design Effect" + )[, c(5)] |> unlist() |> unname() + ) |> + sort() + ) + + expect_equal( + cards::get_ard_statistics(ard_dichotomous_col |> dplyr::arrange_all(), stat_name %in% "deff") |> unlist() |> unname() |> sort(), + c( + survey::svyby( + formula = reformulate2("am"), + by = reformulate2("vs"), + design = svy_dicho, + FUN = survey::svymean, + na.rm = TRUE, + deff = "Design Effect" + )[1, c(6, 7)] |> unlist() |> unname(), + survey::svyby( + formula = reformulate2("cyl"), + by = reformulate2("vs"), + design = svy_dicho, + FUN = survey::svymean, + na.rm = TRUE, + deff = "Design Effect" + )[2, c(8, 9)] |> unlist() |> unname() + ) |> + sort() + ) + + expect_equal( + cards::get_ard_statistics(ard_dichotomous_col, stat_name %in% "n_unweighted") |> unlist() |> unname(), + ard_dichotomous(mtcars, by = vs, variables = c(cyl, am), value = list(cyl = 4), denominator = "column") |> + cards::get_ard_statistics(stat_name %in% "n") |> unlist() |> unname() + ) + expect_equal( + cards::get_ard_statistics(ard_dichotomous_col, stat_name %in% "N_unweighted") |> unlist() |> unname(), + ard_dichotomous(mtcars, by = vs, variables = c(cyl, am), value = list(cyl = 4), denominator = "column") |> + cards::get_ard_statistics(stat_name %in% "N") |> unlist() |> unname() + ) + + # section 3: by variable, cell denominator + expect_equal( + cards::get_ard_statistics(ard_dichotomous_cell, stat_name %in% "n") |> unlist(), + ard_dichotomous(mtcars, by = vs, variables = c(cyl, am), value = list(cyl = 4), denominator = "cell") |> + cards::get_ard_statistics(stat_name %in% "n") |> unlist() + ) + + expect_equal( + cards::get_ard_statistics(ard_dichotomous_cell, stat_name %in% "N") |> unlist(), + ard_dichotomous(mtcars, by = vs, variables = c(cyl, am), value = list(cyl = 4), denominator = "cell") |> + cards::get_ard_statistics(stat_name %in% "N") |> unlist() + ) + + expect_equal( + cards::get_ard_statistics(ard_dichotomous_cell, stat_name %in% "p") |> unlist(), + ard_dichotomous(mtcars, by = vs, variables = c(cyl, am), value = list(cyl = 4), denominator = "cell") |> + cards::get_ard_statistics(stat_name %in% "p") |> unlist() + ) + + expect_equal( + cards::get_ard_statistics(ard_dichotomous_cell |> dplyr::arrange_all(), stat_name %in% "p.std.error") |> unlist() |> unname() |> sort(), + unname(c( + as.data.frame(survey::svymean( + x = inject(~ interaction(!!sym(bt("vs")), !!sym(bt("cyl")))), + design = svy_dicho, + na.rm = TRUE, + deff = "Design Effect" + ))[1:2, "SE"] |> unlist(), + as.data.frame(survey::svymean( + x = inject(~ interaction(!!sym(bt("vs")), !!sym(bt("am")))), + design = svy_dicho, + na.rm = TRUE, + deff = "Design Effect" + ))[2:3, "SE"] |> unlist() + )) |> sort() + ) + + expect_equal( + cards::get_ard_statistics(ard_dichotomous_cell |> dplyr::arrange_all(), stat_name %in% "deff") |> unlist() |> unname() |> sort(), + unname(c( + as.data.frame(survey::svymean( + x = inject(~ interaction(!!sym(bt("vs")), !!sym(bt("cyl")))), + design = svy_dicho, + na.rm = TRUE, + deff = "Design Effect" + ))[1:2, "deff"] |> unlist(), + as.data.frame(survey::svymean( + x = inject(~ interaction(!!sym(bt("vs")), !!sym(bt("am")))), + design = svy_dicho, + na.rm = TRUE, + deff = "Design Effect" + ))[2:3, "deff"] |> unlist() + )) |> sort() + ) + + expect_equal( + cards::get_ard_statistics(ard_dichotomous_cell, stat_name %in% "n_unweighted") |> unlist() |> unname(), + ard_dichotomous(mtcars, by = vs, variables = c(cyl, am), value = list(cyl = 4), denominator = "cell") |> + cards::get_ard_statistics(stat_name %in% "n") |> unlist() |> unname() + ) + expect_equal( + cards::get_ard_statistics(ard_dichotomous_cell, stat_name %in% "N_unweighted") |> unlist() |> unname(), + ard_dichotomous(mtcars, by = vs, variables = c(cyl, am), value = list(cyl = 4), denominator = "cell") |> + cards::get_ard_statistics(stat_name %in% "N") |> unlist() |> unname() + ) + + # row denom without by var + expect_error( + ard_dichotomous_row <- ard_dichotomous(svy_dicho, + by = NULL, + variables = c(cyl, am), + value = list(cyl = 4), + denominator = "row" + ), + NA + ) + expect_invisible(cards::check_ard_structure(ard_dichotomous_row, method = FALSE)) + + # col denom without by var + expect_error( + ard_dichotomous_col <- ard_dichotomous(svy_dicho, + by = NULL, + variables = c(cyl, am), + value = list(cyl = 4), + denominator = "column" + ), + NA + ) + expect_invisible(cards::check_ard_structure(ard_dichotomous_col, method = FALSE)) + + # col denom without by var + expect_error( + ard_dichotomous_cell <- ard_dichotomous(svy_dicho, + by = NULL, + variables = c(cyl, am), + value = list(cyl = 4), + denominator = "cell" + ), + NA + ) + expect_invisible(cards::check_ard_structure(ard_dichotomous_cell, method = FALSE)) +}) + +test_that("ard_dichotomous.survey.design() works with various input types", { + svy_dicho <- survey::svydesign(ids = ~1, data = mtcars, weights = ~1) + + # logical variables + # convert variables to logical + svy_dicho$variables <- svy_dicho$variables |> + dplyr::mutate(across(c("cyl", "am", "vs"), as.logical)) + + + # row denom with by var + expect_error( + ard_dichotomous_row <- + ard_dichotomous(svy_dicho, + by = vs, + variables = c(cyl, am), + value = list(cyl = TRUE), + denominator = "row" + ), + NA + ) + expect_invisible(cards::check_ard_structure(ard_dichotomous_row, method = FALSE)) + + # col denom with by var + expect_error( + ard_dichotomous_col <- ard_dichotomous(svy_dicho, + by = vs, + variables = c(cyl, am), + value = list(cyl = TRUE), + denominator = "column" + ), + NA + ) + expect_invisible(cards::check_ard_structure(ard_dichotomous_col, method = FALSE)) + + # cell denom with by var + expect_error( + ard_dichotomous_cell <- ard_dichotomous(svy_dicho, + by = vs, + variables = c(cyl, am), + value = list(cyl = TRUE), + denominator = "cell" + ), + NA + ) + expect_invisible(cards::check_ard_structure(ard_dichotomous_cell, method = FALSE)) + + + # variables that are neither logical or factor + svy_dicho <- survey::svydesign(ids = ~1, data = mtcars, weights = ~1) + + svy_dicho$variables <- svy_dicho$variables |> + dplyr::mutate(across(c("cyl", "am"), as.numeric)) |> + dplyr::mutate(across("vs", as.character)) + + # row denom with by var + expect_error( + ard_dichotomous_row <- + ard_dichotomous(svy_dicho, + by = vs, + variables = c(cyl, am), + value = list(cyl = 4), + denominator = "row" + ), + NA + ) + expect_invisible(cards::check_ard_structure(ard_dichotomous_row, method = FALSE)) + + # col denom with by var + expect_error( + ard_dichotomous_col <- ard_dichotomous(svy_dicho, + by = vs, + variables = c(cyl, am), + value = list(cyl = 4), + denominator = "column" + ), + NA + ) + expect_invisible(cards::check_ard_structure(ard_dichotomous_col, method = FALSE)) + + # cell denom with by var + expect_error( + ard_dichotomous_cell <- ard_dichotomous(svy_dicho, + by = vs, + variables = c(cyl, am), + value = list(cyl = 4), + denominator = "cell" + ), + NA + ) + expect_invisible(cards::check_ard_structure(ard_dichotomous_cell, method = FALSE)) +}) + + +test_that("ard_dichotomous.survey.design() returns an error with erroneous input", { + # value passed in is not logical should return an error + svy_dicho <- survey::svydesign(ids = ~1, data = mtcars, weights = ~1) + + svy_dicho$variables <- svy_dicho$variables |> + dplyr::mutate(across(c("cyl", "am"), as.logical)) + + expect_snapshot( + ard_dichotomous(svy_dicho, + by = vs, + variables = c(cyl, am), + value = list(cyl = 4), + denominator = "row" + ), + error = TRUE + ) + + # supplied factor value is not a level + svy_dicho$variables <- svy_dicho$variables |> + dplyr::mutate(across("vs", as.factor)) + + svy_dicho$variables$vs + expect_snapshot( + ard_dichotomous(svy_dicho, + by = cyl, + variables = c(vs, am), + value = list(vs = 4), + denominator = "row" + ), + error = TRUE + ) + + svy_dicho$variables <- svy_dicho$variables |> + dplyr::mutate(across("disp", as.numeric)) + + expect_snapshot( + ard_dichotomous(svy_dicho, + by = cyl, + variables = c(vs, disp), + value = list(disp = "turn"), + denominator = "row" + ), + error = TRUE + ) +}) diff --git a/tests/testthat/test-ard_missing.survey.design.R b/tests/testthat/test-ard_missing.survey.design.R new file mode 100644 index 00000000..74b15d5e --- /dev/null +++ b/tests/testthat/test-ard_missing.survey.design.R @@ -0,0 +1,105 @@ +skip_if_not(is_pkg_installed("survey", reference_pkg = "cardx")) + +# Test survey.design working (2x3) +test_that("ard_missing.survey.design() works", { + data(api, package = "survey") + svy_titanic <- survey::svydesign(~1, data = as.data.frame(Titanic), weights = ~Freq) + df_titanic <- as.data.frame(Titanic) |> tidyr::uncount(weights = Freq) + + + # without by + expect_error( + ard_svy_missing <- ard_missing(svy_titanic, variables = c(Class, Age), by = NULL), + NA + ) + expect_invisible(cards::check_ard_structure(ard_svy_missing, method = FALSE)) + + # without by, single var + expect_error( + ard_svy_missing <- ard_missing(svy_titanic, variables = Class, by = NULL), + NA + ) + expect_invisible(cards::check_ard_structure(ard_svy_missing, method = FALSE)) + + # with by, single var + expect_error( + ard_svy_missing <- ard_missing(svy_titanic, variables = Class, by = Survived), + NA + ) + expect_invisible(cards::check_ard_structure(ard_svy_missing, method = FALSE)) + + # with by + expect_error( + ard_svy_missing <- ard_missing(svy_titanic, variables = c(Class, Age), by = Survived), + NA + ) + expect_invisible(cards::check_ard_structure(ard_svy_missing, method = FALSE)) + + + # check the calculated stats are correct + expect_equal( + cards::get_ard_statistics(ard_svy_missing |> dplyr::arrange_all(), stat_name %in% "N_obs") |> unlist(), + ard_missing(df_titanic, variables = c(Class, Age), by = Survived) |> dplyr::arrange_all() |> + cards::get_ard_statistics(stat_name %in% "N_obs") |> unlist() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_missing |> dplyr::arrange_all(), stat_name %in% "N_nonmiss") |> unlist(), + ard_missing(df_titanic, variables = c(Class, Age), by = Survived) |> dplyr::arrange_all() |> + cards::get_ard_statistics(stat_name %in% "N_nonmiss") |> unlist() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_missing |> dplyr::arrange_all(), stat_name %in% "N_nonmiss") |> unlist(), + ard_missing(df_titanic, variables = c(Class, Age), by = Survived) |> dplyr::arrange_all() |> + cards::get_ard_statistics(stat_name %in% "N_nonmiss") |> unlist() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_missing |> dplyr::arrange_all(), stat_name %in% "p_nonmiss") |> unlist(), + ard_missing(df_titanic, variables = c(Class, Age), by = Survived) |> dplyr::arrange_all() |> + cards::get_ard_statistics(stat_name %in% "p_nonmiss") |> unlist() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_missing |> dplyr::arrange_all(), stat_name %in% "N_miss") |> unlist(), + ard_missing(df_titanic, variables = c(Class, Age), by = Survived) |> dplyr::arrange_all() |> + cards::get_ard_statistics(stat_name %in% "N_miss") |> unlist() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_missing |> dplyr::arrange_all(), stat_name %in% "p_miss") |> unlist(), + ard_missing(df_titanic, variables = c(Class, Age), by = Survived) |> dplyr::arrange_all() |> + cards::get_ard_statistics(stat_name %in% "p_miss") |> unlist() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_missing |> dplyr::arrange_all(), stat_name %in% "N_miss_unweighted") |> unlist() |> unname(), + ard_missing(as.data.frame(Titanic), variables = c(Class, Age), by = Survived) |> dplyr::arrange_all() |> + cards::get_ard_statistics(stat_name %in% "N_miss") |> unlist() |> unname() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_missing |> dplyr::arrange_all(), stat_name %in% "N_obs_unweighted") |> unlist() |> unname(), + ard_missing(as.data.frame(Titanic), variables = c(Class, Age), by = Survived) |> dplyr::arrange_all() |> + cards::get_ard_statistics(stat_name %in% "N_obs") |> unlist() |> unname() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_missing |> dplyr::arrange_all(), stat_name %in% "p_miss_unweighted") |> unlist() |> unname(), + ard_missing(as.data.frame(Titanic), variables = c(Class, Age), by = Survived) |> dplyr::arrange_all() |> + cards::get_ard_statistics(stat_name %in% "p_miss") |> unlist() |> unname() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_missing |> dplyr::arrange_all(), stat_name %in% "N_nonmiss_unweighted") |> unlist() |> unname(), + ard_missing(as.data.frame(Titanic), variables = c(Class, Age), by = Survived) |> dplyr::arrange_all() |> + cards::get_ard_statistics(stat_name %in% "N_nonmiss") |> unlist() |> unname() + ) + + expect_equal( + cards::get_ard_statistics(ard_svy_missing |> dplyr::arrange_all(), stat_name %in% "p_nonmiss_unweighted") |> unlist() |> unname(), + ard_missing(as.data.frame(Titanic), variables = c(Class, Age), by = Survived) |> dplyr::arrange_all() |> + cards::get_ard_statistics(stat_name %in% "p_nonmiss") |> unlist() |> unname() + ) +})