From 1494900a705a5f29ac39cf53028ea85c0e8ec917 Mon Sep 17 00:00:00 2001 From: Mark Seeto <39866968+markseeto@users.noreply.github.com> Date: Mon, 23 Sep 2024 13:05:35 +1000 Subject: [PATCH] Add total.label argument for groupingsets, cube, rollup (#5973) * Add 'label' argument Add 'label' argument to the groupingsets.data.table(), cube.data.table(), and rollup.data.table() functions. * Add tests for groupingsets/cube/rollup 'label' argument * Add information for 'label' argument Add to the Usage, Arguments, Details, and Examples sections. * Add item for groupingsets/cube/rollup 'label' argument * Make changes following linter warnings * avoid stop(paste0), use brackify() * update test for brackify() * lowercase 'r' in code gate hint * style on long if condition * Use .shallow() over a full copy * save names(label) for reuse; more .shallow() usage * simplify with mapply * Build info with gettextf() for i18n * short names * More restricted scoping, building message with gettextf * consistency: name 'info' --------- Co-authored-by: Michael Chirico Co-authored-by: Michael Chirico --- NEWS.md | 36 ++ R/groupingsets.R | 71 ++- inst/tests/tests.Rraw | 1299 +++++++++++++++++++++++++++++++++++++++++ man/groupingsets.Rd | 18 +- 4 files changed, 1416 insertions(+), 8 deletions(-) diff --git a/NEWS.md b/NEWS.md index d418445f9..99e4c8598 100644 --- a/NEWS.md +++ b/NEWS.md @@ -27,6 +27,42 @@ rowwiseDT( 2. Limited support for subsetting or aggregating columns of type `expression`, [#5596](https://github.com/Rdatatable/data.table/issues/5596). Thanks to @tsp for the report, and @ben-schwen for the fix. +3. `groupingsets.data.table()`, `cube.data.table()`, and `rollup.data.table()` gain a `label` argument, which allows the user to specify a label for each grouping variable, to be included in the grouping variable column in the output in rows where the variable has been aggregated, [#5351](https://github.com/Rdatatable/data.table/issues/5351). Thanks to @markseeto for the request, @jangorecki and @markseeto for specifying the desired behaviour, and @markseeto for implementing. + + ```r + DT = data.table(V1 = rep(c("a1", "a2"), each = 5), + V2 = rep(rep(c("b1", "b2"), c(3, 2)), 2), + V3 = rep(c("c1", "c2"), c(3, 7)), + V4 = rep(1:2, c(6, 4)), + V5 = rep(1:2, c(9, 1)), + V6 = rep(c(1.1, 1.2), c(2, 8))) + + # Call groupingsets() and specify a label for V1, a different label for the other character grouping + # variables, a label for the integer grouping variables, and a label for the numeric grouping variable. + + groupingsets(DT, .N, by = c("V1", "V2", "V3", "V4", "V5", "V6"), + sets = list(c("V1", "V2", "V3"), c("V1", "V4"), c("V4", "V6"), "V2", "V5", character()), + label = list(V1 = "All values", character = "Total", integer = 999L, numeric = NaN)) + + # V1 V2 V3 V4 V5 V6 N + # + # 1: a1 b1 c1 999 999 NaN 3 + # 2: a1 b2 c2 999 999 NaN 2 + # 3: a2 b1 c2 999 999 NaN 3 + # 4: a2 b2 c2 999 999 NaN 2 + # 5: a1 Total Total 1 999 NaN 5 + # 6: a2 Total Total 1 999 NaN 1 + # 7: a2 Total Total 2 999 NaN 4 + # 8: All values Total Total 1 999 1.1 2 + # 9: All values Total Total 1 999 1.2 4 + # 10: All values Total Total 2 999 1.2 4 + # 11: All values b1 Total 999 999 NaN 6 + # 12: All values b2 Total 999 999 NaN 4 + # 13: All values Total Total 999 1 NaN 9 + # 14: All values Total Total 999 2 NaN 1 + # 15: All values Total Total 999 999 NaN 10 + ``` + ## BUG FIXES 1. Using `print.data.table()` with character truncation using `datatable.prettyprint.char` no longer errors with `NA` entries, [#6441](https://github.com/Rdatatable/data.table/issues/6441). Thanks to @r2evans for the bug report, and @joshhwuu for the fix. diff --git a/R/groupingsets.R b/R/groupingsets.R index 96940497c..2997e34b5 100644 --- a/R/groupingsets.R +++ b/R/groupingsets.R @@ -1,7 +1,7 @@ rollup = function(x, ...) { UseMethod("rollup") } -rollup.data.table = function(x, j, by, .SDcols, id = FALSE, ...) { +rollup.data.table = function(x, j, by, .SDcols, id = FALSE, label = NULL, ...) { # input data type basic validation if (!is.data.table(x)) stopf("Argument 'x' must be a data.table object") @@ -13,13 +13,13 @@ rollup.data.table = function(x, j, by, .SDcols, id = FALSE, ...) { sets = lapply(length(by):0L, function(i) by[0L:i]) # redirect to workhorse function jj = substitute(j) - groupingsets.data.table(x, by=by, sets=sets, .SDcols=.SDcols, id=id, jj=jj) + groupingsets.data.table(x, by=by, sets=sets, .SDcols=.SDcols, id=id, jj=jj, label=label) } cube = function(x, ...) { UseMethod("cube") } -cube.data.table = function(x, j, by, .SDcols, id = FALSE, ...) { +cube.data.table = function(x, j, by, .SDcols, id = FALSE, label = NULL, ...) { # input data type basic validation if (!is.data.table(x)) stopf("Argument 'x' must be a data.table object") @@ -35,13 +35,13 @@ cube.data.table = function(x, j, by, .SDcols, id = FALSE, ...) { sets = lapply((2L^n):1L, function(jj) by[keepBool[jj, ]]) # redirect to workhorse function jj = substitute(j) - groupingsets.data.table(x, by=by, sets=sets, .SDcols=.SDcols, id=id, jj=jj) + groupingsets.data.table(x, by=by, sets=sets, .SDcols=.SDcols, id=id, jj=jj, label=label) } groupingsets = function(x, ...) { UseMethod("groupingsets") } -groupingsets.data.table = function(x, j, by, sets, .SDcols, id = FALSE, jj, ...) { +groupingsets.data.table = function(x, j, by, sets, .SDcols, id = FALSE, jj, label = NULL, ...) { # input data type basic validation if (!is.data.table(x)) stopf("Argument 'x' must be a data.table object") @@ -57,6 +57,14 @@ groupingsets.data.table = function(x, j, by, sets, .SDcols, id = FALSE, jj, ...) stopf("Argument 'sets' must be a list of character vectors.") if (!is.logical(id)) stopf("Argument 'id' must be a logical scalar.") + if (!(is.null(label) || + (is.atomic(label) && length(label) == 1L) || + (is.list(label) && all(vapply_1b(label, is.atomic)) && all(lengths(label) == 1L) && !is.null(names(label))))) + stopf("Argument 'label', if not NULL, must be a scalar or a named list of scalars.") + if (is.list(label) && !is.null(names(label)) && ("" %chin% names(label) || anyNA(names(label)))) + stopf("When argument 'label' is a list, all of the list elements must be named.") + if (is.list(label) && anyDuplicated(names(label))) + stopf("When argument 'label' is a list, the element names must not contain duplicates.") # logic constraints validation if (!all((sets.all.by <- unique(unlist(sets))) %chin% by)) stopf("All columns used in 'sets' argument must be in 'by' too. Columns used in 'sets' but not present in 'by': %s", brackify(setdiff(sets.all.by, by))) @@ -66,6 +74,36 @@ groupingsets.data.table = function(x, j, by, sets, .SDcols, id = FALSE, jj, ...) stopf("Character vectors in 'sets' list must not have duplicated column names within a single grouping set.") if (length(sets) > 1L && (idx<-anyDuplicated(lapply(sets, sort)))) warningf("'sets' contains a duplicate (i.e., equivalent up to sorting) element at index %d; as such, there will be duplicate rows in the output -- note that grouping by A,B and B,A will produce the same aggregations. Use `sets=unique(lapply(sets, sort))` to eliminate duplicates.", idx) + if (is.list(label)) { + other.allowed.names = c("character", "integer", "numeric", "factor", "Date", "IDate") + allowed.label.list.names = c(by, vapply_1c(.shallow(x, by), function(u) class(u)[1]), + other.allowed.names) + label.names = names(label) + if (!all(label.names %in% allowed.label.list.names)) + stopf("When argument 'label' is a list, all element names must be (1) in 'by', or (2) the first element of the class in the data.table 'x' of a variable in 'by', or (3) one of %s. Element names not satisfying this condition: %s", + brackify(other.allowed.names), brackify(setdiff(label.names, allowed.label.list.names))) + label.classes = lapply(label, class) + label.names.in.by = intersect(label.names, by) + label.names.not.in.by = setdiff(label.names, label.names.in.by) + label.names.in.by.classes = label.classes[label.names.in.by] + x.label.names.in.by.classes = lapply(.shallow(x, label.names.in.by), class) + label.names.not.in.by.classes1 = vapply_1c(label.classes[label.names.not.in.by], function(u) u[1]) + if (!all(idx <- mapply(identical, label.names.in.by.classes, x.label.names.in.by.classes))) { + info = gettextf( + "%s (label: %s; data: %s)", + label.names.in.by[!idx], + vapply_1c(label.names.in.by.classes[!idx], toString), + vapply_1c(x.label.names.in.by.classes[!idx], toString)) + stopf("When argument 'label' is a list, the class of each 'label' element with name in 'by' must match the class of the corresponding column of the data.table 'x'. Class mismatch for: %s", brackify(info)) + } + if (!all(idx <- label.names.not.in.by == label.names.not.in.by.classes1)) { + info = gettextf( + "(label name: %s; label class[1]: %s)", + label.names.not.in.by[!idx], + label.names.not.in.by.classes1[!idx]) + stopf("When argument 'label' is a list, the name of each element of 'label' not in 'by' must match the first element of the class of the element value. Mismatches: %s", brackify(info)) + } + } # input arguments handling jj = if (!missing(jj)) jj else substitute(j) av = all.vars(jj, TRUE) @@ -85,6 +123,27 @@ groupingsets.data.table = function(x, j, by, sets, .SDcols, id = FALSE, jj, ...) set(empty, j = "grouping", value = integer()) setcolorder(empty, c("grouping", by, setdiff(names(empty), c("grouping", by)))) } + # Define variables related to label + if (!is.null(label)) { + total.vars = intersect(by, unlist(lapply(sets, function(u) setdiff(by, u)))) + if (is.list(label)) { + by.vars.not.in.label = setdiff(by, names(label)) + by.vars.not.in.label.class1 = vapply_1c(x, function(u) class(u)[1L])[by.vars.not.in.label] + labels.by.vars.not.in.label = label[by.vars.not.in.label.class1[by.vars.not.in.label.class1 %in% label.names.not.in.by]] + names(labels.by.vars.not.in.label) <- by.vars.not.in.label[by.vars.not.in.label.class1 %in% label.names.not.in.by] + label.expanded = c(label[label.names.in.by], labels.by.vars.not.in.label) + label.expanded = label.expanded[intersect(by, names(label.expanded))] # reorder + } else { + by.vars.matching.scalar.class1 = by[vapply_1c(x, function(u) class(u)[1L])[by] == class(label)[1L]] + label.expanded = as.list(rep(label, length(by.vars.matching.scalar.class1))) + names(label.expanded) <- by.vars.matching.scalar.class1 + } + label.use = label.expanded[intersect(total.vars, names(label.expanded))] + if (any(idx <- vapply_1b(names(label.expanded), function(u) label.expanded[[u]] %in% x[[u]]))) { + info = gettextf("%s (label: %s)", names(label.expanded)[idx], vapply_1c(label.expanded[idx], as.character)) + warningf("For the following variables, the 'label' value was already in the data: %s", brackify(info)) + } + } # workaround for rbindlist fill=TRUE on integer64 #1459 int64.cols = vapply_1b(empty, inherits, "integer64") int64.cols = names(int64.cols)[int64.cols] @@ -105,6 +164,8 @@ groupingsets.data.table = function(x, j, by, sets, .SDcols, id = FALSE, jj, ...) missing.int64.by.cols = setdiff(int64.by.cols, by.set) if (length(missing.int64.by.cols)) r[, (missing.int64.by.cols) := bit64::as.integer64(NA)] } + if (!is.null(label) && length(by.label.use.vars <- intersect(setdiff(by, by.set), names(label.use))) > 0L) + r[, (by.label.use.vars) := label.use[by.label.use.vars]] r } # actually processing everything here diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 50276c087..ff24b1f8d 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -19275,3 +19275,1302 @@ y = data.frame(id = "bbb") test(2293.1, x[!y, on='id'], x) class(y) = c("tbl_df", "tbl", "data.frame") test(2293.2, x[!y, on = "id"], x) + +# groupingsets/cube/rollup 'label' argument, #5351 +DT1 = data.table(VCharA = rep(c("C1", "C2", "C3"), each = 12), + VCharB = c(rep(c("C3", "C4"), c(6, 6)), + rep(c("C3", "C4"), c(8, 4)), + rep(c("C3", "C4"), c(4, 8))), + VFacA = factor(rep(c("F1", "F2", "F3"), c(14, 12, 10))), + VFacB = factor(rep(c("F3", "F4"), c(30, 6))), + VIntA = rep(1:2, c(24, 12)), + VIntB = rep(2:3, c(6, 30))) +DT1[, `:=`(VDate = as.Date(sub("F", "2024-01-0", VFacA)), + VIDate = as.IDate(sub("F", "2024-01-0", VFacB)), + VNumA = as.numeric(VIntA), + VNumB = as.numeric(VIntB), + integer = VCharB, + Y = 1:36)] +DT1[35:36, VCharA := NA] +DT1[36, VFacA := NA] +DT1[36, VDate := NA] +# Ans1: character, factor, integer grouping variables. +Ans1 = rbind(DT1[, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB")], + DT1[, .(VFacB = factor("Total"), VIntB = 999L, Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VFacA", "VIntA")], + DT1[, .(VFacB = factor("Total"), VIntA = 999L, VIntB = 999L, Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VFacA")], + DT1[, .(VFacA = factor("Total"), VIntA = 999L, VIntB = 999L, Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VFacB")], + DT1[, .(VCharB = "Total", VFacA = factor("Total"), VIntA = 999L, VIntB = 999L, + Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VFacB")], + DT1[, .(VCharA = "Total", VFacB = factor("Total"), VIntA = 999L, VIntB = 999L, + Count = .N, Y_Sum = sum(Y)), + by = c("VCharB", "VFacA")], + DT1[, .(VCharB = "Total", VFacA = factor("Total"), VFacB = factor("Total"), + VIntA = 999L, VIntB = 999L, Count = .N, Y_Sum = sum(Y)), + by = "VCharA"], + DT1[, .(VCharA = "Total", VCharB = "Total", VFacA = factor("Total"), VFacB = factor("Total"), + VIntA = 999L, VIntB = 999L, Count = .N, Y_Sum = sum(Y))] + ) +# Ans2: character, factor, integer grouping variables; some variables have no 'total' rows. +Ans2 = rbind(DT1[0, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB")], + DT1[, .(VFacB = factor("Total"), VIntB = 999L, Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VFacA", "VIntA")], + DT1[, .(VFacB = factor("Total"), VIntA = 999L, VIntB = 999L, Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VFacA")]) +# Ans3: character, Date, IDate, numeric grouping variables. +Ans3 = Ans1[, .(VCharA, VCharB, + VDate = fifelse(VFacA == "Total", as.Date("3000-01-01"), as.Date(sub("F", "2024-01-0", VFacA))), + VIDate = fifelse(VFacB == "Total", as.IDate("3000-01-01"), as.IDate(sub("F", "2024-01-0", VFacB))), + VNumA = as.numeric(VIntA), + VNumB = as.numeric(VIntB), + Count, Y_Sum)] +# 01. label is an atomic vector with length = 0. +test(2294.01, + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + sets = list(c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + c("VCharA", "VCharB", "VFacA", "VIntA"), + c("VCharA", "VCharB", "VFacA"), + c("VCharA", "VCharB", "VFacB"), + c("VCharA", "VFacB"), + c("VCharB", "VFacA"), + "VCharA", + character()), + label = character(0)), + error = "Argument 'label', if not NULL, must be a scalar or a named list of scalars.") +# 02. label is an atomic vector with length > 1. +test(2294.02, + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + sets = list(c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + c("VCharA", "VCharB", "VFacA", "VIntA"), + c("VCharA", "VCharB", "VFacA"), + c("VCharA", "VCharB", "VFacB"), + c("VCharA", "VFacB"), + c("VCharB", "VFacA"), + "VCharA", + character()), + label = c("Total A", "Total B")), + error = "Argument 'label', if not NULL, must be a scalar or a named list of scalars.") +# 03. label is a list and one of the elements is a list. +test(2294.03, + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + sets = list(c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + c("VCharA", "VCharB", "VFacA", "VIntA"), + c("VCharA", "VCharB", "VFacA"), + c("VCharA", "VCharB", "VFacB"), + c("VCharA", "VFacB"), + c("VCharB", "VFacA"), + "VCharA", + character()), + label = list(character = list(VCharA = "Total VCharA", VCharB = "Total VCharB"))), + error = "Argument 'label', if not NULL, must be a scalar or a named list of scalars.") +# 04. label is a list and one of the elements is an atomic vector with length > 1. +test(2294.04, + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + sets = list(c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + c("VCharA", "VCharB", "VFacA", "VIntA"), + c("VCharA", "VCharB", "VFacA"), + c("VCharA", "VCharB", "VFacB"), + c("VCharA", "VFacB"), + c("VCharB", "VFacA"), + "VCharA", + character()), + label = list(VCharA = "Total VCharA", VCharB = c("Total VCharB", "Total"))), + error = "Argument 'label', if not NULL, must be a scalar or a named list of scalars.") +# 05. label is a list with no elements named. +test(2294.05, + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + sets = list(c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + c("VCharA", "VCharB", "VFacA", "VIntA"), + c("VCharA", "VCharB", "VFacA"), + c("VCharA", "VCharB", "VFacB"), + c("VCharA", "VFacB"), + c("VCharB", "VFacA"), + "VCharA", + character()), + label = list("Total VCharA", "Total VCharB")), + error = "Argument 'label', if not NULL, must be a scalar or a named list of scalars.") +# 06. label is a list with some but not all elements named. +test(2294.06, + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + sets = list(c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + c("VCharA", "VCharB", "VFacA", "VIntA"), + c("VCharA", "VCharB", "VFacA"), + c("VCharA", "VCharB", "VFacB"), + c("VCharA", "VFacB"), + c("VCharB", "VFacA"), + "VCharA", + character()), + label = list(VCharA = "Total VCharA", "Total VCharB")), + error = "When argument 'label' is a list, all of the list elements must be named.") +# 07. label is a list with an NA name. +test(2294.07, + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + sets = list(c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + c("VCharA", "VCharB", "VFacA", "VIntA"), + c("VCharA", "VCharB", "VFacA"), + c("VCharA", "VCharB", "VFacB"), + c("VCharA", "VFacB"), + c("VCharB", "VFacA"), + "VCharA", + character()), + label = structure(list("Total VCharA", "Total VCharB"), names = c("VCharA", NA))), + error = "When argument 'label' is a list, all of the list elements must be named.") +# 08. label is a list with duplicate names. +test(2294.08, + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + sets = list(c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + c("VCharA", "VCharB", "VFacA", "VIntA"), + c("VCharA", "VCharB", "VFacA"), + c("VCharA", "VCharB", "VFacB"), + c("VCharA", "VFacB"), + c("VCharB", "VFacA"), + "VCharA", + character()), + label = list(VCharA = "Total VCharA", VCharA = "Total VCharA", VCharB = "Total VCharB")), + error = "When argument 'label' is a list, the element names must not contain duplicates.") +# 09. label is a list with a name not in 'by', not first element of class in 'x' of a variable in 'by', and not one of the other allowed names. +test(2294.09, + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + sets = list(c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + c("VCharA", "VCharB", "VFacA", "VIntA"), + c("VCharA", "VCharB", "VFacA"), + c("VCharA", "VCharB", "VFacB"), + c("VCharA", "VFacB"), + c("VCharB", "VFacA"), + "VCharA", + character()), + label = list(VCharA = "Total VCharA", z = "Total", VNumA = 1)), + error = "When argument 'label' is a list, all element names must be (1) in 'by', or (2) the first element of the class in the data.table 'x' of a variable in 'by', or (3) one of [character, integer, numeric, factor, Date, IDate]. Element names not satisfying this condition: [z, VNumA]") +# 10. label is a list and an element with name in 'by' has class not matching class in 'x'. +test(2294.10, + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + sets = list(c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + c("VCharA", "VCharB", "VFacA", "VIntA"), + c("VCharA", "VCharB", "VFacA"), + c("VCharA", "VCharB", "VFacB"), + c("VCharA", "VFacB"), + c("VCharB", "VFacA"), + "VCharA", + character()), + label = list(VCharA = "Total VCharA", VCharB = 999L, + VFacA = "Total VFacA", VFacB = 1L, + VIntA = 100L, VIntB = 200)), + error = "When argument 'label' is a list, the class of each 'label' element with name in 'by' must match the class of the corresponding column of the data.table 'x'. Class mismatch for: [VCharB (label: integer; data: character), VFacA (label: character; data: factor), VFacB (label: integer; data: factor), VIntB (label: numeric; data: integer)]") +# 11. label is a list and an element with name in 'by' has class not matching class in 'x' with classes that have length > 1. +test(2294.11, + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VFacA", "VFacB", "VDate", "VIDate"), + sets = list(c("VCharA", "VCharB", "VFacA", "VFacB", "VDate", "VIDate"), + c("VCharA", "VCharB", "VFacA", "VDate"), + c("VCharA", "VCharB", "VFacA"), + c("VCharA", "VCharB", "VFacB"), + c("VCharA", "VFacB"), + c("VCharB", "VFacA"), + "VCharA", + character()), + label = list(VCharA = "Total VCharA", VCharB = "Total VCharB", + VFacA = factor("Total VFacA"), VFacB = factor("Total VFacB"), + VDate = as.IDate("3000-01-01"), VIDate = as.Date("3000-01-01"))), + error = "When argument 'label' is a list, the class of each 'label' element with name in 'by' must match the class of the corresponding column of the data.table 'x'. Class mismatch for: [VDate (label: IDate, Date; data: Date), VIDate (label: Date; data: IDate, Date)]") +# 12. label is a list and an element with name in 'by' has class not matching class in 'x', and the variable name is also first element of the class of other variables in 'by'. +test(2294.12, + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "integer", "VFacA", "VFacB", "VIntA", "VIntB"), + sets = list(c("VCharA", "integer", "VFacA", "VFacB", "VIntA", "VIntB"), + c("VCharA", "integer", "VFacA", "VIntA"), + c("VCharA", "integer", "VFacA"), + c("VCharA", "integer", "VFacB"), + c("VCharA", "VFacB"), + c("integer", "VFacA"), + "VCharA", + character()), + label = list(integer = 999L)), + error = "When argument 'label' is a list, the class of each 'label' element with name in 'by' must match the class of the corresponding column of the data.table 'x'. Class mismatch for: [integer (label: integer; data: character)]") +# 13. label is a list and an element with name not in 'by' has name not matching first element of class of the element. +test(2294.13, + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + sets = list(c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + c("VCharA", "VCharB", "VFacA", "VIntA"), + c("VCharA", "VCharB", "VFacA"), + c("VCharA", "VCharB", "VFacB"), + c("VCharA", "VFacB"), + c("VCharB", "VFacA"), + "VCharA", + character()), + label = list(character = "Total", factor = "Total", integer = 999, + Date = as.IDate("3000-01-01"), IDate = as.Date("3000-01-01"))), + error = "When argument 'label' is a list, the name of each element of 'label' not in 'by' must match the first element of the class of the element value. Mismatches: [(label name: factor; label class[1]: character), (label name: integer; label class[1]: numeric), (label name: Date; label class[1]: IDate), (label name: IDate; label class[1]: Date)]") +# 14. label specified by variable for each variable. +test(2294.14, + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + sets = list(c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + c("VCharA", "VCharB", "VFacA", "VIntA"), + c("VCharA", "VCharB", "VFacA"), + c("VCharA", "VCharB", "VFacB"), + c("VCharA", "VFacB"), + c("VCharB", "VFacA"), + "VCharA", + character()), + label = list(VCharA = "Total", VCharB = "Total", + VFacA = factor("Total"), VFacB = factor("Total"), + VIntA = 999L, VIntB = 999L)), + Ans1) +# 15. label specified by variable for each variable with different values for different variables. +test(2294.15, + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + sets = list(c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + c("VCharA", "VCharB", "VFacA", "VIntA"), + c("VCharA", "VCharB", "VFacA"), + c("VCharA", "VCharB", "VFacB"), + c("VCharA", "VFacB"), + c("VCharB", "VFacA"), + "VCharA", + character()), + label = list(VCharA = "Total VCharA", VCharB = "Total VCharB", + VFacA = factor("Total VFacA"), VFacB = factor("Total VFacB"), + VIntA = 901L, VIntB = 902L)), + Ans1[, .(VCharA = fifelse(VCharA == "Total", "Total VCharA", VCharA), + VCharB = fifelse(VCharB == "Total", "Total VCharB", VCharB), + VFacA = factor(VFacA, levels = levels(VFacA), + labels = sub("Total", "Total VFacA", levels(VFacA))), + VFacB = factor(VFacB, levels = levels(VFacB), + labels = sub("Total", "Total VFacB", levels(VFacB))), + VIntA = fifelse(VIntA == 999L, 901L, VIntA), + VIntB = fifelse(VIntB == 999L, 902L, VIntB), + Count, Y_Sum)]) +# 16. label specified by variable but not for all variables. +test(2294.16, + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + sets = list(c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + c("VCharA", "VCharB", "VFacA", "VIntA"), + c("VCharA", "VCharB", "VFacA"), + c("VCharA", "VCharB", "VFacB"), + c("VCharA", "VFacB"), + c("VCharB", "VFacA"), + "VCharA", + character()), + label = list(VCharA = "Total", VFacA = factor("Total"))), + Ans1[, .(VCharA, + VCharB = fifelse(VCharB == "Total", NA_character_, VCharB), + VFacA, + VFacB = fifelse(VFacB == "Total", factor(NA, levels = levels(VFacB)), VFacB), + VIntA = fifelse(VIntA == 999L, NA_integer_, VIntA), + VIntB = fifelse(VIntB == 999L, NA_integer_, VIntB), + Count, Y_Sum)]) +# 17. label specified by variable with some label values being NA. +test(2294.17, + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + sets = list(c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + c("VCharA", "VCharB", "VFacA", "VIntA"), + c("VCharA", "VCharB", "VFacA"), + c("VCharA", "VCharB", "VFacB"), + c("VCharA", "VFacB"), + c("VCharB", "VFacA"), + "VCharA", + character()), + label = list(VCharA = "Total", VCharB = NA_character_, + VFacA = factor("Total"), VFacB = factor(NA), + VIntA = NA_integer_, VIntB = 999L)), + Ans1[, .(VCharA, + VCharB = fifelse(VCharB == "Total", NA_character_, VCharB), + VFacA, + VFacB = fifelse(VFacB == "Total", factor(NA, levels = levels(VFacB)), VFacB), + VIntA = fifelse(VIntA == 999L, NA_integer_, VIntA), VIntB, + Count, Y_Sum)]) +# 18. label specified by first element of class. +test(2294.18, + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + sets = list(c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + c("VCharA", "VCharB", "VFacA", "VIntA"), + c("VCharA", "VCharB", "VFacA"), + c("VCharA", "VCharB", "VFacB"), + c("VCharA", "VFacB"), + c("VCharB", "VFacA"), + "VCharA", + character()), + label = list(character = "Total", factor = factor("Total"), integer = 999L)), + Ans1) +# 19. label specified by first element of class but not for all classes of variable in 'by'. +test(2294.19, + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + sets = list(c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + c("VCharA", "VCharB", "VFacA", "VIntA"), + c("VCharA", "VCharB", "VFacA"), + c("VCharA", "VCharB", "VFacB"), + c("VCharA", "VFacB"), + c("VCharB", "VFacA"), + "VCharA", + character()), + label = list(factor = factor("Total"))), + Ans1[, .(VCharA = fifelse(VCharA == "Total", NA_character_, VCharA), + VCharB = fifelse(VCharB == "Total", NA_character_, VCharB), + VFacA, VFacB, + VIntA = fifelse(VIntA == 999L, NA_integer_, VIntA), + VIntB = fifelse(VIntB == 999L, NA_integer_, VIntB), + Count, Y_Sum)]) +# 20. label specified by first element of class but for none of the classes of variables in 'by'. +test(2294.20, + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + sets = list(c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + c("VCharA", "VCharB", "VFacA", "VIntA"), + c("VCharA", "VCharB", "VFacA"), + c("VCharA", "VCharB", "VFacB"), + c("VCharA", "VFacB"), + c("VCharB", "VFacA"), + "VCharA", + character()), + label = list(Date = as.Date("3000-01-01"))), + Ans1[, .(VCharA = fifelse(VCharA == "Total", NA_character_, VCharA), + VCharB = fifelse(VCharB == "Total", NA_character_, VCharB), + VFacA = fifelse(VFacA == "Total", factor(NA, levels = levels(VFacA)), VFacA), + VFacB = fifelse(VFacB == "Total", factor(NA, levels = levels(VFacB)), VFacB), + VIntA = fifelse(VIntA == 999L, NA_integer_, VIntA), + VIntB = fifelse(VIntB == 999L, NA_integer_, VIntB), + Count, Y_Sum)]) +# 21. label specified by variable for some classes and by class for other classes. +test(2294.21, + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + sets = list(c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + c("VCharA", "VCharB", "VFacA", "VIntA"), + c("VCharA", "VCharB", "VFacA"), + c("VCharA", "VCharB", "VFacB"), + c("VCharA", "VFacB"), + c("VCharB", "VFacA"), + "VCharA", + character()), + label = list(VCharA = "Total", VCharB = "Total", factor = factor("Total"), integer = 999L)), + Ans1) +# 22. label specified by variable for some classes and by class for other classes. +test(2294.22, + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + sets = list(c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + c("VCharA", "VCharB", "VFacA", "VIntA"), + c("VCharA", "VCharB", "VFacA"), + c("VCharA", "VCharB", "VFacB"), + c("VCharA", "VFacB"), + c("VCharB", "VFacA"), + "VCharA", + character()), + label = list(VFacA = factor("Total"), VFacB = factor("Total"), + VIntA = 999L, VIntB = 999L, character = "Total")), + Ans1) +# 23. label specified by variable for some variables having a particular class and by class for other variables having that class. +test(2294.23, + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + sets = list(c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + c("VCharA", "VCharB", "VFacA", "VIntA"), + c("VCharA", "VCharB", "VFacA"), + c("VCharA", "VCharB", "VFacB"), + c("VCharA", "VFacB"), + c("VCharB", "VFacA"), + "VCharA", + character()), + label = list(VCharA = "Total VCharA", character = "Total", + factor = factor("Total"), integer = 999L, + VFacB = factor("Total VFacB"), VIntA = 901L)), + Ans1[, .(VCharA = fifelse(VCharA == "Total", "Total VCharA", VCharA), + VCharB, VFacA, + VFacB = factor(VFacB, levels = levels(VFacB), + labels = sub("Total", "Total VFacB", levels(VFacB))), + VIntA = fifelse(VIntA == 999L, 901L, VIntA), + VIntB, Count, Y_Sum)]) +# 24. label specified by variable for some variables having a particular class and by class for other variables having that class, and specified for a class not in 'by'. +test(2294.24, + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + sets = list(c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + c("VCharA", "VCharB", "VFacA", "VIntA"), + c("VCharA", "VCharB", "VFacA"), + c("VCharA", "VCharB", "VFacB"), + c("VCharA", "VFacB"), + c("VCharB", "VFacA"), + "VCharA", + character()), + label = list(VCharA = "Total VCharA", character = "Total", + factor = factor("Total"), integer = 999L, + VFacB = factor("Total VFacB"), VIntA = 901L, Date = as.Date("3000-01-01"))), + Ans1[, .(VCharA = fifelse(VCharA == "Total", "Total VCharA", VCharA), + VCharB, VFacA, + VFacB = factor(VFacB, levels = levels(VFacB), + labels = sub("Total", "Total VFacB", levels(VFacB))), + VIntA = fifelse(VIntA == 999L, 901L, VIntA), + VIntB, Count, Y_Sum)]) +# 25. label specified by variable and by class, with class specification not used because all matching variables are specified by variable. +test(2294.25, + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + sets = list(c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + c("VCharA", "VCharB", "VFacA", "VIntA"), + c("VCharA", "VCharB", "VFacA"), + c("VCharA", "VCharB", "VFacB"), + c("VCharA", "VFacB"), + c("VCharB", "VFacA"), + "VCharA", + character()), + label = list(character = "Total", VCharA = "Total VCharA", VCharB = "Total VCharB", + VFacA = factor("Total VFacA"), VFacB = factor("Total VFacB"), + integer = 999L, VIntA = 901L, VIntB = 902L, factor = factor("Total"))), + Ans1[, .(VCharA = fifelse(VCharA == "Total", "Total VCharA", VCharA), + VCharB = fifelse(VCharB == "Total", "Total VCharB", VCharB), + VFacA = factor(VFacA, levels = levels(VFacA), + labels = sub("Total", "Total VFacA", levels(VFacA))), + VFacB = factor(VFacB, levels = levels(VFacB), + labels = sub("Total", "Total VFacB", levels(VFacB))), + VIntA = fifelse(VIntA == 999L, 901L, VIntA), + VIntB = fifelse(VIntB == 999L, 902L, VIntB), + Count, Y_Sum)]) +# 26. label specified by variable for a variable with name that is also first element of the class of other variables in 'by'. +test(2294.26, + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "integer", "VFacA", "VFacB", "VIntA", "VIntB"), + sets = list(c("VCharA", "integer", "VFacA", "VFacB", "VIntA", "VIntB"), + c("VCharA", "integer", "VFacA", "VIntA"), + c("VCharA", "integer", "VFacA"), + c("VCharA", "integer", "VFacB"), + c("VCharA", "VFacB"), + c("integer", "VFacA"), + "VCharA", + character()), + label = list(VCharA = "Total", integer = "Total", factor = factor("Total"))), + Ans1[, .(VCharA, integer = VCharB, VFacA, VFacB, + VIntA = fifelse(VIntA == 999L, NA_integer_, VIntA), + VIntB = fifelse(VIntB == 999L, NA_integer_, VIntB), + Count, Y_Sum)]) +# 27. label specified by variable and by class, with id=TRUE. +# Get correct 'grouping' values by using groupingsets() without 'label'. +grouping_correct = groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + sets = list(c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + c("VCharA", "VCharB", "VFacA", "VIntA"), + c("VCharA", "VCharB", "VFacA"), + c("VCharA", "VCharB", "VFacB"), + c("VCharA", "VFacB"), + c("VCharB", "VFacA"), + "VCharA", + character()), + id=TRUE)$grouping +test(2294.27, + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + sets = list(c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + c("VCharA", "VCharB", "VFacA", "VIntA"), + c("VCharA", "VCharB", "VFacA"), + c("VCharA", "VCharB", "VFacB"), + c("VCharA", "VFacB"), + c("VCharB", "VFacA"), + "VCharA", + character()), + id=TRUE, + label = list(VCharA = "Total", VCharB = "Total", factor = factor("Total"), integer = 999L)), + data.table(grouping = grouping_correct, Ans1)) +# 28. label specified by variable and by class, with id=TRUE and .SDcols used. +test(2294.28, + groupingsets(DT1, lapply(.SD, sum), .SDcols = "Y", + by = c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + sets = list(c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + c("VCharA", "VCharB", "VFacA", "VIntA"), + c("VCharA", "VCharB", "VFacA"), + c("VCharA", "VCharB", "VFacB"), + c("VCharA", "VFacB"), + c("VCharB", "VFacA"), + "VCharA", + character()), + id=TRUE, + label = list(VCharA = "Total", VCharB = "Total", factor = factor("Total"), integer = 999L)), + Ans1[, .(grouping = grouping_correct, + VCharA, VCharB, VFacA, VFacB, VIntA, VIntB, Y = Y_Sum)]) +# 29. label specified by variable with label value in the data for one variable. +test(2294.29, + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + sets = list(c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + c("VCharA", "VCharB", "VFacA", "VIntA"), + c("VCharA", "VCharB", "VFacA"), + c("VCharA", "VCharB", "VFacB"), + c("VCharA", "VFacB"), + c("VCharB", "VFacA"), + "VCharA", + character()), + label = list(VCharA = "C2", VCharB = "Total", + VFacA = factor("Total"), VFacB = factor("Total"), + VIntA = 999L, VIntB = 999L)), + Ans1[, .(VCharA = fifelse(VCharA == "Total", "C2", VCharA), + VCharB, VFacA, VFacB, VIntA, VIntB, Count, Y_Sum)], + warning = "For the following variables, the 'label' value was already in the data: [VCharA (label: C2)]") +# 30. label specified by variable with label value in the data for more than one variable. +test(2294.30, + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + sets = list(c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + c("VCharA", "VCharB", "VFacA", "VIntA"), + c("VCharA", "VCharB", "VFacA"), + c("VCharA", "VCharB", "VFacB"), + c("VCharA", "VFacB"), + c("VCharB", "VFacA"), + "VCharA", + character()), + label = list(VCharA = "C2", VCharB = "Total", + VFacA = factor("Total"), VFacB = factor("F3"), + VIntA = 1L, VIntB = 3L)), + Ans1[, .(VCharA = fifelse(VCharA == "Total", "C2", VCharA), + VCharB, VFacA, + VFacB = fifelse(VFacB == "Total", factor("F3", levels = levels(VFacB)), VFacB), + VIntA = fifelse(VIntA == 999L, 1L, VIntA), + VIntB = fifelse(VIntB == 999L, 3L, VIntB), + Count, Y_Sum)], + warning = "For the following variables, the 'label' value was already in the data: [VCharA (label: C2), VFacB (label: F3), VIntA (label: 1), VIntB (label: 3)]") +# 31. label specified by first element of class with label value in the data. +test(2294.31, + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + sets = list(c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + c("VCharA", "VCharB", "VFacA", "VIntA"), + c("VCharA", "VCharB", "VFacA"), + c("VCharA", "VCharB", "VFacB"), + c("VCharA", "VFacB"), + c("VCharB", "VFacA"), + "VCharA", + character()), + label = list(character = "C2", factor = factor("Total"), integer = 999L)), + Ans1[, .(VCharA = fifelse(VCharA == "Total", "C2", VCharA), + VCharB = fifelse(VCharB == "Total", "C2", VCharB), + VFacA, VFacB, VIntA, VIntB, Count, Y_Sum)], + warning = "For the following variables, the 'label' value was already in the data: [VCharA (label: C2)]") +# 32. label specified by first element of class with label value in the data. +test(2294.32, + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + sets = list(c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + c("VCharA", "VCharB", "VFacA", "VIntA"), + c("VCharA", "VCharB", "VFacA"), + c("VCharA", "VCharB", "VFacB"), + c("VCharA", "VFacB"), + c("VCharB", "VFacA"), + "VCharA", + character()), + label = list(character = "C3", factor = factor("F4"), integer = 3L)), + Ans1[, .(VCharA = fifelse(VCharA == "Total", "C3", VCharA), + VCharB = fifelse(VCharB == "Total", "C3", VCharB), + VFacA = factor(VFacA, levels = levels(VFacA), labels = sub("Total", "F4", levels(VFacA))), + VFacB = fifelse(VFacB == "Total", factor("F4", levels = setdiff(levels(VFacB), "Total")), + factor(VFacB, levels = setdiff(levels(VFacB), "Total"))), + VIntA = fifelse(VIntA == 999L, 3L, VIntA), + VIntB = fifelse(VIntB == 999L, 3L, VIntB), + Count, Y_Sum)], + warning = "For the following variables, the 'label' value was already in the data: [VCharA (label: C3), VCharB (label: C3), VFacB (label: F4), VIntB (label: 3)]") +# 33. label specified by first element of class with label value in the data. +test(2294.33, + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + sets = list(c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + c("VCharA", "VCharB", "VFacA", "VIntA"), + c("VCharA", "VCharB", "VFacA"), + c("VCharA", "VCharB", "VFacB"), + c("VCharA", "VFacB"), + c("VCharB", "VFacA"), + "VCharA", + character()), + label = list(character = "C3", factor = factor("F3"), integer = 2L)), + Ans1[, .(VCharA = fifelse(VCharA == "Total", "C3", VCharA), + VCharB = fifelse(VCharB == "Total", "C3", VCharB), + VFacA = fifelse(VFacA == "Total", factor("F3", levels = setdiff(levels(VFacA), "Total")), + factor(VFacA, levels = setdiff(levels(VFacA), "Total"))), + VFacB = fifelse(VFacB == "Total", factor("F3", levels = setdiff(levels(VFacB), "Total")), + factor(VFacB, levels = setdiff(levels(VFacB), "Total"))), + VIntA = fifelse(VIntA == 999L, 2L, VIntA), + VIntB = fifelse(VIntB == 999L, 2L, VIntB), + Count, Y_Sum)], + warning = "For the following variables, the 'label' value was already in the data: [VCharA (label: C3), VCharB (label: C3), VFacA (label: F3), VFacB (label: F3), VIntA (label: 2), VIntB (label: 2)]") +# 34. label specified by variable and class with label value in the data. +test(2294.34, + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + sets = list(c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + c("VCharA", "VCharB", "VFacA", "VIntA"), + c("VCharA", "VCharB", "VFacA"), + c("VCharA", "VCharB", "VFacB"), + c("VCharA", "VFacB"), + c("VCharB", "VFacA"), + "VCharA", + character()), + label = list(VCharA = "C3", character = "Total", + factor = factor("F3"), integer = 2L, + VFacA = factor("Total"), VIntA = 999L)), + Ans1[, .(VCharA = fifelse(VCharA == "Total", "C3", VCharA), + VCharB, VFacA, + VFacB = fifelse(VFacB == "Total", factor("F3", levels = setdiff(levels(VFacB), "Total")), + factor(VFacB, levels = setdiff(levels(VFacB), "Total"))), + VIntA, + VIntB = fifelse(VIntB == 999L, 2L, VIntB), + Count, Y_Sum)], + warning = "For the following variables, the 'label' value was already in the data: [VCharA (label: C3), VFacB (label: F3), VIntB (label: 2)]") +# 35. label specified by variable and class with label value in the data. +test(2294.35, + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + sets = list(c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + c("VCharA", "VCharB", "VFacA", "VIntA"), + c("VCharA", "VCharB", "VFacA"), + c("VCharA", "VCharB", "VFacB"), + c("VCharA", "VFacB"), + c("VCharB", "VFacA"), + "VCharA", + character()), + label = list(VCharA = "Total", character = "C3", + factor = factor("Total"), integer = 999L, + VFacA = factor("F3"), VIntA = 2L)), + Ans1[, .(VCharA, + VCharB = fifelse(VCharB == "Total", "C3", VCharB), + VFacA = fifelse(VFacA == "Total", factor("F3", levels = setdiff(levels(VFacA), "Total")), + factor(VFacA, levels = setdiff(levels(VFacA), "Total"))), + VFacB, + VIntA = fifelse(VIntA == 999L, 2L, VIntA), + VIntB, + Count, Y_Sum)], + warning = "For the following variables, the 'label' value was already in the data: [VCharB (label: C3), VFacA (label: F3), VIntA (label: 2)]") +# 36. label specified by variable and class with label value in the data. +test(2294.36, + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + sets = list(c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + c("VCharA", "VCharB", "VFacA", "VIntA"), + c("VCharA", "VCharB", "VFacA"), + c("VCharA", "VCharB", "VFacB"), + c("VCharA", "VFacB"), + c("VCharB", "VFacA"), + "VCharA", + character()), + label = list(VCharA = "C2", character = "C3", + factor = factor("F4"), integer = 3L, + VFacA = factor("F3"), VIntA = 2L)), + Ans1[, .(VCharA = fifelse(VCharA == "Total", "C2", VCharA), + VCharB = fifelse(VCharB == "Total", "C3", VCharB), + VFacA = fifelse(VFacA == "Total", factor("F3", levels = setdiff(levels(VFacA), "Total")), + factor(VFacA, levels = setdiff(levels(VFacA), "Total"))), + VFacB = fifelse(VFacB == "Total", factor("F4", levels = setdiff(levels(VFacB), "Total")), + factor(VFacB, levels = setdiff(levels(VFacB), "Total"))), + VIntA = fifelse(VIntA == 999L, 2L, VIntA), + VIntB = fifelse(VIntB == 999L, 3L, VIntB), + Count, Y_Sum)], + warning = "For the following variables, the 'label' value was already in the data: [VCharA (label: C2), VCharB (label: C3), VFacA (label: F3), VFacB (label: F4), VIntA (label: 2), VIntB (label: 3)]") +# 37. Some variables have no 'total' rows, label specified by variable. +test(2294.37, + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + sets = list(c("VCharA", "VCharB", "VFacA", "VIntA"), + c("VCharA", "VCharB", "VFacA")), + label = list(VCharA = "Total", VCharB = "Total", + VFacA = factor("Total"), VFacB = factor("Total"), + VIntA = 999L, VIntB = 999L)), + Ans2) +# 38. Some variables have no 'total' rows, label specified by first element of class. +test(2294.38, + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + sets = list(c("VCharA", "VCharB", "VFacA", "VIntA"), + c("VCharA", "VCharB", "VFacA")), + label = list(character = "Total", factor = factor("Total"), integer = 999L)), + Ans2) +# 39. Some variables have no 'total' rows, label specified by variable and by first element of class. +test(2294.39, + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + sets = list(c("VCharA", "VCharB", "VFacA", "VIntA"), + c("VCharA", "VCharB", "VFacA")), + label = list(VCharA = "Total VCharA", character = "Total", + factor = factor("Total"), integer = 999L, + VFacA = factor("Total VFacA"), VIntA = 901L)), + Ans2[, .(VCharA = fifelse(VCharA == "Total", "Total VCharA", VCharA), + VCharB, + VFacA = factor(VFacA, levels = levels(VFacA), + labels = sub("Total", "Total VFacA", levels(VFacA))), + VFacB, + VIntA = fifelse(VIntA == 999L, 901L, VIntA), + VIntB, Count, Y_Sum)]) +# 40. Some variables have no 'total' rows, label specified by variable with label value in the data. +test(2294.40, + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + sets = list(c("VCharA", "VCharB", "VFacA", "VIntA"), + c("VCharA", "VCharB", "VFacA")), + label = list(VCharA = "C2", VCharB = "C4", + VFacA = factor("F2"), VFacB = factor("F4"), + VIntA = 2L, VIntB = 3L)), + Ans2[, .(VCharA, VCharB, VFacA, + VFacB = fifelse(VFacB == "Total", factor("F4", levels = setdiff(levels(VFacB), "Total")), + factor(VFacB, levels = setdiff(levels(VFacB), "Total"))), + VIntA = fifelse(VIntA == 999L, 2L, VIntA), + VIntB = fifelse(VIntB == 999L, 3L, VIntB), + Count, Y_Sum)], + warning = "For the following variables, the 'label' value was already in the data: [VCharA (label: C2), VCharB (label: C4), VFacA (label: F2), VFacB (label: F4), VIntA (label: 2), VIntB (label: 3)]") +# 41. Some variables have no 'total' rows, label specified by first element of class with label value in the data. +test(2294.41, + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + sets = list(c("VCharA", "VCharB", "VFacA", "VIntA"), + c("VCharA", "VCharB", "VFacA")), + label = list(character = "C3", factor = factor("F3"), integer = 2L)), + Ans2[, .(VCharA, VCharB, VFacA, + VFacB = fifelse(VFacB == "Total", factor("F3", levels = setdiff(levels(VFacB), "Total")), + factor(VFacB, levels = setdiff(levels(VFacB), "Total"))), + VIntA = fifelse(VIntA == 999L, 2L, VIntA), + VIntB = fifelse(VIntB == 999L, 2L, VIntB), + Count, Y_Sum)], + warning = "For the following variables, the 'label' value was already in the data: [VCharA (label: C3), VCharB (label: C3), VFacA (label: F3), VFacB (label: F3), VIntA (label: 2), VIntB (label: 2)]") +# 42. Date, IDate, numeric: label specified by variable for each variable. +test(2294.42, + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VDate", "VIDate", "VNumA", "VNumB"), + sets = list(c("VCharA", "VCharB", "VDate", "VIDate", "VNumA", "VNumB"), + c("VCharA", "VCharB", "VDate", "VNumA"), + c("VCharA", "VCharB", "VDate"), + c("VCharA", "VCharB", "VIDate"), + c("VCharA", "VIDate"), + c("VCharB", "VDate"), + "VCharA", + character()), + label = list(VCharA = "Total", VCharB = "Total", + VDate = as.Date("3000-01-01"), VIDate = as.IDate("3000-01-01"), + VNumA = 999, VNumB = 999)), + Ans3) +# 43. Date, IDate, numeric: label specified by first element of class. +test(2294.43, + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VDate", "VIDate", "VNumA", "VNumB"), + sets = list(c("VCharA", "VCharB", "VDate", "VIDate", "VNumA", "VNumB"), + c("VCharA", "VCharB", "VDate", "VNumA"), + c("VCharA", "VCharB", "VDate"), + c("VCharA", "VCharB", "VIDate"), + c("VCharA", "VIDate"), + c("VCharB", "VDate"), + "VCharA", + character()), + label = list(character = "Total", Date = as.Date("3000-01-01"), + IDate = as.IDate("3000-01-01"), numeric = 999)), + Ans3) +# 44. Date, IDate, numeric: label specified by first element of class but not for all classes of variable in 'by'. +test(2294.44, + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VDate", "VIDate", "VNumA", "VNumB"), + sets = list(c("VCharA", "VCharB", "VDate", "VIDate", "VNumA", "VNumB"), + c("VCharA", "VCharB", "VDate", "VNumA"), + c("VCharA", "VCharB", "VDate"), + c("VCharA", "VCharB", "VIDate"), + c("VCharA", "VIDate"), + c("VCharB", "VDate"), + "VCharA", + character()), + label = list(Date = as.Date("3000-01-01"))), + Ans3[, .(VCharA = fifelse(VCharA == "Total", NA_character_, VCharA), + VCharB = fifelse(VCharB == "Total", NA_character_, VCharB), + VDate, + VIDate = fifelse(VIDate == as.IDate("3000-01-01"), as.IDate(NA), VIDate), + VNumA = fifelse(abs(VNumA - 999) < 0.1, NA_real_, VNumA), + VNumB = fifelse(abs(VNumB - 999) < 0.1, NA_real_, VNumB), + Count, Y_Sum)]) +# 45. Date, IDate, numeric: label specified by variable for some classes and by class for other classes. +test(2294.45, + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VDate", "VIDate", "VNumA", "VNumB"), + sets = list(c("VCharA", "VCharB", "VDate", "VIDate", "VNumA", "VNumB"), + c("VCharA", "VCharB", "VDate", "VNumA"), + c("VCharA", "VCharB", "VDate"), + c("VCharA", "VCharB", "VIDate"), + c("VCharA", "VIDate"), + c("VCharB", "VDate"), + "VCharA", + character()), + label = list(VCharA = "Total", VCharB = "Total", + Date = as.Date("3000-01-01"), IDate = as.IDate("3000-01-01"), + numeric = 999)), + Ans3) +# 46. Date, IDate, numeric: label specified by variable for some classes and by class for other classes. +test(2294.46, + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VDate", "VIDate", "VNumA", "VNumB"), + sets = list(c("VCharA", "VCharB", "VDate", "VIDate", "VNumA", "VNumB"), + c("VCharA", "VCharB", "VDate", "VNumA"), + c("VCharA", "VCharB", "VDate"), + c("VCharA", "VCharB", "VIDate"), + c("VCharA", "VIDate"), + c("VCharB", "VDate"), + "VCharA", + character()), + label = list(VDate = as.Date("3000-01-01"), VIDate = as.IDate("3000-01-01"), + VNumA = 999, VNumB = 999, character = "Total")), + Ans3) +# 47. Date, IDate, numeric: label specified by variable for some variables having a particular class and by class for other variables having that class. +test(2294.47, + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VDate", "VIDate", "VNumA", "VNumB"), + sets = list(c("VCharA", "VCharB", "VDate", "VIDate", "VNumA", "VNumB"), + c("VCharA", "VCharB", "VDate", "VNumA"), + c("VCharA", "VCharB", "VDate"), + c("VCharA", "VCharB", "VIDate"), + c("VCharA", "VIDate"), + c("VCharB", "VDate"), + "VCharA", + character()), + label = list(VCharA = "Total VCharA", character = "Total", + Date = as.Date("3000-01-01"), IDate = as.IDate("3000-01-01"), + numeric = 999, VNumA = 901)), + Ans3[, .(VCharA = fifelse(VCharA == "Total", "Total VCharA", VCharA), + VCharB, VDate, VIDate, + VNumA = fifelse(abs(VNumA - 999) < 0.1, 901, VNumA), + VNumB, Count, Y_Sum)]) +# 48. Date, IDate, numeric: label specified by variable and by class, with class specification not used because all matching variables are specified by variable. +test(2294.48, + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VDate", "VIDate", "VNumA", "VNumB"), + sets = list(c("VCharA", "VCharB", "VDate", "VIDate", "VNumA", "VNumB"), + c("VCharA", "VCharB", "VDate", "VNumA"), + c("VCharA", "VCharB", "VDate"), + c("VCharA", "VCharB", "VIDate"), + c("VCharA", "VIDate"), + c("VCharB", "VDate"), + "VCharA", + character()), + label = list(character = "Total", VCharA = "Total VCharA", VCharB = "Total VCharB", + VDate = as.Date("2999-01-01"), VIDate = as.IDate("2999-01-01"), + numeric = 999, VNumA = 901, VNumB = 902, + Date = as.Date("3000-01-01"), IDate = as.IDate("3000-01-01"))), + Ans3[, .(VCharA = fifelse(VCharA == "Total", "Total VCharA", VCharA), + VCharB = fifelse(VCharB == "Total", "Total VCharB", VCharB), + VDate = fifelse(VDate == as.Date("3000-01-01"), as.Date("2999-01-01"), VDate), + VIDate = fifelse(VIDate == as.IDate("3000-01-01"), as.IDate("2999-01-01"), VIDate), + VNumA = fifelse(abs(VNumA - 999) < 0.1, 901, VNumA), + VNumB = fifelse(abs(VNumB - 999) < 0.1, 902, VNumB), + Count, Y_Sum)]) +# 49. Date, IDate, numeric: label specified by variable with label value in the data. +test(2294.49, + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VDate", "VIDate", "VNumA", "VNumB"), + sets = list(c("VCharA", "VCharB", "VDate", "VIDate", "VNumA", "VNumB"), + c("VCharA", "VCharB", "VDate", "VNumA"), + c("VCharA", "VCharB", "VDate"), + c("VCharA", "VCharB", "VIDate"), + c("VCharA", "VIDate"), + c("VCharB", "VDate"), + "VCharA", + character()), + label = list(VCharA = "C2", VCharB = "Total", + VDate = as.Date("2024-01-03"), VIDate = as.IDate("2024-01-03"), + VNumA = 1, VNumB = 3)), + Ans3[, .(VCharA = fifelse(VCharA == "Total", "C2", VCharA), + VCharB, + VDate = fifelse(VDate == as.Date("3000-01-01"), as.Date("2024-01-03"), VDate), + VIDate = fifelse(VIDate == as.IDate("3000-01-01"), as.IDate("2024-01-03"), VIDate), + VNumA = fifelse(abs(VNumA - 999) < 0.1, 1, VNumA), + VNumB = fifelse(abs(VNumB - 999) < 0.1, 3, VNumB), + Count, Y_Sum)], + warning = "For the following variables, the 'label' value was already in the data: [VCharA (label: C2), VDate (label: 2024-01-03), VIDate (label: 2024-01-03), VNumA (label: 1), VNumB (label: 3)]") +# 50. Date, IDate, numeric: label specified by first element of class with label value in the data. +test(2294.50, + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VDate", "VIDate", "VNumA", "VNumB"), + sets = list(c("VCharA", "VCharB", "VDate", "VIDate", "VNumA", "VNumB"), + c("VCharA", "VCharB", "VDate", "VNumA"), + c("VCharA", "VCharB", "VDate"), + c("VCharA", "VCharB", "VIDate"), + c("VCharA", "VIDate"), + c("VCharB", "VDate"), + "VCharA", + character()), + label = list(character = "C3", Date = as.Date("2024-01-03"), + IDate = as.IDate("3000-01-01"), numeric = 3)), + Ans3[, .(VCharA = fifelse(VCharA == "Total", "C3", VCharA), + VCharB = fifelse(VCharB == "Total", "C3", VCharB), + VDate = fifelse(VDate == as.Date("3000-01-01"), as.Date("2024-01-03"), VDate), + VIDate, + VNumA = fifelse(abs(VNumA - 999) < 0.1, 3, VNumA), + VNumB = fifelse(abs(VNumB - 999) < 0.1, 3, VNumB), + Count, Y_Sum)], + warning = "For the following variables, the 'label' value was already in the data: [VCharA (label: C3), VCharB (label: C3), VDate (label: 2024-01-03), VNumB (label: 3)]") +# 51. Date, IDate, numeric: label specified by first element of class with label value in the data. +test(2294.51, + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VDate", "VIDate", "VNumA", "VNumB"), + sets = list(c("VCharA", "VCharB", "VDate", "VIDate", "VNumA", "VNumB"), + c("VCharA", "VCharB", "VDate", "VNumA"), + c("VCharA", "VCharB", "VDate"), + c("VCharA", "VCharB", "VIDate"), + c("VCharA", "VIDate"), + c("VCharB", "VDate"), + "VCharA", + character()), + label = list(character = "C3", Date = as.Date("2024-01-03"), + IDate = as.IDate("2024-01-03"), numeric = 2)), + Ans3[, .(VCharA = fifelse(VCharA == "Total", "C3", VCharA), + VCharB = fifelse(VCharB == "Total", "C3", VCharB), + VDate = fifelse(VDate == as.Date("3000-01-01"), as.Date("2024-01-03"), VDate), + VIDate = fifelse(VIDate == as.IDate("3000-01-01"), as.IDate("2024-01-03"), VIDate), + VNumA = fifelse(abs(VNumA - 999) < 0.1, 2, VNumA), + VNumB = fifelse(abs(VNumB - 999) < 0.1, 2, VNumB), + Count, Y_Sum)], + warning = "For the following variables, the 'label' value was already in the data: [VCharA (label: C3), VCharB (label: C3), VDate (label: 2024-01-03), VIDate (label: 2024-01-03), VNumA (label: 2), VNumB (label: 2)]") +# 52. Date, IDate, numeric: label specified by variable and class with label value in the data. +test(2294.52, + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VDate", "VIDate", "VNumA", "VNumB"), + sets = list(c("VCharA", "VCharB", "VDate", "VIDate", "VNumA", "VNumB"), + c("VCharA", "VCharB", "VDate", "VNumA"), + c("VCharA", "VCharB", "VDate"), + c("VCharA", "VCharB", "VIDate"), + c("VCharA", "VIDate"), + c("VCharB", "VDate"), + "VCharA", + character()), + label = list(VCharA = "C2", character = "C3", + Date = as.Date("2024-01-03"), IDate = as.IDate("3000-01-01"), + numeric = 3, VDate = as.Date("3000-01-01"), + VNumA = 2)), + Ans3[, .(VCharA = fifelse(VCharA == "Total", "C2", VCharA), + VCharB = fifelse(VCharB == "Total", "C3", VCharB), + VDate, VIDate, + VNumA = fifelse(abs(VNumA - 999) < 0.1, 2, VNumA), + VNumB = fifelse(abs(VNumB - 999) < 0.1, 3, VNumB), + Count, Y_Sum)], + warning = "For the following variables, the 'label' value was already in the data: [VCharA (label: C2), VCharB (label: C3), VNumA (label: 2), VNumB (label: 3)]") +# 53. label is a character scalar. +test(2294.53, + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + sets = list(c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + c("VCharA", "VCharB", "VFacA", "VIntA"), + c("VCharA", "VCharB", "VFacA"), + c("VCharA", "VCharB", "VFacB"), + c("VCharA", "VFacB"), + c("VCharB", "VFacA"), + "VCharA", + character()), + label = "Total"), + Ans1[, .(VCharA, VCharB, + VFacA = fifelse(VFacA == "Total", factor(NA, levels = levels(VFacA)), VFacA), + VFacB = fifelse(VFacB == "Total", factor(NA, levels = levels(VFacB)), VFacB), + VIntA = fifelse(VIntA == 999L, NA_integer_, VIntA), + VIntB = fifelse(VIntB == 999L, NA_integer_, VIntB), + Count, Y_Sum)]) +# 54. label is a named character scalar. +test(2294.54, + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + sets = list(c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + c("VCharA", "VCharB", "VFacA", "VIntA"), + c("VCharA", "VCharB", "VFacA"), + c("VCharA", "VCharB", "VFacB"), + c("VCharA", "VFacB"), + c("VCharB", "VFacA"), + "VCharA", + character()), + label = c(z = "Total")), + Ans1[, .(VCharA, VCharB, + VFacA = fifelse(VFacA == "Total", factor(NA, levels = levels(VFacA)), VFacA), + VFacB = fifelse(VFacB == "Total", factor(NA, levels = levels(VFacB)), VFacB), + VIntA = fifelse(VIntA == 999L, NA_integer_, VIntA), + VIntB = fifelse(VIntB == 999L, NA_integer_, VIntB), + Count, Y_Sum)]) +# 55. label is a factor scalar. +test(2294.55, + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + sets = list(c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + c("VCharA", "VCharB", "VFacA", "VIntA"), + c("VCharA", "VCharB", "VFacA"), + c("VCharA", "VCharB", "VFacB"), + c("VCharA", "VFacB"), + c("VCharB", "VFacA"), + "VCharA", + character()), + label = factor("Total")), + Ans1[, .(VCharA = fifelse(VCharA == "Total", NA_character_, VCharA), + VCharB = fifelse(VCharB == "Total", NA_character_, VCharB), + VFacA, VFacB, + VIntA = fifelse(VIntA == 999L, NA_integer_, VIntA), + VIntB = fifelse(VIntB == 999L, NA_integer_, VIntB), + Count, Y_Sum)]) +# 56. label is an integer scalar. +test(2294.56, + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + sets = list(c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + c("VCharA", "VCharB", "VFacA", "VIntA"), + c("VCharA", "VCharB", "VFacA"), + c("VCharA", "VCharB", "VFacB"), + c("VCharA", "VFacB"), + c("VCharB", "VFacA"), + "VCharA", + character()), + label = 999L), + Ans1[, .(VCharA = fifelse(VCharA == "Total", NA_character_, VCharA), + VCharB = fifelse(VCharB == "Total", NA_character_, VCharB), + VFacA = fifelse(VFacA == "Total", factor(NA, levels = levels(VFacA)), VFacA), + VFacB = fifelse(VFacB == "Total", factor(NA, levels = levels(VFacB)), VFacB), + VIntA, VIntB, + Count, Y_Sum)]) +# 57. label is a Date scalar. +test(2294.57, + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VDate", "VIDate", "VNumA", "VNumB"), + sets = list(c("VCharA", "VCharB", "VDate", "VIDate", "VNumA", "VNumB"), + c("VCharA", "VCharB", "VDate", "VNumA"), + c("VCharA", "VCharB", "VDate"), + c("VCharA", "VCharB", "VIDate"), + c("VCharA", "VIDate"), + c("VCharB", "VDate"), + "VCharA", + character()), + label = as.Date("3000-01-01")), + Ans3[, .(VCharA = fifelse(VCharA == "Total", NA_character_, VCharA), + VCharB = fifelse(VCharB == "Total", NA_character_, VCharB), + VDate, + VIDate = fifelse(VIDate == as.IDate("3000-01-01"), as.IDate(NA), VIDate), + VNumA = fifelse(abs(VNumA - 999) < 0.1, NA_real_, VNumA), + VNumB = fifelse(abs(VNumB - 999) < 0.1, NA_real_, VNumB), + Count, Y_Sum)]) +# 58. label is an IDate scalar. +test(2294.58, + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VDate", "VIDate", "VNumA", "VNumB"), + sets = list(c("VCharA", "VCharB", "VDate", "VIDate", "VNumA", "VNumB"), + c("VCharA", "VCharB", "VDate", "VNumA"), + c("VCharA", "VCharB", "VDate"), + c("VCharA", "VCharB", "VIDate"), + c("VCharA", "VIDate"), + c("VCharB", "VDate"), + "VCharA", + character()), + label = as.IDate("3000-01-01")), + Ans3[, .(VCharA = fifelse(VCharA == "Total", NA_character_, VCharA), + VCharB = fifelse(VCharB == "Total", NA_character_, VCharB), + VDate = fifelse(VDate == as.Date("3000-01-01"), as.Date(NA), VDate), + VIDate, + VNumA = fifelse(abs(VNumA - 999) < 0.1, NA_real_, VNumA), + VNumB = fifelse(abs(VNumB - 999) < 0.1, NA_real_, VNumB), + Count, Y_Sum)]) +# 59. label is a numeric scalar. +test(2294.59, + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VDate", "VIDate", "VNumA", "VNumB"), + sets = list(c("VCharA", "VCharB", "VDate", "VIDate", "VNumA", "VNumB"), + c("VCharA", "VCharB", "VDate", "VNumA"), + c("VCharA", "VCharB", "VDate"), + c("VCharA", "VCharB", "VIDate"), + c("VCharA", "VIDate"), + c("VCharB", "VDate"), + "VCharA", + character()), + label = 999), + Ans3[, .(VCharA = fifelse(VCharA == "Total", NA_character_, VCharA), + VCharB = fifelse(VCharB == "Total", NA_character_, VCharB), + VDate = fifelse(VDate == as.Date("3000-01-01"), as.Date(NA), VDate), + VIDate = fifelse(VIDate == as.IDate("3000-01-01"), as.IDate(NA), VIDate), + VNumA, VNumB, Count, Y_Sum)]) +# 60. label is a scalar with class not matching any variable in 'by'. +test(2294.60, + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + sets = list(c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + c("VCharA", "VCharB", "VFacA", "VIntA"), + c("VCharA", "VCharB", "VFacA"), + c("VCharA", "VCharB", "VFacB"), + c("VCharA", "VFacB"), + c("VCharB", "VFacA"), + "VCharA", + character()), + label = as.Date("3000-01-01")), + Ans1[, .(VCharA = fifelse(VCharA == "Total", NA_character_, VCharA), + VCharB = fifelse(VCharB == "Total", NA_character_, VCharB), + VFacA = fifelse(VFacA == "Total", factor(NA, levels = levels(VFacA)), VFacA), + VFacB = fifelse(VFacB == "Total", factor(NA, levels = levels(VFacB)), VFacB), + VIntA = fifelse(VIntA == 999L, NA_integer_, VIntA), + VIntB = fifelse(VIntB == 999L, NA_integer_, VIntB), + Count, Y_Sum)]) +# 61. label is a character scalar with value in the data. +test(2294.61, + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + sets = list(c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + c("VCharA", "VCharB", "VFacA", "VIntA"), + c("VCharA", "VCharB", "VFacA"), + c("VCharA", "VCharB", "VFacB"), + c("VCharA", "VFacB"), + c("VCharB", "VFacA"), + "VCharA", + character()), + label = "C2"), + Ans1[, .(VCharA = fifelse(VCharA == "Total", "C2", VCharA), + VCharB = fifelse(VCharB == "Total", "C2", VCharB), + VFacA = fifelse(VFacA == "Total", factor(NA, levels = levels(VFacA)), VFacA), + VFacB = fifelse(VFacB == "Total", factor(NA, levels = levels(VFacB)), VFacB), + VIntA = fifelse(VIntA == 999L, NA_integer_, VIntA), + VIntB = fifelse(VIntB == 999L, NA_integer_, VIntB), + Count, Y_Sum)], + warning = "For the following variables, the 'label' value was already in the data: [VCharA (label: C2)") +# 62. label is a factor scalar with value in the data. +test(2294.62, + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + sets = list(c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + c("VCharA", "VCharB", "VFacA", "VIntA"), + c("VCharA", "VCharB", "VFacA"), + c("VCharA", "VCharB", "VFacB"), + c("VCharA", "VFacB"), + c("VCharB", "VFacA"), + "VCharA", + character()), + label = factor("F3")), + Ans1[, .(VCharA = fifelse(VCharA == "Total", NA_character_, VCharA), + VCharB = fifelse(VCharB == "Total", NA_character_, VCharB), + VFacA = fifelse(VFacA == "Total", factor("F3", levels = setdiff(levels(VFacA), "Total")), + factor(VFacA, levels = setdiff(levels(VFacA), "Total"))), + VFacB = fifelse(VFacB == "Total", factor("F3", levels = setdiff(levels(VFacB), "Total")), + factor(VFacB, levels = setdiff(levels(VFacB), "Total"))), + VIntA = fifelse(VIntA == 999L, NA_integer_, VIntA), + VIntB = fifelse(VIntB == 999L, NA_integer_, VIntB), + Count, Y_Sum)], + warning = "For the following variables, the 'label' value was already in the data: [VFacA (label: F3), VFacB (label: F3)") +# 63. label is a numeric scalar with value in the data. +test(2294.63, + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VDate", "VIDate", "VNumA", "VNumB"), + sets = list(c("VCharA", "VCharB", "VDate", "VIDate", "VNumA", "VNumB"), + c("VCharA", "VCharB", "VDate", "VNumA"), + c("VCharA", "VCharB", "VDate"), + c("VCharA", "VCharB", "VIDate"), + c("VCharA", "VIDate"), + c("VCharB", "VDate"), + "VCharA", + character()), + label = 2), + Ans3[, .(VCharA = fifelse(VCharA == "Total", NA_character_, VCharA), + VCharB = fifelse(VCharB == "Total", NA_character_, VCharB), + VDate = fifelse(VDate == as.Date("3000-01-01"), as.Date(NA), VDate), + VIDate = fifelse(VIDate == as.IDate("3000-01-01"), as.IDate(NA), VIDate), + VNumA = fifelse(abs(VNumA - 999) < 0.1, 2, VNumA), + VNumB = fifelse(abs(VNumB - 999) < 0.1, 2, VNumB), + Count, Y_Sum)], + warning = "For the following variables, the 'label' value was already in the data: [VNumA (label: 2), VNumB (label: 2)") +# 64. label is a character scalar with value in the data and the character variables have no 'total' rows. +test(2294.64, + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), + by = c("VCharA", "VCharB", "VFacA", "VFacB", "VIntA", "VIntB"), + sets = list(c("VCharA", "VCharB", "VFacA", "VIntA"), + c("VCharA", "VCharB", "VFacA")), + label = "C3"), + Ans2[, .(VCharA = fifelse(VCharA == "Total", "C3", VCharA), + VCharB = fifelse(VCharB == "Total", "C3", VCharB), + VFacA = fifelse(VFacA == "Total", factor(NA, levels = levels(VFacA)), VFacA), + VFacB = fifelse(VFacB == "Total", factor(NA, levels = levels(VFacB)), VFacB), + VIntA = fifelse(VIntA == 999L, NA_integer_, VIntA), + VIntB = fifelse(VIntB == 999L, NA_integer_, VIntB), + Count, Y_Sum)], + warning = "For the following variables, the 'label' value was already in the data: [VCharA (label: C3), VCharB (label: C3)]") +# 65. cube: label is a list and an element with name in 'by' has class not matching class in 'x'. +test(2294.65, + cube(DT1, .(Count = .N, Y_Sum = sum(Y)), by = c("VCharA", "VCharB", "VFacA"), + label = list(VCharA = "Total", VCharB = 999, VFacA = "Total")), + error = "When argument 'label' is a list, the class of each 'label' element with name in 'by' must match the class of the corresponding column of the data.table 'x'. Class mismatch for: [VCharB (label: numeric; data: character), VFacA (label: character; data: factor)]") +# 66. cube: label specified by variable for each variable. +test(2294.66, + cube(DT1, .(Count = .N, Y_Sum = sum(Y)), by = c("VCharA", "VCharB", "VFacA"), + label = list(VCharA = "Total", VCharB = "Total", VFacA = factor("Total"))), + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), by = c("VCharA", "VCharB", "VFacA"), + sets = list(c("VCharA", "VCharB", "VFacA"), + c("VCharA", "VCharB"), + c("VCharA", "VFacA"), + "VCharA", + c("VCharB", "VFacA"), + "VCharB", "VFacA", + character(0)), + label = list(VCharA = "Total", VCharB = "Total", VFacA = factor("Total")))) +# 67. cube: label specified by variable and by class. +test(2294.67, + cube(DT1, .(Count = .N, Y_Sum = sum(Y)), by = c("VCharA", "VCharB", "VIntA"), + label = list(character = "Total", VCharA = "Total VIntA", integer = 999L)), + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), by = c("VCharA", "VCharB", "VIntA"), + sets = list(c("VCharA", "VCharB", "VIntA"), + c("VCharA", "VCharB"), + c("VCharA", "VIntA"), + "VCharA", + c("VCharB", "VIntA"), + "VCharB", "VIntA", + character(0)), + label = list(character = "Total", VCharA = "Total VIntA", integer = 999L))) +# 68. cube: label specified by variable and by class with label value in the data. +test(2294.68, + cube(DT1, .(Count = .N, Y_Sum = sum(Y)), by = c("VCharA", "VCharB", "VIntA"), + label = list(character = "C3", VCharA = "Total", integer = 2L)), + suppressWarnings( + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), by = c("VCharA", "VCharB", "VIntA"), + sets = list(c("VCharA", "VCharB", "VIntA"), + c("VCharA", "VCharB"), + c("VCharA", "VIntA"), + "VCharA", + c("VCharB", "VIntA"), + "VCharB", "VIntA", + character(0)), + label = list(character = "C3", VCharA = "Total", integer = 2L))), + warning = "For the following variables, the 'label' value was already in the data: [VCharB (label: C3), VIntA (label: 2)]") +# 69: rollup: label is a list and an element with name in 'by' has class not matching class in 'x'. +test(2294.69, + rollup(DT1, .(Count = .N, Y_Sum = sum(Y)), by = c("VCharA", "VCharB", "VFacA"), + label = list(VCharA = "Total", VCharB = 999, VFacA = "Total")), + error = "When argument 'label' is a list, the class of each 'label' element with name in 'by' must match the class of the corresponding column of the data.table 'x'. Class mismatch for: [VCharB (label: numeric; data: character), VFacA (label: character; data: factor)]") +# 70. rollup: label specified by variable for each variable. +test(2294.70, + rollup(DT1, .(Count = .N, Y_Sum = sum(Y)), by = c("VCharA", "VCharB", "VFacA"), + label = list(VCharA = "Total", VCharB = "Total", VFacA = factor("Total"))), + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), by = c("VCharA", "VCharB", "VFacA"), + sets = list(c("VCharA", "VCharB", "VFacA"), + c("VCharA", "VCharB"), + "VCharA", + character(0)), + label = list(VCharA = "Total", VCharB = "Total", VFacA = factor("Total")))) +# 71. rollup: label specified by variable and by class. +test(2294.71, + rollup(DT1, .(Count = .N, Y_Sum = sum(Y)), by = c("VCharA", "VCharB", "VIntA"), + label = list(character = "Total", VCharA = "Total VIntA", integer = 999L)), + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), by = c("VCharA", "VCharB", "VIntA"), + sets = list(c("VCharA", "VCharB", "VIntA"), + c("VCharA", "VCharB"), + "VCharA", + character(0)), + label = list(character = "Total", VCharA = "Total VIntA", integer = 999L))) +# 72. rollup: label specified by variable and by class with label value in the data. +test(2294.72, + rollup(DT1, .(Count = .N, Y_Sum = sum(Y)), by = c("VCharA", "VCharB", "VIntA"), + label = list(character = "C3", VCharA = "Total", integer = 2L)), + suppressWarnings( + groupingsets(DT1, .(Count = .N, Y_Sum = sum(Y)), by = c("VCharA", "VCharB", "VIntA"), + sets = list(c("VCharA", "VCharB", "VIntA"), + c("VCharA", "VCharB"), + "VCharA", + character(0)), + label = list(character = "C3", VCharA = "Total", integer = 2L))), + warning = "For the following variables, the 'label' value was already in the data: [VCharB (label: C3), VIntA (label: 2)]") diff --git a/man/groupingsets.Rd b/man/groupingsets.Rd index 6ae02779c..2989ba55e 100644 --- a/man/groupingsets.Rd +++ b/man/groupingsets.Rd @@ -11,11 +11,11 @@ } \usage{ rollup(x, \dots) -\method{rollup}{data.table}(x, j, by, .SDcols, id = FALSE, \dots) +\method{rollup}{data.table}(x, j, by, .SDcols, id = FALSE, label = NULL, \dots) cube(x, \dots) -\method{cube}{data.table}(x, j, by, .SDcols, id = FALSE, \dots) +\method{cube}{data.table}(x, j, by, .SDcols, id = FALSE, label = NULL, \dots) groupingsets(x, \dots) -\method{groupingsets}{data.table}(x, j, by, sets, .SDcols, id = FALSE, jj, \dots) +\method{groupingsets}{data.table}(x, j, by, sets, .SDcols, id = FALSE, jj, label = NULL, \dots) } \arguments{ \item{x}{\code{data.table}.} @@ -26,9 +26,14 @@ groupingsets(x, \dots) \item{.SDcols}{columns to be used in \code{j} expression in \code{.SD} object.} \item{id}{logical default \code{FALSE}. If \code{TRUE} it will add leading column with bit mask of grouping sets.} \item{jj}{quoted version of \code{j} argument, for convenience. When provided function will ignore \code{j} argument.} + \item{label}{label(s) to be used in the 'total' rows in the grouping variable columns of the output, that is, in rows where the grouping variable has been aggregated. Can be a named list of scalars, or a scalar, or \code{NULL}. Defaults to \code{NULL}, which results in the grouping variables having \code{NA} in their 'total' rows. See Details.} } \details{ All three functions \code{rollup, cube, groupingsets} are generic methods, \code{data.table} methods are provided. + + The \code{label} argument can be a named list of scalars, or a scalar, or \code{NULL}. When \code{label} is a list, each element name must be (1) a variable name in \code{by}, or (2) the first element of the class in the data.table \code{x} of a variable in \code{by}, or (3) one of 'character', 'integer', 'numeric', 'factor', 'Date', 'IDate'. The order of the list elements is not important. A label specified by variable name will apply only to that variable, while a label specified by first element of a class will apply to all variables in \code{by} for which the first element of the class of the variable in \code{x} matches the \code{label} element name, except for variables that have a label specified by variable name (that is, specification by variable name takes precedence over specification by class). For \code{label} elements with name in \code{by}, the class of the label value must be the same as the class of the variable in \code{x}. For \code{label} elements with name not in \code{by}, the first element of the class of the label value must be the same as the \code{label} element name. For example, \code{label = list(integer = 999, IDate = as.Date("3000-01-01"))} would produce an error because \code{class(999)[1]} is not \code{"integer"} and \code{class(as.Date("3000-01-01"))[1]} is not \code{"IDate"}. A corrected specification would be \code{label = list(integer = 999L, IDate = as.IDate("3000-01-01"))}. + + The \code{label = } option provides a shorter alternative in the case where only one class of grouping variable requires a label. For example, \code{label = list(character = "Total")} can be shortened to \code{label = "Total"}. When this option is used, the label will be applied to all variables in \code{by} for which the first element of the class of the variable in \code{x} matches the first element of the class of the scalar. } \value{ A data.table with various aggregates. @@ -55,6 +60,13 @@ rollup(DT, j = sum(value), by = c("color","year","status")) # default id=FALSE rollup(DT, j = sum(value), by = c("color","year","status"), id=TRUE) rollup(DT, j = lapply(.SD, sum), by = c("color","year","status"), id=TRUE, .SDcols="value") rollup(DT, j = c(list(count=.N), lapply(.SD, sum)), by = c("color","year","status"), id=TRUE) +rollup(DT, j = sum(value), by = c("color","year","status"), + label = list(color = "total", year = as.Date("3000-01-01"), status = factor("total"))) # specify label by variable name +rollup(DT, j = sum(value), by = c("color","year","status"), + label = list(color = "total", Date = as.Date("3000-01-01"), factor = factor("total"))) # specify label by variable name and first element of class +rollup(DT, j = sum(value), by = c("color","year","status"), label = "total") # label is character scalar so applies to color only +rollup(DT, j = .N, by = c("color","year","status","value"), + label = list(color = NA_character_, year = as.Date(NA), status = factor(NA), value = NaN)) # label can be explicitly specified as NA or NaN # cube cube(DT, j = sum(value), by = c("color","year","status"), id=TRUE)