Merge pull request #198 from Crunch-io/develop

WIP: Release 1.30
Crunch-io · Oct 26, 2020 · a46a9e1 · a46a9e1
2 parents bc0b91a + 7a63ed2
commit a46a9e1
Show file tree

Hide file tree

Showing 137 changed files with 16,232 additions and 13,751 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -12,7 +12,7 @@ jobs:
     - name: Add kableExtra
       run: Rscript -e 'devtools::install_github("haozhu233/kableExtra")'
     - name: CRAN crunch
-      run: Rscript -e 'install.packages("crunch")'
+      run: Rscript -e 'devtools::install_github("crunch-io/rcrunch", ref = "gfe-multi-weight-tabbook")'
     - name: Build
       run: R CMD build --no-build-vignettes --no-manual .
     - name: Check

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -6,7 +6,7 @@ Description: In order to generate custom survey reports, this package provides
     'banners' (cross-tabulations) of datasets in the Crunch
     (<https://crunch.io/>) web service. Reports can be written in 'PDF' format
     using 'LaTeX' or in Microsoft Excel '.xlsx' files.
-Version: 1.2.9
+Version: 1.3.0
 Authors@R: c(
     person("Persephone", "Tsebelis", role="aut"),
     person("Kamil", "Sedrowicz", role="aut"),
@@ -19,7 +19,7 @@ Depends:
     R (>= 3.5.0),
     crunch
 Imports:
-    kableExtra,
+    kableExtra (>= 1.1.0.9000),
     rlang,
     openxlsx,
     digest,
@@ -31,9 +31,9 @@ Suggests:
     httptest (>= 2.0.0),
     jsonlite,
     knitr,
+    httpcache,
     rmarkdown,
-    testthat (>= 2.1.0),
-    kableExtra (>= 1.1.0.9000)
+    testthat (>= 2.1.0)
 RoxygenNote: 7.1.1
 VignetteBuilder: knitr
 Encoding: UTF-8
diff --git a/NAMESPACE b/NAMESPACE
@@ -53,6 +53,7 @@ export(themeHuffPoCrosstabs)
 export(themeHuffPoToplines)
 export(themeNew)
 export(themeUKPolitical)
+export(toplines)
 export(with_api_fixture)
 export(writeCodeBookLatex)
 export(writeExcel)

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,16 @@
+## crunchtabs 1.3.0
+
+- Fixes problems with enforce_onehundred (#195)
+- Adds option to remove page numbers from toplines/crosstabs (#200)
+- Add toplines() function as an alias to crosstabs() (#201)
+- Add functionality for flipping grids and presenting recontact questions (#103 )
+- Fixes two new issues with codebooks where kableExtra added breaking elements and an issue with the basename of a dataaset containing special characters (#204, #205 )
+- Adds support for custom weights (#209)
+- Adds support for recontact_toplines (#199)
+- Adds functionality for flipping grids on an exceptional basis (#212)
+- Fixes a bug with include_q_number = FALSE (#207)
+- Vignettes for recontact toplines, custom weights and other (#218)
+
 ## crunchtabs 1.2.9
 
 - Codebook question descriptions now appropriately escape special characters (#187)

diff --git a/R/codebookLatex.R b/R/codebookLatex.R
@@ -184,12 +184,14 @@ codeBookItemBody.CategoricalVariable <- function(x, ...) {
 
 
     if (max(nchar(k$`{Label}`)) > 80) {
-      kab %>% kableExtra::column_spec(2, width = "5.25in") %>%
+      kab <- kab %>% kableExtra::column_spec(2, width = "5.25in") %>%
         kable_styling_defaults(...)
     } else {
-      kab %>%
+      kab <- kab %>%
         kable_styling_defaults(...)
     }
+    # Fix for square braces in options
+    gsub("\\hspace*{0in}", "", kab, fixed = TRUE)
 
   }
 
@@ -436,11 +438,17 @@ curlyWrap <- function(x) paste0("{", x, "}")
 #' @param k A data.frame to be printed using \link[kableExtra]{kable}
 #' @param alignment A string vector of alignments
 scolumnAlign <- function(k, alignment) {
-  nchars <- unlist(lapply(k, function(x) max(nchar(x), na.rm = TRUE)))
+  nchars <- unlist(lapply(k, function(x) suppressWarnings(max(nchar(x), na.rm = TRUE))))
 
   for (i in 1:ncol(k)) {
     if (alignment[i] == "d") {
-      maxnchar <- max(nchar(k[[i]]), na.rm = TRUE)
+      # If entire column is NA, set to two
+      if (all(is.na(k[[i]]))) {
+        maxnchar <- 2
+      } else {
+        maxnchar <- max(nchar(k[[i]]), na.rm = TRUE)
+      }
+
       if (maxnchar > 6) {
         alignment[i] <- sprintf("S[table-format=%s]", maxnchar)
       } else {

diff --git a/R/crosstabs.R b/R/crosstabs.R
@@ -6,14 +6,18 @@
 #' @param dataset A Crunch dataset.
 #' @param vars An optional vector of aliases of the non-hidden variables that shoulds be used.
 #' Defaults to all non-hidden variables.
-#' @param weight The alias of a numeric variable that should be used for data weighting.
-#' Defaults to current weight variable. For unweighted, set to \code{NULL}
+#' @param weight The alias of a numeric variable that should be used for data
+#' weighting. Alternatively a named list where the name is the alias of the weight
+#' and the contents of the list component are a character vector of aliases to
+#' which that weight should apply. Defaults to current weight variable. For
+#' unweighted, set to \code{NULL}
 #' @param banner An optional object of class \code{Banner} that should be used to generate
 #' a Crosstabs summary. Defaults to \code{NULL} - a Toplines summary is produced and returned.
 #' @param codebook If \code{TRUE}, codebook data summaries are prepared. Defaults to \code{FALSE}.
 #' @param include_numeric Logical. Should we include numeric questions? Defaults to FALSE. Implemented for Toplines only.
 #' @param include_datetime Logical. Should we include date time questions? Defaults to FALSE. Implemented for Toplines only.
 #' @param include_verbatims Logical. Should we include a sample text varaibles? Defaults to FALSE. Implemented for Toplines only.
+#' @param include_original_weighted Logical. When providing list of weights to apply, should we include the default weighted vars? Defaults to TRUE.
 #' @param num_verbatims An integer identifying the number of examples to extract from a text variable. Defaults to 10. Implemented for Toplines only.
 #' @return A Toplines (when no banner is provided) or Crosstabs (when a banner is provided)
 #' summary of the input dataset.
@@ -26,9 +30,8 @@
 #' @importFrom crunch name aliases allVariables is.Numeric is.dataset weight alias weightVariables is.variable
 #' @importFrom methods is
 #' @export
-crosstabs <- function(dataset, vars = names(dataset), weight = crunch::weight(dataset), banner = NULL, codebook = FALSE, include_numeric = FALSE, include_datetime = FALSE, include_verbatims = FALSE, num_verbatims = 10) {
+crosstabs <- function(dataset, vars = names(dataset), weight = crunch::weight(dataset), banner = NULL, codebook = FALSE, include_numeric = FALSE, include_datetime = FALSE, include_verbatims = FALSE, num_verbatims = 10, include_original_weighted = TRUE) {
 
-  # TODO: open ends
   wrong_class_error(dataset, "CrunchDataset", "dataset")
 
   all_types = crunch::types(crunch::allVariables(dataset))
@@ -41,35 +44,42 @@ crosstabs <- function(dataset, vars = names(dataset), weight = crunch::weight(da
     quotes = TRUE
   )
 
-  if (!is.null(weight)) {
-    if (crunch::is.variable(weight)) { weight <- crunch::alias(weight) }
+  if (!is.null(weight) & !is.list(weight)) {
+
+    if (crunch::is.variable(weight)) {
+      weight <- crunch::alias(weight)
+    }
+
     if (!weight %in% all_aliases) {
       stop("`weight`, if provided, must be a valid variable in `dataset`. '",
            weight, "' is not found."
       )
     }
+
     if (!weight %in% weightVariables(dataset)) {
       stop(
         "`weight`, if provided, must be a valid weight variable in `dataset`. '",
         weight,
         "' is not a weight variable."
       )
     }
+
   }
   if (!is.null(banner) && !is(banner, "Banner")) {
     stop("`banner`, if provided, must be an object of class 'Banner'.")
   }
 
-  weight_var <- if (!is.null(weight)) dataset[[weight]]
-
-  vars_out <- if (codebook) { vars } else {
-    intersect(vars, all_aliases[all_types %in% c("categorical", "multiple_response", "categorical_array", "numeric")]) }
+  if (!is.null(weight) & !is.list(weight)) {
+    weight_var <- dataset[[weight]]
+  } else {
+    weight_var <- weight
+  }
 
+  # TODO: Add check here to verify variables in weight are included
+  # in vars and the ds
 
-  # error_if_items(
-  #   unique(types(allVariables(dataset[setdiff(vars, vars_out)]))),
-  #   "`vars` of type(s) {items} are not supported and have been skipped.",
-  #   and = TRUE, error = FALSE)
+  vars_out <- if (codebook) { vars } else {
+    intersect(vars, all_aliases[all_types %in% c("categorical", "multiple_response", "categorical_array")]) }
 
   if (length(vars_out) == 0) {
     stop("No variables provided.")
@@ -89,7 +99,8 @@ crosstabs <- function(dataset, vars = names(dataset), weight = crunch::weight(da
     vars = vars_out,
     banner = banner_use,
     weight = weight_var,
-    topline = is.null(banner)
+    topline = is.null(banner),
+    include_original_weighted = include_original_weighted
   )
 
   if (codebook) {
@@ -217,3 +228,7 @@ crosstabs <- function(dataset, vars = names(dataset), weight = crunch::weight(da
 
   return(summary_data)
 }
+
+#' @describeIn crosstabs An alias for \code{crosstabs}
+#' @export
+toplines <- crosstabs
diff --git a/R/recontactQuestion.R b/R/recontactQuestion.R
@@ -0,0 +1,135 @@
+#' Recontact Toplines
+#'
+#' Allows the user to create a simple report that shows recontact question.
+#' @param dataset A crunch dataset
+#' @param questions A character vector of aliases that should be included in the
+#' report. If your recontact has been named using a suffix such as _pre, _post
+#' leave that out.
+#' @param suffixes The suffixes of recontact questions, for example _pre, _post
+#' @param labels Formal labels for
+#' the election", "After the election".
+#' @param weights A character vector of equal to the length of suffixes. You may
+#' specify a unique weight per recontact period. The default would return
+#' all variables with the default survey weighting `weight(ds)`. Your weights
+#' should be in the same order as your suffixies.
+#' @param default_weight The default weight of the dataset, if any.
+recontact_toplines <- function(dataset, questions, suffixes, labels,
+                               weights = crunch::weight(dataset), default_weight = crunch::alias(crunch::weight(dataset))) {
+
+  stopifnot(is.dataset(dataset))
+  stopifnot(is.character(questions))
+  stopifnot(is.character(suffixes))
+  stopifnot(is.character(labels))
+
+  groupings <- lapply(questions, function(x) paste0(x, suffixes))
+  names(groupings) <- questions
+  vars <- unlist(groupings)
+
+  if (length(weights) > 1) {
+    weight_spec <- lapply(suffixes, function(x) vars[grepl(x, vars)])
+    names(weight_spec) <- weights
+  }
+
+  ct <- crosstabs(
+    dataset,
+    vars = c(unlist(groupings), names(weight_spec)),
+    weight = weight_spec,
+    include_original_weighted = FALSE
+  )
+
+  for (question in questions) {
+
+    if (!is.null(weights)) {
+
+      if (weights[1] == default_weight) {
+        p1 <- groupings[[question]][1]
+      } else {
+        p1 <- paste0(groupings[[question]][1],"_", weights[1])
+      }
+
+      if (weights[2] == default_weight) {
+        p2 <- groupings[[question]][2]
+      } else {
+        p2 <- paste0(groupings[[question]][2],"_", weights[2])
+      }
+
+    } else {
+
+      p1 <- groupings[[question]][1]
+      p2 <- groupings[[question]][2]
+    }
+
+    ct$results[[question]] <- as.ToplineCategoricalArray(
+      ct$results[[p1]],
+      ct$results[[p2]],
+      question,
+      labels,
+      weights
+    )
+
+    ct$results[[p1]] <- NULL
+    ct$results[[p2]] <- NULL
+
+  }
+
+  ct
+}
+
+#' Combine two questions as a categorical array
+#'
+#' Here we manipulate the tabBook results so that they match the layout
+#' of a categoricalArray, which has the benefit of already having
+#' distinct code to write it to latex.
+#'
+#' @param q1 The results object for the first question
+#' @param q2 The results object for the second question
+#' @param question_alias A string specifying the resulting alias.
+#' @param labels Two character strings used to describe the pre and post waves
+#' @param weights A single alias, list, or NULL
+as.ToplineCategoricalArray <- function(q1, q2, question_alias = NULL, labels = c("Pre", "Post"), weights) {
+
+  q1$alias <- question_alias
+  q1$subnames <- labels
+  q1$notes <- paste0(labels, " is weighted by ", weights, collapse = " : ")
+  q1$type <- "categorical_array"
+
+  matrix_rows <- length(
+    attr(q1$crosstabs$Results$`___total___`$counts, "dimnames")[[1]]
+  )
+
+  # Build counts
+  m <- matrix(
+    c(
+      as.numeric(q1$crosstabs$Results$`___total___`$counts),
+      as.numeric(q2$crosstabs$Results$`___total___`$counts)),
+      nrow = matrix_rows
+  )
+
+  dimnames(m) <- list(
+    attr(q1$crosstabs$Results$`___total___`$counts, "dimnames")[[1]],
+    labels
+  )
+
+  q1$crosstabs$Results$`___total___`$counts <- m
+
+  # Build proportions
+  m <- matrix(
+    c(
+      as.numeric(q1$crosstabs$Results$`___total___`$proportions),
+      as.numeric(q2$crosstabs$Results$`___total___`$proportions)
+      ),
+      nrow = matrix_rows
+  )
+
+  dimnames(m) <- list(
+    attr(q1$crosstabs$Results$`___total___`$counts, "dimnames")[[1]],
+    labels
+  )
+
+  q1$crosstabs$Results$`___total___`$proportions <- m
+
+  class(q1) <- c("ToplineCategoricalArray", "ToplineVar", "CrossTabVar")
+
+  q1
+
+}
diff --git a/R/reformatResults.R b/R/reformatResults.R
@@ -308,6 +308,7 @@ reformatVar <- function(var, banner_name, theme, proportions, banner_info, latex
 #' @param var The crunch variable
 #' @param theme The theme object from \link{themeNew}
 getVarInfo <- function(var, theme) {
+
   if_there <- function(str) {
     if (!is.null(str) && !is.na(str) && str != "") {
       return(str)
@@ -322,8 +323,13 @@ getVarInfo <- function(var, theme) {
     format_var_filtertext = if_there(var[["notes"]]),
     format_var_subname = if_there(var[["subname"]])
   )
+
+  if (is.null(var_info$format_var_description))
+    var_info$format_var_description <- var_info$format_var_name
+
   number <- if_there(var[["number"]])
   var_info2 <- list()
+
   for (info_name in intersect(names(theme), names(var_info))) {
     if (!is.null(theme[[info_name]]) && (var$type != "categorical_array" ||
                                          (is.null(theme[[info_name]]$repeat_for_subs) ||