From fcad956f63230c9e7a4e5c18d849e975ca73dcdc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ver=C3=ADssimo?= <211358+averissimo@users.noreply.github.com> Date: Tue, 22 Oct 2024 13:50:09 +0100 Subject: [PATCH] Allow non-standard datanames in code dependency (#340) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Pull Request Fixes https://github.com/insightsengineering/teal/issues/1366 Related: - https://github.com/insightsengineering/teal/pull/1382 - https://github.com/insightsengineering/teal.slice/pull/622 - https://github.com/insightsengineering/teal.data/pull/340 ### Changes description - [x] Adds support for non-standard names in code dependency - [x] Support backtick symbols in code dependency
Reproducible code for backtick support in code parser `%add_column%` definition is not detected ```r pkgload::load_all("teal.data") #> ℹ Loading teal.data #> Loading required package: teal.code td <- teal_data() |> within({ IRIS <- iris IRIS2 <- iris MTCARS <- mtcars `%add_column%` <- function(lhs, rhs) dplyr::bind_cols(lhs, rhs) # @ add_column <- function(lhs, rhs) dplyr::bind_cols(lhs, rhs) IRIS <- IRIS %add_column% dplyr::tibble(yada = IRIS2$Species) IRIS <- add_column(IRIS, dplyr::tibble(yada2 = IRIS2$Species)) }) td |> get_code(datanames = "IRIS") |> cat() #> IRIS <- iris #> IRIS2 <- iris #> add_column <- function(lhs, rhs) dplyr::bind_cols(lhs, rhs) #> IRIS <- IRIS %add_column% dplyr::tibble(yada = IRIS2$Species) #> IRIS <- add_column(IRIS, dplyr::tibble(yada2 = IRIS2$Species)) td2 <- td |> within({ IRIS <- `%add_column%`(IRIS, dplyr::tibble(yada2 = IRIS2$Species)) }) td2 |> get_code(datanames = "IRIS") |> cat() #> IRIS <- iris #> IRIS2 <- iris #> add_column <- function(lhs, rhs) dplyr::bind_cols(lhs, rhs) #> IRIS <- IRIS %add_column% dplyr::tibble(yada = IRIS2$Species) #> IRIS <- add_column(IRIS, dplyr::tibble(yada2 = IRIS2$Species)) #> IRIS <- IRIS %add_column% dplyr::tibble(yada2 = IRIS2$Species) ``` Created on 2024-10-15 with [reprex v2.1.1](https://reprex.tidyverse.org)
--------- Co-authored-by: 27856297+dependabot-preview[bot]@users.noreply.github.com <27856297+dependabot-preview[bot]@users.noreply.github.com> --- R/teal_data-get_code.R | 5 ++ R/utils-get_code_dependency.R | 25 ++++++- tests/testthat/test-get_code.R | 131 +++++++++++++++++++++++++++++++++ 3 files changed, 158 insertions(+), 3 deletions(-) diff --git a/R/teal_data-get_code.R b/R/teal_data-get_code.R index d74435bd6..5cc5c6494 100644 --- a/R/teal_data-get_code.R +++ b/R/teal_data-get_code.R @@ -107,6 +107,11 @@ setMethod("get_code", signature = "teal_data", definition = function(object, dep checkmate::assert_character(datanames, min.len = 1L, null.ok = TRUE) checkmate::assert_flag(deparse) + # Normalize in case special it is backticked + if (!is.null(datanames)) { + datanames <- gsub("^`(.*)`$", "\\1", datanames) + } + code <- if (!is.null(datanames)) { get_code_dependency(object@code, datanames, ...) } else { diff --git a/R/utils-get_code_dependency.R b/R/utils-get_code_dependency.R index 48fe6b122..1f549c360 100644 --- a/R/utils-get_code_dependency.R +++ b/R/utils-get_code_dependency.R @@ -42,8 +42,10 @@ get_code_dependency <- function(code, names, check_names = TRUE) { code <- parse(text = code, keep.source = TRUE) pd <- utils::getParseData(code) + pd <- normalize_pd(pd) calls_pd <- extract_calls(pd) + if (check_names) { # Detect if names are actually in code. symbols <- unlist(lapply(calls_pd, function(call) call[call$token == "SYMBOL", "text"])) @@ -175,7 +177,7 @@ fix_arrows <- function(calls) { sub_arrows <- function(call) { checkmate::assert_data_frame(call) map <- data.frame( - row.names = c("`<-`", "`<<-`", "`=`"), + row.names = c("<-", "<<-", "="), token = rep("LEFT_ASSIGN", 3), text = rep("<-", 3) ) @@ -297,7 +299,7 @@ extract_occurrence <- function(calls_pd) { # What occurs in a function body is not tracked. x <- call_pd[!is_in_function(call_pd), ] - sym_cond <- which(x$token %in% c("SYMBOL", "SYMBOL_FUNCTION_CALL")) + sym_cond <- which(x$token %in% c("SPECIAL", "SYMBOL", "SYMBOL_FUNCTION_CALL")) if (length(sym_cond) == 0) { return(character(0L)) @@ -381,7 +383,8 @@ extract_side_effects <- function(calls_pd) { #' @noRd graph_parser <- function(x, graph) { occurrence <- vapply( - graph, function(call) { + graph, + function(call) { ind <- match("<-", call, nomatch = length(call) + 1L) x %in% call[seq_len(ind - 1L)] }, @@ -434,3 +437,19 @@ detect_libraries <- function(calls_pd) { ) ) } + +#' Normalize parsed data removing backticks from symbols +#' +#' @param pd `data.frame` resulting from `utils::getParseData()` call. +#' +#' @return `data.frame` with backticks removed from `text` column for `SYMBOL` tokens. +#' +#' @keywords internal +#' @noRd +normalize_pd <- function(pd) { + # Remove backticks from SYMBOL tokens + symbol_index <- grepl("^SYMBOL.*$", pd$token) + pd[symbol_index, "text"] <- gsub("^`(.*)`$", "\\1", pd[symbol_index, "text"]) + + pd +} diff --git a/tests/testthat/test-get_code.R b/tests/testthat/test-get_code.R index 40095f1bc..114dee932 100644 --- a/tests/testthat/test-get_code.R +++ b/tests/testthat/test-get_code.R @@ -695,3 +695,134 @@ testthat::test_that("data() call is returned when data name is provided as a cha ) ) }) + +testthat::describe("Backticked symbol", { + testthat::it("code can be retrieved with get_code", { + td <- within( + teal_data(), + { + `%cbind%` <- function(lhs, rhs) cbind(lhs, rhs) # nolint: object_name. + iris_ds <- iris %cbind% data.frame(new_col = "new column") + } + ) + + testthat::expect_identical( + get_code(td, datanames = "%cbind%"), + "`%cbind%` <- function(lhs, rhs) cbind(lhs, rhs)" + ) + }) + + testthat::it("code can be retrieved with get_code", { + td <- within( + teal_data(), + { + `%cbind%` <- function(lhs, rhs) cbind(lhs, rhs) # nolint: object_name. + iris_ds <- iris %cbind% data.frame(new_col = "new column") + } + ) + + testthat::expect_identical( + get_code(td, datanames = "`%cbind%`"), + "`%cbind%` <- function(lhs, rhs) cbind(lhs, rhs)" + ) + }) + + testthat::it("starting with underscore is detected in code dependency", { + td <- within( + teal_data(), + { + `_add_column_` <- function(lhs, rhs) cbind(lhs, rhs) # nolint: object_name. + iris_ds <- `_add_column_`(iris, data.frame(new_col = "new column")) + } + ) + + testthat::expect_identical( + get_code(td, datanames = "iris_ds"), + paste( + sep = "\n", + "`_add_column_` <- function(lhs, rhs) cbind(lhs, rhs)", + "iris_ds <- `_add_column_`(iris, data.frame(new_col = \"new column\"))" + ) + ) + }) + + testthat::it("with space character is detected in code dependency", { + td <- within( + teal_data(), + { + `add column` <- function(lhs, rhs) cbind(lhs, rhs) # nolint: object_name. + iris_ds <- `add column`(iris, data.frame(new_col = "new column")) + } + ) + + testthat::expect_identical( + get_code(td, datanames = "iris_ds"), + paste( + sep = "\n", + "`add column` <- function(lhs, rhs) cbind(lhs, rhs)", + "iris_ds <- `add column`(iris, data.frame(new_col = \"new column\"))" + ) + ) + }) + + testthat::it("without special characters is cleaned and detected in code dependency", { + td <- within( + teal_data(), + { + `add_column` <- function(lhs, rhs) cbind(lhs, rhs) + iris_ds <- `add_column`(iris, data.frame(new_col = "new column")) + } + ) + + testthat::expect_identical( + get_code(td, datanames = "iris_ds"), + paste( + sep = "\n", + "add_column <- function(lhs, rhs) cbind(lhs, rhs)", + "iris_ds <- add_column(iris, data.frame(new_col = \"new column\"))" + ) + ) + }) + + testthat::it("with non-native pipe used as function is detected code dependency", { + td <- within( + teal_data(), + { + `%add_column%` <- function(lhs, rhs) cbind(lhs, rhs) + iris_ds <- `%add_column%`(iris, data.frame(new_col = "new column")) + } + ) + + # Note that the original code is changed to use the non-native pipe operator + # correctly. + testthat::expect_identical( + get_code(td, datanames = "iris_ds"), + paste( + sep = "\n", + "`%add_column%` <- function(lhs, rhs) cbind(lhs, rhs)", + "iris_ds <- iris %add_column% data.frame(new_col = \"new column\")" + ) + ) + }) + + testthat::it("with non-native pipe is detected code dependency", { + td <- within( + teal_data(), + { + `%add_column%` <- function(lhs, rhs) cbind(lhs, rhs) + iris_ds <- iris %add_column% data.frame(new_col = "new column") + } + ) + + # Note that the original code is changed to use the non-native pipe operator + # correctly. + testthat::expect_identical( + get_code(td, datanames = "iris_ds"), + paste( + sep = "\n", + "`%add_column%` <- function(lhs, rhs) cbind(lhs, rhs)", + "iris_ds <- iris %add_column% data.frame(new_col = \"new column\")" + ) + ) + }) +})