From 9a65f98bfe32bbe441967c6c461147cca7a4c4ce Mon Sep 17 00:00:00 2001 From: "Zhian N. Kamvar" Date: Thu, 9 May 2024 14:52:11 -0700 Subject: [PATCH 1/8] add get_protected() --- DESCRIPTION | 2 +- NAMESPACE | 1 + NEWS.md | 7 +++ R/asis-nodes.R | 7 +-- R/class-yarn.R | 26 ++++++++++++ R/get_protected.R | 47 ++++++++++++++++++++ man/get_protected.Rd | 44 +++++++++++++++++++ man/protect_unescaped.Rd | 2 +- man/yarn.Rd | 66 +++++++++++++++++++++++++++++ tests/testthat/test-get_protected.R | 30 +++++++++++++ 10 files changed, 227 insertions(+), 5 deletions(-) create mode 100644 R/get_protected.R create mode 100644 man/get_protected.Rd create mode 100644 tests/testthat/test-get_protected.R diff --git a/DESCRIPTION b/DESCRIPTION index 1c487b2..298b9ff 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -55,5 +55,5 @@ Config/testthat/edition: 3 Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.1 VignetteBuilder: knitr diff --git a/NAMESPACE b/NAMESPACE index 93033f3..df82f09 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,7 @@ # Generated by roxygen2: do not edit by hand export(find_between) +export(get_protected) export(md_ns) export(protect_curly) export(protect_math) diff --git a/NEWS.md b/NEWS.md index 45d39ff..f9f5afd 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,12 @@ # tinkr 0.2.0.9000 +## NEW FEATURES + +* `get_protected()` function (and yarn method) will return nodes which have + been protected in some way by {tinkr} via one of the `protect_` family of + functions. Adopting this pattern is preferred over using + `md:text[@asis='true']` as the attribute names may change in the future. + ## BUG FIX * Inline math with single characters will no longer cause an error (issue: #101, diff --git a/R/asis-nodes.R b/R/asis-nodes.R index e3264dc..d2a033d 100644 --- a/R/asis-nodes.R +++ b/R/asis-nodes.R @@ -178,8 +178,8 @@ fix_partial_inline <- function(tag, body, ns) { # paste the lines together and create new nodes n <- length(math_lines) char <- as.character(math_lines) - char[[1]] <- sub("[$]", "$", char[[1]]) - char[[n]] <- sub("[<]text ", "", char[[1]]) + char[[n]] <- sub("[<]text ", "this is $\LaTeX$ text char <- gsub( pattern = inline_dollars_regex("full"), - replacement = "\\1", + replacement = "\\1", x = char, perl = TRUE ) @@ -259,6 +259,7 @@ protect_block_math <- function(body, ns) { # get all of the internal nodes bm <- xml2::xml_find_all(bm, ".//descendant-or-self::md:*", ns = ns) set_asis(bm) + xml2::xml_set_attr(bm, "math", "true") } # TICK BOXES ------------------------------------------------------------------- diff --git a/R/class-yarn.R b/R/class-yarn.R index 10721c9..a775ccc 100644 --- a/R/class-yarn.R +++ b/R/class-yarn.R @@ -199,6 +199,32 @@ yarn <- R6::R6Class("yarn", message("to use the `protect_unescaped()` method, you will need to re-read your document with `yarn$new(sourcepos = TRUE)`") } invisible(self) + }, + #' @description Return nodes whose contents are protected from being escaped + #' @param type a character vector listing the protections to be included. + #' Defaults to `NULL`, which includes all protected nodes: + #' - math: via the `protect_math()` function + #' - curly: via the `protect_curly()` function + #' - unescaped: via the `protect_unescaped()` function + #' + #' @examples + #' path <- system.file("extdata", "basic-curly.md", package = "tinkr") + #' ex <- tinkr::yarn$new(path, sourcepos = TRUE) + #' # protect curly braces + #' ex$protect_curly() + #' # add math and protect it + #' ex$add_md(c("## math\n", + #' "$c^2 = a^2 + b^2$\n", + #' "$$", + #' "\\sum_{i}^k = x_i + 1", + #' "$$\n") + #' ) + #' ex$protect_math() + #' # get protected now shows all the protected nodes + #' ex$get_protected() + #' ex$get_protected(c("math", "curly")) # only show the math and curly + get_protected = function(type = NULL) { + get_protected(self$body, type = type, self$ns) } ), private = list( diff --git a/R/get_protected.R b/R/get_protected.R new file mode 100644 index 0000000..a3c5b36 --- /dev/null +++ b/R/get_protected.R @@ -0,0 +1,47 @@ +#' Get protected nodes +#' +#' @param body an `xml_document` object +#' @param type a character vector listing the protections to be included. +#' Defaults to `NULL`, which includes all protected nodes: +#' - math: via the `protect_math()` function +#' - curly: via the `protect_curly()` function +#' - unescaped: via the `protect_unescaped()` function +#' @param ns the namespace of the document (defaults to [md_ns()]) +#' @return an `xml_nodelist` object. +#' @export +#' @examples +#' path <- system.file("extdata", "basic-curly.md", package = "tinkr") +#' ex <- tinkr::yarn$new(path, sourcepos = TRUE) +#' # protect curly braces +#' ex$protect_curly() +#' # add math and protect it +#' ex$add_md(c("## math\n", +#' "$c^2 = a^2 + b^2$\n", +#' "$$", +#' "\\sum_{i}^k = x_i + 1", +#' "$$\n") +#' ) +#' ex$protect_math() +#' # get protected now shows all the protected nodes +#' get_protected(ex$body) +#' get_protected(ex$body, c("math", "curly")) # only show the math and curly +get_protected <- function(body, type = NULL, ns = md_ns()) { + protections <- c( + math = "@math", + curly = "@curly", + unescaped = "(@asis and text()='[' or text()=']')" + ) + if (!is.null(type)) { + keep <- match.arg(type, names(protections), several.ok = TRUE) + missing <- setdiff(type, keep) + if (length(missing) > 0) { + be <- if (length(missing) > 1) "are" else "is" + missing <- glue::glue_collapse(missing, sep = ", ", last = ", and ") + message(glue::glue("the type options {missing} {be} not one of math, curly, or unescaped")) + } + } else { + keep <- TRUE + } + xpath <- sprintf(".//node()[%s]", paste(protections[keep], collapse = " or ")) + xml2::xml_find_all(body, xpath, ns = ns) +} diff --git a/man/get_protected.Rd b/man/get_protected.Rd new file mode 100644 index 0000000..4f3377d --- /dev/null +++ b/man/get_protected.Rd @@ -0,0 +1,44 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_protected.R +\name{get_protected} +\alias{get_protected} +\title{Get protected nodes} +\usage{ +get_protected(body, type = NULL, ns = md_ns()) +} +\arguments{ +\item{body}{an \code{xml_document} object} + +\item{type}{a character vector listing the protections to be included. +Defaults to \code{NULL}, which includes all protected nodes: +\itemize{ +\item math: via the \code{protect_math()} function +\item curly: via the \code{protect_curly()} function +\item unescaped: via the \code{protect_unescaped()} function +}} + +\item{ns}{the namespace of the document (defaults to \code{\link[=md_ns]{md_ns()}})} +} +\value{ +an \code{xml_nodelist} object. +} +\description{ +Get protected nodes +} +\examples{ +path <- system.file("extdata", "basic-curly.md", package = "tinkr") +ex <- tinkr::yarn$new(path, sourcepos = TRUE) +# protect curly braces +ex$protect_curly() +# add math and protect it +ex$add_md(c("## math\n", + "$c^2 = a^2 + b^2$\n", + "$$", + "\\\\sum_{i}^k = x_i + 1", + "$$\n") +) +ex$protect_math() +# get protected now shows all the protected nodes +get_protected(ex$body) +get_protected(ex$body, c("math", "curly")) # only show the math and curly +} diff --git a/man/protect_unescaped.Rd b/man/protect_unescaped.Rd index 6d31652..44a9de4 100644 --- a/man/protect_unescaped.Rd +++ b/man/protect_unescaped.Rd @@ -79,7 +79,7 @@ md <- yarn$new(f, sourcepos = TRUE, unescaped = FALSE) md$show() if (requireNamespace("withr")) { lines <- readLines(f)[-length(md$yaml)] -lnks <- withr::with_namespace("tinkr", +lnks <- withr::with_namespace("tinkr", protect_unescaped(body = md$body, txt = lines)) md$body <- lnks md$show() diff --git a/man/yarn.Rd b/man/yarn.Rd index c872c3b..d142d40 100644 --- a/man/yarn.Rd +++ b/man/yarn.Rd @@ -113,6 +113,26 @@ path <- system.file("extdata", "basic-curly.md", package = "tinkr") ex <- tinkr::yarn$new(path, sourcepos = TRUE, unescaped = FALSE) ex$tail() ex$protect_unescaped()$tail() + +## ------------------------------------------------ +## Method `yarn$get_protected` +## ------------------------------------------------ + +path <- system.file("extdata", "basic-curly.md", package = "tinkr") +ex <- tinkr::yarn$new(path, sourcepos = TRUE) +# protect curly braces +ex$protect_curly() +# add math and protect it +ex$add_md(c("## math\n", + "$c^2 = a^2 + b^2$\n", + "$$", + "\\\\sum_{i}^k = x_i + 1", + "$$\n") +) +ex$protect_math() +# get protected now shows all the protected nodes +ex$get_protected() +ex$get_protected(c("math", "curly")) # only show the math and curly } \section{Public fields}{ \if{html}{\out{
}} @@ -141,6 +161,7 @@ commonmark.} \item \href{#method-yarn-protect_math}{\code{yarn$protect_math()}} \item \href{#method-yarn-protect_curly}{\code{yarn$protect_curly()}} \item \href{#method-yarn-protect_unescaped}{\code{yarn$protect_unescaped()}} +\item \href{#method-yarn-get_protected}{\code{yarn$get_protected()}} \item \href{#method-yarn-clone}{\code{yarn$clone()}} } } @@ -432,6 +453,51 @@ ex$protect_unescaped()$tail() } +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-yarn-get_protected}{}}} +\subsection{Method \code{get_protected()}}{ +Return nodes whose contents are protected from being escaped +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{yarn$get_protected(type = NULL)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{type}}{a character vector listing the protections to be included. +Defaults to \code{NULL}, which includes all protected nodes: +\itemize{ +\item math: via the \code{protect_math()} function +\item curly: via the \code{protect_curly()} function +\item unescaped: via the \code{protect_unescaped()} function +}} +} +\if{html}{\out{
}} +} +\subsection{Examples}{ +\if{html}{\out{
}} +\preformatted{path <- system.file("extdata", "basic-curly.md", package = "tinkr") +ex <- tinkr::yarn$new(path, sourcepos = TRUE) +# protect curly braces +ex$protect_curly() +# add math and protect it +ex$add_md(c("## math\n", + "$c^2 = a^2 + b^2$\n", + "$$", + "\\\\sum_{i}^k = x_i + 1", + "$$\n") +) +ex$protect_math() +# get protected now shows all the protected nodes +ex$get_protected() +ex$get_protected(c("math", "curly")) # only show the math and curly +} +\if{html}{\out{
}} + +} + } \if{html}{\out{
}} \if{html}{\out{}} diff --git a/tests/testthat/test-get_protected.R b/tests/testthat/test-get_protected.R new file mode 100644 index 0000000..4c6c272 --- /dev/null +++ b/tests/testthat/test-get_protected.R @@ -0,0 +1,30 @@ +test_that("protected nodes can be accessed", { + path <- system.file("extdata", "basic-curly.md", package = "tinkr") + ex <- tinkr::yarn$new(path, sourcepos = TRUE) + # we should have two protected elements right off due to the braces + expect_length(ex$get_protected(), 2) + + # protect curly braces + ex$protect_curly() + # we should have six protected curly nodes + expect_length(ex$get_protected(), 2 + 6) + # add math and protect it + ex$add_md(c("## math\n", + "$c^2 = a^2 + b^2$\n", + "$$", + "\\sum_{i}^k = x_i + 1", + "$$\n") + ) + ex$protect_math() + # one inline math, two softbreaks, one line of block + expect_length(ex$get_protected(), 2 + 6 + 4) + + expect_length(ex$get_protected("curly"), 6) + expect_length(ex$get_protected("math"), 4) + expect_length(ex$get_protected("unescaped"), 2) + + expect_message(ex$get_protected(c("curly", "shemp", "moe")), + "shemp, and moe are not" + ) +}) + From 86dd647abfd9112e5d5946067a8160a9003e4a5d Mon Sep 17 00:00:00 2001 From: "Zhian N. Kamvar" Date: Tue, 21 May 2024 16:02:20 -0700 Subject: [PATCH 2/8] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Maƫlle Salmon --- R/class-yarn.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/class-yarn.R b/R/class-yarn.R index a775ccc..017c49e 100644 --- a/R/class-yarn.R +++ b/R/class-yarn.R @@ -203,7 +203,7 @@ yarn <- R6::R6Class("yarn", #' @description Return nodes whose contents are protected from being escaped #' @param type a character vector listing the protections to be included. #' Defaults to `NULL`, which includes all protected nodes: - #' - math: via the `protect_math()` function + #' - math: via the [protect_math()] function #' - curly: via the `protect_curly()` function #' - unescaped: via the `protect_unescaped()` function #' From 8fdb6d18f97d3563e86730971f8cd063f84a6239 Mon Sep 17 00:00:00 2001 From: "Zhian N. Kamvar" Date: Tue, 21 May 2024 16:23:49 -0700 Subject: [PATCH 3/8] set the softbreaks as asis --- R/asis-nodes.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/asis-nodes.R b/R/asis-nodes.R index d2a033d..b83ee77 100644 --- a/R/asis-nodes.R +++ b/R/asis-nodes.R @@ -29,7 +29,7 @@ protect_math <- function(body, ns = md_ns()) { } set_asis <- function(nodes) { - xml2::xml_set_attr(nodes[xml2::xml_name(nodes) != "softbreak"], "asis", "true") + xml2::xml_set_attr(nodes, "asis", "true") } # INLINE MATH ------------------------------------------------------------------ From f5daaa396afba0fc2eb0f4ee2cfe7b7432f4e668 Mon Sep 17 00:00:00 2001 From: "Zhian N. Kamvar" Date: Tue, 21 May 2024 16:24:07 -0700 Subject: [PATCH 4/8] include math block delimiters as protected nodes --- R/asis-nodes.R | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/R/asis-nodes.R b/R/asis-nodes.R index b83ee77..5b9838b 100644 --- a/R/asis-nodes.R +++ b/R/asis-nodes.R @@ -246,7 +246,12 @@ make_text_nodes <- function(txt) { # BLOCK MATH ------------------------------------------------------------------ find_block_math <- function(body, ns) { - find_between(body, ns, pattern = "md:text[contains(text(), '$$')]", include = FALSE) + # https://github.com/ropensci/tinkr/issues/113#issue-2302065427 + find_between(body, + ns, + pattern = "md:text[contains(text(), '$$')]", + include = TRUE + ) } find_between_inlines <- function(body, ns, tag) { From 39d0aab199ec0b86c730403665dea7ca9e2d98c6 Mon Sep 17 00:00:00 2001 From: "Zhian N. Kamvar" Date: Tue, 21 May 2024 16:24:36 -0700 Subject: [PATCH 5/8] Update math tests to include delimiters --- tests/testthat/test-asis-nodes.R | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/testthat/test-asis-nodes.R b/tests/testthat/test-asis-nodes.R index 206548d..c9134cc 100644 --- a/tests/testthat/test-asis-nodes.R +++ b/tests/testthat/test-asis-nodes.R @@ -68,6 +68,12 @@ test_that("block math can be protected", { expect_snapshot(show_user(m$protect_math()$tail(48), force = TRUE)) expect_length(xml2::xml_ns(m$body), 1L) expect_equal(md_ns()[[1]], xml2::xml_ns(m$body)[[1]]) + # 3 math blocks with code examples + expect_length(grep("$$", m$show(), fixed = TRUE), 12) + # 3 math delimiters included in the get_protected + expect_equal(sum(xml2::xml_text(m$get_protected("math")) == "$$"), 6) + + }) test_that("tick boxes are protected by default", { From 4a1228e33e485ad9253e7a5dbf018739ead5b3f1 Mon Sep 17 00:00:00 2001 From: "Zhian N. Kamvar" Date: Tue, 21 May 2024 16:39:39 -0700 Subject: [PATCH 6/8] update test to clearly indicate protections see https://github.com/ropensci/tinkr/pull/111#discussion_r1604451163 --- tests/testthat/test-get_protected.R | 47 +++++++++++++++++++---------- 1 file changed, 31 insertions(+), 16 deletions(-) diff --git a/tests/testthat/test-get_protected.R b/tests/testthat/test-get_protected.R index 4c6c272..3238abe 100644 --- a/tests/testthat/test-get_protected.R +++ b/tests/testthat/test-get_protected.R @@ -1,26 +1,41 @@ test_that("protected nodes can be accessed", { - path <- system.file("extdata", "basic-curly.md", package = "tinkr") + path <- withr::local_tempfile() + # five protected curly elements + curlies <- c("## curlies", + "\nThis line has {xml2} one and {tinkr} two curlies!", + "\n![a pretty kitten](https://placekitten.com/200/300){#kitteh alt='a picture of a kitten'}", + "\n![a pretty puppy](https://placedog.net/200/300){#dog alt=\"a picture", + "of a dog\"}", + # two protected unescaped elements + "\n[span with attributes]{.span-with-attributes ", + "style='color: red'}", + "" + ) + # six protected math elements + math <- c("## math", + "\n$c^2 = a^2 + b^2$", # 1 + "\n$$", # 2 + # 3 + "\\sum_{i}^k = x_i + 1", # 4 + # 5 + "$$", # 6 + "" + ) + writeLines(c(curlies, "\n", math), path) ex <- tinkr::yarn$new(path, sourcepos = TRUE) # we should have two protected elements right off due to the braces expect_length(ex$get_protected(), 2) - # protect curly braces - ex$protect_curly() - # we should have six protected curly nodes - expect_length(ex$get_protected(), 2 + 6) - # add math and protect it - ex$add_md(c("## math\n", - "$c^2 = a^2 + b^2$\n", - "$$", - "\\sum_{i}^k = x_i + 1", - "$$\n") - ) - ex$protect_math() # one inline math, two softbreaks, one line of block - expect_length(ex$get_protected(), 2 + 6 + 4) + ex$protect_math() + expect_length(ex$get_protected(), 2 + 6) + + # we should have six protected curly nodes + ex$protect_curly() + expect_length(ex$get_protected(), 2 + 6 + 5) - expect_length(ex$get_protected("curly"), 6) - expect_length(ex$get_protected("math"), 4) + expect_length(ex$get_protected("curly"), 5) + expect_length(ex$get_protected("math"), 6) expect_length(ex$get_protected("unescaped"), 2) expect_message(ex$get_protected(c("curly", "shemp", "moe")), From d566cfee5f05b4478511ef09eab913bf3e6b0042 Mon Sep 17 00:00:00 2001 From: "Zhian N. Kamvar" Date: Tue, 21 May 2024 16:54:18 -0700 Subject: [PATCH 7/8] add {rlang} as dependency for arg match options see https://github.com/ropensci/tinkr/pull/111#discussion_r1604450567 --- DESCRIPTION | 1 + NEWS.md | 11 ++++++++++- R/get_protected.R | 8 +------- tests/testthat/test-get_protected.R | 4 ++-- 4 files changed, 14 insertions(+), 10 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 298b9ff..6b514e2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -42,6 +42,7 @@ Imports: magrittr, purrr, R6, + rlang (>= 0.4.5), xml2, xslt, yaml diff --git a/NEWS.md b/NEWS.md index c87a559..8d406b2 100644 --- a/NEWS.md +++ b/NEWS.md @@ -5,7 +5,16 @@ * `get_protected()` function (and yarn method) will return nodes which have been protected in some way by {tinkr} via one of the `protect_` family of functions. Adopting this pattern is preferred over using - `md:text[@asis='true']` as the attribute names may change in the future. + `md:text[@asis='true']` as the attribute names may change in the future + (@zkamvar, #111; reviewed: @maelle) +* Block math will now include the delimiters and the softbreaks for protection + (issue/review: #113, @maelle; implemented: #111, @zkamvar) + +## NEW IMPORTS + +* We now import {rlang} for error handling. Because we already import {purrr}, + this does not impact the dependency footprint (suggested: @maelle, #111; + implemented: @zkamvar, #111). ## BUG FIX diff --git a/R/get_protected.R b/R/get_protected.R index a3c5b36..82bd9e4 100644 --- a/R/get_protected.R +++ b/R/get_protected.R @@ -32,13 +32,7 @@ get_protected <- function(body, type = NULL, ns = md_ns()) { unescaped = "(@asis and text()='[' or text()=']')" ) if (!is.null(type)) { - keep <- match.arg(type, names(protections), several.ok = TRUE) - missing <- setdiff(type, keep) - if (length(missing) > 0) { - be <- if (length(missing) > 1) "are" else "is" - missing <- glue::glue_collapse(missing, sep = ", ", last = ", and ") - message(glue::glue("the type options {missing} {be} not one of math, curly, or unescaped")) - } + keep <- rlang::arg_match(type, names(protections), multiple = TRUE) } else { keep <- TRUE } diff --git a/tests/testthat/test-get_protected.R b/tests/testthat/test-get_protected.R index 3238abe..843cc20 100644 --- a/tests/testthat/test-get_protected.R +++ b/tests/testthat/test-get_protected.R @@ -38,8 +38,8 @@ test_that("protected nodes can be accessed", { expect_length(ex$get_protected("math"), 6) expect_length(ex$get_protected("unescaped"), 2) - expect_message(ex$get_protected(c("curly", "shemp", "moe")), - "shemp, and moe are not" + expect_error(ex$get_protected(c("curly", "shemp")), + "not \"shemp\"" ) }) From 419e6ce8c7c83832b017cf35d246132e749fa702 Mon Sep 17 00:00:00 2001 From: "Zhian N. Kamvar" Date: Tue, 21 May 2024 16:57:15 -0700 Subject: [PATCH 8/8] redocument --- man/yarn.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/yarn.Rd b/man/yarn.Rd index d142d40..673caa0 100644 --- a/man/yarn.Rd +++ b/man/yarn.Rd @@ -469,7 +469,7 @@ Return nodes whose contents are protected from being escaped \item{\code{type}}{a character vector listing the protections to be included. Defaults to \code{NULL}, which includes all protected nodes: \itemize{ -\item math: via the \code{protect_math()} function +\item math: via the \code{\link[=protect_math]{protect_math()}} function \item curly: via the \code{protect_curly()} function \item unescaped: via the \code{protect_unescaped()} function }}