-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #76 from ropensci/curly
feat: protect_curly()
- Loading branch information
Showing
16 changed files
with
375 additions
and
37 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
# CURLY ------------------ | ||
find_curly <- function(body, ns) { | ||
i <- ".//md:text[not(@asis) and contains(text(), '{')]" | ||
curlies <- xml2::xml_find_all(body, i, ns = ns) | ||
# https://github.com/carpentries/pegboard/blob/a8db02ab037f2ffeab6e13cc3b662ea8c42822ad/R/get_images.R#L84 | ||
attr_texts <- xml2::xml_text(curlies) | ||
no_closing <- !grepl("[}]", attr_texts) | ||
if (any(no_closing)) { | ||
close_xpath <- "self::*/following-sibling::md:text[contains(text(), '}')]" | ||
for (not_closed in curlies[no_closing]) { | ||
closing <- xml2::xml_find_all( | ||
not_closed, | ||
glue::glue("./{close_xpath}"), | ||
ns | ||
) | ||
xml2::xml_text(not_closed) <- paste( | ||
xml2::xml_text(not_closed), | ||
xml2::xml_text(closing), | ||
sep = "\n" | ||
) | ||
xml2::xml_remove(closing) | ||
} | ||
} | ||
curlies | ||
} | ||
|
||
digest_curly <- function(curly, ns) { | ||
char <- as.character(curly) | ||
curlies <- regmatches(char, gregexpr("\\{.*?\\}", char))[[1]] | ||
for (curl in curlies) { | ||
attributes <- "curly='true'" | ||
|
||
alt_fragment <- regmatches(curl, gregexpr("alt=['\"].*?['\"]", curl))[[1]] | ||
if (length(alt_fragment) > 0) { | ||
alt_text <- sub("^alt=", "", alt_fragment) | ||
attributes <- sprintf("%s alt=%s", attributes, alt_text) | ||
} | ||
|
||
char <- sub( | ||
curl, | ||
sprintf("</text><text %s>%s</text><text>", attributes, curl), | ||
char, | ||
fixed = TRUE | ||
) | ||
} | ||
make_text_nodes(char) | ||
} | ||
|
||
#' Protect curly elements for further processing | ||
#' | ||
#' @inheritParams protect_math | ||
#' @return a copy of the modified XML object | ||
#' @details Commonmark will render text such as `{.unnumbered}` | ||
#' (Pandoc/Quarto option) or | ||
#' `{#hello .greeting .message style="color: red;"}` | ||
#' (Markdown custom block) | ||
#' as normal text which might be problematic if trying to extract | ||
#' real text from the XML. | ||
#' | ||
#' If sending the XML to, say, a translation API that allows some tags | ||
#' to be ignored, you could first transform the text tags with the | ||
#' attribute `curly` to `curly` tags, and then transform them back | ||
#' to text tags before using `to_md()`. | ||
#' | ||
#' @note this function is also a method in the [tinkr::yarn] object. | ||
#' | ||
#' @export | ||
#' @examples | ||
#' m <- tinkr::to_xml(system.file("extdata", "basic-curly.md", package = "tinkr")) | ||
#' xml2::xml_child(m$body) | ||
#' m$body <- protect_curly(m$body) | ||
#' xml2::xml_child(m$body) | ||
protect_curly <- function(body, ns = md_ns()) { | ||
body <- copy_xml(body) | ||
curly <- find_curly(body, ns) | ||
new_nodes <- purrr::map(curly, digest_curly, ns = ns) | ||
# since we split up the nodes, we have to do this node by node | ||
for (i in seq(new_nodes)) { | ||
add_node_siblings(curly[[i]], new_nodes[[i]], remove = TRUE) | ||
} | ||
copy_xml(body) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,4 +19,5 @@ reference: | |
- '`find_between`' | ||
- '`md_ns`' | ||
- '`protect_math`' | ||
- '`protect_curly`' | ||
- '`stylesheet`' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
--- | ||
title: basic curly | ||
--- | ||
|
||
# preface {#pre-face .unnumbered} | ||
|
||
hello | ||
|
||
I like {xml2} but of course {tinkr} is even cooler! | ||
|
||
Images that use pandoc style will have curlies with content that should be translated and should be protected. | ||
|
||
![a pretty kitten](https://placekitten.com/200/300){#kitteh alt='a picture of a kitten'} | ||
|
||
![a pretty puppy](https://placedog.net/200/300){#dog alt="a picture | ||
of a dog"} | ||
|
||
[a span with attributes]{.span-with-attributes | ||
style='color: red;'} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
--- | ||
title: "Untitled" | ||
author: "M. Salmon" | ||
date: "September 6, 2018" | ||
output: html_document | ||
--- | ||
|
||
```{r setup, include=FALSE, eval = TRUE} | ||
knitr::opts_chunk$set(echo = TRUE) | ||
``` | ||
|
||
# preface {#pre-face .unnumbered} | ||
|
||
hello | ||
|
||
I like {xml2} but of course {tinkr} is even cooler! | ||
|
||
Images that use pandoc style will have curlies with content that should be translated and should be protected. | ||
|
||
![a pretty kitten](https://placekitten.com/200/300){#kitteh alt='a picture of a kitten'} | ||
|
||
![a pretty puppy](https://placedog.net/200/300){#dog alt="a picture | ||
of a dog"} | ||
|
||
[a span with attributes]{.span-with-attributes | ||
style='color: red;'} |
Oops, something went wrong.