Skip to content

Commit

Permalink
Make print page (print.html) links link to anchors on the print page
Browse files Browse the repository at this point in the history
Let all the anchors id on the print page to have a path id prefix to
help locate.

e.g. bar/foo.md#abc -> #bar-foo-abc

Also append a dummy div to the start of the original page to make sure
that original page links without an anchor can also be located.

Signed-off-by: Hollow Man <[email protected]>
  • Loading branch information
HollowMan6 committed Feb 2, 2022
1 parent 2213312 commit faef05d
Show file tree
Hide file tree
Showing 3 changed files with 141 additions and 23 deletions.
57 changes: 48 additions & 9 deletions src/renderer/html_handlebars/hbs_renderer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,19 @@ impl HtmlHandlebars {
print_content
.push_str(r#"<div style="break-before: page; page-break-before: always;"></div>"#);
}
print_content.push_str(&fixed_content);
let path_id = {
let mut base = path.display().to_string();
if base.ends_with(".md") {
base.replace_range(base.len() - 3.., "");
}
&base.replace("/", "-").replace("\\", "-")
};

// We have to build header links in advance so that we can know the ranges
// for the headers in one page.
// Insert a dummy div to make sure that we can locate the specific page.
print_content.push_str(&(format!(r#"<div id="{}"></div>"#, &path_id)));
print_content.push_str(&build_header_links(&fixed_content, Some(path_id)));

// Update the context with data for this file
let ctx_path = path
Expand Down Expand Up @@ -181,19 +193,31 @@ impl HtmlHandlebars {
}

#[cfg_attr(feature = "cargo-clippy", allow(clippy::let_and_return))]
fn post_process(
fn post_process_print(
&self,
rendered: String,
playground_config: &Playground,
edition: Option<RustEdition>,
) -> String {
let rendered = build_header_links(&rendered);
let rendered = fix_code_blocks(&rendered);
let rendered = add_playground_pre(&rendered, playground_config, edition);

rendered
}

#[cfg_attr(feature = "cargo-clippy", allow(clippy::let_and_return))]
fn post_process(
&self,
rendered: String,
playground_config: &Playground,
edition: Option<RustEdition>,
) -> String {
let rendered = build_header_links(&rendered, None);
let rendered = self.post_process_print(rendered, &playground_config, edition);

rendered
}

fn copy_static_files(
&self,
destination: &Path,
Expand Down Expand Up @@ -547,7 +571,7 @@ impl Renderer for HtmlHandlebars {
let rendered = handlebars.render("index", &data)?;

let rendered =
self.post_process(rendered, &html_config.playground, ctx.config.rust.edition);
self.post_process_print(rendered, &html_config.playground, ctx.config.rust.edition);

utils::fs::write_file(destination, "print.html", rendered.as_bytes())?;
debug!("Creating print.html ✓");
Expand Down Expand Up @@ -746,7 +770,7 @@ fn make_data(

/// Goes through the rendered HTML, making sure all header tags have
/// an anchor respectively so people can link to sections directly.
fn build_header_links(html: &str) -> String {
fn build_header_links(html: &str, path_id: Option<&str>) -> String {
let regex = Regex::new(r"<h(\d)>(.*?)</h\d>").unwrap();
let mut id_counter = HashMap::new();

Expand All @@ -756,25 +780,40 @@ fn build_header_links(html: &str) -> String {
.parse()
.expect("Regex should ensure we only ever get numbers here");

insert_link_into_header(level, &caps[2], &mut id_counter)
insert_link_into_header(level, &caps[2], &mut id_counter, path_id)
})
.into_owned()
}

/// Insert a sinle link into a header, making sure each link gets its own
/// unique ID by appending an auto-incremented number (if necessary).
///
/// For `print.html`, we will add a path id prefix.
fn insert_link_into_header(
level: usize,
content: &str,
id_counter: &mut HashMap<String, usize>,
path_id: Option<&str>,
) -> String {
let raw_id = utils::id_from_content(content);

let id_count = id_counter.entry(raw_id.clone()).or_insert(0);

let id = match *id_count {
0 => raw_id,
other => format!("{}-{}", raw_id, other),
0 => {
if let Some(path_id) = path_id {
format!("{}-{}", path_id, raw_id)
} else {
raw_id
}
}
other => {
if let Some(path_id) = path_id {
format!("{}-{}-{}", path_id, raw_id, other)
} else {
format!("{}-{}", raw_id, other)
}
}
};

*id_count += 1;
Expand Down Expand Up @@ -980,7 +1019,7 @@ mod tests {
];

for (src, should_be) in inputs {
let got = build_header_links(src);
let got = build_header_links(src, None);
assert_eq!(got, should_be);
}
}
Expand Down
101 changes: 90 additions & 11 deletions src/utils/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use pulldown_cmark::{html, CodeBlockKind, CowStr, Event, Options, Parser, Tag};

use std::borrow::Cow;
use std::fmt::Write;
use std::path::Path;
use std::path::{Component, Path, PathBuf};

pub use self::string::{
take_anchored_lines, take_lines, take_rustdoc_include_anchored_lines,
Expand Down Expand Up @@ -63,30 +63,87 @@ pub fn id_from_content(content: &str) -> String {
normalize_id(trimmed)
}

/// https://stackoverflow.com/a/68233480
/// Improve the path to try remove and solve .. token. Return the path id
/// by replacing the directory separator with a hyphen.
///
/// This assumes that `a/b/../c` is `a/c` which might be different from
/// what the OS would have chosen when b is a link. This is OK
/// for broot verb arguments but can't be generally used elsewhere
///
/// This function ensures a given path ending with '/' will
/// end with '-' after normalization.
pub fn normalize_path_id<P: AsRef<Path>>(path: P) -> String {
let ends_with_slash = path.as_ref().to_str().map_or(false, |s| s.ends_with('/'));
let mut normalized = PathBuf::new();
for component in path.as_ref().components() {
match &component {
Component::ParentDir => {
if !normalized.pop() {
normalized.push(component);
}
}
_ => {
normalized.push(component);
}
}
}
if ends_with_slash {
normalized.push("");
}
normalized
.to_str()
.unwrap()
.replace("\\", "-")
.replace("/", "-")
}

/// Fix links to the correct location.
///
/// This adjusts links, such as turning `.md` extensions to `.html`.
///
/// `path` is the path to the page being rendered relative to the root of the
/// book. This is used for the `print.html` page so that links on the print
/// page go to the original location. Normal page rendering sets `path` to
/// None. Ideally, print page links would link to anchors on the print page,
/// but that is very difficult.
/// page go to the anchors that has a path id prefix. Normal page rendering
/// sets `path` to None.
fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {
lazy_static! {
static ref SCHEME_LINK: Regex = Regex::new(r"^[a-z][a-z0-9+.-]*:").unwrap();
static ref MD_LINK: Regex = Regex::new(r"(?P<link>.*)\.md(?P<anchor>#.*)?").unwrap();
static ref HTML_MD_LINK: Regex =
Regex::new(r"(?P<link>.*)\.(html|md)(?P<anchor>#.*)?").unwrap();
}

fn fix<'a>(dest: CowStr<'a>, path: Option<&Path>) -> CowStr<'a> {
// Don't modify links with schemes like `https`.
if !SCHEME_LINK.is_match(&dest) {
// This is a relative link, adjust it as necessary.
let mut fixed_link = String::new();
if let Some(path) = path {
let base = path
.parent()
.expect("path can't be empty")
.to_str()
.expect("utf-8 paths only");
if !base.is_empty() {
write!(fixed_link, "{}/", base).unwrap();
}
}
fixed_link.push_str(&dest);
return CowStr::from(fixed_link);
}
dest
}

fn fix_a_links<'a>(dest: CowStr<'a>, path: Option<&Path>) -> CowStr<'a> {
if dest.starts_with('#') {
// Fragment-only link.
if let Some(path) = path {
let mut base = path.display().to_string();
if base.ends_with(".md") {
base.replace_range(base.len() - 3.., ".html");
base.replace_range(base.len() - 3.., "");
}
return format!("{}{}", base, dest).into();
return format!("#{}{}", normalize_path_id(base), dest.replace("#", "-")).into();
} else {
return dest;
}
Expand All @@ -106,7 +163,7 @@ fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {
}
}

if let Some(caps) = MD_LINK.captures(&dest) {
if let Some(caps) = HTML_MD_LINK.captures(&dest) {
fixed_link.push_str(&caps["link"]);
fixed_link.push_str(".html");
if let Some(anchor) = caps.name("anchor") {
Expand All @@ -115,6 +172,21 @@ fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {
} else {
fixed_link.push_str(&dest);
};

let path_id = normalize_path_id(&fixed_link)
.replace(".html", "")
.replace("#", "-");
// Judge if the html link is inside the book.
if !path_id.contains("..") {
if let Some(_) = path {
// In `print.html`, print page links would all link to anchors on the print page.
let mut fixed_anchor_for_print = String::new();
fixed_anchor_for_print.push_str("#");
fixed_anchor_for_print.push_str(&path_id);
return CowStr::from(fixed_anchor_for_print);
}
}
// In normal page rendering, links to anchors on another page.
return CowStr::from(fixed_link);
}
dest
Expand All @@ -130,22 +202,29 @@ fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {
// feel free to add more tags if desired; these are the only ones I
// care about right now.
lazy_static! {
static ref HTML_LINK: Regex =
Regex::new(r#"(<(?:a|img) [^>]*?(?:src|href)=")([^"]+?)""#).unwrap();
static ref A_LINK: Regex = Regex::new(r#"(<a [^>]*?href=")([^"]+?)""#).unwrap();
static ref HTML_LINK: Regex = Regex::new(r#"(<img [^>]*?src=")([^"]+?)""#).unwrap();
}

HTML_LINK
let temp_html = HTML_LINK
.replace_all(&html, |caps: &regex::Captures<'_>| {
let fixed = fix(caps[2].into(), path);
format!("{}{}\"", &caps[1], fixed)
})
.into_owned();

A_LINK
.replace_all(&temp_html, |caps: &regex::Captures<'_>| {
let fixed = fix_a_links(caps[2].into(), path);
format!("{}{}\"", &caps[1], fixed)
})
.into_owned()
.into()
}

match event {
Event::Start(Tag::Link(link_type, dest, title)) => {
Event::Start(Tag::Link(link_type, fix(dest, path), title))
Event::Start(Tag::Link(link_type, fix_a_links(dest, path), title))
}
Event::Start(Tag::Image(link_type, dest, title)) => {
Event::Start(Tag::Image(link_type, fix(dest, path), title))
Expand Down
6 changes: 3 additions & 3 deletions tests/rendered_output.rs
Original file line number Diff line number Diff line change
Expand Up @@ -125,11 +125,11 @@ fn check_correct_relative_links_in_print_page() {
assert_contains_strings(
first.join("print.html"),
&[
r##"<a href="second/../first/nested.html">the first section</a>,"##,
r##"<a href="#first-nested">the first section</a>,"##,
r##"<a href="second/../../std/foo/bar.html">outside</a>"##,
r##"<img src="second/../images/picture.png" alt="Some image" />"##,
r##"<a href="second/nested.html#some-section">fragment link</a>"##,
r##"<a href="second/../first/markdown.html">HTML Link</a>"##,
r##"<a href="#second-nested-some-section">fragment link</a>"##,
r##"<a href="#first-markdown">HTML Link</a>"##,
r##"<img src="second/../images/picture.png" alt="raw html">"##,
],
);
Expand Down

0 comments on commit faef05d

Please sign in to comment.