diff --git a/src/renderer/html_handlebars/hbs_renderer.rs b/src/renderer/html_handlebars/hbs_renderer.rs
index 8a13db9f09..76499b711c 100644
--- a/src/renderer/html_handlebars/hbs_renderer.rs
+++ b/src/renderer/html_handlebars/hbs_renderer.rs
@@ -54,8 +54,12 @@ impl HtmlHandlebars {
let content = ch.content.clone();
let content = utils::render_markdown(&content, ctx.html_config.curly_quotes);
- let fixed_content =
- utils::render_markdown_with_path(&ch.content, ctx.html_config.curly_quotes, Some(path));
+ let fixed_content = utils::render_markdown_with_path(
+ &ch.content,
+ ctx.html_config.curly_quotes,
+ Some(path),
+ ctx.html_config.redirect,
+ );
if !ctx.is_index && ctx.html_config.print.page_break {
// Add page break between chapters
// See https://developer.mozilla.org/en-US/docs/Web/CSS/break-before and https://developer.mozilla.org/en-US/docs/Web/CSS/page-break-before
@@ -63,7 +67,25 @@ impl HtmlHandlebars {
print_content
.push_str(r#"
"#);
}
- print_content.push_str(&fixed_content);
+ let path_id = {
+ let mut base = path.display().to_string();
+ if base.ends_with(".md") {
+ base.replace_range(base.len() - 3.., "");
+ }
+ &base
+ .replace("/", "-")
+ .replace("\\", "-")
+ .to_ascii_lowercase()
+ };
+
+ // We have to build header links in advance so that we can know the ranges
+ // for the headers in one page.
+ // Insert a dummy div to make sure that we can locate the specific page.
+ print_content.push_str(&(format!(r#""#, &path_id)));
+ print_content.push_str(&build_header_links(
+ &build_print_element_id(&fixed_content, &path_id),
+ Some(path_id),
+ ));
// Update the context with data for this file
let ctx_path = path
@@ -188,19 +210,31 @@ impl HtmlHandlebars {
}
#[cfg_attr(feature = "cargo-clippy", allow(clippy::let_and_return))]
- fn post_process(
+ fn post_process_print(
&self,
rendered: String,
playground_config: &Playground,
edition: Option,
) -> String {
- let rendered = build_header_links(&rendered);
let rendered = fix_code_blocks(&rendered);
let rendered = add_playground_pre(&rendered, playground_config, edition);
rendered
}
+ #[cfg_attr(feature = "cargo-clippy", allow(clippy::let_and_return))]
+ fn post_process(
+ &self,
+ rendered: String,
+ playground_config: &Playground,
+ edition: Option,
+ ) -> String {
+ let rendered = build_header_links(&rendered, None);
+ let rendered = self.post_process_print(rendered, &playground_config, edition);
+
+ rendered
+ }
+
fn copy_static_files(
&self,
destination: &Path,
@@ -560,7 +594,7 @@ impl Renderer for HtmlHandlebars {
let rendered = handlebars.render("index", &data)?;
let rendered =
- self.post_process(rendered, &html_config.playground, ctx.config.rust.edition);
+ self.post_process_print(rendered, &html_config.playground, ctx.config.rust.edition);
utils::fs::write_file(destination, "print.html", rendered.as_bytes())?;
debug!("Creating print.html ✓");
@@ -760,9 +794,44 @@ fn make_data(
Ok(data)
}
+/// Goes through part of the rendered print page HTML,
+/// add path id prefix to all the elements id as well as footnote links.
+fn build_print_element_id(html: &str, path_id: &str) -> String {
+ let all_id = Regex::new(r#"(<[^>]*?id=")([^"]+?)""#).unwrap();
+ let footnote_id = Regex::new(
+ r##"(]*?class="footnote-reference"[^>]*?>[^<]*?]*?href="#)([^"]+?)""##,
+ )
+ .unwrap();
+
+ if path_id.is_empty() {
+ return html.to_string();
+ }
+
+ let temp_html = all_id
+ .replace_all(html, |caps: &Captures<'_>| {
+ let mut fixed = String::new();
+ fixed.push_str(&path_id);
+ fixed.push_str("-");
+ fixed.push_str(&caps[2]);
+ format!("{}{}\"", &caps[1], fixed)
+ })
+ .into_owned();
+
+ footnote_id
+ .replace_all(&temp_html, |caps: &Captures<'_>| {
+ let mut fixed = String::new();
+ fixed.push_str(&path_id);
+ fixed.push_str("-");
+ fixed.push_str(&caps[2]);
+ format!("{}{}\"", &caps[1], fixed)
+ })
+ .into_owned()
+}
+
/// Goes through the rendered HTML, making sure all header tags have
/// an anchor respectively so people can link to sections directly.
-fn build_header_links(html: &str) -> String {
+<<<<<<< HEAD
+fn build_header_links(html: &str, path_id: Option<&str>) -> String {
lazy_static! {
static ref BUILD_HEADER_LINKS: Regex = Regex::new(r"(.*?)").unwrap();
}
@@ -775,19 +844,26 @@ fn build_header_links(html: &str) -> String {
.parse()
.expect("Regex should ensure we only ever get numbers here");
- insert_link_into_header(level, &caps[2], &mut id_counter)
+ insert_link_into_header(level, &caps[2], &mut id_counter, path_id)
})
.into_owned()
}
/// Insert a sinle link into a header, making sure each link gets its own
/// unique ID by appending an auto-incremented number (if necessary).
+///
+/// For `print.html`, we will add a path id prefix.
fn insert_link_into_header(
level: usize,
content: &str,
id_counter: &mut HashMap,
+ path_id: Option<&str>,
) -> String {
- let id = utils::unique_id_from_content(content, id_counter);
+ let id = if let Some(path_id) = path_id {
+ utils::unique_id_from_content_with_path(content, id_counter, path_id)
+ } else {
+ utils::unique_id_from_content(content, id_counter)
+ };
format!(
r##""##,
@@ -998,7 +1074,7 @@ mod tests {
];
for (src, should_be) in inputs {
- let got = build_header_links(src);
+ let got = build_header_links(src, None);
assert_eq!(got, should_be);
}
}
diff --git a/src/utils/mod.rs b/src/utils/mod.rs
index 2000d661bc..ea1aeba88c 100644
--- a/src/utils/mod.rs
+++ b/src/utils/mod.rs
@@ -11,7 +11,7 @@ use pulldown_cmark::{html, CodeBlockKind, CowStr, Event, Options, Parser, Tag};
use std::borrow::Cow;
use std::collections::HashMap;
use std::fmt::Write;
-use std::path::Path;
+use std::path::{Component, Path, PathBuf};
pub use self::string::{
take_anchored_lines, take_lines, take_rustdoc_include_anchored_lines,
@@ -86,30 +86,117 @@ pub fn unique_id_from_content(content: &str, id_counter: &mut HashMap,
+ path_id: &str,
+) -> String {
+ unique_id_from_content(&format!("{} {}", path_id, content), id_counter)
+}
+
+/// https://stackoverflow.com/a/68233480
+/// Improve the path to try remove and solve .. token.
+///
+/// This assumes that `a/b/../c` is `a/c` which might be different from
+/// what the OS would have chosen when b is a link. This is OK
+/// for broot verb arguments but can't be generally used elsewhere
+///
+/// This function ensures a given path ending with '/' will also
+/// end with '/' after normalization.
+pub fn normalize_path>(path: P) -> String {
+ let ends_with_slash = path.as_ref().to_str().map_or(false, |s| s.ends_with('/'));
+ let mut normalized = PathBuf::new();
+ for component in path.as_ref().components() {
+ match &component {
+ Component::ParentDir => {
+ if !normalized.pop() {
+ normalized.push(component);
+ }
+ }
+ Component::CurDir => {}
+ _ => {
+ normalized.push(component);
+ }
+ }
+ }
+ if ends_with_slash {
+ normalized.push("");
+ }
+ normalized.to_str().unwrap().replace("\\", "/").to_string()
+}
+
+/// Return the normalized path id.
+pub fn normalize_path_id(mut path: String) -> String {
+ path = path
+ .replace("/", "-")
+ .replace(".html#", "-")
+ .replace("#", "-")
+ .to_ascii_lowercase();
+ if path.ends_with(".html") {
+ path.replace_range(path.len() - 5.., "");
+ }
+ path
+}
+
/// Fix links to the correct location.
///
/// This adjusts links, such as turning `.md` extensions to `.html`.
///
/// `path` is the path to the page being rendered relative to the root of the
/// book. This is used for the `print.html` page so that links on the print
-/// page go to the original location. Normal page rendering sets `path` to
-/// None. Ideally, print page links would link to anchors on the print page,
-/// but that is very difficult.
-fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {
+/// page go to the anchors that has a path id prefix. Normal page rendering
+/// sets `path` to None.
+fn adjust_links<'a>(
+ event: Event<'a>,
+ path: Option<&Path>,
+ redirects: HashMap,
+) -> Event<'a> {
lazy_static! {
static ref SCHEME_LINK: Regex = Regex::new(r"^[a-z][a-z0-9+.-]*:").unwrap();
static ref MD_LINK: Regex = Regex::new(r"(?P.*)\.md(?P#.*)?").unwrap();
+ static ref HTML_MD_LINK: Regex =
+ Regex::new(r"(?P.*)\.(html|md)(?P#.*)?").unwrap();
}
fn fix<'a>(dest: CowStr<'a>, path: Option<&Path>) -> CowStr<'a> {
+ // Don't modify links with schemes like `https`.
+ if !SCHEME_LINK.is_match(&dest) {
+ // This is a relative link, adjust it as necessary.
+ let mut fixed_link = String::new();
+ if let Some(path) = path {
+ let base = path
+ .parent()
+ .expect("path can't be empty")
+ .to_str()
+ .expect("utf-8 paths only");
+ if !base.is_empty() {
+ write!(fixed_link, "{}/", base).unwrap();
+ }
+ }
+ fixed_link.push_str(&dest);
+ return CowStr::from(fixed_link);
+ }
+ dest
+ }
+
+ fn fix_a_links<'a>(
+ dest: CowStr<'a>,
+ path: Option<&Path>,
+ redirects: HashMap,
+ ) -> CowStr<'a> {
if dest.starts_with('#') {
// Fragment-only link.
if let Some(path) = path {
let mut base = path.display().to_string();
if base.ends_with(".md") {
- base.replace_range(base.len() - 3.., ".html");
+ base.replace_range(base.len() - 3.., "");
}
- return format!("{}{}", base, dest).into();
+ return format!(
+ "#{}{}",
+ normalize_path_id(normalize_path(base)),
+ dest.replace("#", "-")
+ )
+ .into();
} else {
return dest;
}
@@ -129,7 +216,7 @@ fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {
}
}
- if let Some(caps) = MD_LINK.captures(&dest) {
+ if let Some(caps) = HTML_MD_LINK.captures(&dest) {
fixed_link.push_str(&caps["link"]);
fixed_link.push_str(".html");
if let Some(anchor) = caps.name("anchor") {
@@ -138,12 +225,84 @@ fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {
} else {
fixed_link.push_str(&dest);
};
+
+ let mut normalized_path = normalize_path(&fixed_link);
+
+ // Judge if the html link is inside the book.
+ if !normalized_path.starts_with("../") && !normalized_path.contains("/../") {
+ // In `print.html`, print page links would all link to anchors on the print page.
+ if let Some(_) = path {
+ // Fix redirect links
+ let normalized_path_split: Vec<&str> = normalized_path.split('#').collect();
+ for (original, redirect) in &redirects {
+ if normalize_path(original.trim_start_matches('/'))
+ .eq_ignore_ascii_case(&normalized_path)
+ || normalize_path(original.trim_start_matches('/'))
+ .eq_ignore_ascii_case(&normalized_path_split[0])
+ {
+ let mut unnormalized_path = String::new();
+ if SCHEME_LINK.is_match(&redirect) {
+ unnormalized_path = redirect.to_string();
+ } else {
+ let base = PathBuf::from(normalized_path_split[0])
+ .parent()
+ .expect("path can't be empty")
+ .to_str()
+ .expect("utf-8 paths only")
+ .to_owned();
+
+ let normalized_base =
+ normalize_path(base).trim_matches('/').to_owned();
+ if !normalized_base.is_empty() {
+ write!(unnormalized_path, "{}/{}", normalized_base, redirect)
+ .unwrap();
+ } else {
+ unnormalized_path =
+ redirect.to_string().trim_start_matches('/').to_string();
+ }
+ }
+
+ // original without anchors, need to append link anchors
+ if !original.contains("#") {
+ for i in 1..normalized_path_split.len() {
+ if !unnormalized_path.contains("#") {
+ unnormalized_path.push('#');
+ } else {
+ unnormalized_path.push('-');
+ }
+ unnormalized_path.push_str(normalized_path_split[i]);
+ }
+ }
+
+ if !SCHEME_LINK.is_match(&redirect) {
+ normalized_path = normalize_path(unnormalized_path);
+ } else {
+ return CowStr::from(unnormalized_path);
+ }
+ break;
+ }
+ }
+ // Check again to make sure anchors are the html links inside the book.
+ if normalized_path.starts_with("../") || normalized_path.contains("/../") {
+ return CowStr::from(normalized_path);
+ }
+ let mut fixed_anchor_for_print = String::new();
+ fixed_anchor_for_print.push_str("#");
+ fixed_anchor_for_print.push_str(&normalize_path_id(normalized_path));
+ return CowStr::from(fixed_anchor_for_print);
+ }
+ }
+ // In normal page rendering, links to anchors on another page.
return CowStr::from(fixed_link);
}
dest
}
- fn fix_html<'a>(html: CowStr<'a>, path: Option<&Path>) -> CowStr<'a> {
+ fn fix_html<'a>(
+ html: CowStr<'a>,
+ path: Option<&Path>,
+ redirects: HashMap,
+ ) -> CowStr<'a> {
// This is a terrible hack, but should be reasonably reliable. Nobody
// should ever parse a tag with a regex. However, there isn't anything
// in Rust that I know of that is suitable for handling partial html
@@ -153,34 +312,43 @@ fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {
// feel free to add more tags if desired; these are the only ones I
// care about right now.
lazy_static! {
- static ref HTML_LINK: Regex =
- Regex::new(r#"(<(?:a|img) [^>]*?(?:src|href)=")([^"]+?)""#).unwrap();
+ static ref A_LINK: Regex = Regex::new(r#"(]*?href=")([^"]+?)""#).unwrap();
+ static ref HTML_LINK: Regex = Regex::new(r#"(]*?src=")([^"]+?)""#).unwrap();
}
- HTML_LINK
+ let temp_html = HTML_LINK
.replace_all(&html, |caps: ®ex::Captures<'_>| {
let fixed = fix(caps[2].into(), path);
format!("{}{}\"", &caps[1], fixed)
})
+ .into_owned();
+
+ A_LINK
+ .replace_all(&temp_html, |caps: ®ex::Captures<'_>| {
+ let fixed = fix_a_links(caps[2].into(), path, redirects.clone());
+ format!("{}{}\"", &caps[1], fixed)
+ })
.into_owned()
.into()
}
match event {
- Event::Start(Tag::Link(link_type, dest, title)) => {
- Event::Start(Tag::Link(link_type, fix(dest, path), title))
- }
+ Event::Start(Tag::Link(link_type, dest, title)) => Event::Start(Tag::Link(
+ link_type,
+ fix_a_links(dest, path, redirects),
+ title,
+ )),
Event::Start(Tag::Image(link_type, dest, title)) => {
Event::Start(Tag::Image(link_type, fix(dest, path), title))
}
- Event::Html(html) => Event::Html(fix_html(html, path)),
+ Event::Html(html) => Event::Html(fix_html(html, path, redirects)),
_ => event,
}
}
/// Wrapper around the pulldown-cmark parser for rendering markdown to HTML.
pub fn render_markdown(text: &str, curly_quotes: bool) -> String {
- render_markdown_with_path(text, curly_quotes, None)
+ render_markdown_with_path(text, curly_quotes, None, HashMap::new())
}
pub fn new_cmark_parser(text: &str, curly_quotes: bool) -> Parser<'_, '_> {
@@ -195,12 +363,17 @@ pub fn new_cmark_parser(text: &str, curly_quotes: bool) -> Parser<'_, '_> {
Parser::new_ext(text, opts)
}
-pub fn render_markdown_with_path(text: &str, curly_quotes: bool, path: Option<&Path>) -> String {
+pub fn render_markdown_with_path(
+ text: &str,
+ curly_quotes: bool,
+ path: Option<&Path>,
+ redirects: HashMap,
+) -> String {
let mut s = String::with_capacity(text.len() * 3 / 2);
let p = new_cmark_parser(text, curly_quotes);
let events = p
.map(clean_codeblock_headers)
- .map(|event| adjust_links(event, path));
+ .map(|event| adjust_links(event, path, redirects.clone()));
html::push_html(&mut s, events);
s
diff --git a/tests/rendered_output.rs b/tests/rendered_output.rs
index 873a622df5..92450cdfae 100644
--- a/tests/rendered_output.rs
+++ b/tests/rendered_output.rs
@@ -127,11 +127,11 @@ fn check_correct_relative_links_in_print_page() {
assert_contains_strings(
first.join("print.html"),
&[
- r##"the first section,"##,
+ r##"the first section,"##,
r##"outside"##,
r##""##,
- r##"fragment link"##,
- r##"HTML Link"##,
+ r##"fragment link"##,
+ r##"HTML Link"##,
r##""##,
],
);