Skip to content

Commit

Permalink
feat: programmatically infer redirects to ghost pages
Browse files Browse the repository at this point in the history
  • Loading branch information
GetPsyched committed Nov 5, 2024
1 parent bdb2b66 commit f804b81
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 75 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -676,7 +676,12 @@ def _postprocess(self, infile: Path, outfile: Path, tokens: Sequence[Token]) ->

TocEntry.collect_and_link(self._xref_targets, tokens)
self._redirects.validate(self._xref_targets)
self._redirects.export_server_redirects(self._xref_targets, outfile.parent)
server_redirects = self._redirects.get_server_redirects()
with open(outfile.parent / '_redirects', 'w') as server_redirects_file:
formatted_server_redirects = []
for from_path, to_path in server_redirects.items():
formatted_server_redirects.append(f"{from_path} {to_path} 301")
server_redirects_file.write("\n".join(formatted_server_redirects))


def _build_cli_html(p: argparse.ArgumentParser) -> None:
Expand Down
71 changes: 23 additions & 48 deletions pkgs/tools/nix/nixos-render-docs/src/nixos_render_docs/redirects.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import json
from dataclasses import dataclass
from dataclasses import dataclass, field
from pathlib import Path

from .manual_structure import XrefTarget
Expand All @@ -8,36 +8,20 @@
class RedirectsError(Exception):
def __init__(
self,
client_paths_with_server_redirects=None,
conflicting_anchors=None,
divergent_redirects=None,
identifiers_missing_current_outpath=None,
identifiers_without_redirects=None,
orphan_identifiers=None
conflicting_anchors: set[str] = None,
divergent_redirects: set[str] = None,
identifiers_missing_current_outpath: set[str] = None,
identifiers_without_redirects: set[str] = None,
orphan_identifiers: set[str] = None
):
self.client_paths_with_server_redirects = client_paths_with_server_redirects or []
self.conflicting_anchors = conflicting_anchors or []
self.divergent_redirects = divergent_redirects or []
self.identifiers_missing_current_outpath = identifiers_missing_current_outpath or []
self.identifiers_without_redirects = identifiers_without_redirects or []
self.orphan_identifiers = orphan_identifiers or []
self.conflicting_anchors = conflicting_anchors or set()
self.divergent_redirects = divergent_redirects or set()
self.identifiers_missing_current_outpath = identifiers_missing_current_outpath or set()
self.identifiers_without_redirects = identifiers_without_redirects or set()
self.orphan_identifiers = orphan_identifiers or set()

def __str__(self):
error_messages = []
if self.client_paths_with_server_redirects:
error_messages.append(f"""
**Client Paths with Server Redirects Found**
A client redirect from a path that has a server-side redirect must not exist.
The following identifiers violate the above rule:
- {"\n- ".join(f"{source} -> {dest}" for source, dest in self.client_paths_with_server_redirects.items())}
This can generally happen when:
- A redirect was added that redirects to another redirect
This is problematic because:
- It could lead to undefined behaviour. If a user goes to such a link, the server-side redirect would activate asynchronously along with the client-side redirect which then would trigger another server-side redirect.
""")
if self.conflicting_anchors:
error_messages.append(f"""
**Conflicting Anchors Found**
Expand Down Expand Up @@ -113,6 +97,8 @@ class Redirects:
_raw_redirects: dict[str, list[str]]
_redirects_script: str

_xref_targets: dict[str, XrefTarget] = field(default_factory=dict)

def validate(self, xref_targets: dict[str, XrefTarget]):
"""
Validate redirection mappings against element locations in the output
Expand Down Expand Up @@ -160,55 +146,44 @@ def validate(self, xref_targets: dict[str, XrefTarget]):
else:
divergent_redirects.add(location)

client_paths_with_server_redirects = {}
for server_from, server_to in server_side_redirects.items():
for client_from, client_to in client_side_redirects.items():
path, anchor = client_from.split('#')
if server_from == path:
client_paths_with_server_redirects[client_from] = f"{server_to}#{anchor}"

if any([
client_paths_with_server_redirects,
conflicting_anchors,
divergent_redirects,
identifiers_missing_current_outpath,
identifiers_without_redirects,
orphan_identifiers
]):
raise RedirectsError(
client_paths_with_server_redirects=client_paths_with_server_redirects,
conflicting_anchors=conflicting_anchors,
divergent_redirects=divergent_redirects,
identifiers_missing_current_outpath=identifiers_missing_current_outpath,
identifiers_without_redirects=identifiers_without_redirects,
orphan_identifiers=orphan_identifiers
)

def get_client_redirects(self, redirection_target: str):
self._xref_targets = xref_targets

def get_client_redirects(self, target: str):
paths_to_target = {src for src, dest in self.get_server_redirects().items() if dest == target}
client_redirects = {}
for identifier, locations in self._raw_redirects.items():
for location in locations[1:]:
if '#' not in location:
continue
path, anchor = location.split('#')
if path != redirection_target:
if path not in [target, *paths_to_target]:
continue
client_redirects[anchor] = f"{locations[0]}#{identifier}"
return client_redirects

def export_server_redirects(self, xref_targets: dict[str, XrefTarget], outpath: Path):
def get_server_redirects(self):
server_redirects = {}
for identifier, locations in self._raw_redirects.items():
for location in locations[1:]:
if '#' not in location and location not in server_redirects:
server_redirects[location] = xref_targets[identifier].path

with open(outpath / '_redirects', 'w') as server_redirects_file:
formatted_server_redirects = []
for from_path, to_path in server_redirects.items():
formatted_server_redirects.append(f"{from_path} {to_path} 301")
server_redirects_file.write("\n".join(formatted_server_redirects))
server_redirects[location] = self._xref_targets[identifier].path
return server_redirects

def get_redirect_script(self, redirection_target: str) -> str:
client_redirects = self.get_client_redirects(redirection_target)
def get_redirect_script(self, target: str) -> str:
client_redirects = self.get_client_redirects(target)
return self._redirects_script.replace('REDIRECTS_PLACEHOLDER', json.dumps(client_redirects))
47 changes: 21 additions & 26 deletions pkgs/tools/nix/nixos-render-docs/src/tests/test_redirects.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import json
import unittest
from pathlib import Path
from typing import Type

from nixos_render_docs.manual import HTMLConverter, HTMLParameters
from nixos_render_docs.redirects import Redirects, RedirectsError
Expand Down Expand Up @@ -181,48 +180,44 @@ def test_divergent_redirect(self):
)
self.assert_redirect_error({"divergent_redirects": ["old-foo.html"]}, md)

def test_client_path_with_server_redirect(self):
"""Test for client paths with server redirects."""
md = self.setup_test(
sources={"foo.md": "# Foo {#foo}"},
raw_redirects={"foo": ["foo.html", "bar.html", "bar.html#foo"]}
)
self.assert_redirect_error({"client_paths_with_server_redirects": ["bar.html#foo"]}, md)


class TestGetClientRedirects(unittest.TestCase):
def test_no_client_redirects(self):
"""Test fetching client side redirects and ignore server-side ones."""
redirects = Redirects({"foo": ["index.html"], "bar": ["index.html", "foo.html"]}, "")
self.assertEqual(redirects.get_client_redirects("index.html"), {})
md = self.setup_test(
sources={"foo.md": "# Foo {#foo}\n## Bar {#bar}"},
raw_redirects={"foo": ["foo.html"], "bar": ["foo.html", "bar.html"]}
)
self.run_test(md)
self.assertEqual(md._redirects.get_client_redirects("foo.html"), {})

def test_basic_redirect_matching(self):
redirects = Redirects(
{
'foo': ['index.html', 'index.html#some-section', 'index.html#another-section'],
'bar': ['index.html'],
md = self.setup_test(
sources={"foo.md": "# Foo {#foo}\n## Bar {#bar}"},
raw_redirects={
'foo': ['foo.html', 'foo.html#some-section', 'foo.html#another-section'],
'bar': ['foo.html'],
},
"",
)
self.run_test(md)

client_redirects = redirects.get_client_redirects("index.html")
expected_redirects = {'some-section': 'index.html#foo', 'another-section': 'index.html#foo'}
client_redirects = md._redirects.get_client_redirects("foo.html")
expected_redirects = {'some-section': 'foo.html#foo', 'another-section': 'foo.html#foo'}
self.assertEqual(client_redirects, expected_redirects)

def test_advanced_redirect_matching(self):
redirects = Redirects(
{
md = self.setup_test(
sources={"foo.md": "# Foo {#foo}", "bar.md": "# Bar {#bar}"},
raw_redirects={
'foo': ['foo.html', 'foo.html#some-section', 'bar.html#foo'],
'bar': ['bar.html', 'bar.html#another-section'],
},
"",
)
self.assertEqual(redirects.get_client_redirects("index.html"), {})
self.run_test(md)
self.assertEqual(md._redirects.get_client_redirects("index.html"), {})

client_redirects = redirects.get_client_redirects("foo.html")
client_redirects = md._redirects.get_client_redirects("foo.html")
expected_redirects = {'some-section': 'foo.html#foo'}
self.assertEqual(client_redirects, expected_redirects)

client_redirects = redirects.get_client_redirects("bar.html")
client_redirects = md._redirects.get_client_redirects("bar.html")
expected_redirects = {'foo': 'foo.html#foo', 'another-section': 'bar.html#bar'}
self.assertEqual(client_redirects, expected_redirects)

0 comments on commit f804b81

Please sign in to comment.