Skip to content

Commit

Permalink
Implement Canonical URLs (#821)
Browse files Browse the repository at this point in the history
* Fix #608
  • Loading branch information
tristanlatr authored Oct 15, 2024
1 parent 68cb9f8 commit 41b734b
Show file tree
Hide file tree
Showing 12 changed files with 107 additions and 7 deletions.
2 changes: 2 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@ in development
^^^^^^^^^^^^^^

* Drop Python 3.7 and support Python 3.13.
* Implement canonical HTML element (``<link rel="canonical" href="..."/>``) to help search engines reduce outdated content.
Enable this feature by passing the base URL of the API documentation with option ``--html-base-url``.
* Improve collection of objects:
- Document objects declared in the ``else`` block of 'if' statements (previously they were ignored).
- Document objects declared in ``finalbody`` and ``else`` block of 'try' statements (previously they were ignored).
Expand Down
6 changes: 6 additions & 0 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@
pydoctor_args = {
'main': [
'--html-output={outdir}/api/', # Make sure to have a trailing delimiter for better usage coverage.
'--html-base-url=https://pydoctor.readthedocs.io/en/latest/api',
'--project-name=pydoctor',
f'--project-version={version}',
'--docformat=epytext',
Expand All @@ -108,6 +109,7 @@
] + _common_args,
'custom_template_demo': [
'--html-output={outdir}/custom_template_demo/',
'--html-base-url=https://pydoctor.readthedocs.io/en/latest/custom_template_demo',
f'--project-version={version}',
f'--template-dir={_pydoctor_root}/docs/sample_template',
f'{_pydoctor_root}/pydoctor',
Expand All @@ -116,6 +118,7 @@
'-qqq' ], # we don't want to hear any warnings from this custom template demo.
'epydoc_demo': [
'--html-output={outdir}/docformat/epytext_demo',
'--html-base-url=https://pydoctor.readthedocs.io/en/latest/docformat/epytext_demo',
'--project-name=pydoctor-epytext-demo',
'--project-version=1.3.0',
'--docformat=epytext',
Expand All @@ -126,6 +129,7 @@
] + _common_args,
'restructuredtext_demo': [
'--html-output={outdir}/docformat/restructuredtext_demo',
'--html-base-url=https://pydoctor.readthedocs.io/en/latest/docformat/restructuredtext_demo',
'--project-name=pydoctor-restructuredtext-demo',
'--project-version=1.0.0',
'--docformat=restructuredtext',
Expand All @@ -136,6 +140,7 @@
] + _common_args,
'numpy_demo': [ # no need to pass --docformat here, we use __docformat__
'--html-output={outdir}/docformat/numpy_demo',
'--html-base-url=https://pydoctor.readthedocs.io/en/latest/docformat/numpy_demo',
'--project-name=pydoctor-numpy-style-demo',
'--project-version=1.0.0',
'--project-url=../google-numpy.html',
Expand All @@ -145,6 +150,7 @@
] + _common_args,
'google_demo': [
'--html-output={outdir}/docformat/google_demo',
'--html-base-url=https://pydoctor.readthedocs.io/en/latest/docformat/google_demo',
'--project-name=pydoctor-google-style-demo',
'--project-version=1.0.0',
'--docformat=google',
Expand Down
1 change: 1 addition & 0 deletions docs/source/publish-github-action.rst
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ with the appropriate information.
--project-name=(projectname) \
--project-url=https://github.com/$GITHUB_REPOSITORY \
--html-viewsource-base=https://github.com/$GITHUB_REPOSITORY/tree/$GITHUB_SHA \
--html-base-url=https://$GITHUB_REPOSITORY_OWNER.github.io/${GITHUB_REPOSITORY#*/} \
--html-output=./apidocs \
--docformat=restructuredtext \
--intersphinx=https://docs.python.org/3/objects.inv \
Expand Down
1 change: 1 addition & 0 deletions docs/source/quickstart.rst
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ The result looks like `this <api/index.html>`_.
--project-version=1.2.0 \
--project-url=https://github.com/twisted/pydoctor/ \
--html-viewsource-base=https://github.com/twisted/pydoctor/tree/20.7.2 \
--html-base-url=https://pydoctor.readthedocs.io/en/latest/api \
--html-output=docs/api \
--docformat=epytext \
--intersphinx=https://docs.python.org/3/objects.inv \
Expand Down
8 changes: 6 additions & 2 deletions docs/tests/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ def test_page_contains_infos():
- nav and links to modules, classes, names
- js script source
- pydoctor github link in the footer
- canonical link
"""

infos = (f'<meta name="generator" content="pydoctor {__version__}"',
Expand All @@ -101,7 +102,8 @@ def test_page_contains_infos():
'<a href="classIndex.html"',
'<a href="nameIndex.html"',
'<script src="pydoctor.js" type="text/javascript"></script>',
'<a href="https://github.com/twisted/pydoctor/">pydoctor</a>',)
'<a href="https://github.com/twisted/pydoctor/">pydoctor</a>',
'<link rel="canonical" href="https://pydoctor.readthedocs.io/en/latest/api/pydoctor.driver.html"',)

with open(BASE_DIR / 'api' / 'pydoctor.driver.html', 'r', encoding='utf-8') as stream:
page = stream.read()
Expand All @@ -117,6 +119,7 @@ def test_custom_template_contains_infos():
- pydoctor github link in the footer
- the custom header
- link to teh extra.css
- canonical link
"""

infos = (f'<meta name="generator" content="pydoctor {__version__}"',
Expand All @@ -126,7 +129,8 @@ def test_custom_template_contains_infos():
'<a href="nameIndex.html"',
'<a href="https://github.com/twisted/pydoctor/">pydoctor</a>',
'<img src="https://twistedmatrix.com/trac/chrome/common/trac_banner.png" alt="Twisted" />',
'<link rel="stylesheet" type="text/css" href="extra.css" />',)
'<link rel="stylesheet" type="text/css" href="extra.css" />',
'<link rel="canonical" href="https://pydoctor.readthedocs.io/en/latest/custom_template_demo/index.html"',)

with open(BASE_DIR / 'custom_template_demo' / 'index.html', 'r', encoding='utf-8') as stream:
page = stream.read()
Expand Down
10 changes: 10 additions & 0 deletions pydoctor/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,11 @@ def get_parser() -> ArgumentParser:
"The default behaviour auto detects most common providers like Github, Bitbucket, GitLab or SourceForge. "
"But in some cases you might have to override the template string, for instance to make it work with git-web, use: "
'--html-viewsource-template="{mod_source_href}#n{lineno}"'), metavar='SOURCETEMPLATE', default=Options.HTML_SOURCE_TEMPLATE_DEFAULT)
parser.add_argument(
'--html-base-url', dest='htmlbaseurl',
help=("A base URL used to include a canonical link in every html page. "
"This help search engine to link to the preferred version of "
"a web page to prevent duplicated or oudated content. "), default=None, metavar='BASEURL', )
parser.add_argument(
'--buildtime', dest='buildtime',
help=("Use the specified build time over the current time. "
Expand Down Expand Up @@ -297,6 +302,10 @@ def _convert_htmlwriter(s: str) -> Type['IWriter']:
error(str(e))
def _convert_privacy(l: List[str]) -> List[Tuple['model.PrivacyClass', str]]:
return list(map(functools.partial(parse_privacy_tuple, opt='--privacy'), l))
def _convert_htmlbaseurl(url:str | None) -> str | None:
if url and not url.endswith('/'):
url += '/'
return url

_RECOGNIZED_SOURCE_HREF = {
# Sourceforge
Expand Down Expand Up @@ -361,6 +370,7 @@ class Options:
htmlwriter: Type['IWriter'] = attr.ib(converter=_convert_htmlwriter)
htmlsourcebase: Optional[str] = attr.ib()
htmlsourcetemplate: str = attr.ib()
htmlbaseurl: str | None = attr.ib(converter=_convert_htmlbaseurl)
buildtime: Optional[str] = attr.ib()
warnings_as_errors: bool = attr.ib()
verbosity: int = attr.ib()
Expand Down
24 changes: 22 additions & 2 deletions pydoctor/templatewriter/pages/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
)
import ast
import abc
from urllib.parse import urljoin

from twisted.web.iweb import IRenderable, ITemplateLoader, IRequest
from twisted.web.template import Element, Tag, renderer, tags
Expand Down Expand Up @@ -146,9 +147,19 @@ class Head(TemplateElement):

filename = 'head.html'

def __init__(self, title: str, loader: ITemplateLoader) -> None:
def __init__(self, title: str, baseurl: str | None, pageurl: str,
loader: ITemplateLoader) -> None:
super().__init__(loader)
self._title = title
self._baseurl = baseurl
self._pageurl = pageurl

@renderer
def canonicalurl(self, request: IRequest, tag: Tag) -> Flattenable:
if not self._baseurl:
return ''
canonical_link = urljoin(self._baseurl, self._pageurl)
return tags.link(rel='canonical', href=canonical_link)

@renderer
def title(self, request: IRequest, tag: Tag) -> str:
Expand All @@ -171,6 +182,14 @@ def __init__(self, system: model.System,
if not loader:
loader = self.lookup_loader(template_lookup)
super().__init__(loader)

@property
def page_url(self) -> str:
# This MUST be overriden in CommonPage
"""
The relative page url
"""
return self.filename

def render(self, request: Optional[IRequest]) -> Tag:
return tags.transparent(super().render(request)).fillSlots(**self.slot_map)
Expand All @@ -197,7 +216,8 @@ def title(self) -> str:

@renderer
def head(self, request: IRequest, tag: Tag) -> IRenderable:
return Head(self.title(), Head.lookup_loader(self.template_lookup))
return Head(self.title(), self.system.options.htmlbaseurl, self.page_url,
loader=Head.lookup_loader(self.template_lookup))

@renderer
def nav(self, request: IRequest, tag: Tag) -> IRenderable:
Expand Down
17 changes: 17 additions & 0 deletions pydoctor/test/test_commandline.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,3 +303,20 @@ def test_index_hardlink(tmp_path: Path) -> None:
assert (tmp_path / 'basic.html').exists()
assert not (tmp_path / 'basic.html').is_symlink()
assert (tmp_path / 'basic.html').is_file()

def test_htmlbaseurl_option_all_pages(tmp_path: Path) -> None:
"""
Check that the canonical link is included in all html pages, including summary pages.
"""
exit_code = driver.main(args=[
'--html-base-url=https://example.com.abcde',
'--html-output', str(tmp_path), 'pydoctor/test/testpackages/basic/'])
assert exit_code == 0
for t in tmp_path.iterdir():
if not t.name.endswith('.html'):
continue
filename = t.name
if t.stem == 'basic':
filename = 'index.html' # since we have only one module it's linked as index.html
assert f'<link rel="canonical" href="https://example.com.abcde/{filename}"' in t.read_text(encoding='utf-8')

36 changes: 36 additions & 0 deletions pydoctor/test/test_templatewriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -911,3 +911,39 @@ class Stuff(socket):
index = flatten(ClassIndexPage(mod.system, TemplateLookup(template_dir)))
assert 'href="https://docs.python.org/3/library/socket.html#socket.socket"' in index

def test_canonical_links() -> None:
src = '''
var = True
class Cls:
foo = False
'''
mod = fromText(src, modname='t', system=model.System(model.Options.from_args(
['--html-base-url=https://example.org/t/docs']
)))
html1 = getHTMLOf(mod)
html2 = getHTMLOf(mod.contents['Cls'])

assert '<link rel="canonical" href="https://example.org/t/docs/index.html"' in html1
assert '<link rel="canonical" href="https://example.org/t/docs/t.Cls.html"' in html2

def test_canonical_links_two_root_modules() -> None:
src = '''
var = True
class Cls:
foo = False
'''
mod = fromText(src, modname='t', system=model.System(model.Options.from_args(
['--html-base-url=https://example.org/t/docs']
)))
mod2 = fromText(src, modname='t2', system=mod.system)
html1 = getHTMLOf(mod)
html2 = getHTMLOf(mod.contents['Cls'])

assert '<link rel="canonical" href="https://example.org/t/docs/t.html"' in html1
assert '<link rel="canonical" href="https://example.org/t/docs/t.Cls.html"' in html2

html3 = getHTMLOf(mod2)
html4 = getHTMLOf(mod2.contents['Cls'])

assert '<link rel="canonical" href="https://example.org/t/docs/t2.html"' in html3
assert '<link rel="canonical" href="https://example.org/t/docs/t2.Cls.html"' in html4
3 changes: 2 additions & 1 deletion pydoctor/themes/base/head.html
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<head xmlns:t="http://twistedmatrix.com/ns/twisted.web.template/0.1">
<meta name="pydoctor-template-version" content="2" />
<meta name="pydoctor-template-version" content="3" />
<title><t:transparent t:render="title">
The title of Something
</t:transparent></title>
Expand All @@ -10,4 +10,5 @@
<meta name="viewport" content="width=device-width, initial-scale=1 maximum-scale=1" />
<link rel="stylesheet" type="text/css" href="apidocs.css" />
<link rel="stylesheet" type="text/css" href="extra.css" />
<t:transparent t:render="canonicalurl">Canonical URL</t:transparent>
</head>
3 changes: 2 additions & 1 deletion pydoctor/themes/classic/head.html
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<head xmlns:t="http://twistedmatrix.com/ns/twisted.web.template/0.1">
<meta name="pydoctor-template-version" content="2" />
<meta name="pydoctor-template-version" content="3" />
<title><t:transparent t:render="title">
The title of Something
</t:transparent></title>
Expand All @@ -11,4 +11,5 @@
<link rel="stylesheet" type="text/css" href="bootstrap.min.css" />
<link rel="stylesheet" type="text/css" href="apidocs.css" />
<link rel="stylesheet" type="text/css" href="extra.css" />
<t:transparent t:render="canonicalurl">Canonical URL</t:transparent>
</head>
3 changes: 2 additions & 1 deletion pydoctor/themes/readthedocs/head.html
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<head xmlns:t="http://twistedmatrix.com/ns/twisted.web.template/0.1">
<meta name="pydoctor-template-version" content="2" />
<meta name="pydoctor-template-version" content="3" />
<title><t:transparent t:render="title">
The title of Something
</t:transparent></title>
Expand All @@ -11,4 +11,5 @@
<link rel="stylesheet" type="text/css" href="apidocs.css" />
<link rel="stylesheet" type="text/css" href="readthedocstheme.css" />
<link rel="stylesheet" type="text/css" href="extra.css" />
<t:transparent t:render="canonicalurl">Canonical URL</t:transparent>
</head>

0 comments on commit 41b734b

Please sign in to comment.