Skip to content

Commit

Permalink
Infrastructure: Fix failing link-checker on GitHub README links (#2931)
Browse files Browse the repository at this point in the history
Closes #2907

* Supports parsing GitHub's partials when needed to find URI's fragment
  • Loading branch information
evmiguel authored Feb 26, 2024
1 parent 16dbe83 commit 829499f
Show file tree
Hide file tree
Showing 2 changed files with 82 additions and 10 deletions.
53 changes: 50 additions & 3 deletions .link-checker.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,29 @@
const HTMLParser = require('node-html-parser');

// Checks object for attribute and returns value.
// If not found on first pass, recursively checks
// nested objects and arrays of nested object(s)
// until attribute is found. If not found,
// returns undefined.
const getAttributeValue = (obj, attribute) => {
if (typeof obj !== 'object' || obj === null) return undefined;
if (obj.hasOwnProperty(attribute)) return obj[attribute];

if (Array.isArray(obj)) {
for (const element of obj) {
const attributeValue = getAttributeValue(element, attribute);
if (attributeValue !== undefined) return attributeValue;
}
} else {
for (const key in obj) {
const attributeValue = getAttributeValue(obj[key], attribute);
if (attributeValue !== undefined) return attributeValue;
}
}

return undefined;
};

module.exports = {
filesToIgnore: [
// For example:
Expand All @@ -18,13 +44,34 @@ module.exports = {
{
name: 'github',
pattern: /^https:\/\/github\.com\/.*/,
matchHash: (ids, hash) =>
ids.includes(hash) || ids.includes(`user-content-${hash}`),
matchHash: (ids, hash, { reactPartial }) => {
if (reactPartial) {
// This is where the react-partial keeps data about READMEs and other *.md files
const richText = getAttributeValue(reactPartial, 'richText');
if (richText !== undefined) {
const html = HTMLParser.parse(richText);
const githubIds = html
.querySelectorAll('[id]')
.map((idElement) => idElement.getAttribute('id'));
return githubIds.includes(`user-content-${hash}`);
}
}
return ids.includes(hash) || ids.includes(`user-content-${hash}`);
},
getPartial: (html) => {
return html
.querySelectorAll('react-partial')
.filter(
(partialElement) =>
partialElement.getAttribute('partial-name') === 'repos-overview' // This is the partial that handles the READMEs
)
.flatMap((element) => element.getElementsByTagName('script'))
.map((element) => JSON.parse(element.innerHTML))[0];
},
},
],
ignoreHashesOnExternalPagesMatchingRegex: [
// Some hash links are resolved with JS and are therefore difficult to check algorithmically
/^https:\/\/html\.spec\.whatwg\.org\/multipage\//,
'https://github.com/w3c/aria-practices#code-conformance', // TODO: Remove when #2907 is resolved
],
};
39 changes: 32 additions & 7 deletions scripts/link-checker.js
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,24 @@ async function checkLinks() {
return getLineNumber;
};

const checkPathForHash = (hrefOrSrc, ids = [], hash) => {
const getHashCheckHandler = (hrefOrSrc) => {
return options.hashCheckHandlers.find(({ pattern }) =>
pattern.test(hrefOrSrc)
);
};

const getReactPartial = (hrefOrSrc, html) => {
const handler = getHashCheckHandler(hrefOrSrc);
if (handler) return handler.getPartial(html);
return undefined;
};

const checkPathForHash = (
hrefOrSrc,
ids = [],
hash,
{ reactPartial } = {}
) => {
// On some websites, the ids may not exactly match the hash included
// in the link.
// For e.g. GitHub will prepend client facing ids with their own
Expand All @@ -43,10 +60,8 @@ async function checkLinks() {
// as being 'user-content-foo-bar' for its own page processing purposes.
//
// See https://github.com/w3c/aria-practices/issues/2809
const handler = options.hashCheckHandlers.find(({ pattern }) =>
pattern.test(hrefOrSrc)
);
if (handler) return handler.matchHash(ids, hash);
const handler = getHashCheckHandler(hrefOrSrc);
if (handler) return handler.matchHash(ids, hash, { reactPartial });
else return ids.includes(hash);
};

Expand Down Expand Up @@ -149,7 +164,15 @@ async function checkLinks() {
.querySelectorAll('[id]')
.map((idElement) => idElement.getAttribute('id'));

return { ok: response.ok, status: response.status, ids };
// Handle GitHub README links.
// These links are stored within a react-partial element
const reactPartial = getReactPartial(hrefOrSrc, html);
return {
ok: response.ok,
status: response.status,
ids,
reactPartial,
};
} catch (error) {
return {
errorMessage:
Expand Down Expand Up @@ -305,7 +328,9 @@ async function checkLinks() {
if (
!isHashCheckingDisabled &&
hash &&
!checkPathForHash(hrefOrSrc, pageData.ids, hash)
!checkPathForHash(hrefOrSrc, pageData.ids, hash, {
reactPartial: pageData.reactPartial,
})
) {
consoleError(
`Found broken external link on ${htmlPath}:${lineNumber}:${columnNumber}, ` +
Expand Down

0 comments on commit 829499f

Please sign in to comment.