Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use mdn search api v1 vs parsing html from mdn.io redirects. #40

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 12 additions & 77 deletions src/plugins/mdn/mdnPlugin.js
Original file line number Diff line number Diff line change
@@ -1,74 +1,8 @@
const url = require('url');
const superagent = require('superagent');
const cheerio = require('cheerio');

function slugify(words) {
return words
.map((x) => x.trim().toLowerCase())
.join('-')
.replace(/[^a-zA-Z0-9]+/g, '-')
.replace(/[^a-zA-Z0-9]+/g, '-');
}

class HtmlParseError extends Error {}

function getMdnTitle(title) {
return title.replace(/\s*-\s*(\w+\s*\w*)\s*\|\s*MDN/gi, (m, _type) => {
let type = _type;
if (type === 'JavaScript') type = null;
if (type === 'Web APIs') type = 'DOM';
return type ? `, ${type}` : '';
});
}

function extractFromHtml(html) {
const $ = cheerio.load(html);
const title = getMdnTitle($('head title').text());
const text = $('#wikiArticle')
.first()
.find('p')
.first()
.text();

if (!text) {
const bodyText = $('body')
.text()
.replace(/\s+/g, ' ');

if (
/did not match any documents|No results containing all your search terms were found/.test(
bodyText,
)
) {
throw new HtmlParseError(`No MDN page found with this search.`);
}
throw new HtmlParseError(`Failed to extract mdn text`);
}
return { text, title };
}

async function fixLanguage(origRes, lastRedirect) {
let res = origRes;

// attempt to rewrite the language part of the URL
const urlParts = url.parse(lastRedirect);
urlParts.pathname = urlParts.pathname.replace(
/^\/(\w+)(\/docs\/)/,
(m, lang, rest) => {
return `/en-US${rest}`;
},
);

// If we changed the URL, we need to do another request for it
const fixedUrl = url.format(urlParts);

if (fixedUrl !== lastRedirect) {
console.error(`Translated MDN URL from "${lastRedirect}" to "${fixedUrl}"`);
res = await superagent.get(fixedUrl).redirects(1);
}

return res;
}
const mdnUrl = 'https://developer.mozilla.org'
const mdnSearchApiUrl = `${mdnUrl}/api/v1/search/en-US`

const mdnPlugin = async (msg) => {
if (!msg.command) return;
Expand All @@ -79,8 +13,8 @@ const mdnPlugin = async (msg) => {
}
msg.handling();

const suffix = slugify(words.slice(1));
const initialUrl = `https://mdn.io/${suffix}`;
const query = new URLSearchParams({ q: words.slice(1).join(' '), topic: 'js' });
const initialUrl = `${mdnSearchApiUrl}?${query}`;

let lastRedirect = initialUrl;
let res = null;
Expand All @@ -89,6 +23,7 @@ const mdnPlugin = async (msg) => {
res = await superagent
.get(initialUrl)
.set('accept-language', 'en-US,en;q=0.5')
.set('Accept', 'application/json')
.redirects(5)
.on('redirect', (redirect) => {
lastRedirect = redirect.headers.location;
Expand All @@ -100,20 +35,20 @@ const mdnPlugin = async (msg) => {
}
}

if (res) {
res = await fixLanguage(res, lastRedirect).catch(() => null);
}

if (!res || !res.ok) {
msg.respondWithMention(`Try ${initialUrl} (couldn't fetch metadata)`);
return;
}

let pageData;
try {
pageData = extractFromHtml(res.text);
pageData = {
title: res.body.documents[0].title,
text: res.body.documents[0].excerpt.replace(/<\/?mark>/g, ''),
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we be stripping all HTML here?

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, you can pass highlight=false in the query to not get these <mark> elements.

https://developer.mozilla.org/en-US/docs/MDN/Contribute/Tools/Search

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good to know. just fixed it.

url: `${mdnUrl}/${res.body.documents[0].slug}`,
};
} catch (e) {
if (!(e instanceof HtmlParseError)) throw e;
if (!(e instanceof TypeError)) throw e;

msg.respond(`${initialUrl} - ${e.message}`);
return;
Expand All @@ -123,7 +58,7 @@ const mdnPlugin = async (msg) => {
if (response.length > 400) {
response = `${response.slice(0, 350).trim()}…`;
}
response += ` ${initialUrl}`;
response += ` ${pageData.url || initialUrl}`;

msg.respondWithMention(response);
};
Expand Down