diff --git a/packages/metascraper-logo-favicon/src/index.js b/packages/metascraper-logo-favicon/src/index.js index 98c6f6ad3..64186c407 100644 --- a/packages/metascraper-logo-favicon/src/index.js +++ b/packages/metascraper-logo-favicon/src/index.js @@ -21,9 +21,8 @@ const SIZE_REGEX_BY_X = /\d+x\d+/ const toLogo = toRule(logo) -const isValidContenType = (contentType, contentTypes) => { - return contentType && contentTypes.some(ct => contentType.includes(ct)) -} +const isValidContenType = (contentType, contentTypes) => + contentType && contentTypes.some(ct => contentType.includes(ct)) const toSize = (input, url) => { if (isEmpty(input)) return @@ -105,13 +104,16 @@ const firstReachable = async (domNodeSizes, gotOpts) => { const contentType = response.headers['content-type'] const urlExtension = extension(url) + const contentTypes = ALLOWED_EXTENSION_CONTENT_TYPES.find( ([ext]) => ext === urlExtension ) - if (contentTypes && !isValidContenType(contentType, contentTypes[1])) { - continue - } + if ( + contentTypes && + (!isValidContenType(contentType, contentTypes[1]) || + response.body.toString()[0] === '<') + ) { continue } return response.url } @@ -142,7 +144,14 @@ const createFavicon = ([ext, contentTypes]) => { const response = await reachableUrl(faviconUrl, gotOpts) if (!reachableUrl.isReachable(response)) return undefined const contentType = response.headers['content-type'] - return isValidContenType(contentType, contentTypes) && response.url + + if ( + contentTypes && + (!isValidContenType(contentType, contentTypes) || + response.body.toString()[0] === '<') + ) { return undefined } + + return response.url } } diff --git a/packages/metascraper-logo-favicon/test/favicon.js b/packages/metascraper-logo-favicon/test/favicon.js index c77270433..4ff0f03c4 100644 --- a/packages/metascraper-logo-favicon/test/favicon.js +++ b/packages/metascraper-logo-favicon/test/favicon.js @@ -36,7 +36,7 @@ test("don't resolve favicon.ico with no valid content-type", async t => { res.setHeader('content-type', 'image/svg+xml; charset=utf-8') res.end('') }) - t.is(await faviconICO(url), false) + t.is(await faviconICO(url), undefined) }) test("favicon.png with 'image/png' content-type", async t => { diff --git a/packages/metascraper-logo-favicon/test/index.js b/packages/metascraper-logo-favicon/test/index.js index 2725ad47b..de5e8eb3b 100644 --- a/packages/metascraper-logo-favicon/test/index.js +++ b/packages/metascraper-logo-favicon/test/index.js @@ -267,6 +267,19 @@ test("favicon.ico detected in HTML markup can't be random content-type", async t t.is(metadata.logo, null) }) +test("don't trust in favicon.ico content-type", async t => { + const url = await runServer(t, async ({ res }) => { + res.setHeader('content-type', 'image/x-icon') + res.end('') + }) + + const html = + '' + const metascraper = createMetascraper() + const metadata = await metascraper({ url, html }) + t.is(metadata.logo, null) +}) + test('favicon.ico detected in HTML markup can be `image/x-icon` content-type', async t => { const url = await runServer(t, async ({ res }) => { res.setHeader('content-type', 'image/x-icon') diff --git a/packages/metascraper/test/integration/substack/index.js b/packages/metascraper/test/integration/substack/index.js index 368676e10..0d103a4ad 100644 --- a/packages/metascraper/test/integration/substack/index.js +++ b/packages/metascraper/test/integration/substack/index.js @@ -26,7 +26,8 @@ const url = test('substack', async t => { const html = await readFile(resolve(__dirname, 'input.html')) - const { date, ...metadata } = await metascraper({ html, url }) - t.is(typeof date, 'string') + const { date, logo, ...metadata } = await metascraper({ html, url }) t.snapshot(metadata) + t.is(typeof date, 'string') + t.true(logo.includes('gstatic')) }) diff --git a/packages/metascraper/test/integration/substack/snapshots/index.js.md b/packages/metascraper/test/integration/substack/snapshots/index.js.md index 06289385f..7646df509 100644 --- a/packages/metascraper/test/integration/substack/snapshots/index.js.md +++ b/packages/metascraper/test/integration/substack/snapshots/index.js.md @@ -14,7 +14,6 @@ Generated by [AVA](https://avajs.dev). description: 'The world is a very malleable place. When I read biographies, early lives leap out the most. Leonardo da Vinci was a studio apprentice to Verrocchio at 14. Walt Disney took on a number of jobs, chiefly delivering papers, from 11 years old. Vladimir Nabokov published his first book (a collection of poems) at 16, while still in school. Andrew Carnegie', image: 'https://substackcdn.com/image/fetch/w_1200,h_600,c_fill,f_jpg,q_auto:good,fl_progressive:steep,g_auto/https%3A%2F%2Fbucketeer-e05bbc84-baa3-437e-9518-adb32be77984.s3.amazonaws.com%2Fpublic%2Fimages%2Fef3bd0df-b9fa-4358-afee-116c23f4c55f_2560x1902.jpeg', lang: 'en', - logo: 'https://t1.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://simonsarris.substack.com/p/the-most-precious-resource-is-agency&size=128', publisher: 'The Map is Mostly Water', title: 'The Most Precious Resource is Agency', url: 'https://map.simonsarris.com/p/the-most-precious-resource-is-agency', diff --git a/packages/metascraper/test/integration/substack/snapshots/index.js.snap b/packages/metascraper/test/integration/substack/snapshots/index.js.snap index 8988b1a95..9fddccdc7 100644 Binary files a/packages/metascraper/test/integration/substack/snapshots/index.js.snap and b/packages/metascraper/test/integration/substack/snapshots/index.js.snap differ