diff --git a/packages/metascraper-logo-favicon/src/index.js b/packages/metascraper-logo-favicon/src/index.js
index 98c6f6ad3..64186c407 100644
--- a/packages/metascraper-logo-favicon/src/index.js
+++ b/packages/metascraper-logo-favicon/src/index.js
@@ -21,9 +21,8 @@ const SIZE_REGEX_BY_X = /\d+x\d+/
const toLogo = toRule(logo)
-const isValidContenType = (contentType, contentTypes) => {
- return contentType && contentTypes.some(ct => contentType.includes(ct))
-}
+const isValidContenType = (contentType, contentTypes) =>
+ contentType && contentTypes.some(ct => contentType.includes(ct))
const toSize = (input, url) => {
if (isEmpty(input)) return
@@ -105,13 +104,16 @@ const firstReachable = async (domNodeSizes, gotOpts) => {
const contentType = response.headers['content-type']
const urlExtension = extension(url)
+
const contentTypes = ALLOWED_EXTENSION_CONTENT_TYPES.find(
([ext]) => ext === urlExtension
)
- if (contentTypes && !isValidContenType(contentType, contentTypes[1])) {
- continue
- }
+ if (
+ contentTypes &&
+ (!isValidContenType(contentType, contentTypes[1]) ||
+ response.body.toString()[0] === '<')
+ ) { continue }
return response.url
}
@@ -142,7 +144,14 @@ const createFavicon = ([ext, contentTypes]) => {
const response = await reachableUrl(faviconUrl, gotOpts)
if (!reachableUrl.isReachable(response)) return undefined
const contentType = response.headers['content-type']
- return isValidContenType(contentType, contentTypes) && response.url
+
+ if (
+ contentTypes &&
+ (!isValidContenType(contentType, contentTypes) ||
+ response.body.toString()[0] === '<')
+ ) { return undefined }
+
+ return response.url
}
}
diff --git a/packages/metascraper-logo-favicon/test/favicon.js b/packages/metascraper-logo-favicon/test/favicon.js
index c77270433..4ff0f03c4 100644
--- a/packages/metascraper-logo-favicon/test/favicon.js
+++ b/packages/metascraper-logo-favicon/test/favicon.js
@@ -36,7 +36,7 @@ test("don't resolve favicon.ico with no valid content-type", async t => {
res.setHeader('content-type', 'image/svg+xml; charset=utf-8')
res.end('')
})
- t.is(await faviconICO(url), false)
+ t.is(await faviconICO(url), undefined)
})
test("favicon.png with 'image/png' content-type", async t => {
diff --git a/packages/metascraper-logo-favicon/test/index.js b/packages/metascraper-logo-favicon/test/index.js
index 2725ad47b..de5e8eb3b 100644
--- a/packages/metascraper-logo-favicon/test/index.js
+++ b/packages/metascraper-logo-favicon/test/index.js
@@ -267,6 +267,19 @@ test("favicon.ico detected in HTML markup can't be random content-type", async t
t.is(metadata.logo, null)
})
+test("don't trust in favicon.ico content-type", async t => {
+ const url = await runServer(t, async ({ res }) => {
+ res.setHeader('content-type', 'image/x-icon')
+ res.end('')
+ })
+
+ const html =
+ ''
+ const metascraper = createMetascraper()
+ const metadata = await metascraper({ url, html })
+ t.is(metadata.logo, null)
+})
+
test('favicon.ico detected in HTML markup can be `image/x-icon` content-type', async t => {
const url = await runServer(t, async ({ res }) => {
res.setHeader('content-type', 'image/x-icon')
diff --git a/packages/metascraper/test/integration/substack/index.js b/packages/metascraper/test/integration/substack/index.js
index 368676e10..0d103a4ad 100644
--- a/packages/metascraper/test/integration/substack/index.js
+++ b/packages/metascraper/test/integration/substack/index.js
@@ -26,7 +26,8 @@ const url =
test('substack', async t => {
const html = await readFile(resolve(__dirname, 'input.html'))
- const { date, ...metadata } = await metascraper({ html, url })
- t.is(typeof date, 'string')
+ const { date, logo, ...metadata } = await metascraper({ html, url })
t.snapshot(metadata)
+ t.is(typeof date, 'string')
+ t.true(logo.includes('gstatic'))
})
diff --git a/packages/metascraper/test/integration/substack/snapshots/index.js.md b/packages/metascraper/test/integration/substack/snapshots/index.js.md
index 06289385f..7646df509 100644
--- a/packages/metascraper/test/integration/substack/snapshots/index.js.md
+++ b/packages/metascraper/test/integration/substack/snapshots/index.js.md
@@ -14,7 +14,6 @@ Generated by [AVA](https://avajs.dev).
description: 'The world is a very malleable place. When I read biographies, early lives leap out the most. Leonardo da Vinci was a studio apprentice to Verrocchio at 14. Walt Disney took on a number of jobs, chiefly delivering papers, from 11 years old. Vladimir Nabokov published his first book (a collection of poems) at 16, while still in school. Andrew Carnegie',
image: 'https://substackcdn.com/image/fetch/w_1200,h_600,c_fill,f_jpg,q_auto:good,fl_progressive:steep,g_auto/https%3A%2F%2Fbucketeer-e05bbc84-baa3-437e-9518-adb32be77984.s3.amazonaws.com%2Fpublic%2Fimages%2Fef3bd0df-b9fa-4358-afee-116c23f4c55f_2560x1902.jpeg',
lang: 'en',
- logo: 'https://t1.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://simonsarris.substack.com/p/the-most-precious-resource-is-agency&size=128',
publisher: 'The Map is Mostly Water',
title: 'The Most Precious Resource is Agency',
url: 'https://map.simonsarris.com/p/the-most-precious-resource-is-agency',
diff --git a/packages/metascraper/test/integration/substack/snapshots/index.js.snap b/packages/metascraper/test/integration/substack/snapshots/index.js.snap
index 8988b1a95..9fddccdc7 100644
Binary files a/packages/metascraper/test/integration/substack/snapshots/index.js.snap and b/packages/metascraper/test/integration/substack/snapshots/index.js.snap differ