diff --git a/package.json b/package.json index 480e5c1..bec5b70 100644 --- a/package.json +++ b/package.json @@ -44,11 +44,11 @@ "html-encode": "~2.1.6", "html-urls": "~2.4.55", "is-html-content": "~1.0.0", + "localhost-url-regex": "~1.0.11", "lodash": "~4.17.21", "mri": "~1.2.0", "p-cancelable": "~2.1.0", "p-retry": "~4.6.0", - "replace-string": "~3.1.0", "tinyspawn": "~1.2.6", "top-sites": "~1.1.205" }, diff --git a/src/html.js b/src/html.js index 27b9fe2..d0942e8 100644 --- a/src/html.js +++ b/src/html.js @@ -1,8 +1,8 @@ 'use strict' const { get, split, nth, castArray, forEach } = require('lodash') +const localhostUrl = require('localhost-url-regex') const { TAGS: URL_TAGS } = require('html-urls') -const replaceString = require('replace-string') const isHTML = require('is-html-content') const cssUrl = require('css-url-regex') const execall = require('execall') @@ -95,7 +95,9 @@ const rewriteHtmlUrls = ({ $, url }) => { const el = $(this) const attr = el.attr(urlAttr) - if (typeof attr === 'string' && !attr.startsWith('http')) { + if (localhostUrl().test(attr)) { + el.remove() + } else if (typeof attr === 'string' && !attr.startsWith('http')) { try { const newAttr = new URL(attr, url).toString() el.attr(urlAttr, newAttr) @@ -117,7 +119,7 @@ const rewriteCssUrls = ({ html, url }) => { if (cssUrl.startsWith('/')) { try { const absoluteUrl = new URL(cssUrl, url).toString() - html = replaceString(html, `url(${cssUrl})`, `url(${absoluteUrl})`) + html = html.replaceAll(`url(${cssUrl})`, `url(${absoluteUrl})`) } catch (_) {} } }) diff --git a/test/html/index.js b/test/html/index.js index 01d0384..598e80d 100644 --- a/test/html/index.js +++ b/test/html/index.js @@ -1,12 +1,7 @@ -/* eslint-disable prefer-regex-literals */ - 'use strict' const cheerio = require('cheerio') -const execall = require('execall') -const path = require('path') const test = require('ava') -const fs = require('fs') const { prettyHtml } = require('../util') @@ -88,221 +83,6 @@ test('add video markup', t => { t.snapshot(prettyHtml(output)) }) -test("'`rewriteCssUrls` don't modify html markup", t => { - const output = html({ - rewriteUrls: true, - url: 'https://www.rubiomonocoatusa.com/blogs/blog/how-to-apply-oil-plus-2c-to-furniture', - html: ` - - - - - - -`, - headers: { - 'content-type': 'text/html; charset=utf-8' - } - }) - - t.true( - output.includes( - 'content="http://cdn.shopify.com/s/files/1/0260/4810/2497/articles/Applying-Oil-Plus-2C-to-a-table_600x.jpg?v=1616464305"' - ) - ) - - t.true( - output.includes( - 'url(https://cdn.shopify.com/s/files/1/0260/4810/2497/articles/Applying-Oil-Plus-2C-to-a-table_600x.jpg?v=1616464305)' - ) - ) - - t.snapshot(prettyHtml(output)) -}) - -test('`rewriteHtmlUrls` rewrites relative root URLs inside html markup', t => { - const output = html({ - rewriteUrls: true, - url: 'https://browserless.js.org', - html: fs.readFileSync( - path.resolve(__dirname, '../fixtures/browserless.html'), - 'utf8' - ), - headers: { - 'content-type': 'text/html; charset=utf-8' - } - }) - - t.true(output.includes('https://browserless.js.org/static/main.min.js')) - t.true(output.includes('https://unpkg.com/docsify/lib/docsify.min.js')) - - t.snapshot(prettyHtml(output)) -}) - -test('`rewriteHtmlUrls` rewrites relative URLs inside html markup', t => { - const output = html({ - rewriteUrls: true, - url: 'https://moovility.me/', - html: ` - - - - - - `, - headers: { - 'content-type': 'text/html; charset=utf-8' - } - }) - - t.true(output.includes('https://moovility.me/img/icons/MOV/icon2-76.png')) - - t.snapshot(prettyHtml(output)) -}) - -test('`rewriteHtmlUrls` rewrites relative URLs inside stylesheet', t => { - const output = html({ - rewriteUrls: true, - url: 'https://kikobeats.com', - html: ` - - -
-
- - - `, - headers: { - 'content-type': 'text/html; charset=utf-8' - } - }) - - const results = execall( - new RegExp('https://kikobeats.com/images/microlink.jpg', 'g'), - output - ) - - t.is(results.length, 2) - t.snapshot(prettyHtml(output)) -}) - -test("`rewriteHtmlUrls` don't modify inline javascript", t => { - const output = html({ - rewriteUrls: true, - url: 'https://www.latimes.com/opinion/story/2020-06-07/column-muralist-honors-african-americans-killed-by-police', - html: ` - - - - - - - -Print - -`, - headers: { - 'content-type': 'text/html;charset=UTF-8' - } - }) - - t.true( - output.includes( - 'Print' - ) - ) - - t.snapshot(prettyHtml(output)) -}) - -test("`rewriteHtmlUrls` don't modify non http protocols", t => { - const output = html({ - rewriteUrls: true, - url: 'https://www.latimes.com/opinion/story/2020-06-07/column-muralist-honors-african-americans-killed-by-police', - html: ` - - - - - - - - - - - - - - -`, - headers: { - 'content-type': 'text/html;charset=UTF-8' - } - }) - - t.true(output.includes('')) - t.true(output.includes('')) - t.true(output.includes('')) - t.true(output.includes('')) - t.true(output.includes('')) - - t.snapshot(prettyHtml(output)) -}) - -test("`rewriteHtmlUrls` don't modify data URIs", t => { - const output = html({ - rewriteUrls: true, - url: 'https://example.com', - html: ` - - - - - - - -star - -`, - headers: { - 'content-type': 'text/html;charset=UTF-8' - } - }) - - t.true( - output.includes( - 'star' - ) - ) - - t.snapshot(prettyHtml(output)) -}) - -test("`rewriteHtmlUrls` don't modify undefined attributes", t => { - const output = html({ - rewriteUrls: true, - url: 'https://moovility.me', - html: ` - - - - Document - - - - -`, - headers: { - 'content-type': 'text/html;charset=UTF-8' - } - }) - - t.true(output.includes("")) - - t.snapshot(prettyHtml(output)) -}) - test('styles injection', t => { const output = html({ url: 'https://kikobeats.com', diff --git a/test/html/rewrite-css-urls.js b/test/html/rewrite-css-urls.js new file mode 100644 index 0000000..e1baeca --- /dev/null +++ b/test/html/rewrite-css-urls.js @@ -0,0 +1,66 @@ +'use strict' + +const execall = require('execall') +const test = require('ava') + +const { prettyHtml } = require('../util') + +const html = require('../../src/html') + +test("don't modify html markup", t => { + const output = html({ + rewriteUrls: true, + url: 'https://www.rubiomonocoatusa.com/blogs/blog/how-to-apply-oil-plus-2c-to-furniture', + html: ` + + + + + + +`, + headers: { + 'content-type': 'text/html; charset=utf-8' + } + }) + + t.true( + output.includes( + 'content="http://cdn.shopify.com/s/files/1/0260/4810/2497/articles/Applying-Oil-Plus-2C-to-a-table_600x.jpg?v=1616464305"' + ) + ) + + t.true( + output.includes( + 'url(https://cdn.shopify.com/s/files/1/0260/4810/2497/articles/Applying-Oil-Plus-2C-to-a-table_600x.jpg?v=1616464305)' + ) + ) + + t.snapshot(prettyHtml(output)) +}) + +test('rewrites relative URLs inside stylesheet', t => { + const output = html({ + rewriteUrls: true, + url: 'https://kikobeats.com', + html: ` + + +
+
+ + + `, + headers: { + 'content-type': 'text/html; charset=utf-8' + } + }) + + const results = execall( + /https:\/\/kikobeats.com\/images\/microlink\.jpg/g, + output + ) + + t.is(results.length, 2) + t.snapshot(prettyHtml(output)) +}) diff --git a/test/html/rewrite-urls.js b/test/html/rewrite-urls.js new file mode 100644 index 0000000..ba4f820 --- /dev/null +++ b/test/html/rewrite-urls.js @@ -0,0 +1,190 @@ +'use strict' + +const path = require('path') +const test = require('ava') +const fs = require('fs') + +const { prettyHtml } = require('../util') + +const html = require('../../src/html') + +test('remove localhost alike URLs', async t => { + const output = html({ + rewriteUrls: true, + url: 'https://kikobeats.com', + html: ` + + + kikobeats.com + + + + + + + + + + `, + headers: { 'content-type': 'text/html; charset=utf-8' } + }) + + t.snapshot(prettyHtml(output)) +}) + +test('rewrites relative root URLs inside html markup', t => { + const output = html({ + rewriteUrls: true, + url: 'https://browserless.js.org', + html: fs.readFileSync( + path.resolve(__dirname, '../fixtures/browserless.html'), + 'utf8' + ), + headers: { + 'content-type': 'text/html; charset=utf-8' + } + }) + + t.true(output.includes('https://browserless.js.org/static/main.min.js')) + t.true(output.includes('https://unpkg.com/docsify/lib/docsify.min.js')) + + t.snapshot(prettyHtml(output)) +}) + +test('rewrites relative URLs inside html markup', t => { + const output = html({ + rewriteUrls: true, + url: 'https://moovility.me/', + html: ` + + + + + + `, + headers: { + 'content-type': 'text/html; charset=utf-8' + } + }) + + t.true(output.includes('https://moovility.me/img/icons/MOV/icon2-76.png')) + + t.snapshot(prettyHtml(output)) +}) + +test(" don't modify inline javascript", t => { + const output = html({ + rewriteUrls: true, + url: 'https://www.latimes.com/opinion/story/2020-06-07/column-muralist-honors-african-americans-killed-by-police', + html: ` + + + + + + + +Print + +`, + headers: { + 'content-type': 'text/html;charset=UTF-8' + } + }) + + t.true( + output.includes( + 'Print' + ) + ) + + t.snapshot(prettyHtml(output)) +}) + +test("don't modify non http protocols", t => { + const output = html({ + rewriteUrls: true, + url: 'https://www.latimes.com/opinion/story/2020-06-07/column-muralist-honors-african-americans-killed-by-police', + html: ` + + + + + + + + + + + + + + +`, + headers: { + 'content-type': 'text/html;charset=UTF-8' + } + }) + + t.true(output.includes('')) + t.true(output.includes('')) + t.true(output.includes('')) + t.true(output.includes('')) + t.true(output.includes('')) + + t.snapshot(prettyHtml(output)) +}) + +test("don't modify data URIs", t => { + const output = html({ + rewriteUrls: true, + url: 'https://example.com', + html: ` + + + + + + + +star + +`, + headers: { + 'content-type': 'text/html;charset=UTF-8' + } + }) + + t.true( + output.includes( + 'star' + ) + ) + + t.snapshot(prettyHtml(output)) +}) + +test("don't modify undefined attributes", t => { + const output = html({ + rewriteUrls: true, + url: 'https://moovility.me', + html: ` + + + + Document + + + + +`, + headers: { + 'content-type': 'text/html;charset=UTF-8' + } + }) + + t.true(output.includes("")) + + t.snapshot(prettyHtml(output)) +}) diff --git a/test/html/snapshots/index.js.md b/test/html/snapshots/index.js.md index c1df5b5..10974a3 100644 --- a/test/html/snapshots/index.js.md +++ b/test/html/snapshots/index.js.md @@ -110,205 +110,6 @@ Generated by [AVA](https://avajs.dev). ␊ ` -## '`rewriteCssUrls` don't modify html markup - -> Snapshot 1 - - `␊ - ␊ - ␊ - ␊ - ␊ - how-to-apply-oil-plus-2c-to-furniture␊ - ␊ - ␊ - ␊ - ␊ - ␊ - ␊ - ` - -## `rewriteHtmlUrls` rewrites relative root URLs inside html markup - -> Snapshot 1 - - `␊ - ␊ - ␊ - ␊ - ␊ - ␊ - ␊ - ␊ - ␊ - ␊ - ␊ - ␊ - browserless, a puppeter-like Node.js library for interacting with Headless production scenarios.␊ - ␊ - ␊ - ␊ - ␊ - ␊ - ␊ - ␊ - ␊ - ␊ - ␊ - ␊ - ␊ - ␊ - ␊ - ␊ - ␊ - ␊ - ␊ - ␊ - ␊ - ␊ - ␊ - ␊ - ␊ - ␊ - ␊ - ␊ - ␊ - ␊ - ␊ - ␊ - ␊ - ␊ - ␊ -
␊ - ␊ - ␊ - ␊ - ␊ - ␊ - ␊ - ␊ - ␊ - ` - -## `rewriteHtmlUrls` rewrites relative URLs inside html markup - -> Snapshot 1 - - `␊ - ␊ - ␊ - ␊ - moovility.me␊ - ␊ - ␊ - ␊ - ␊ - ␊ - ␊ - ` - -## `rewriteHtmlUrls` rewrites relative URLs inside stylesheet - -> Snapshot 1 - - `␊ - ␊ - ␊ - kikobeats.com␊ - ␊ - ␊ - ␊ - ␊ - ␊ -
␊ -
␊ - ␊ - ` - -## `rewriteHtmlUrls` don't modify inline javascript - -> Snapshot 1 - - `␊ - ␊ - ␊ - ␊ - ␊ - column-muralist-honors-african-americans-killed-by-police␊ - ␊ - ␊ - ␊ - ␊ - ␊ - ␊ - Print␊ - ␊ - ` - -## `rewriteHtmlUrls` don't modify non http protocols - -> Snapshot 1 - - `␊ - ␊ - ␊ - ␊ - ␊ - column-muralist-honors-african-americans-killed-by-police␊ - ␊ - ␊ - ␊ - ␊ - ␊ - ␊ - ␊ - ␊ - ␊ - ␊ - ␊ - ` - -## `rewriteHtmlUrls` don't modify data URIs - -> Snapshot 1 - - `␊ - ␊ - ␊ - ␊ - ␊ - example.com␊ - ␊ - ␊ - ␊ - ␊ - star␊ - ␊ - ` - -## `rewriteHtmlUrls` don't modify undefined attributes - -> Snapshot 1 - - `␊ - ␊ - ␊ - Document␊ - ␊ - ␊ - ␊ - ␊ - ␊ - ␊ - ␊ - ` - ## styles injection > Snapshot 1 diff --git a/test/html/snapshots/index.js.snap b/test/html/snapshots/index.js.snap index 2a80441..82f09ed 100644 Binary files a/test/html/snapshots/index.js.snap and b/test/html/snapshots/index.js.snap differ diff --git a/test/html/snapshots/rewrite-css-urls.js.md b/test/html/snapshots/rewrite-css-urls.js.md new file mode 100644 index 0000000..eeef8a8 --- /dev/null +++ b/test/html/snapshots/rewrite-css-urls.js.md @@ -0,0 +1,45 @@ +# Snapshot report for `test/html/rewrite-css-urls.js` + +The actual snapshot is saved in `rewrite-css-urls.js.snap`. + +Generated by [AVA](https://avajs.dev). + +## don't modify html markup + +> Snapshot 1 + + `␊ + ␊ + ␊ + ␊ + ␊ + how-to-apply-oil-plus-2c-to-furniture␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ` + +## rewrites relative URLs inside stylesheet + +> Snapshot 1 + + `␊ + ␊ + ␊ + kikobeats.com␊ + ␊ + ␊ + ␊ + ␊ + ␊ +
␊ +
␊ + ␊ + ` diff --git a/test/html/snapshots/rewrite-css-urls.js.snap b/test/html/snapshots/rewrite-css-urls.js.snap new file mode 100644 index 0000000..fe2eaf8 Binary files /dev/null and b/test/html/snapshots/rewrite-css-urls.js.snap differ diff --git a/test/html/snapshots/rewrite-urls.js.md b/test/html/snapshots/rewrite-urls.js.md new file mode 100644 index 0000000..6270b57 --- /dev/null +++ b/test/html/snapshots/rewrite-urls.js.md @@ -0,0 +1,180 @@ +# Snapshot report for `test/html/rewrite-urls.js` + +The actual snapshot is saved in `rewrite-urls.js.snap`. + +Generated by [AVA](https://avajs.dev). + +## remove localhost alike URLs + +> Snapshot 1 + + `␊ + ␊ + ␊ + kikobeats.com␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ` + +## rewrites relative root URLs inside html markup + +> Snapshot 1 + + `␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + browserless, a puppeter-like Node.js library for interacting with Headless production scenarios.␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ +
␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ` + +## rewrites relative URLs inside html markup + +> Snapshot 1 + + `␊ + ␊ + ␊ + ␊ + moovility.me␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ` + +## don't modify inline javascript + +> Snapshot 1 + + `␊ + ␊ + ␊ + ␊ + ␊ + column-muralist-honors-african-americans-killed-by-police␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + Print␊ + ␊ + ` + +## don't modify non http protocols + +> Snapshot 1 + + `␊ + ␊ + ␊ + ␊ + ␊ + column-muralist-honors-african-americans-killed-by-police␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ` + +## don't modify data URIs + +> Snapshot 1 + + `␊ + ␊ + ␊ + ␊ + ␊ + example.com␊ + ␊ + ␊ + ␊ + ␊ + star␊ + ␊ + ` + +## don't modify undefined attributes + +> Snapshot 1 + + `␊ + ␊ + ␊ + Document␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ` diff --git a/test/html/snapshots/rewrite-urls.js.snap b/test/html/snapshots/rewrite-urls.js.snap new file mode 100644 index 0000000..6505a07 Binary files /dev/null and b/test/html/snapshots/rewrite-urls.js.snap differ