From 65a399da58c661d7e60c3446ddc68e426cd85d51 Mon Sep 17 00:00:00 2001 From: tmont Date: Fri, 2 Nov 2012 10:52:05 -0700 Subject: [PATCH] fixed bug where ignoring tags that were contained within other ignored tags made stuff explode --- src/parser.js | 50 ++++++++++++++++++++----------------- tests/integration-tests.js | 1 - tests/sanitization-tests.js | 12 +++++++-- 3 files changed, 37 insertions(+), 26 deletions(-) diff --git a/src/parser.js b/src/parser.js index c47b024..ff4bf26 100644 --- a/src/parser.js +++ b/src/parser.js @@ -283,6 +283,10 @@ exports.sanitize = function(htmlString, removalCallbacks) { } } + function last(arr) { + return arr[arr.length - 1]; + } + var toRemove = { attributes: createArrayCallback('attributes'), elements: createArrayCallback('elements'), @@ -291,7 +295,7 @@ exports.sanitize = function(htmlString, removalCallbacks) { }; var sanitized = '', tagStack = []; - var ignoring = false; + var ignoreStack = []; var selfClosingTags = { meta: 1, br: 1, @@ -320,21 +324,22 @@ exports.sanitize = function(htmlString, removalCallbacks) { //if there is an unclosed self-closing tag in the stack, then //pop it off (assumed to be malformed html). if (tagStack.length) { - var scope = tagStack[tagStack.length - 1]; - //console.log(scope); - if (selfClosingTags[scope.name]) { + var scope = last(tagStack); + if (selfClosingTags[scope]) { tagStack.pop(); - if (scope === ignoring) { - ignoring = null; + if (scope === last(ignoreStack)) { + ignoreStack.pop(); } } } - tagStack.push({ name: name }); + if (ignoreStack.length) { + return; + } + + tagStack.push(name); if (toRemove.elements(name)) { - if (!ignoring) { - ignoring = tagStack[tagStack.length - 1]; - } + ignoreStack.push(name); return; } sanitized += '<' + name; @@ -345,11 +350,11 @@ exports.sanitize = function(htmlString, removalCallbacks) { if (token.length === 2) { //self closing var scope = tagStack.pop(); - if (scope === ignoring) { - ignoring = null; + if (scope === last(ignoreStack)) { + ignoreStack.pop(); } } - if (ignoring || toRemove.elements(name)) { + if (ignoreStack.length || toRemove.elements(name)) { return; } sanitized += token; @@ -357,20 +362,19 @@ exports.sanitize = function(htmlString, removalCallbacks) { closeElement: function(name) { name = name.toLowerCase(); - if (tagStack.length && tagStack[tagStack.length - 1].name === name) { - var scope = tagStack.pop(); - if (scope === ignoring) { - ignoring = null; + if (tagStack.length && last(tagStack) === name) { + if (tagStack.pop() === last(ignoreStack)) { + ignoreStack.pop(); } } - if (ignoring || toRemove.elements(name)) { + if (ignoreStack.length || toRemove.elements(name)) { return; } sanitized += ''; }, attribute: function(name, value) { - if (ignoring) { + if (ignoreStack.length) { return; } @@ -386,26 +390,26 @@ exports.sanitize = function(htmlString, removalCallbacks) { }, text: function(value) { - if (ignoring) { + if (ignoreStack.length) { return; } sanitized += value; }, comment: function(value) { - if (ignoring || toRemove.comments(value)) { + if (ignoreStack.length || toRemove.comments(value)) { return; } sanitized += ''; }, cdata: function(value) { - if (ignoring) { + if (ignoreStack.length) { return; } for (var i = tagStack.length - 1; i >= 0; i--) { - if (tagStack[i].name === 'script' || tagStack[i].name === 'xmp') { + if (tagStack[i] === 'script' || tagStack[i] === 'xmp') { sanitized += value; return; } diff --git a/tests/integration-tests.js b/tests/integration-tests.js index 6fc29ae..7b742e1 100644 --- a/tests/integration-tests.js +++ b/tests/integration-tests.js @@ -3,7 +3,6 @@ var helpers = require('./helpers'); var fs = require('fs'); describe('Integration', function() { - it('real life HTML document', function() { var string = fs.readFileSync(__dirname + '/files/good.html', 'utf8'); var expected = fs.readFileSync(__dirname + '/files/good-expected.html', 'utf8'); diff --git a/tests/sanitization-tests.js b/tests/sanitization-tests.js index 4c7d5e1..5cee246 100644 --- a/tests/sanitization-tests.js +++ b/tests/sanitization-tests.js @@ -118,7 +118,7 @@ describe('Sanitization', function() { sanitized.should.equal('

foo

'); }); - describe('self-closing tags that don\'t close', function() { + describe('self-closing tag', function() { var selfClosingTags = { meta: 1, br: 1, @@ -136,7 +136,7 @@ describe('Sanitization', function() { }; for (var tag in selfClosingTags) { - it(tag + ' tag without a "/>"', (function(tag) { + it('"' + tag + '" without a closing "/>"', (function(tag) { return function() { var html = '<' + tag + '>

foo

'; var sanitized = helpers.parser.sanitize(html, { @@ -148,4 +148,12 @@ describe('Sanitization', function() { }(tag))); } }); + + it('should remove tags within tags', function() { + var html = 'lolzoh hai there'; + var sanitized = helpers.parser.sanitize(html, { + elements: [ 'foo', 'bar' ] + }); + sanitized.should.equal('oh hai there'); + }); });