Skip to content

Commit

Permalink
fixed bug where ignoring tags that were contained within other ignore…
Browse files Browse the repository at this point in the history
…d tags made stuff explode
  • Loading branch information
tmont committed Nov 2, 2012
1 parent 8373c61 commit 65a399d
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 26 deletions.
50 changes: 27 additions & 23 deletions src/parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,10 @@ exports.sanitize = function(htmlString, removalCallbacks) {
}
}

function last(arr) {
return arr[arr.length - 1];
}

var toRemove = {
attributes: createArrayCallback('attributes'),
elements: createArrayCallback('elements'),
Expand All @@ -291,7 +295,7 @@ exports.sanitize = function(htmlString, removalCallbacks) {
};

var sanitized = '', tagStack = [];
var ignoring = false;
var ignoreStack = [];
var selfClosingTags = {
meta: 1,
br: 1,
Expand Down Expand Up @@ -320,21 +324,22 @@ exports.sanitize = function(htmlString, removalCallbacks) {
//if there is an unclosed self-closing tag in the stack, then
//pop it off (assumed to be malformed html).
if (tagStack.length) {
var scope = tagStack[tagStack.length - 1];
//console.log(scope);
if (selfClosingTags[scope.name]) {
var scope = last(tagStack);
if (selfClosingTags[scope]) {
tagStack.pop();
if (scope === ignoring) {
ignoring = null;
if (scope === last(ignoreStack)) {
ignoreStack.pop();
}
}
}

tagStack.push({ name: name });
if (ignoreStack.length) {
return;
}

tagStack.push(name);
if (toRemove.elements(name)) {
if (!ignoring) {
ignoring = tagStack[tagStack.length - 1];
}
ignoreStack.push(name);
return;
}
sanitized += '<' + name;
Expand All @@ -345,32 +350,31 @@ exports.sanitize = function(htmlString, removalCallbacks) {
if (token.length === 2) {
//self closing
var scope = tagStack.pop();
if (scope === ignoring) {
ignoring = null;
if (scope === last(ignoreStack)) {
ignoreStack.pop();
}
}
if (ignoring || toRemove.elements(name)) {
if (ignoreStack.length || toRemove.elements(name)) {
return;
}
sanitized += token;
},

closeElement: function(name) {
name = name.toLowerCase();
if (tagStack.length && tagStack[tagStack.length - 1].name === name) {
var scope = tagStack.pop();
if (scope === ignoring) {
ignoring = null;
if (tagStack.length && last(tagStack) === name) {
if (tagStack.pop() === last(ignoreStack)) {
ignoreStack.pop();
}
}
if (ignoring || toRemove.elements(name)) {
if (ignoreStack.length || toRemove.elements(name)) {
return;
}
sanitized += '</' + name + '>';
},

attribute: function(name, value) {
if (ignoring) {
if (ignoreStack.length) {
return;
}

Expand All @@ -386,26 +390,26 @@ exports.sanitize = function(htmlString, removalCallbacks) {
},

text: function(value) {
if (ignoring) {
if (ignoreStack.length) {
return;
}
sanitized += value;
},

comment: function(value) {
if (ignoring || toRemove.comments(value)) {
if (ignoreStack.length || toRemove.comments(value)) {
return;
}
sanitized += '<!--' + value + '-->';
},

cdata: function(value) {
if (ignoring) {
if (ignoreStack.length) {
return;
}

for (var i = tagStack.length - 1; i >= 0; i--) {
if (tagStack[i].name === 'script' || tagStack[i].name === 'xmp') {
if (tagStack[i] === 'script' || tagStack[i] === 'xmp') {
sanitized += value;
return;
}
Expand Down
1 change: 0 additions & 1 deletion tests/integration-tests.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ var helpers = require('./helpers');
var fs = require('fs');

describe('Integration', function() {

it('real life HTML document', function() {
var string = fs.readFileSync(__dirname + '/files/good.html', 'utf8');
var expected = fs.readFileSync(__dirname + '/files/good-expected.html', 'utf8');
Expand Down
12 changes: 10 additions & 2 deletions tests/sanitization-tests.js
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ describe('Sanitization', function() {
sanitized.should.equal('<p novalue2>foo</p>');
});

describe('self-closing tags that don\'t close', function() {
describe('self-closing tag', function() {
var selfClosingTags = {
meta: 1,
br: 1,
Expand All @@ -136,7 +136,7 @@ describe('Sanitization', function() {
};

for (var tag in selfClosingTags) {
it(tag + ' tag without a "/>"', (function(tag) {
it('"' + tag + '" without a closing "/>"', (function(tag) {
return function() {
var html = '<' + tag + '><p>foo</p>';
var sanitized = helpers.parser.sanitize(html, {
Expand All @@ -148,4 +148,12 @@ describe('Sanitization', function() {
}(tag)));
}
});

it('should remove tags within tags', function() {
var html = '<foo><bar>lolz</bar><bat></bat></foo><baz>oh hai there</baz>';
var sanitized = helpers.parser.sanitize(html, {
elements: [ 'foo', 'bar' ]
});
sanitized.should.equal('<baz>oh hai there</baz>');
});
});

0 comments on commit 65a399d

Please sign in to comment.