From 15476bdf5719ac84bc2d1cd3e2b82e7eaf947e5f Mon Sep 17 00:00:00 2001 From: tmont Date: Tue, 5 Jun 2012 15:22:25 -0700 Subject: [PATCH] implemented doctype parsing --- src/context.js | 2 +- src/parser.js | 17 ++++++++-- tests/doctype-tests.js | 65 ++++++++++++++++++++++++++++++++++++++ tests/files/good.html | 41 ++++++++++++++++++++++++ tests/integration-tests.js | 8 +++++ 5 files changed, 130 insertions(+), 3 deletions(-) create mode 100644 tests/doctype-tests.js create mode 100644 tests/files/good.html create mode 100644 tests/integration-tests.js diff --git a/src/context.js b/src/context.js index 23cf18e..e5c66a6 100644 --- a/src/context.js +++ b/src/context.js @@ -72,7 +72,7 @@ exports.create = function(raw, options) { }); context.callbacks = {}; - [ 'openElement', 'closeElement', 'attribute', 'comment', 'cdata', 'text' ].forEach(function(value) { + [ 'openElement', 'closeElement', 'attribute', 'comment', 'cdata', 'text', 'docType', 'xmlProlog', ].forEach(function(value) { context.callbacks[value] = options[value] || function() { }; }); diff --git a/src/parser.js b/src/parser.js index 6b1946d..f0396ae 100644 --- a/src/parser.js +++ b/src/parser.js @@ -51,7 +51,7 @@ function parseCData(context) { var match = /^([\s\S]*?)(?:$|]]>)/.exec(context.substring); var value = match[1]; - context.read(value.length + match[0].length); + context.read(match[0].length); context.callbacks.cdata(value); } @@ -61,10 +61,20 @@ function parseComment(context) { var match = /^([\s\S]*?)(?:$|-->)/.exec(context.substring); var value = match[1]; - context.read(value.length + match[0].length); + context.read(match[0].length); context.callbacks.comment(value); } +function parseDocType(context) { + //read "!doctype" + context.read(8); + + var match = /^\s*([\s\S]*?)(?:$|>)/.exec(context.substring); + var value = match[1]; + context.read(match[0].length); + context.callbacks.docType(value); +} + function appendText(value, context) { context.text += value; } @@ -97,6 +107,9 @@ function parseNext(context) { } else if (/^!--/.test(context.substring)) { callbackText(context); parseComment(context); + } else if (/^!doctype/i.test(context.substring)) { + callbackText(context); + parseDocType(context); } else { //malformed html context.read(); diff --git a/tests/doctype-tests.js b/tests/doctype-tests.js new file mode 100644 index 0000000..8edd5d2 --- /dev/null +++ b/tests/doctype-tests.js @@ -0,0 +1,65 @@ +var should = require('should'); +var helpers = require('./helpers'); + +describe('DocTypes', function() { + it('at beginning of document', function() { + var docTypeCount = 0; + helpers.parseString('', { + docType: function(value) { + value.should.equal('html'); + docTypeCount++; + } + }); + + docTypeCount.should.equal(1); + }); + + it('in middle of document', function() { + var docTypeCount = 0, openCount = 0, closeCount = 0; + helpers.parseString('', { + openElement: function(name) { + name.should.equal('foo'); + openCount++; + }, + closeElement: function(name) { + name.should.equal('foo'); + closeCount++; + }, + docType: function(value) { + openCount.should.equal(1); + closeCount.should.equal(0); + value.should.equal('html'); + docTypeCount++; + } + }); + + docTypeCount.should.equal(1); + openCount.should.equal(1); + closeCount.should.equal(1); + }); + + it('with line breaks', function() { + var docTypeCount = 0; + helpers.parseString('', { + docType: function(value) { + value.should.equal('foo\nbar'); + docTypeCount++; + } + }); + + docTypeCount.should.equal(1); + }); + + it('are case insensitive', function() { + var docTypeCount = 0; + helpers.parseString('', { + docType: function(value) { + value.should.equal('html'); + docTypeCount++; + } + }); + + docTypeCount.should.equal(1); + }); + +}); \ No newline at end of file diff --git a/tests/files/good.html b/tests/files/good.html new file mode 100644 index 0000000..31342e3 --- /dev/null +++ b/tests/files/good.html @@ -0,0 +1,41 @@ + + + + + tommy montgomery + + + + + + +
+ + +
+ + +
+
+ + + + + + + + + + diff --git a/tests/integration-tests.js b/tests/integration-tests.js new file mode 100644 index 0000000..948fd7f --- /dev/null +++ b/tests/integration-tests.js @@ -0,0 +1,8 @@ +require('should'); +var helpers = require('./helpers'); + +describe('Integration', function() { + it('real life HTML document', function() { + + }); +}); \ No newline at end of file