-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
test: add test cases from html5lib-tests
- Loading branch information
Showing
6 changed files
with
218 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
[submodule "Tests/TokenizerTests/Resources/html5lib-tests"] | ||
path = Tests/TokenizerTests/Resources/html5lib-tests | ||
url = https://github.com/html5lib/html5lib-tests.git |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
import FoundationEssentials | ||
public import Testing | ||
import Tokenizer | ||
|
||
private struct TestSink { | ||
var tokens = [Token]() | ||
var errors = [ParseError]() | ||
} | ||
|
||
extension TestSink: TokenSink { | ||
mutating func process(_ token: consuming Token) { | ||
switch consume token { | ||
case .error(let error): self.errors.append(consume error) | ||
case let token: self.tokens.append(token) | ||
} | ||
} | ||
} | ||
|
||
// swift-format-ignore: NeverUseForceTry | ||
@Test("html5lib-tests", arguments: try! parseTestCases(from: Data(PackageResources.test1_test))) | ||
public func html5libTests(_ testCase: TestCase) throws { | ||
// TODO: Do not ignore any test cases | ||
switch testCase.description { | ||
case "Simple comment": return | ||
case "Comment, Central dash no space": return | ||
case "Comment, two central dashes": return | ||
case "Comment, central less-than bang": return | ||
case "Unfinished comment": return | ||
case "Unfinished comment after start of nested comment": return | ||
case "Start of a comment": return | ||
case "Short comment": return | ||
case "Short comment two": return | ||
case "Short comment three": return | ||
case "< in comment": return | ||
case "<< in comment": return | ||
case "<! in comment": return | ||
case "<!- in comment": return | ||
case "Nested comment": return | ||
case "Nested comment with extra <": return | ||
case "Escaped script data": return | ||
case "< in script HTML comment": return | ||
case "</ in script HTML comment": return | ||
case "Start tag in script HTML comment": return | ||
case "End tag in script HTML comment": return | ||
case "- in script HTML comment double escaped": return | ||
case "-- in script HTML comment double escaped": return | ||
case "--- in script HTML comment double escaped": return | ||
case "- spaced in script HTML comment double escaped": return | ||
case "-- spaced in script HTML comment double escaped": return | ||
case _: break | ||
} | ||
|
||
var tokenizer = Tokenizer(sink: TestSink()) | ||
var iter = testCase.input.makeIterator() | ||
tokenizer.tokenize(&iter) | ||
|
||
let tokens = tokenizer.sink.tokens | ||
let errors = tokenizer.sink.errors | ||
#expect(tokens == testCase.tokens) | ||
#expect(errors.count == testCase.errors.count) // TODO: Make it stricter | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
import FoundationEssentials | ||
import Tokenizer | ||
|
||
struct TestFile: Decodable { | ||
var tests: [TestFileEntry] | ||
} | ||
|
||
struct TestFileEntry: Decodable { | ||
var description: String | ||
var input: String | ||
var output: [[ExpectedTokenField?]] | ||
var initialStates: [String]? | ||
var lastStartTag: String? | ||
var errors: [ExpectedError]? | ||
} | ||
|
||
enum ExpectedTokenField { | ||
case str(String) | ||
case bool(Bool) | ||
case dict([String: String]) | ||
} | ||
|
||
extension ExpectedTokenField: Decodable { | ||
init(from decoder: any Decoder) throws { | ||
let container = try decoder.singleValueContainer() | ||
if let s = try? container.decode(String.self) { | ||
self = .str(s) | ||
} else if let b = try? container.decode(Bool.self) { | ||
self = .bool(b) | ||
} else if let d = try? container.decode([String: String].self) { | ||
self = .dict(d) | ||
} else { | ||
preconditionFailure() | ||
} | ||
} | ||
} | ||
|
||
public struct ExpectedError: Equatable, Sendable, Decodable { | ||
var code: String | ||
var line: Int | ||
var col: Int | ||
} | ||
|
||
public struct TestCase: Equatable, CustomStringConvertible, Sendable { | ||
public var description: String | ||
var input: String | ||
var tokens: [Token] | ||
var errors: [ExpectedError] | ||
} | ||
|
||
enum TestParseError: Error { | ||
case invalidTokenType | ||
case invalidTokenFormat | ||
} | ||
|
||
// swift-format-ignore: NeverForceUnwrap | ||
func parseTestCases(from data: Data) throws -> [TestCase] { | ||
try JSONDecoder().decode(TestFile.self, from: data).tests | ||
.map { entry in | ||
.init( | ||
description: entry.description, | ||
input: entry.input, | ||
tokens: try entry.output.map { token in | ||
switch token[0] { | ||
case .str("DOCTYPE"): | ||
guard case (.str(let name), _, _, .bool(let correctness)) = (token[1], token[2], token[3], token[4]) else { | ||
throw TestParseError.invalidTokenFormat | ||
} | ||
return .doctype(.init(name: name, forceQuirks: !correctness)) | ||
case .str("StartTag"): | ||
switch token.count { | ||
case 4: | ||
guard case (.str(let name), .dict(let attrs), .bool(true)) = (token[1], token[2], token[3]) else { | ||
throw TestParseError.invalidTokenFormat | ||
} | ||
return .tag(.init(name: name, kind: .start, attrs: attrs.map { k, v in .init(name: k, value: v) }, selfClosing: true)) | ||
case 3: | ||
guard case (.str(let name), .dict(let attrs)) = (token[1], token[2]) else { | ||
throw TestParseError.invalidTokenFormat | ||
} | ||
return .tag(.init(name: name, kind: .start, attrs: attrs.map { k, v in .init(name: k, value: v) })) | ||
case _: throw TestParseError.invalidTokenFormat | ||
} | ||
case .str("EndTag"): | ||
guard case .str(let name) = token[1] else { throw TestParseError.invalidTokenFormat } | ||
return .tag(.init(name: name, kind: .end)) | ||
case .str("Comment"): | ||
guard case .str(let data) = token[1] else { throw TestParseError.invalidTokenFormat } | ||
return .comment(data) | ||
case .str("Character"): | ||
guard case .str(let data) = token[1] else { throw TestParseError.invalidTokenFormat } | ||
return .char(data.first!) | ||
case _: throw TestParseError.invalidTokenType | ||
} | ||
} + [.eof], | ||
errors: entry.errors ?? [] | ||
) | ||
} | ||
} |
Submodule html5lib-tests
added at
a9f449