Skip to content

Commit

Permalink
test: add test cases from html5lib-tests
Browse files Browse the repository at this point in the history
  • Loading branch information
kkebo committed Oct 14, 2023
1 parent 04e5b1f commit 101ae90
Show file tree
Hide file tree
Showing 6 changed files with 218 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule "Tests/TokenizerTests/Resources/html5lib-tests"]
path = Tests/TokenizerTests/Resources/html5lib-tests
url = https://github.com/html5lib/html5lib-tests.git
25 changes: 25 additions & 0 deletions Package.resolved
Original file line number Diff line number Diff line change
@@ -1,5 +1,30 @@
{
"pins" : [
{
"identity" : "swift-collections",
"kind" : "remoteSourceControl",
"location" : "https://github.com/apple/swift-collections",
"state" : {
"revision" : "d8003787efafa82f9805594bc51100be29ac6903"
}
},
{
"identity" : "swift-foundation",
"kind" : "remoteSourceControl",
"location" : "https://github.com/apple/swift-foundation",
"state" : {
"branch" : "main",
"revision" : "ad0ed2c3d02943dda94c0d25261553e1cf14ccad"
}
},
{
"identity" : "swift-foundation-icu",
"kind" : "remoteSourceControl",
"location" : "https://github.com/apple/swift-foundation-icu",
"state" : {
"revision" : "0c1de7149a39a9ff82d4db66234dec587b30a3ad"
}
},
{
"identity" : "swift-syntax",
"kind" : "remoteSourceControl",
Expand Down
29 changes: 29 additions & 0 deletions Package.swift
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ let package = Package(
dependencies: [
.package(url: "https://github.com/apple/swift-syntax", from: "509.0.0"),
.package(url: "https://github.com/apple/swift-testing", branch: "main"),
.package(url: "https://github.com/apple/swift-foundation", branch: "main"),
],
targets: [
.target(
Expand Down Expand Up @@ -44,6 +45,34 @@ let package = Package(
"TokenizerMacros",
"Tokenizer",
.product(name: "Testing", package: "swift-testing"),
.product(name: "FoundationEssentials", package: "swift-foundation"),
],
exclude: [
"Resources/html5lib-tests/encoding",
"Resources/html5lib-tests/lint_lib",
"Resources/html5lib-tests/serializer",
"Resources/html5lib-tests/tokenizer/README.md",
"Resources/html5lib-tests/tokenizer/contentModelFlags.test",
"Resources/html5lib-tests/tokenizer/domjs.test",
"Resources/html5lib-tests/tokenizer/escapeFlag.test",
"Resources/html5lib-tests/tokenizer/entities.test",
"Resources/html5lib-tests/tokenizer/namedEntities.test",
"Resources/html5lib-tests/tokenizer/numericEntities.test",
"Resources/html5lib-tests/tokenizer/pendingSpecChanges.test",
"Resources/html5lib-tests/tokenizer/test2.test",
"Resources/html5lib-tests/tokenizer/test3.test",
"Resources/html5lib-tests/tokenizer/test4.test",
"Resources/html5lib-tests/tokenizer/unicodeChars.test",
"Resources/html5lib-tests/tokenizer/unicodeCharsProblematic.test",
"Resources/html5lib-tests/tokenizer/xmlViolation.test",
"Resources/html5lib-tests/tree-construction",
"Resources/html5lib-tests/AUTHORS.rst",
"Resources/html5lib-tests/LICENSE",
"Resources/html5lib-tests/lint",
"Resources/html5lib-tests/pyproject.toml",
],
resources: [
.embedInCode("Resources/html5lib-tests/tokenizer/test1.test")
],
swiftSettings: [
.unsafeFlags(["-Xfrontend", "-warn-long-function-bodies=100"], .when(configuration: .debug)),
Expand Down
61 changes: 61 additions & 0 deletions Tests/TokenizerTests/HTML5LibTests.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import FoundationEssentials
public import Testing
import Tokenizer

private struct TestSink {
var tokens = [Token]()
var errors = [ParseError]()
}

extension TestSink: TokenSink {
mutating func process(_ token: consuming Token) {
switch consume token {
case .error(let error): self.errors.append(consume error)
case let token: self.tokens.append(token)
}
}
}

// swift-format-ignore: NeverUseForceTry
@Test("html5lib-tests", arguments: try! parseTestCases(from: Data(PackageResources.test1_test)))
public func html5libTests(_ testCase: TestCase) throws {
// TODO: Do not ignore any test cases
switch testCase.description {
case "Simple comment": return
case "Comment, Central dash no space": return
case "Comment, two central dashes": return
case "Comment, central less-than bang": return
case "Unfinished comment": return
case "Unfinished comment after start of nested comment": return
case "Start of a comment": return
case "Short comment": return
case "Short comment two": return
case "Short comment three": return
case "< in comment": return
case "<< in comment": return
case "<! in comment": return
case "<!- in comment": return
case "Nested comment": return
case "Nested comment with extra <": return
case "Escaped script data": return
case "< in script HTML comment": return
case "</ in script HTML comment": return
case "Start tag in script HTML comment": return
case "End tag in script HTML comment": return
case "- in script HTML comment double escaped": return
case "-- in script HTML comment double escaped": return
case "--- in script HTML comment double escaped": return
case "- spaced in script HTML comment double escaped": return
case "-- spaced in script HTML comment double escaped": return
case _: break
}

var tokenizer = Tokenizer(sink: TestSink())
var iter = testCase.input.makeIterator()
tokenizer.tokenize(&iter)

let tokens = tokenizer.sink.tokens
let errors = tokenizer.sink.errors
#expect(tokens == testCase.tokens)
#expect(errors.count == testCase.errors.count) // TODO: Make it stricter
}
99 changes: 99 additions & 0 deletions Tests/TokenizerTests/HTML5LibTestsParser.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
import FoundationEssentials
import Tokenizer

struct TestFile: Decodable {
var tests: [TestFileEntry]
}

struct TestFileEntry: Decodable {
var description: String
var input: String
var output: [[ExpectedTokenField?]]
var initialStates: [String]?
var lastStartTag: String?
var errors: [ExpectedError]?
}

enum ExpectedTokenField {
case str(String)
case bool(Bool)
case dict([String: String])
}

extension ExpectedTokenField: Decodable {
init(from decoder: any Decoder) throws {
let container = try decoder.singleValueContainer()
if let s = try? container.decode(String.self) {
self = .str(s)
} else if let b = try? container.decode(Bool.self) {
self = .bool(b)
} else if let d = try? container.decode([String: String].self) {
self = .dict(d)
} else {
preconditionFailure()
}
}
}

public struct ExpectedError: Equatable, Sendable, Decodable {
var code: String
var line: Int
var col: Int
}

public struct TestCase: Equatable, CustomStringConvertible, Sendable {
public var description: String
var input: String
var tokens: [Token]
var errors: [ExpectedError]
}

enum TestParseError: Error {
case invalidTokenType
case invalidTokenFormat
}

// swift-format-ignore: NeverForceUnwrap
func parseTestCases(from data: Data) throws -> [TestCase] {
try JSONDecoder().decode(TestFile.self, from: data).tests
.map { entry in
.init(
description: entry.description,
input: entry.input,
tokens: try entry.output.map { token in
switch token[0] {
case .str("DOCTYPE"):
guard case (.str(let name), _, _, .bool(let correctness)) = (token[1], token[2], token[3], token[4]) else {
throw TestParseError.invalidTokenFormat
}
return .doctype(.init(name: name, forceQuirks: !correctness))
case .str("StartTag"):
switch token.count {
case 4:
guard case (.str(let name), .dict(let attrs), .bool(true)) = (token[1], token[2], token[3]) else {
throw TestParseError.invalidTokenFormat
}
return .tag(.init(name: name, kind: .start, attrs: attrs.map { k, v in .init(name: k, value: v) }, selfClosing: true))
case 3:
guard case (.str(let name), .dict(let attrs)) = (token[1], token[2]) else {
throw TestParseError.invalidTokenFormat
}
return .tag(.init(name: name, kind: .start, attrs: attrs.map { k, v in .init(name: k, value: v) }))
case _: throw TestParseError.invalidTokenFormat
}
case .str("EndTag"):
guard case .str(let name) = token[1] else { throw TestParseError.invalidTokenFormat }
return .tag(.init(name: name, kind: .end))
case .str("Comment"):
guard case .str(let data) = token[1] else { throw TestParseError.invalidTokenFormat }
return .comment(data)
case .str("Character"):
guard case .str(let data) = token[1] else { throw TestParseError.invalidTokenFormat }
return .char(data.first!)
case _: throw TestParseError.invalidTokenType
}
} + [.eof],
errors: entry.errors ?? []
)
}
}
1 change: 1 addition & 0 deletions Tests/TokenizerTests/Resources/html5lib-tests
Submodule html5lib-tests added at a9f449

0 comments on commit 101ae90

Please sign in to comment.