From f116ab75cf877a41a7f892482f4e460e8bf616d0 Mon Sep 17 00:00:00 2001 From: Kaz Yoshikawa Date: Wed, 17 Jan 2024 16:39:03 -0500 Subject: [PATCH 1/2] bugfix: when a node has two attributes "id" and "t:id", node["r:id"] will not find its attribute value, by checking ns (namespace) to be able to find right attribute obscured by the same attribute name with different namespace. example: --- Sources/Kanna/libxmlHTMLNode.swift | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/Sources/Kanna/libxmlHTMLNode.swift b/Sources/Kanna/libxmlHTMLNode.swift index 7892622..6e2a406 100755 --- a/Sources/Kanna/libxmlHTMLNode.swift +++ b/Sources/Kanna/libxmlHTMLNode.swift @@ -115,17 +115,17 @@ final class libxmlHTMLNode: XMLElement { get { var attr = nodePtr.pointee.properties while attr != nil { - let mem = attr?.pointee - if let tagName = String(validatingUTF8: UnsafeRawPointer((mem?.name)!).assumingMemoryBound(to: CChar.self)) { - if attributeName == tagName { - if let children = mem?.children { - return libxmlGetNodeContent(children) - } else { - return "" - } - } - } - attr = attr?.pointee.next + let mem = attr!.pointee + let prefix = mem.ns.flatMap { $0.pointee.prefix.string } + let tagName = [prefix, mem.name.string].compactMap { $0 }.joined(separator: ":") + if attributeName == tagName { + if let children = mem.children { + return libxmlGetNodeContent(children) + } else { + return "" + } + } + attr = attr!.pointee.next } return nil } @@ -233,3 +233,10 @@ private func escape(_ str: String) -> String { } return newStr } + +fileprivate extension UnsafePointer { + var string: String? { + let string = String(validatingUTF8: UnsafePointer(OpaquePointer(self))) + return string + } +} From c050bdab59786f75ac0169f559da479789ed575e Mon Sep 17 00:00:00 2001 From: Kaz Yoshikawa Date: Thu, 18 Jan 2024 22:54:11 -0500 Subject: [PATCH 2/2] by specifying nil for namespaces on xpath, it build and use namespace dictionary from its own. add test cases about namespaces --- Kanna.xcodeproj/project.pbxproj | 30 ++++- Sources/Kanna/Kanna.swift | 9 ++ Sources/Kanna/libxmlHTMLDocument.swift | 2 + Tests/KannaTests/Data/pptx-presentation.xml | 118 ++++++++++++++++++ .../Data/pptx-presentation.xml.rels | 21 ++++ Tests/KannaTests/KannaXMLTests.swift | 29 +++++ 6 files changed, 207 insertions(+), 2 deletions(-) create mode 100755 Tests/KannaTests/Data/pptx-presentation.xml create mode 100755 Tests/KannaTests/Data/pptx-presentation.xml.rels diff --git a/Kanna.xcodeproj/project.pbxproj b/Kanna.xcodeproj/project.pbxproj index 5a8cae1..a7db722 100644 --- a/Kanna.xcodeproj/project.pbxproj +++ b/Kanna.xcodeproj/project.pbxproj @@ -26,6 +26,8 @@ 1EB4A01F204C1F240003D7A2 /* KannaCSSTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 1EB4A01E204C1F240003D7A2 /* KannaCSSTests.swift */; }; 1EB4A021204C20760003D7A2 /* KannaXMLTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 1EB4A020204C20760003D7A2 /* KannaXMLTests.swift */; }; 1EC805FA1FA2FB2F0067D3DA /* Deprecated.swift in Sources */ = {isa = PBXBuildFile; fileRef = 1EC805F91FA2FB2F0067D3DA /* Deprecated.swift */; }; + CC02AE172B5A1A210075B74A /* pptx-presentation.xml.rels in Resources */ = {isa = PBXBuildFile; fileRef = CC02AE162B5A1A210075B74A /* pptx-presentation.xml.rels */; }; + CC02AE192B5A1A480075B74A /* pptx-presentation.xml in Resources */ = {isa = PBXBuildFile; fileRef = CC02AE182B5A1A480075B74A /* pptx-presentation.xml */; }; /* End PBXBuildFile section */ /* Begin PBXContainerItemProxy section */ @@ -66,6 +68,8 @@ 1EB4A01E204C1F240003D7A2 /* KannaCSSTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = KannaCSSTests.swift; sourceTree = ""; }; 1EB4A020204C20760003D7A2 /* KannaXMLTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = KannaXMLTests.swift; sourceTree = ""; }; 1EC805F91FA2FB2F0067D3DA /* Deprecated.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Deprecated.swift; sourceTree = ""; }; + CC02AE162B5A1A210075B74A /* pptx-presentation.xml.rels */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.xml; path = "pptx-presentation.xml.rels"; sourceTree = ""; }; + CC02AE182B5A1A480075B74A /* pptx-presentation.xml */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.xml; path = "pptx-presentation.xml"; sourceTree = ""; }; /* End PBXFileReference section */ /* Begin PBXFrameworksBuildPhase section */ @@ -171,6 +175,8 @@ 1E7ADC9C1DF5907F006E1815 /* sample.html */, 1E7ADC9D1DF5907F006E1815 /* test_HTML4.html */, 1E7ADC9E1DF5907F006E1815 /* test_XML_ExcelWorkbook.xml */, + CC02AE162B5A1A210075B74A /* pptx-presentation.xml.rels */, + CC02AE182B5A1A480075B74A /* pptx-presentation.xml */, 1E7ADC9F1DF5907F006E1815 /* versions.xml */, ); name = Data; @@ -282,9 +288,11 @@ isa = PBXResourcesBuildPhase; buildActionMask = 2147483647; files = ( + CC02AE172B5A1A210075B74A /* pptx-presentation.xml.rels in Resources */, 1E7ADCA21DF5907F006E1815 /* test_HTML4.html in Resources */, 1E7ADCA11DF5907F006E1815 /* sample.html in Resources */, 1E7ADCA01DF5907F006E1815 /* libraries.xml in Resources */, + CC02AE192B5A1A480075B74A /* pptx-presentation.xml in Resources */, 1E7ADCA41DF5907F006E1815 /* versions.xml in Resources */, 1E7ADCA31DF5907F006E1815 /* test_XML_ExcelWorkbook.xml in Resources */, ); @@ -403,7 +411,8 @@ GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; GCC_WARN_UNUSED_FUNCTION = YES; GCC_WARN_UNUSED_VARIABLE = YES; - MACOSX_DEPLOYMENT_TARGET = 10.9; + IPHONEOS_DEPLOYMENT_TARGET = 12.0; + MACOSX_DEPLOYMENT_TARGET = 10.13; MTL_ENABLE_DEBUG_INFO = YES; ONLY_ACTIVE_ARCH = YES; SDKROOT = macosx; @@ -411,8 +420,10 @@ SWIFT_INCLUDE_PATHS = ""; SWIFT_OPTIMIZATION_LEVEL = "-Onone"; SWIFT_VERSION = 5.0; + TVOS_DEPLOYMENT_TARGET = 12.0; VERSIONING_SYSTEM = "apple-generic"; VERSION_INFO_PREFIX = ""; + WATCHOS_DEPLOYMENT_TARGET = 4.0; }; name = Debug; }; @@ -462,14 +473,17 @@ GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; GCC_WARN_UNUSED_FUNCTION = YES; GCC_WARN_UNUSED_VARIABLE = YES; - MACOSX_DEPLOYMENT_TARGET = 10.9; + IPHONEOS_DEPLOYMENT_TARGET = 12.0; + MACOSX_DEPLOYMENT_TARGET = 10.13; MTL_ENABLE_DEBUG_INFO = NO; SDKROOT = macosx; SWIFT_INCLUDE_PATHS = ""; SWIFT_OPTIMIZATION_LEVEL = "-Owholemodule"; SWIFT_VERSION = 5.0; + TVOS_DEPLOYMENT_TARGET = 12.0; VERSIONING_SYSTEM = "apple-generic"; VERSION_INFO_PREFIX = ""; + WATCHOS_DEPLOYMENT_TARGET = 4.0; }; name = Release; }; @@ -487,12 +501,16 @@ FRAMEWORK_VERSION = A; INFOPLIST_FILE = Sources/Kanna/Info.plist; INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks"; + IPHONEOS_DEPLOYMENT_TARGET = 12.0; LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/../Frameworks @loader_path/Frameworks"; + MACOSX_DEPLOYMENT_TARGET = 10.13; OTHER_LDFLAGS = "-lxml2"; PRODUCT_BUNDLE_IDENTIFIER = com.tid.Kanna; PRODUCT_NAME = "$(TARGET_NAME)"; SKIP_INSTALL = YES; SWIFT_VERSION = 5.0; + TVOS_DEPLOYMENT_TARGET = 12.0; + WATCHOS_DEPLOYMENT_TARGET = 4.0; }; name = Debug; }; @@ -510,12 +528,16 @@ FRAMEWORK_VERSION = A; INFOPLIST_FILE = Sources/Kanna/Info.plist; INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks"; + IPHONEOS_DEPLOYMENT_TARGET = 12.0; LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/../Frameworks @loader_path/Frameworks"; + MACOSX_DEPLOYMENT_TARGET = 10.13; OTHER_LDFLAGS = "-lxml2"; PRODUCT_BUNDLE_IDENTIFIER = com.tid.Kanna; PRODUCT_NAME = "$(TARGET_NAME)"; SKIP_INSTALL = YES; SWIFT_VERSION = 5.0; + TVOS_DEPLOYMENT_TARGET = 12.0; + WATCHOS_DEPLOYMENT_TARGET = 4.0; }; name = Release; }; @@ -528,10 +550,12 @@ COMBINE_HIDPI_IMAGES = YES; DEVELOPMENT_TEAM = DP9Q5R8635; LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/../Frameworks @loader_path/../Frameworks"; + MACOSX_DEPLOYMENT_TARGET = 10.13; PRODUCT_BUNDLE_IDENTIFIER = com.tid.KannaTests; PRODUCT_NAME = "$(TARGET_NAME)"; SWIFT_OPTIMIZATION_LEVEL = "-Onone"; SWIFT_VERSION = 5.0; + TVOS_DEPLOYMENT_TARGET = 12.0; }; name = Debug; }; @@ -544,9 +568,11 @@ COMBINE_HIDPI_IMAGES = YES; DEVELOPMENT_TEAM = DP9Q5R8635; LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/../Frameworks @loader_path/../Frameworks"; + MACOSX_DEPLOYMENT_TARGET = 10.13; PRODUCT_BUNDLE_IDENTIFIER = com.tid.KannaTests; PRODUCT_NAME = "$(TARGET_NAME)"; SWIFT_VERSION = 5.0; + TVOS_DEPLOYMENT_TARGET = 12.0; }; name = Release; }; diff --git a/Sources/Kanna/Kanna.swift b/Sources/Kanna/Kanna.swift index 7d306ae..4d507cb 100755 --- a/Sources/Kanna/Kanna.swift +++ b/Sources/Kanna/Kanna.swift @@ -180,6 +180,15 @@ public protocol XMLDocument: AnyObject, SearchableNode { var namespaces: [Namespace] { get } } +public extension XMLDocument { + var namespaceDictionary: [String: String]? { + let dictionary = self.namespaces.reduce(into: [:]) { + // when prefix is blank, treat prefix "" as "xmlns", or xpath cannot specify "" as prefix + $0[$1.prefix == "" ? "xmlns": $1.prefix] = $1.name + } + return dictionary.count > 0 ? dictionary : nil + } +} /** HTMLDocument */ diff --git a/Sources/Kanna/libxmlHTMLDocument.swift b/Sources/Kanna/libxmlHTMLDocument.swift index c1ec15e..8014bc0 100755 --- a/Sources/Kanna/libxmlHTMLDocument.swift +++ b/Sources/Kanna/libxmlHTMLDocument.swift @@ -307,6 +307,7 @@ final class libxmlXMLDocument: XMLDocument { } func xpath(_ xpath: String, namespaces: [String: String]? = nil) -> XPathObject { + let namespaces = namespaces ?? self.namespaceDictionary guard let docPtr = docPtr else { return .none } return XPath(doc: self, docPtr: docPtr).xpath(xpath, namespaces: namespaces) } @@ -336,6 +337,7 @@ struct XPath { guard let ctxt = xmlXPathNewContext(docPtr) else { return .none } defer { xmlXPathFreeContext(ctxt) } + let namespaces = namespaces ?? self.doc.namespaceDictionary if let nsDictionary = namespaces { for (ns, name) in nsDictionary { xmlXPathRegisterNs(ctxt, ns, name) diff --git a/Tests/KannaTests/Data/pptx-presentation.xml b/Tests/KannaTests/Data/pptx-presentation.xml new file mode 100755 index 0000000..2296577 --- /dev/null +++ b/Tests/KannaTests/Data/pptx-presentation.xml @@ -0,0 +1,118 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/Tests/KannaTests/Data/pptx-presentation.xml.rels b/Tests/KannaTests/Data/pptx-presentation.xml.rels new file mode 100755 index 0000000..65386eb --- /dev/null +++ b/Tests/KannaTests/Data/pptx-presentation.xml.rels @@ -0,0 +1,21 @@ + + + + + + + + + \ No newline at end of file diff --git a/Tests/KannaTests/KannaXMLTests.swift b/Tests/KannaTests/KannaXMLTests.swift index 6e31637..ac001e3 100644 --- a/Tests/KannaTests/KannaXMLTests.swift +++ b/Tests/KannaTests/KannaXMLTests.swift @@ -88,6 +88,35 @@ class KannaXMLTests: XCTestCase { XCTAssertEqual(namespaces.sorted(), arry.sorted()) } } + + func testNamespaces_multipleNamespaces() { + // namespaces: "xmlns:a", "xmlns:r", "xmlns:p" + let url = Bundle(for: KannaXMLTests.self).url(forResource: "pptx-presentation", withExtension: "xml") + XCTAssertNotNil(url) + let doc = try? XML(url: url!, encoding: .utf8) + XCTAssertNotNil(doc) + let nodes = Array(doc!.xpath("//p:sldId")) + XCTAssert(nodes.count == 1) + let sldId = nodes[0] + XCTAssert(sldId.tagName == "sldId") + XCTAssert(sldId["id"] == "256") + XCTAssert(sldId["r:id"] == "rId2") + } + + func testNamespaces_singleNamespace() { + // namespaces: "xmlns" + let url = Bundle(for: KannaXMLTests.self).url(forResource: "pptx-presentation", withExtension: "xml.rels") + XCTAssertNotNil(url) + let doc = try? XML(url: url!, encoding: .utf8) + XCTAssertNotNil(doc) + let nodes1 = Array(doc!.xpath("//Relationship")) + XCTAssert(nodes1.count == 0) + let nodes2 = Array(doc!.xpath("//xmlns:Relationship")) + XCTAssert(nodes2.count == 6) + let (relationship0, relationship1) = (nodes2[0], nodes2[1]) + XCTAssert(relationship0["Id"] == "rId3") + XCTAssert(relationship1["Id"] == "rId2") + } } extension KannaXMLTests {