Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updated toc parsing #30

Merged
merged 3 commits into from
Oct 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 53 additions & 0 deletions Sources/ToucanSDK/Extensions/TocElement+Tree.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
//
// File.swift
// toucan
//
// Created by Viasz-Kádi Ferenc on 2024. 10. 14..
//

import Foundation

extension [TocElement] {

/// Builds a tree of Table of Content (ToC) nodes based on the hierarchy levels of the elements.
///
/// - Returns: An array of `ToCNode` objects representing the hierarchical structure of the ToC.
func buildToCTree() -> [ToCNode] {
var result: [ToCNode] = []
var stack: [ToCNode] = []

for element in self {
let newNode = ToCNode(
level: element.level,
text: element.text,
fragment: element.fragment
)

// Find the correct parent for the current node
while let last = stack.last, last.level >= element.level {
stack.removeLast()
}

if let parent = stack.last {
// Append new node as a child of the last node in the stack
var updatedParent = parent
updatedParent.children.append(newNode)
stack[stack.count - 1] = updatedParent
if let index = result.firstIndex(where: {
$0.fragment == parent.fragment && $0.level == parent.level
}) {
result[index] = updatedParent
}
}
else {
// Add the new node to the result if it has no parent
result.append(newNode)
}

// Add the new node to the stack
stack.append(newNode)
}

return result
}
}
54 changes: 0 additions & 54 deletions Sources/ToucanSDK/Markdown/MarkdownRenderer.swift
Original file line number Diff line number Diff line change
Expand Up @@ -51,58 +51,4 @@ public struct MarkdownRenderer {
var htmlVisitor = MarkupToHTMLVisitor(delegate: delegate)
return htmlVisitor.visitDocument(document)
}

/// Render a Table of Contents
public func renderToC(
markdown: String
) -> [ToC] {
let document = Document(
parsing: markdown
)
var headingsVisitor = MarkupToHXVisitor()
return Self.buildToC(headingsVisitor.visitDocument(document))
}

// MARK: - private

static func buildToC(
_ headings: [MarkupToHXVisitor.HX]
) -> [ToC] {
var result: [ToC] = []
var stack: [ToC] = []

for heading in headings {
let newNode = ToC(
level: heading.level,
text: heading.text,
fragment: heading.fragment
)

// Find the correct parent for the current node
while let last = stack.last, last.level >= heading.level {
stack.removeLast()
}

if let parent = stack.last {
// Append new node as a child of the last node in the stack
var updatedParent = parent
updatedParent.children.append(newNode)
stack[stack.count - 1] = updatedParent
if let index = result.firstIndex(where: {
$0.fragment == parent.fragment && $0.level == parent.level
}) {
result[index] = updatedParent
}
}
else {
// Add the new node to the result if it has no parent
result.append(newNode)
}

// Add the new node to the stack
stack.append(newNode)
}

return result
}
}
36 changes: 36 additions & 0 deletions Sources/ToucanSDK/Markdown/MarkupHeadingVisitor.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
//
// File.swift
//
//
// Created by Tibor Bodecs on 03/05/2024.
//

import Markdown

struct MarkupHeadingVisitor: MarkupVisitor {

typealias Result = [TocElement]

let levels: [Int]

init(levels: [Int] = [2, 3]) {
self.levels = levels
}

// MARK: - visitor functions

mutating func defaultVisit(_ markup: any Markup) -> Result {
markup.children.flatMap { visit($0) }
}

// MARK: - elements

mutating func visitHeading(_ heading: Heading) -> Result {
guard levels.contains(heading.level) else {
return []
}
return [
TocElement(heading)
]
}
}
55 changes: 0 additions & 55 deletions Sources/ToucanSDK/Markdown/MarkupToHXVisitor.swift

This file was deleted.

59 changes: 59 additions & 0 deletions Sources/ToucanSDK/Parsers/ToC/HTMLToCParser.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
//
// File.swift
// toucan
//
// Created by Viasz-Kádi Ferenc on 2024. 10. 14..
//

import Foundation
import Logging
import SwiftSoup

/// A struct that parses HTML content to extract table of contents (ToC) elements from heading tags.
/// This struct conforms to the `ToCElementParser` protocol.
struct HTMLToCParser: ToCElementParser {

/// Logger instance used to log errors encountered during parsing.
let logger: Logger

/// Parses the provided HTML string and extracts table of contents elements from `<h2>` and `<h3>` tags.
///
/// - Parameter value: The HTML string to parse.
/// - Returns: An array of `TocElement` objects or `nil` if an error occurs during parsing.
func parse(from value: String) -> [TocElement]? {
do {
let document: SwiftSoup.Document = try SwiftSoup.parse(value)
let headings = try document.select("h2, h3")
return headings.compactMap { TocElement($0) }
}
catch Exception.Error(_, let message) {
logger.error("\(message)")
return nil
}
catch {
logger.error("\(error.localizedDescription)")
return nil
}
}
}

extension TocElement {

/// Initializes a new instance with the provided SwiftSoup.Element.
/// Attempts to extract text, level, and fragment from the element.
/// If the fragment attribute is empty or an error occurs during extraction, initialization fails.
init?(_ element: SwiftSoup.Element) {
do {
text = try element.text()
level = element.nodeName().hasSuffix("2") ? 2 : 3
fragment = try element.attr("id")

guard !fragment.isEmpty else {
return nil
}
}
catch {
return nil
}
}
}
35 changes: 35 additions & 0 deletions Sources/ToucanSDK/Parsers/ToC/MarkdownToCParser.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
//
// File.swift
// toucan
//
// Created by Viasz-Kádi Ferenc on 2024. 10. 14..
//

import Foundation
import Markdown

/// A parser that extracts table of contents elements from a Markdown document.
struct MarkdownToCParser: ToCElementParser {

/// Parses a string containing Markdown content and returns an array of `TocElement` objects.
///
/// - Parameter value: A string containing Markdown content.
/// - Returns: An array of `TocElement` objects if headings are found, otherwise `nil`.
func parse(from value: String) -> [TocElement]? {
let document = Markdown.Document(parsing: value)
var headingsVisitor = MarkupHeadingVisitor()
return headingsVisitor.visitDocument(document)
}
}

extension TocElement {

/// Initializes a `TocElement` from a `Markdown.Heading`.
///
/// - Parameter element: A `Markdown.Heading` from which to initialize the `TocElement`.
init(_ element: Markdown.Heading) {
level = element.level
text = element.plainText
fragment = text.lowercased().slugify()
}
}
18 changes: 18 additions & 0 deletions Sources/ToucanSDK/Parsers/ToC/ToCParser.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
//
// File.swift
// toucan
//
// Created by Viasz-Kádi Ferenc on 2024. 10. 11..
//

import Foundation

/// A protocol that defines a method for parsing table of contents elements from a string value.
protocol ToCElementParser {

/// Parses the given string value into an array of `TocElement` objects.
///
/// - Parameter value: The string representation of the table of contents elements.
/// - Returns: An array of `TocElement` objects if parsing is successful, or `nil` if parsing fails.
func parse(from value: String) -> [TocElement]?
}
44 changes: 10 additions & 34 deletions Sources/ToucanSDK/Site/SiteRenderer.swift
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,10 @@ struct SiteRenderer {
let destinationUrl: URL

let fileManager: FileManager = .default

let htmlToCParser: HTMLToCParser
let markdownToCParser: MarkdownToCParser

let logger: Logger

let templateRenderer: MustacheToHTMLRenderer
Expand All @@ -79,6 +83,8 @@ struct SiteRenderer {
self.templatesUrl = templatesUrl
self.overridesUrl = overridesUrl
self.destinationUrl = destinationUrl
self.htmlToCParser = .init(logger: logger)
self.markdownToCParser = .init()
self.logger = logger

let calendar = Calendar(identifier: .gregorian)
Expand Down Expand Up @@ -314,7 +320,7 @@ struct SiteRenderer {
// let run = transformers.array("run", as: [String: Any].self)

let markdown = pageBundle.markdown.dropFrontMatter()
var toc: [ToC]? = nil
var toc: [ToCNode]? = nil
var time: Int? = nil
var contents = ""

Expand Down Expand Up @@ -360,38 +366,7 @@ struct SiteRenderer {
try? fileManager.delete(at: fileURL)

time = readingTime(contents)

do {
let doc: Document = try SwiftSoup.parse(contents)

var tocList: [MarkupToHXVisitor.HX] = []
let headings = try doc.select("h2, h3")
for h in headings {
let n = h.nodeName()
let attr = try h.attr("id")
guard !attr.isEmpty else { continue }
let val = try h.text()

let level = n.hasSuffix("2") ? 2 : 3

tocList.append(
.init(
level: level,
text: val,
fragment: attr
)
)
}

toc = MarkdownRenderer.buildToC(tocList)

}
catch Exception.Error(_, let message) {
logger.error("\(message)")
}
catch {
logger.error("\(error.localizedDescription)")
}
toc = htmlToCParser.parse(from: contents)?.buildToCTree()
}

if renderFallback {
Expand All @@ -400,7 +375,8 @@ struct SiteRenderer {

var context: [String: Any] = [:]
context["readingTime"] = time ?? readingTime(markdown)
context["toc"] = toc ?? renderer.renderToC(markdown: markdown)
context["toc"] =
toc ?? markdownToCParser.parse(from: markdown)?.buildToCTree()
context["contents"] = contents

return context
Expand Down
Loading