Skip to content

Commit

Permalink
Basic SEO checks (#55)
Browse files Browse the repository at this point in the history
- basic seo validator struct
- add `--seo-checks` flag
- SEO checks (length + keyword):
  - title, 
  - description, 
  - h1
- canonical link check
  • Loading branch information
tib authored Dec 10, 2024
1 parent 5e2796d commit e0625d4
Show file tree
Hide file tree
Showing 8 changed files with 254 additions and 44 deletions.
19 changes: 6 additions & 13 deletions Sources/ToucanSDK/Mustache/MustacheToHTMLRenderer.swift
Original file line number Diff line number Diff line change
Expand Up @@ -94,18 +94,11 @@ public struct MustacheToHTMLRenderer {
with object: Any,
to destination: URL
) throws {
guard ids.contains(template) else {
throw Error.missingTemplate(template)
}
try library.render(
object,
withTemplate: template
)?
.minifyHTML()
.write(
to: destination,
atomically: true,
encoding: .utf8
)
try render(template: template, with: object)?
.write(
to: destination,
atomically: true,
encoding: .utf8
)
}
}
6 changes: 3 additions & 3 deletions Sources/ToucanSDK/PageBundle/PageBundle.swift
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,13 @@ struct PageBundle {
}

let prefix = "./\(config.assets.folder)/"

guard path.hasPrefix(prefix) else {
return path
}

let src = String(path.dropFirst(prefix.count))

return [
baseUrl,
config.assets.folder,
Expand Down
3 changes: 2 additions & 1 deletion Sources/ToucanSDK/PageBundle/PageBundleLoader.swift
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ public struct PageBundleLoader {
metadata: metadata
)

return .init(
let pageBundle = PageBundle(
id: id,
url: dirUrl,
baseUrl: sourceConfig.site.baseUrl,
Expand All @@ -181,6 +181,7 @@ public struct PageBundleLoader {
markdown: markdown,
assets: assets
)
return pageBundle
}
catch {
throw Error.pageBundle(error)
Expand Down
73 changes: 47 additions & 26 deletions Sources/ToucanSDK/Renderers/HTMLRenderer.swift
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,19 @@ struct HTMLRenderer {
let currentYear: Int

let contextStore: ContextStore
let seoChecks: Bool

init(
source: Source,
destinationUrl: URL,
templateRenderer: MustacheToHTMLRenderer,
seoChecks: Bool,
logger: Logger
) throws {
self.source = source
self.destinationUrl = destinationUrl
self.templateRenderer = templateRenderer
self.seoChecks = seoChecks
self.logger = logger

let calendar = Calendar(identifier: .gregorian)
Expand Down Expand Up @@ -154,33 +157,51 @@ struct HTMLRenderer {
for: fileUrl
)

try templateRenderer.render(
template: template ?? "pages.default",
with: HTML(
site: .init(
baseUrl: source.sourceConfig.site.baseUrl,
title: source.sourceConfig.site.title,
description: source.sourceConfig.site.description,
language: source.sourceConfig.site.language,
context: globalContext
),
page: contextStore.fullContext(for: pageBundle),
userDefined: pageBundle.config.userDefined
.recursivelyMerged(
with: source.sourceConfig.site.userDefined
)
.sanitized(),
pagination: .init(
links: paginationContext,
data: paginationData.mapValues {
$0.map { contextStore.fullContext(for: $0) }
}
),
year: currentYear
)
.context,
to: fileUrl
let context = HTML(
site: .init(
baseUrl: source.sourceConfig.site.baseUrl,
title: source.sourceConfig.site.title,
description: source.sourceConfig.site.description,
language: source.sourceConfig.site.language,
context: globalContext
),
page: contextStore.fullContext(for: pageBundle),
userDefined: pageBundle.config.userDefined
.recursivelyMerged(
with: source.sourceConfig.site.userDefined
)
.sanitized(),
pagination: .init(
links: paginationContext,
data: paginationData.mapValues {
$0.map { contextStore.fullContext(for: $0) }
}
),
year: currentYear
)
.context

let metadata: Logger.Metadata = [
"type": "\(pageBundle.contentType.id)",
"slug": "\(pageBundle.slug)",
]

guard
let html = try templateRenderer.render(
template: template ?? "pages.default",
with: context
)
else {
logger.error("Missing HTML contents.", metadata: metadata)
return
}

if seoChecks {
let seoValidator = SEOValidator(logger: logger)
seoValidator.validate(html: html, using: pageBundle)
}

try html.write(to: fileUrl, atomically: true, encoding: .utf8)
}

// MARK: - render related methods
Expand Down
183 changes: 183 additions & 0 deletions Sources/ToucanSDK/SEOValidator/SEOValidator.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
//
// File.swift
// toucan
//
// Created by Tibor Bodecs on 2024. 10. 28..
//

import Foundation
import Logging
import SwiftSoup

extension SwiftSoup.Document {

public func selectFirst(_ query: String) throws -> Element? {
try select(query).first()
}

func getTitle() throws -> String? {
try selectFirst("title")?.text()
}

func getDescription() throws -> String? {
let metas = try select("meta")
for meta in metas {
let name = try meta.attr("name")
if name == "description" {
let content = try meta.attr("content")
return content
}
}
return nil
}

func getCanonicalLink() throws -> String? {
let links = try select("link")
for link in links {
let rel = try link.attr("rel")
if rel == "canonical" {
let content = try link.attr("href")
return content
}
}
return nil
}

// func getAttribute(_ key: String) throws -> String? {
// try attr(key)
// }
}

public struct SEOValidator {

public enum Error: Swift.Error {
case validation(String)
}

let logger: Logger

public init(logger: Logger) {
self.logger = logger
}

func validate(
html: String,
using pageBundle: PageBundle
) {
var metadata: Logger.Metadata = [
"type": "\(pageBundle.contentType.id)",
"slug": "\(pageBundle.slug)",
]

do {
let document: SwiftSoup.Document = try SwiftSoup.parse(html)

if try document.getCanonicalLink() == nil {
logger.warning(
"Canonical link not present",
metadata: metadata
)
}

guard let title = try document.getTitle() else {
throw Error.validation("Title not found")
}

if title.count > 70 {
metadata["title"] = "`\(title)`"
metadata["count"] = "\(title.count)"
logger.warning(
"Title is way too long, use maximum 70 characters.",
metadata: metadata
)
}

guard let description = try document.getDescription() else {
throw Error.validation("Description not found")
}

if description.count < 50 {
metadata["description"] = "`\(description)`"
metadata["count"] = "\(description.count)"
logger.warning(
"Description is too short, use minimum 50 characters.",
metadata: metadata
)
}
if description.count > 160 {
metadata["description"] = "`\(description)`"
metadata["count"] = "\(description.count)"
logger.warning(
"Description is too long, use maximum 160 characters.",
metadata: metadata
)
}

let headings = try document.select("h1")
guard let h1tag = headings.first, headings.count == 1 else {
throw Error.validation(
"Invalid number of H1 tags (missing or multiple)"
)
}
let h1 = try h1tag.text()
if h1.count > 80 {
metadata["h1"] = "`\(h1)`"
metadata["count"] = "\(h1.count)"
logger.warning(
"Heading 1 should be 80 characters or less.",
metadata: metadata
)
}

// check keyword
if let keyword = pageBundle.frontMatter.string("keyword") {
metadata["title"] = nil
metadata["description"] = nil
metadata["h1"] = nil
metadata["count"] = nil
if !title.contains(keyword) {
metadata["title"] = "`\(title)`"
metadata["keyword"] = "`\(keyword)`"
logger.warning(
"Title does not contain keyword: `\(keyword)`.",
metadata: metadata
)
}
if !description.contains(keyword) {
metadata["description"] = "`\(description)`"
metadata["keyword"] = "`\(keyword)`"
logger.warning(
"Description does not contain keyword: `\(keyword)`.",
metadata: metadata
)
}
if !h1.contains(keyword) {
metadata["h1"] = "`\(h1)`"
metadata["keyword"] = "`\(keyword)`"
logger.warning(
"H1 does not contain keyword: `\(keyword)`.",
metadata: metadata
)
}
}
}
catch Error.validation(let message) {
logger.error(
"\(message)",
metadata: metadata
)
}
catch Exception.Error(_, let message) {
logger.error(
"\(message)",
metadata: metadata
)
}
catch {
logger.error(
"\(error.localizedDescription)",
metadata: metadata
)
}
}
}
6 changes: 5 additions & 1 deletion Sources/ToucanSDK/Toucan.swift
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ public struct Toucan {
let inputUrl: URL
let outputUrl: URL
let baseUrl: String?
let seoChecks: Bool
let logger: Logger

/// Initialize a new instance.
Expand All @@ -28,6 +29,7 @@ public struct Toucan {
input: String,
output: String,
baseUrl: String?,
seoChecks: Bool,
logger: Logger = .init(label: "toucan")
) {
self.fileManager = FileManager.default
Expand All @@ -44,6 +46,7 @@ public struct Toucan {
self.inputUrl = getSafeUrl(input, home: home)
self.outputUrl = getSafeUrl(output, home: home)
self.baseUrl = baseUrl
self.seoChecks = seoChecks
self.logger = logger
}

Expand Down Expand Up @@ -161,6 +164,7 @@ public struct Toucan {
source: source,
destinationUrl: workDirUrl,
templateRenderer: templateRenderer,
seoChecks: seoChecks,
logger: logger
)

Expand All @@ -172,7 +176,7 @@ public struct Toucan {
logger: logger
)
try apiRenderer.render()

try resetDirectory(at: outputUrl)
try fileManager.copyRecursively(from: workDirUrl, to: outputUrl)

Expand Down
4 changes: 4 additions & 0 deletions Sources/toucan-cli/Commands/Generate.swift
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ extension Entrypoint {
@Option(name: .shortAndLong, help: "The log level to use.")
var logLevel: Logger.Level = .info

@Flag(name: .shortAndLong, help: "SEO checks")
var seoChecks = false

func run() async throws {
var logger = Logger(label: "toucan")
logger.logLevel = logLevel
Expand All @@ -27,6 +30,7 @@ extension Entrypoint {
input: input,
output: output,
baseUrl: baseUrl,
seoChecks: seoChecks,
logger: logger
)

Expand Down
Loading

0 comments on commit e0625d4

Please sign in to comment.