diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 7a78da8..545b1ac 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -26,7 +26,7 @@ jobs: # cache key. - uses: actions/cache@v4 with: - key: worm-scraper-cache-2025-01-02 + key: worm-scraper-cache-2025-01-04 path: ./cache - run: node ./lib/worm-scraper.js --book=worm diff --git a/lib/convert.js b/lib/convert.js index b22fbd8..75efc14 100644 --- a/lib/convert.js +++ b/lib/convert.js @@ -59,6 +59,7 @@ function getChapterData(arcs, manifest, chapterTitleStyle) { chapter.originalTitle = manifestEntry.title; chapter.usedTitle = chooseChapterTitle(chapter, chapterTitleStyle); chapter.datePublished = manifestEntry.datePublished; + chapter.dateModified = manifestEntry.dateModified; } } diff --git a/lib/download.js b/lib/download.js index fea5291..a3b9adf 100644 --- a/lib/download.js +++ b/lib/download.js @@ -70,11 +70,13 @@ async function downloadAllChapters(manifest, bookData, cachePath, manifestPath) const { contents, dom } = await downloadChapter(chapterURL); const title = getChapterTitle(dom.window.document); const datePublished = getChapterDatePublished(dom.window.document); + const dateModified = getChapterDateModified(dom.window.document); dom.window.close(); manifest[manifestIndex].title = title; manifest[manifestIndex].datePublished = datePublished; + manifest[manifestIndex].dateModified = dateModified; manifest[manifestIndex].filename = filename; await fs.writeFile(path.resolve(cachePath, filename), contents); @@ -105,7 +107,11 @@ function getChapterTitle(rawChapterDoc) { } function getChapterDatePublished(rawChapterDoc) { - return rawChapterDoc.querySelector(".entry-date").dateTime; + return rawChapterDoc.querySelector(`meta[property="article:published_time"]`).content; +} + +function getChapterDateModified(rawChapterDoc) { + return rawChapterDoc.querySelector(`meta[property="article:modified_time"]`).content; } async function downloadChapter(url) { diff --git a/lib/scaffold.js b/lib/scaffold.js index 60838f5..87ed173 100644 --- a/lib/scaffold.js +++ b/lib/scaffold.js @@ -1,9 +1,12 @@ "use strict"; const fs = require("fs").promises; const path = require("path"); +const { name: packageName, version: packageVersion } = require("../package.json"); +const BOOK_SERIES = "Parahumans"; const BOOK_PUBLISHER = "Domenic Denicola"; const BOOK_AUTHOR = "Wildbow"; +const BOOK_GENERATOR = `${packageName} v${packageVersion}`; const STYLES_FILENAME = "chapter.css"; const COVER_DOCUMENT_FILENAME = "cover.xhtml"; @@ -17,15 +20,15 @@ module.exports = async ( bookPath, contentPath, chaptersPath, - augmentedChapterDataPath, + chapterDataPath, bookInfo ) => { await Promise.all([ fs.cp(scaffoldingPath, bookPath, { recursive: true, filter: noThumbs }), fs.cp(coverImagePath, path.resolve(bookPath, "OEBPS", COVER_IMAGE_FILENAME)), - getChapterInfo(contentPath, chaptersPath, augmentedChapterDataPath).then(info => { + getChapterInfo(contentPath, chaptersPath, chapterDataPath).then(info => { return Promise.all([ - writeOPF(contentPath, bookInfo, info.manifestAndSpineFiles, info.datePublished), + writeOPF(contentPath, bookInfo, info.manifestAndSpineFiles, info.datePublished, info.dateModified), writeNav(contentPath, info.manifestAndSpineFiles, info.tocHTML), writeArcTitlePages(chaptersPath, info.arcTitlePages) ]); @@ -39,7 +42,7 @@ function noThumbs(filePath) { return path.basename(filePath) !== "Thumbs.db"; } -function writeOPF(contentPath, bookInfo, manifestAndSpineFiles, datePublished) { +function writeOPF(contentPath, bookInfo, manifestAndSpineFiles, datePublished, dateModified) { const manifestItems = manifestAndSpineFiles.map(f => { return ` `; }).join("\n"); @@ -48,8 +51,6 @@ function writeOPF(contentPath, bookInfo, manifestAndSpineFiles, datePublished) { return ` `; }).join("\n"); - const dateWithoutMilliseconds = `${(new Date()).toISOString().split(".")[0]}Z`; - // Note: per the spec at https://www.w3.org/TR/epub-33/#sec-group-position it seems like the collection-type should be // "set", but Calibre only recognizes "series" as of now: // https://github.com/kovidgoyal/calibre/blob/37dd0f5c70ebf8952d7be6dd7c37afd2a4fce9f0/src/calibre/ebooks/metadata/opf3.py#L792 @@ -65,16 +66,20 @@ function writeOPF(contentPath, bookInfo, manifestAndSpineFiles, datePublished) { ${bookInfo.title} main - Parahumans + ${BOOK_SERIES} series ${bookInfo.groupPosition} ${BOOK_AUTHOR} aut + ${BOOK_PUBLISHER} - ${datePublished} - ${dateWithoutMilliseconds} + ${BOOK_GENERATOR} + bkp + + ${reformatDateString(datePublished)} + ${reformatDateString(dateModified)} ${bookInfo.description} @@ -133,7 +138,8 @@ async function getChapterInfo(contentPath, chaptersPath, augmentedChapterDataPat const manifestAndSpineFiles = []; let tocHTML = "
    \n"; let arcIdCounter = 0; - let lastChapter; + let lastChapter, dateModified; + let dateModifiedTimestamp = 0; for (const arc of chapterData) { if (!arc.invisible) { const arcFilename = `arc${arcIdCounter}.xhtml`; @@ -164,6 +170,13 @@ async function getChapterInfo(contentPath, chaptersPath, augmentedChapterDataPat tocHTML += `
  1. ${chapter.usedTitle}
  2. \n`; lastChapter = chapter; + + // The modification date of the book is the latest modification date of any chapter. + const thisDateModifiedTimestamp = (new Date(Date.parse(chapter.dateModified))).getTime(); + if (thisDateModifiedTimestamp > dateModifiedTimestamp) { + dateModifiedTimestamp = thisDateModifiedTimestamp; + dateModified = chapter.dateModified; + } } if (!arc.invisible) { @@ -179,7 +192,7 @@ async function getChapterInfo(contentPath, chaptersPath, augmentedChapterDataPat // We say that the publication date of the book is equal to the publication date of the last chapter. const { datePublished } = lastChapter; - return { arcTitlePages, manifestAndSpineFiles, tocHTML, datePublished }; + return { arcTitlePages, manifestAndSpineFiles, tocHTML, datePublished, dateModified }; } async function writeArcTitlePages(chaptersPath, arcTitlePages) { @@ -207,3 +220,8 @@ async function writeArcTitlePages(chaptersPath, arcTitlePages) { function arcPlaintextTitle(arc) { return `${arc.label}: ${arc.title}`; } + +function reformatDateString(dateString) { + const date = new Date(dateString); + return date.toISOString().replace(".000", ""); +}