-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathmdbook.js
166 lines (145 loc) · 5.71 KB
/
mdbook.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
const { chromium, devices } = require('playwright');
const SITE_CONFIG = {
bodySelector: '.page',
chapterLinksElmSelector: '#sidebar .chapter-item a:not(.active)',
bookContentSelector: '#content',
chapterAppendSelector: '#content',
headerSelector: '#menu-bar',
navNextSelector: '.nav-wrapper',
sideBarSelector: '#sidebar',
menuContainerSelector: '#content',
menuNextElmSelector: '#content main',
};
class GitBookPDFSpider {
constructor({browser, page, pageConfig, url, bookName}) {
this._browser = browser;
this._mainPage = page;
this._pageConfig = pageConfig;
this._bookUrl = url;
this._bookName = bookName;
}
static async create({url, bookName, isMobile = true}) {
const browser = await chromium.launch();
// const pageConfig = isMobile ? devices['iPhone 14'] : devices['Desktop Chrome'];
const pageConfig = {};
const page = await browser.newPage();
return new GitBookPDFSpider({browser, page, pageConfig, url, bookName});
}
async run() {
await this._openMainPage();
const chaptersMetaInfo = await this._getChaptersMetaInfo();
console.log('chaptersMetaInfo', chaptersMetaInfo);
const chaptersHTMLContent = await this._fetchAllChaptersHTMLContent(chaptersMetaInfo);
await this._beautifyMainPage(chaptersMetaInfo);
await this._generateFullHTMLPage(chaptersHTMLContent);
await this._mainPage.pdf({ path: this._bookName, format: 'A4' });
await this._browser.close();
}
_openMainPage = async () => {
console.log('open url', this._bookUrl);
await this._mainPage.goto(this._bookUrl);
await this._mainPage.waitForLoadState('domcontentloaded');
}
// hide left menu; add pdf chapter link
_beautifyMainPage = async (chaptersMetaInfo = []) => {
console.log('beautify MainPage')
const res = await this._mainPage.evaluate(({chaptersMetaInfo = [], SITE_CONFIG}) => {
// for gitbook
const bodyElm = document.querySelector(SITE_CONFIG.bodySelector);
if (bodyElm) {
bodyElm.style.position = 'static';
}
// for rustbook
document.documentElement.style.setProperty('--sidebar-width', '0');
try {
document.querySelector(SITE_CONFIG.sideBarSelector).remove();
document.querySelector(SITE_CONFIG.headerSelector).remove();
document.querySelector(SITE_CONFIG.navNextSelector).remove();
} catch (e) {
console.error('remove elm error', e);
}
const pdfMenu = document.createElement('div');
pdfMenu.style.fontSize = '16px';
pdfMenu.style.padding = '2px 48px';
pdfMenu.style.marginBottom = '600px';
chaptersMetaInfo.forEach((chapter, index) => {
const {title, id} = chapter;
const chapterLink = document.createElement('a');
chapterLink.textContent = `${index}. ${title}`;
const chapterLinkContainer = document.createElement('div');
chapterLinkContainer.style.margin = '4px 18px';
chapterLinkContainer.appendChild(chapterLink);
pdfMenu.appendChild(chapterLinkContainer);
});
const bookContainer = document.querySelector(SITE_CONFIG.menuContainerSelector);
const bookStartElm = document.querySelector(SITE_CONFIG.menuNextElmSelector);
bookContainer.insertBefore(pdfMenu, bookStartElm);
return pdfMenu.innerHTML;
}, {chaptersMetaInfo, SITE_CONFIG});
}
// {url, title, id}
_getChaptersMetaInfo = async () => {
console.log('get Chapters MetaInfo');
return this._mainPage.evaluate((SITE_CONFIG) => {
const res = [];
const linksElm = document.querySelectorAll(SITE_CONFIG.chapterLinksElmSelector);
linksElm.forEach((link, index) => {
link.href && res.push({
url: link.href,
title: link.textContent.trim() || 'UnTitled',
id: `pdfchapter_${index}`
});
});
return res;
}, SITE_CONFIG);
}
_fetchAllChaptersHTMLContent = async (chaptersMetaInfo = []) => {
const chaptersContents = [];
if (chaptersMetaInfo.length) {
const newPage = await this._browser.newPage();
// const testPages = chaptersMetaInfo.slice(1, 3);
// for (let {url} of testPages) {
for (let {url} of chaptersMetaInfo) {
const contentElm = await this._openURLAndPickHTMLStr(newPage, url);
contentElm && chaptersContents.push(contentElm);
}
await newPage.close();
}
return chaptersContents;
}
_openURLAndPickHTMLStr = async (page, url) => {
console.log('open sub page', url);
await page.goto(url);
await page.waitForLoadState('domcontentloaded');
await page.evaluate((SITE_CONFIG) => {
const bodyElm = document.querySelector(SITE_CONFIG.bodySelector);
if (bodyElm) {
bodyElm.style.position = 'relative';
}
}, SITE_CONFIG);
const bookContent = await page.$(SITE_CONFIG.bookContentSelector);
const bookContentHTML = bookContent ? await page.evaluate((bookContent) => bookContent.outerHTML, bookContent) : null;
// console.log(bookContentHTML)
return bookContentHTML;
}
_generateFullHTMLPage = async (chaptersHTMLContent = []) => {
if (chaptersHTMLContent.length) {
await this._mainPage.evaluate(({chaptersHTMLContent, SITE_CONFIG}) => {
const bodyElm = document.querySelector(SITE_CONFIG.chapterAppendSelector);
chaptersHTMLContent.forEach((htmlStr) => {
const container = document.createElement('div');
container.innerHTML = htmlStr;
container.style.marginTop = '800px';
container.style.paddingTop = '40px';
bodyElm.appendChild(container);
});
}, {chaptersHTMLContent, SITE_CONFIG});
}
}
}
GitBookPDFSpider.create({
url: 'https://rust-lang.github.io/mdBook/',
bookName: 'mdBook.pdf',
}).then(spider => {
spider.run();
});