diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index d5cfbbb3..9e777318 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -1,20 +1,20 @@  diff --git a/arrangement/classify_and_compress.js b/arrangement/classify_and_compress.js index f39e850e..c1b992dd 100644 --- a/arrangement/classify_and_compress.js +++ b/arrangement/classify_and_compress.js @@ -522,7 +522,7 @@ function classify(fso_name, fso_path, fso_status, sub_fso_list) { if ( // "1", "12-13-15", "3211231", "2014" - /^(?:[\d\- ]*|Ongoing|Eng?|English(?:-Uncen)?|korean|kor|Jap|Japanese|RUS|英訳|中国語|更正|GIFs?|CG|mp3|FLAC|APE)$/i + /^(?:[\d\- ]*|Ongoing|Eng?|English(?:-Uncen)?|korean|kor|Jap|Japanese|RUS|英訳|中国語|更正|GIFs?|CG|mp3|FLAC|APE|文庫版?)$/i .test(matched[1])) { // [Pixiv] 60枚 (3322006).zip } else if (matched[1] !== '仮') { diff --git a/comic.cmn-Hans-CN/comic.cmn-Hans-CN.bat b/comic.cmn-Hans-CN/comic.cmn-Hans-CN.bat index 7a33ef49..e1b2e830 100644 --- a/comic.cmn-Hans-CN/comic.cmn-Hans-CN.bat +++ b/comic.cmn-Hans-CN/comic.cmn-Hans-CN.bat @@ -1,10 +1,11 @@ @ECHO OFF -REM This is a batch file to show how to execute every crawler in one click. You must setup .txt title list file first. - +REM parallelly REM chcp 65001 -chcp 936 +REM chcp 936 REM cd/d D:\USB\cgi-bin\program\comic REM CD "%~n0" -FORFILES /M *.js /C "cmd.exe /c IF @isdir==FALSE node @file l=@fname.txt || PAUSE" +REM It's often too slow, so trying to execute parallelly. +FORFILES /M *.js /C "cmd.exe /c IF @isdir==FALSE START node @file l=@fname.txt || PAUSE" + REM CD .. diff --git a/comic.cmn-Hans-CN/dagu.js b/comic.cmn-Hans-CN/dagu.js index cf2e31e9..6044c24d 100644 --- a/comic.cmn-Hans-CN/dagu.js +++ b/comic.cmn-Hans-CN/dagu.js @@ -57,6 +57,7 @@ var crawler = new CeL.work_crawler({ : matched[1].replace('/', '-')); id_data.push(get_label(matched[2])); }, this); + // console.log([ id_list, id_data ]); return [ id_list, id_data ]; }, @@ -107,10 +108,12 @@ var crawler = new CeL.work_crawler({ work_data.chapter_list.push(chapter_data); } work_data.chapter_list.reverse(); + // console.log(work_data.chapter_list); }, pre_parse_chapter_data - // 執行在解析章節資料 process_chapter_data() 之前的作業 (async)。必須自行保證不丟出異常。 + // 執行在解析章節資料 process_chapter_data() 之前的作業 (async)。 + // 必須自行保證執行 callback(),不丟出異常、中斷。 : function(XMLHttp, work_data, callback, chapter_NO) { var chapter_data = work_data.chapter_list[chapter_NO - 1], // @@ -161,11 +164,12 @@ var crawler = new CeL.work_crawler({ chapter_data.image_list = []; extract_image(XMLHttp); - CeL.run_serial(function(run_next, NO, index) { - var image_page_url = url.replace(/(\.[^.]+)$/, '_' + NO + '$1'); + CeL.run_serial(function(run_next, image_NO, index) { + var image_page_url = url.replace(/(\.[^.]+)$/, '_' + image_NO + + '$1'); // console.log('Get #' + index + ': ' + image_page_url); - process.stdout.write('Get image pages of #' + chapter_NO + ': ' - + NO + '/' + image_count + '...\r'); + process.stdout.write('Get image data pages of #' + chapter_NO + + ': ' + image_NO + '/' + image_count + '...\r'); _this.get_URL(image_page_url, function(XMLHttp) { extract_image(XMLHttp); run_next(); diff --git a/comic.cmn-Hans-CN/dm5.js b/comic.cmn-Hans-CN/dm5.js index a37ab60d..719a628f 100644 --- a/comic.cmn-Hans-CN/dm5.js +++ b/comic.cmn-Hans-CN/dm5.js @@ -75,18 +75,28 @@ var crawler = new CeL.work_crawler({ var work_data = { // 必要屬性:須配合網站平台更改。 title : get_label(html.between('

', - '')), + '') + // 土豪漫画 + || html.between('

', '

')), // 選擇性屬性:須配合網站平台更改。 - author : get_label(html.between('

', '

') - .replace(/^.*?[::]/, '')), + // e.g., "

作者:...图:.../文:...

" + author : get_label(html.between('

', '

')), description : get_label(html.between('

') .between('>').replace(/', ''), - part_list : part_list + // 土豪漫画: 每周六更 + next_update : html.between(' class="remind">', '<'), + part_list : part_list, + // reset work_data.chapter_list + chapter_list : [] }; + if (!/[::][^::]+?[::]/.test(work_data.author)) { + work_data.author = work_data.author.replace(/^.*?[::]/, ''); + } + if (matched) { work_data.latest_chapter = matched[1]; } @@ -101,18 +111,19 @@ var crawler = new CeL.work_crawler({ } }); - work_data.status = work_data.状态; + Object.assign(work_data, { + status : work_data.状态, + last_update : work_data.更新时间 + }); return work_data; }, get_chapter_list : function(work_data, html, get_label) { - // 1: 由舊至新 - work_data.inverted_order = / DM5_COMIC_SORT\s*=\s*2/.test(html); + // 1: 由舊至新, 2: 由新至舊 + work_data.inverted_order = !/ DM5_COMIC_SORT\s*=\s*1/.test(html); html = html.between('detail-list-select', '

'); - // reset chapter_list - work_data.chapter_list = []; var PATTERN_chapter = /
  • ([\s\S]+?)<\/li>|