-
Notifications
You must be signed in to change notification settings - Fork 105
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
7 changed files
with
79 additions
and
78 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,3 @@ | ||
# so-novel | ||
|
||
Java语言开发,可根据书名、作者搜索并下载小说。 | ||
Java 语言开发,可根据书名、作者搜索并下载小说。 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,2 @@ | ||
Manifest-Version: 1.1 | ||
Manifest-Version: 1.3 | ||
Main-Class: work.pcdd.sonovel.Application |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,8 +12,8 @@ | |
import java.util.Scanner; | ||
|
||
/** | ||
* @author [email protected] | ||
* @date 2021/6/10 16:18 | ||
* @author pcdd | ||
* Created at 2021/6/10 16:18 | ||
*/ | ||
public class Application { | ||
|
||
|
@@ -24,7 +24,7 @@ public static void main(String[] args) { | |
while (true) { | ||
printHint(); | ||
String keyword = scanner.nextLine().trim(); | ||
if (keyword.length() == 0) { | ||
if (keyword.isEmpty()) { | ||
Console.log("<== 关键字不能为空"); | ||
continue; | ||
} | ||
|
@@ -59,7 +59,7 @@ public static void main(String[] args) { | |
int end = scanner.nextInt(); | ||
double res = SearchNovelUtils.crawl(results, num, start, end); | ||
|
||
Console.log("\n<== 下载完毕,耗时{}s\n", NumberUtil.round(res, 2)); | ||
Console.log("\n<== 下载完毕,总耗时{}s\n", NumberUtil.round(res, 2)); | ||
} | ||
|
||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,15 +16,13 @@ | |
import java.net.URL; | ||
import java.nio.charset.StandardCharsets; | ||
import java.util.ArrayList; | ||
import java.util.Collections; | ||
import java.util.List; | ||
import java.util.Properties; | ||
import java.util.concurrent.ThreadLocalRandom; | ||
import java.util.concurrent.TimeUnit; | ||
import java.util.concurrent.*; | ||
|
||
/** | ||
* @author [email protected] | ||
* @date 2021/6/10 17:03 | ||
* @author pcdd | ||
* Created at 2021/6/10 17:03 | ||
*/ | ||
public class SearchNovelUtils { | ||
|
||
|
@@ -46,11 +44,10 @@ public class SearchNovelUtils { | |
searchUrl = pro.get("search_url").toString(); | ||
savePath = pro.get("savePath").toString(); | ||
extName = pro.get("extName").toString(); | ||
minTimeInterval = Convert.toLong(pro.get("min"), 50L); | ||
maxTimeInterval = Convert.toLong(pro.get("max"), 100L); | ||
minTimeInterval = Convert.toLong(pro.get("min"), 5000L); | ||
maxTimeInterval = Convert.toLong(pro.get("max"), 6000L); | ||
} catch (IOException e) { | ||
Console.log("初始化参数失败:" + e.getMessage()); | ||
e.printStackTrace(); | ||
Console.error("初始化参数失败:" + e.getMessage()); | ||
} | ||
} | ||
|
||
|
@@ -63,23 +60,13 @@ private SearchNovelUtils() { | |
* @param keyword 关键字 | ||
* @return 匹配的小说列表 | ||
*/ | ||
@SneakyThrows | ||
public static List<SearchResultLine> search(String keyword) { | ||
Console.log("==> 正在搜索..."); | ||
long start = System.currentTimeMillis(); | ||
Connection connect = Jsoup.connect(searchUrl); | ||
Document document = null; | ||
|
||
try { | ||
// 搜索结果页DOM | ||
document = connect.data("searchkey", keyword).post(); | ||
} catch (IOException e) { | ||
e.printStackTrace(); | ||
} | ||
|
||
if (document == null) { | ||
Console.log("<== 搜索到0条记录"); | ||
return Collections.emptyList(); | ||
} | ||
// 搜索结果页DOM | ||
Document document = connect.data("searchkey", keyword).post(); | ||
|
||
// tr:nth-child(n+2)表示获取第2个tr开始获取 | ||
Elements elements = document.select("#checkform > table > tbody > tr:nth-child(n+2)"); | ||
|
@@ -112,39 +99,43 @@ public static List<SearchResultLine> search(String keyword) { | |
* @param start 从第几章下载 | ||
* @param end 下载到第几章 | ||
*/ | ||
@SneakyThrows | ||
public static double crawl(List<SearchResultLine> list, int num, int start, int end) { | ||
try { | ||
SearchResultLine srl = list.get(num); | ||
String bookName = srl.getBookName(); | ||
String author = srl.getAuthor(); | ||
// 小说详情页url | ||
String url = srl.getLink(); | ||
|
||
// 小说目录名格式:书名(作者) | ||
novelDir = String.format("%s(%s)", bookName, author); | ||
File file = new File(savePath + novelDir); | ||
if (!file.exists()) { | ||
file.mkdirs(); | ||
} | ||
|
||
Document document = Jsoup.parse(new URL(url), 10000); | ||
// 获取小说目录 | ||
Elements elements = document.getElementById("list").getElementsByTag("a"); | ||
Console.log("==> 开始下载:《{}》({})", bookName, author); | ||
|
||
long startTime = System.currentTimeMillis(); | ||
// elements.size()是小说的总章数 | ||
for (int i = start - 1; i < end && i < elements.size(); i++) { | ||
String title = elements.get(i).text(); | ||
String href = indexUrl + elements.get(i).attr("href"); | ||
crawlChapter(i + 1, title, href); | ||
} | ||
return (System.currentTimeMillis() - startTime) / 1000.0; | ||
SearchResultLine srl = list.get(num); | ||
String bookName = srl.getBookName(); | ||
String author = srl.getAuthor(); | ||
// 小说详情页url | ||
String url = srl.getLink(); | ||
|
||
// 小说目录名格式:书名(作者) | ||
novelDir = String.format("%s(%s)", bookName, author); | ||
File file = new File(savePath + novelDir); | ||
if (!file.exists()) { | ||
file.mkdirs(); | ||
} | ||
|
||
} catch (IOException e) { | ||
e.printStackTrace(); | ||
Document document = Jsoup.parse(new URL(url), 10000); | ||
// 获取小说目录 | ||
Elements elements = document.getElementById("list").getElementsByTag("a"); | ||
Console.log("==> 开始下载:《{}》({})", bookName, author); | ||
|
||
ExecutorService executor = Executors.newSingleThreadExecutor(); | ||
CountDownLatch countDownLatch = new CountDownLatch(end); | ||
long startTime = System.currentTimeMillis(); | ||
|
||
// elements.size()是小说的总章数 | ||
for (int i = start - 1; i < end && i < elements.size(); i++) { | ||
int finalI = i; | ||
executor.execute(() -> { | ||
String title = elements.get(finalI).text(); | ||
String href = indexUrl + elements.get(finalI).attr("href"); | ||
crawlChapter(finalI + 1, title, href); | ||
countDownLatch.countDown(); | ||
}); | ||
} | ||
return -1; | ||
countDownLatch.await(); | ||
|
||
return (System.currentTimeMillis() - startTime) / 1000.0; | ||
} | ||
|
||
/** | ||
|
@@ -159,7 +150,7 @@ private static void crawlChapter(int chapterNo, String chapterName, String url) | |
// 设置时间间隔 | ||
long timeInterval = ThreadLocalRandom.current().nextLong(minTimeInterval, maxTimeInterval); | ||
TimeUnit.MILLISECONDS.sleep(timeInterval); | ||
Console.log("正在下载:【{}】", chapterName); | ||
Console.log("正在下载:【{}】 间隔 {} ms", chapterName, timeInterval); | ||
Document document = Jsoup.parse(new URL(url), 10000); | ||
String content = document.getElementById("content").html(); | ||
download(chapterNo, chapterName, content); | ||
|
@@ -177,9 +168,9 @@ private static void download(int chapterNo, String chapterName, String content) | |
String path = savePath + novelDir + File.separator | ||
+ chapterNo + "_" + chapterName | ||
+ "." + extName; | ||
OutputStream fos = new BufferedOutputStream(new FileOutputStream(path)); | ||
fos.write(content.getBytes(StandardCharsets.UTF_8)); | ||
fos.close(); | ||
try (OutputStream fos = new BufferedOutputStream(new FileOutputStream(path))) { | ||
fos.write(content.getBytes(StandardCharsets.UTF_8)); | ||
} | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,15 +1,15 @@ | ||
# 版本号 | ||
version=1.1 | ||
# 某小说网站主页 | ||
index_url=https://www.ibiquge.la | ||
index_url=https://www.ibiquges.org | ||
# 某小说网站的搜索请求url | ||
search_url=https://www.ibiquge.la/modules/article/waps.php | ||
search_url=https://www.ibiquges.org/modules/article/waps.php | ||
# 下载根路径 | ||
savePath=d:\\so-novel\\ | ||
savePath=/so-novel/ | ||
# 文件扩展名 | ||
extName=html | ||
# 最小时间间隔 ms | ||
min=50 | ||
# 最大时间间隔 ms | ||
max=100 | ||
# 爬取最小间隔 ms | ||
min=5000 | ||
# 爬取最大间隔 ms | ||
max=6000 | ||
# 版本号 | ||
version=@version@ | ||
|