Skip to content

Commit

Permalink
🔖 Release v1.3
Browse files Browse the repository at this point in the history
  • Loading branch information
freeok committed Aug 12, 2023
1 parent 72e4799 commit f890526
Show file tree
Hide file tree
Showing 7 changed files with 79 additions and 78 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
# so-novel

Java语言开发,可根据书名、作者搜索并下载小说。
Java 语言开发,可根据书名、作者搜索并下载小说。
20 changes: 15 additions & 5 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,23 +6,23 @@

<groupId>work.pcdd</groupId>
<artifactId>so-novel</artifactId>
<version>1.0-SNAPSHOT</version>
<version>1.3</version>

<properties>
<maven.compiler.source>11</maven.compiler.source>
<maven.compiler.target>11</maven.compiler.target>
<maven.compiler.source>17</maven.compiler.source>
<maven.compiler.target>17</maven.compiler.target>
</properties>

<dependencies>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.14.3</version>
<version>1.15.3</version>
</dependency>
<dependency>
<groupId>cn.hutool</groupId>
<artifactId>hutool-all</artifactId>
<version>5.8.10</version>
<version>5.8.21</version>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
Expand All @@ -31,4 +31,14 @@
</dependency>
</dependencies>

<build>
<!-- 使用 @@ application.yml 获取pom文件中的配置 -->
<resources>
<resource>
<directory>src/main/resources</directory>
<filtering>true</filtering>
</resource>
</resources>
</build>

</project>
2 changes: 1 addition & 1 deletion src/main/java/META-INF/MANIFEST.MF
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
Manifest-Version: 1.1
Manifest-Version: 1.3
Main-Class: work.pcdd.sonovel.Application
8 changes: 4 additions & 4 deletions src/main/java/work/pcdd/sonovel/Application.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
import java.util.Scanner;

/**
* @author [email protected]
* @date 2021/6/10 16:18
* @author pcdd
* Created at 2021/6/10 16:18
*/
public class Application {

Expand All @@ -24,7 +24,7 @@ public static void main(String[] args) {
while (true) {
printHint();
String keyword = scanner.nextLine().trim();
if (keyword.length() == 0) {
if (keyword.isEmpty()) {
Console.log("<== 关键字不能为空");
continue;
}
Expand Down Expand Up @@ -59,7 +59,7 @@ public static void main(String[] args) {
int end = scanner.nextInt();
double res = SearchNovelUtils.crawl(results, num, start, end);

Console.log("\n<== 下载完毕,耗时{}s\n", NumberUtil.round(res, 2));
Console.log("\n<== 下载完毕,总耗时{}s\n", NumberUtil.round(res, 2));
}

}
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/work/pcdd/sonovel/bean/SearchResultLine.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
import lombok.Data;

/**
* @author pc
* @date 2022-05-23 23:28:56
* @author pcdd
* Created at 2022-05-23 23:28:56
*/
@Data
@Builder
Expand Down
103 changes: 47 additions & 56 deletions src/main/java/work/pcdd/sonovel/util/SearchNovelUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,13 @@
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Properties;
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.*;

/**
* @author [email protected]
* @date 2021/6/10 17:03
* @author pcdd
* Created at 2021/6/10 17:03
*/
public class SearchNovelUtils {

Expand All @@ -46,11 +44,10 @@ public class SearchNovelUtils {
searchUrl = pro.get("search_url").toString();
savePath = pro.get("savePath").toString();
extName = pro.get("extName").toString();
minTimeInterval = Convert.toLong(pro.get("min"), 50L);
maxTimeInterval = Convert.toLong(pro.get("max"), 100L);
minTimeInterval = Convert.toLong(pro.get("min"), 5000L);
maxTimeInterval = Convert.toLong(pro.get("max"), 6000L);
} catch (IOException e) {
Console.log("初始化参数失败:" + e.getMessage());
e.printStackTrace();
Console.error("初始化参数失败:" + e.getMessage());
}
}

Expand All @@ -63,23 +60,13 @@ private SearchNovelUtils() {
* @param keyword 关键字
* @return 匹配的小说列表
*/
@SneakyThrows
public static List<SearchResultLine> search(String keyword) {
Console.log("==> 正在搜索...");
long start = System.currentTimeMillis();
Connection connect = Jsoup.connect(searchUrl);
Document document = null;

try {
// 搜索结果页DOM
document = connect.data("searchkey", keyword).post();
} catch (IOException e) {
e.printStackTrace();
}

if (document == null) {
Console.log("<== 搜索到0条记录");
return Collections.emptyList();
}
// 搜索结果页DOM
Document document = connect.data("searchkey", keyword).post();

// tr:nth-child(n+2)表示获取第2个tr开始获取
Elements elements = document.select("#checkform > table > tbody > tr:nth-child(n+2)");
Expand Down Expand Up @@ -112,39 +99,43 @@ public static List<SearchResultLine> search(String keyword) {
* @param start 从第几章下载
* @param end 下载到第几章
*/
@SneakyThrows
public static double crawl(List<SearchResultLine> list, int num, int start, int end) {
try {
SearchResultLine srl = list.get(num);
String bookName = srl.getBookName();
String author = srl.getAuthor();
// 小说详情页url
String url = srl.getLink();

// 小说目录名格式:书名(作者)
novelDir = String.format("%s(%s)", bookName, author);
File file = new File(savePath + novelDir);
if (!file.exists()) {
file.mkdirs();
}

Document document = Jsoup.parse(new URL(url), 10000);
// 获取小说目录
Elements elements = document.getElementById("list").getElementsByTag("a");
Console.log("==> 开始下载:《{}》({})", bookName, author);

long startTime = System.currentTimeMillis();
// elements.size()是小说的总章数
for (int i = start - 1; i < end && i < elements.size(); i++) {
String title = elements.get(i).text();
String href = indexUrl + elements.get(i).attr("href");
crawlChapter(i + 1, title, href);
}
return (System.currentTimeMillis() - startTime) / 1000.0;
SearchResultLine srl = list.get(num);
String bookName = srl.getBookName();
String author = srl.getAuthor();
// 小说详情页url
String url = srl.getLink();

// 小说目录名格式:书名(作者)
novelDir = String.format("%s(%s)", bookName, author);
File file = new File(savePath + novelDir);
if (!file.exists()) {
file.mkdirs();
}

} catch (IOException e) {
e.printStackTrace();
Document document = Jsoup.parse(new URL(url), 10000);
// 获取小说目录
Elements elements = document.getElementById("list").getElementsByTag("a");
Console.log("==> 开始下载:《{}》({})", bookName, author);

ExecutorService executor = Executors.newSingleThreadExecutor();
CountDownLatch countDownLatch = new CountDownLatch(end);
long startTime = System.currentTimeMillis();

// elements.size()是小说的总章数
for (int i = start - 1; i < end && i < elements.size(); i++) {
int finalI = i;
executor.execute(() -> {
String title = elements.get(finalI).text();
String href = indexUrl + elements.get(finalI).attr("href");
crawlChapter(finalI + 1, title, href);
countDownLatch.countDown();
});
}
return -1;
countDownLatch.await();

return (System.currentTimeMillis() - startTime) / 1000.0;
}

/**
Expand All @@ -159,7 +150,7 @@ private static void crawlChapter(int chapterNo, String chapterName, String url)
// 设置时间间隔
long timeInterval = ThreadLocalRandom.current().nextLong(minTimeInterval, maxTimeInterval);
TimeUnit.MILLISECONDS.sleep(timeInterval);
Console.log("正在下载:【{}】", chapterName);
Console.log("正在下载:【{}】 间隔 {} ms", chapterName, timeInterval);
Document document = Jsoup.parse(new URL(url), 10000);
String content = document.getElementById("content").html();
download(chapterNo, chapterName, content);
Expand All @@ -177,9 +168,9 @@ private static void download(int chapterNo, String chapterName, String content)
String path = savePath + novelDir + File.separator
+ chapterNo + "_" + chapterName
+ "." + extName;
OutputStream fos = new BufferedOutputStream(new FileOutputStream(path));
fos.write(content.getBytes(StandardCharsets.UTF_8));
fos.close();
try (OutputStream fos = new BufferedOutputStream(new FileOutputStream(path))) {
fos.write(content.getBytes(StandardCharsets.UTF_8));
}
}

}
18 changes: 9 additions & 9 deletions src/main/resources/config.properties
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
# 版本号
version=1.1
# 某小说网站主页
index_url=https://www.ibiquge.la
index_url=https://www.ibiquges.org
# 某小说网站的搜索请求url
search_url=https://www.ibiquge.la/modules/article/waps.php
search_url=https://www.ibiquges.org/modules/article/waps.php
# 下载根路径
savePath=d:\\so-novel\\
savePath=/so-novel/
# 文件扩展名
extName=html
# 最小时间间隔 ms
min=50
# 最大时间间隔 ms
max=100
# 爬取最小间隔 ms
min=5000
# 爬取最大间隔 ms
max=6000
# 版本号
version=@version@

0 comments on commit f890526

Please sign in to comment.