From a11f65ef11496b81f6d30990979cb31cdc8df542 Mon Sep 17 00:00:00 2001
From: Yuukiy <76897913+Yuukiy@users.noreply.github.com>
Date: Mon, 2 Oct 2023 14:31:25 +0800
Subject: [PATCH] =?UTF-8?q?javbus:=20=E5=BA=94=E5=AF=B9=E6=8A=93=E5=8F=96?=
 =?UTF-8?q?=E6=95=B0=E6=8D=AE=E6=97=B6=E8=A2=AB=E9=87=8D=E5=AE=9A=E5=90=91?=
 =?UTF-8?q?=E5=88=B0=E7=99=BB=E5=BD=95=E9=A1=B5=E7=9A=84=E6=83=85=E5=BD=A2?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 web/javbus.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/web/javbus.py b/web/javbus.py
index 2f2759cd3..3d64ac4df 100644
--- a/web/javbus.py
+++ b/web/javbus.py
@@ -31,9 +31,17 @@ def parse_data(movie: MovieInfo):
     if resp.status_code == 404:
         raise MovieNotFoundError(__name__, movie.dvdid)
     resp.raise_for_status()
-    html = resp2html(resp)
+    # 疑似JavBus检测到类似爬虫的行为时会要求登录，不过发现目前不需要登录也可以从重定向前的网页中提取信息
+    if resp.history and resp.history[0].status_code == 302:
+        html = resp2html(resp.history[0])
+    else:
+        html = resp2html(resp)
+    # 引入登录验证后状态码不再准确，因此还要额外通过检测标题来确认是否发生了404
+    page_title = html.xpath('/html/head/title/text()')
+    if page_title and page_title[0].startswith('404 Page Not Found!'):
+        raise MovieNotFoundError(__name__, movie.dvdid)
 
-    container = html.xpath("/html/body/div[@class='container']")[0]
+    container = html.xpath("//div[@class='container']")[0]
     title = container.xpath("h3/text()")[0]
     cover = container.xpath("//a[@class='bigImage']/img/@src")[0]
     preview_pics = container.xpath("//div[@id='sample-waterfall']/a/@href")