From 04cbe549af3874d3caa04fd2416f42aeccba63ec Mon Sep 17 00:00:00 2001 From: Relakkes Date: Tue, 20 Aug 2024 03:09:42 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8D=E6=8A=96=E9=9F=B3?= =?UTF-8?q?=E5=85=B3=E9=94=AE=E8=AF=8D=E6=90=9C=E7=B4=A2bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- media_platform/douyin/client.py | 5 ++++- media_platform/douyin/core.py | 6 ++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/media_platform/douyin/client.py b/media_platform/douyin/client.py index 7e2a78ed..e760697b 100644 --- a/media_platform/douyin/client.py +++ b/media_platform/douyin/client.py @@ -126,7 +126,8 @@ async def search_info_by_keyword( offset: int = 0, search_channel: SearchChannelType = SearchChannelType.GENERAL, sort_type: SearchSortType = SearchSortType.GENERAL, - publish_time: PublishTimeType = PublishTimeType.UNLIMITED + publish_time: PublishTimeType = PublishTimeType.UNLIMITED, + search_id: str = "" ): """ DouYin Web Search API @@ -135,6 +136,7 @@ async def search_info_by_keyword( :param search_channel: :param sort_type: :param publish_time: · + :param search_id: · :return: """ query_params = { @@ -149,6 +151,7 @@ async def search_info_by_keyword( 'count': '15', 'need_filter_settings': '1', 'list_type': 'multi', + 'search_id': search_id, } if sort_type.value != SearchSortType.GENERAL.value or publish_time.value != PublishTimeType.UNLIMITED.value: query_params["filter_selected"] = json.dumps({ diff --git a/media_platform/douyin/core.py b/media_platform/douyin/core.py index 19b3ad1a..395d7c0d 100644 --- a/media_platform/douyin/core.py +++ b/media_platform/douyin/core.py @@ -83,6 +83,7 @@ async def search(self) -> None: utils.logger.info(f"[DouYinCrawler.search] Current keyword: {keyword}") aweme_list: List[str] = [] page = 0 + dy_search_id = "" while (page - start_page + 1) * dy_limit_count <= config.CRAWLER_MAX_NOTES_COUNT: if page < start_page: utils.logger.info(f"[DouYinCrawler.search] Skip {page}") @@ -92,7 +93,8 @@ async def search(self) -> None: utils.logger.info(f"[DouYinCrawler.search] search douyin keyword: {keyword}, page: {page}") posts_res = await self.dy_client.search_info_by_keyword(keyword=keyword, offset=page * dy_limit_count - dy_limit_count, - publish_time=PublishTimeType(config.PUBLISH_TIME_TYPE) + publish_time=PublishTimeType(config.PUBLISH_TIME_TYPE), + search_id=dy_search_id ) except DataFetchError: utils.logger.error(f"[DouYinCrawler.search] search douyin keyword: {keyword} failed") @@ -103,7 +105,7 @@ async def search(self) -> None: utils.logger.error( f"[DouYinCrawler.search] search douyin keyword: {keyword} failed,账号也许被风控了。") break - + dy_search_id = posts_res.get("extra", {}).get("logid", "") for post_item in posts_res.get("data"): try: aweme_info: Dict = post_item.get("aweme_info") or \