Skip to content

Commit

Permalink
fix: 修复抖音关键词搜索bug
Browse files Browse the repository at this point in the history
  • Loading branch information
NanmiCoder committed Aug 19, 2024
1 parent 2ab1492 commit 04cbe54
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 3 deletions.
5 changes: 4 additions & 1 deletion media_platform/douyin/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,8 @@ async def search_info_by_keyword(
offset: int = 0,
search_channel: SearchChannelType = SearchChannelType.GENERAL,
sort_type: SearchSortType = SearchSortType.GENERAL,
publish_time: PublishTimeType = PublishTimeType.UNLIMITED
publish_time: PublishTimeType = PublishTimeType.UNLIMITED,
search_id: str = ""
):
"""
DouYin Web Search API
Expand All @@ -135,6 +136,7 @@ async def search_info_by_keyword(
:param search_channel:
:param sort_type:
:param publish_time: ·
:param search_id: ·
:return:
"""
query_params = {
Expand All @@ -149,6 +151,7 @@ async def search_info_by_keyword(
'count': '15',
'need_filter_settings': '1',
'list_type': 'multi',
'search_id': search_id,
}
if sort_type.value != SearchSortType.GENERAL.value or publish_time.value != PublishTimeType.UNLIMITED.value:
query_params["filter_selected"] = json.dumps({
Expand Down
6 changes: 4 additions & 2 deletions media_platform/douyin/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ async def search(self) -> None:
utils.logger.info(f"[DouYinCrawler.search] Current keyword: {keyword}")
aweme_list: List[str] = []
page = 0
dy_search_id = ""
while (page - start_page + 1) * dy_limit_count <= config.CRAWLER_MAX_NOTES_COUNT:
if page < start_page:
utils.logger.info(f"[DouYinCrawler.search] Skip {page}")
Expand All @@ -92,7 +93,8 @@ async def search(self) -> None:
utils.logger.info(f"[DouYinCrawler.search] search douyin keyword: {keyword}, page: {page}")
posts_res = await self.dy_client.search_info_by_keyword(keyword=keyword,
offset=page * dy_limit_count - dy_limit_count,
publish_time=PublishTimeType(config.PUBLISH_TIME_TYPE)
publish_time=PublishTimeType(config.PUBLISH_TIME_TYPE),
search_id=dy_search_id
)
except DataFetchError:
utils.logger.error(f"[DouYinCrawler.search] search douyin keyword: {keyword} failed")
Expand All @@ -103,7 +105,7 @@ async def search(self) -> None:
utils.logger.error(
f"[DouYinCrawler.search] search douyin keyword: {keyword} failed,账号也许被风控了。")
break

dy_search_id = posts_res.get("extra", {}).get("logid", "")
for post_item in posts_res.get("data"):
try:
aweme_info: Dict = post_item.get("aweme_info") or \
Expand Down

0 comments on commit 04cbe54

Please sign in to comment.