Skip to content

Commit

Permalink
Merge pull request #345 from ZhouXsh/main
Browse files Browse the repository at this point in the history
新增B站创作者(UP主)信息爬取(合并为单个commit)
  • Loading branch information
NanmiCoder authored Jul 18, 2024
2 parents 548271e + 3b2cc44 commit 3f2f03b
Show file tree
Hide file tree
Showing 6 changed files with 129 additions and 1 deletion.
2 changes: 1 addition & 1 deletion base/base_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ async def store_comment(self, comment_item: Dict):

# TODO support all platform
# only xhs is supported, so @abstractmethod is commented
# @abstractmethod
@abstractmethod
async def store_creator(self, creator: Dict):
pass

Expand Down
1 change: 1 addition & 0 deletions media_platform/bilibili/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ async def search(self):
if video_item:
video_id_list.append(video_item.get("View").get("aid"))
await bilibili_store.update_bilibili_video(video_item)
await bilibili_store.update_up_info(video_item)
await self.get_bilibili_video(video_item, semaphore)
page += 1
await self.batch_get_video_comments(video_id_list)
Expand Down
19 changes: 19 additions & 0 deletions schema/tables.sql
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,25 @@ CREATE TABLE `bilibili_video_comment` (
KEY `idx_bilibili_vi_video_i_f22873` (`video_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci COMMENT='B 站视频评论';

-- ----------------------------
-- Table structure for bilibili_up_info
-- ----------------------------
DROP TABLE IF EXISTS `bilibili_up_info`;
CREATE TABLE `bilibili_up_info` (
`id` int NOT NULL AUTO_INCREMENT COMMENT '自增ID',
`user_id` varchar(64) DEFAULT NULL COMMENT '用户ID',
`nickname` varchar(64) DEFAULT NULL COMMENT '用户昵称',
`avatar` varchar(255) DEFAULT NULL COMMENT '用户头像地址',
`add_ts` bigint NOT NULL COMMENT '记录添加时间戳',
`last_modify_ts` bigint NOT NULL COMMENT '记录最后修改时间戳',
`total_fans` bigint DEFAULT NULL COMMENT '粉丝数',
`total_liked` bigint DEFAULT NULL COMMENT '总获赞数',
`user_rank` int DEFAULT NULL COMMENT '用户等级',
`is_official` int DEFAULT NULL COMMENT '是否官号',
PRIMARY KEY (`id`),
KEY `idx_bilibili_vi_user_123456` (`user_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci COMMENT='B 站UP主信息';

-- ----------------------------
-- Table structure for douyin_aweme
-- ----------------------------
Expand Down
18 changes: 18 additions & 0 deletions store/bilibili/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,24 @@ async def update_bilibili_video(video_item: Dict):
await BiliStoreFactory.create_store().store_content(content_item=save_content_item)


async def update_up_info(video_item: Dict):
video_item_card_list: Dict = video_item.get("Card")
video_item_card: Dict = video_item_card_list.get("card")
saver_up_info = {
"user_id": str(video_item_card.get("mid")),
"nickname": video_item_card.get("name"),
"avatar": video_item_card.get("face"),
"last_modify_ts": utils.get_current_timestamp(),
"total_fans": video_item_card.get("fans"),
"total_liked": video_item_card_list.get("like_num"),
"user_rank": video_item_card.get("level_info").get("current_level"),
"is_official": video_item_card.get("official_verify").get("type"),
}
utils.logger.info(
f"[store.bilibili.update_up_info] bilibili user_id:{video_item_card.get('mid')}")
await BiliStoreFactory.create_store().store_creator(creator=saver_up_info)


async def batch_update_bilibili_video_comments(video_id: str, comments: List[Dict]):
if not comments:
return
Expand Down
43 changes: 43 additions & 0 deletions store/bilibili/bilibili_store_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,17 @@ async def store_comment(self, comment_item: Dict):
"""
await self.save_data_to_csv(save_item=comment_item, store_type="comments")

async def store_creator(self, creator: Dict):
"""
Bilibili creator CSV storage implementation
Args:
creator: creator item dict
Returns:
"""
await self.save_data_to_csv(save_item=creator, store_type="creators")


class BiliDbStoreImplement(AbstractStore):
async def store_content(self, content_item: Dict):
Expand Down Expand Up @@ -129,6 +140,27 @@ async def store_comment(self, comment_item: Dict):
else:
await update_comment_by_comment_id(comment_id, comment_item=comment_item)

async def store_creator(self, creator: Dict):
"""
Bilibili creator DB storage implementation
Args:
creator: creator item dict
Returns:
"""

from .bilibili_store_sql import (add_new_creator,
query_creator_by_creator_id,
update_creator_by_creator_id)
creator_id = creator.get("user_id")
creator_detail: Dict = await query_creator_by_creator_id(creator_id=creator_id)
if not creator_detail:
creator["add_ts"] = utils.get_current_timestamp()
await add_new_creator(creator)
else:
await update_creator_by_creator_id(creator_id,creator_item=creator)


class BiliJsonStoreImplement(AbstractStore):
json_store_path: str = "data/bilibili/json"
Expand Down Expand Up @@ -204,3 +236,14 @@ async def store_comment(self, comment_item: Dict):
"""
await self.save_data_to_json(comment_item, "comments")

async def store_creator(self, creator: Dict):
"""
creator JSON storage implementatio
Args:
creator:
Returns:
"""
await self.save_data_to_json(creator, "creators")
47 changes: 47 additions & 0 deletions store/bilibili/bilibili_store_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,3 +100,50 @@ async def update_comment_by_comment_id(comment_id: str, comment_item: Dict) -> i
async_db_conn: AsyncMysqlDB = media_crawler_db_var.get()
effect_row: int = await async_db_conn.update_table("bilibili_video_comment", comment_item, "comment_id", comment_id)
return effect_row


async def query_creator_by_creator_id(creator_id: str) -> Dict:
"""
查询up主信息
Args:
creator_id:
Returns:
"""
async_db_conn: AsyncMysqlDB = media_crawler_db_var.get()
sql: str = f"select * from bilibili_up_info where user_id = '{creator_id}'"
rows: List[Dict] = await async_db_conn.query(sql)
if len(rows) > 0:
return rows[0]
return dict()


async def add_new_creator(creator_item: Dict) -> int:
"""
新增up主信息
Args:
creator_item:
Returns:
"""
async_db_conn: AsyncMysqlDB = media_crawler_db_var.get()
last_row_id: int = await async_db_conn.item_to_table("bilibili_up_info", creator_item)
return last_row_id


async def update_creator_by_creator_id(creator_id: str, creator_item: Dict) -> int:
"""
更新up主信息
Args:
creator_id:
creator_item:
Returns:
"""
async_db_conn: AsyncMysqlDB = media_crawler_db_var.get()
effect_row: int = await async_db_conn.update_table("bilibili_up_info", creator_item, "user_id", creator_id)
return effect_row

0 comments on commit 3f2f03b

Please sign in to comment.