diff --git a/app/chain/search.py b/app/chain/search.py index a3ee205d..31441e19 100644 --- a/app/chain/search.py +++ b/app/chain/search.py @@ -23,7 +23,7 @@ class SearchChain(ChainBase): """ 站点资源搜索处理链 """ - + __result_temp_file = "__search_result__" def __init__(self): @@ -103,7 +103,8 @@ def process(self, mediainfo: MediaInfo, no_exists: Dict[int, Dict[int, NotExistMediaInfo]] = None, sites: List[int] = None, rule_groups: List[str] = None, - area: str = "title") -> List[Context]: + area: str = "title", + custom_words: List[str] = None) -> List[Context]: """ 根据媒体信息搜索种子资源,精确匹配,应用过滤规则,同时根据no_exists过滤本地已存在的资源 :param mediainfo: 媒体信息 @@ -112,6 +113,7 @@ def process(self, mediainfo: MediaInfo, :param sites: 站点ID列表,为空时搜索所有站点 :param rule_groups: 过滤规则组名称列表 :param area: 搜索范围,title or imdbid + :param custom_words: 自定义识别词列表 """ def __do_filter(torrent_list: List[TorrentInfo]) -> List[TorrentInfo]: @@ -177,16 +179,36 @@ def __do_filter(torrent_list: List[TorrentInfo]) -> List[TorrentInfo]: # 开始新进度 self.progress.start(ProgressKey.Search) + # 开始过滤 + self.progress.update(value=0, text=f'开始过滤,总 {len(torrents)} 个资源,请稍候...', + key=ProgressKey.Search) + + # 开始过滤规则过滤 + if rule_groups is None: + # 取搜索过滤规则 + rule_groups: List[str] = self.systemconfig.get(SystemConfigKey.SearchFilterRuleGroups) + if rule_groups: + logger.info(f'开始过滤规则/剧集过滤,使用规则组:{rule_groups} ...') + torrents = __do_filter(torrents) + if not torrents: + logger.warn(f'{keyword or mediainfo.title} 没有符合过滤规则的资源') + return [] + logger.info(f"过滤规则/剧集过滤完成,剩余 {len(torrents)} 个资源") + + # 过滤完成 + self.progress.update(value=50, text=f'过滤完成,剩余 {len(torrents)} 个资源', key=ProgressKey.Search) + # 开始匹配 _match_torrents = [] # 总数 _total = len(torrents) # 已处理数 _count = 0 + if mediainfo: # 英文标题应该在别名/原标题中,不需要再匹配 logger.info(f"开始匹配结果 标题:{mediainfo.title},原标题:{mediainfo.original_title},别名:{mediainfo.names}") - self.progress.update(value=0, text=f'开始匹配,总 {_total} 个资源 ...', key=ProgressKey.Search) + self.progress.update(value=51, text=f'开始匹配,总 {_total} 个资源 ...', key=ProgressKey.Search) for torrent in torrents: _count += 1 self.progress.update(value=(_count / _total) * 96, @@ -194,23 +216,24 @@ def __do_filter(torrent_list: List[TorrentInfo]) -> List[TorrentInfo]: key=ProgressKey.Search) if not torrent.title: continue + # 识别元数据 + torrent_meta = MetaInfo(title=torrent.title, subtitle=torrent.description, + custom_words=custom_words) + if torrent.title != torrent_meta.org_string: + logger.info(f"种子名称应用识别词后发生改变:{torrent.title} => {torrent_meta.org_string}") # 比对IMDBID if torrent.imdbid \ and mediainfo.imdb_id \ and torrent.imdbid == mediainfo.imdb_id: logger.info(f'{mediainfo.title} 通过IMDBID匹配到资源:{torrent.site_name} - {torrent.title}') - _match_torrents.append(torrent) + _match_torrents.append((torrent, torrent_meta)) continue - # 识别 - torrent_meta = MetaInfo(title=torrent.title, subtitle=torrent.description) - if torrent.title != torrent_meta.org_string: - logger.info(f"种子名称应用识别词后发生改变:{torrent.title} => {torrent_meta.org_string}") # 比对种子 if self.torrenthelper.match_torrent(mediainfo=mediainfo, torrent_meta=torrent_meta, torrent=torrent): # 匹配成功 - _match_torrents.append(torrent) + _match_torrents.append((torrent, torrent_meta)) continue # 匹配完成 logger.info(f"匹配完成,共匹配到 {len(_match_torrents)} 个资源") @@ -218,33 +241,15 @@ def __do_filter(torrent_list: List[TorrentInfo]) -> List[TorrentInfo]: text=f'匹配完成,共匹配到 {len(_match_torrents)} 个资源', key=ProgressKey.Search) else: - _match_torrents = torrents - - # 开始过滤 - self.progress.update(value=98, text=f'开始过滤,总 {len(_match_torrents)} 个资源,请稍候...', - key=ProgressKey.Search) - - # 开始过滤规则过滤 - if rule_groups is None: - # 取搜索过滤规则 - rule_groups: List[str] = self.systemconfig.get(SystemConfigKey.SearchFilterRuleGroups) - if rule_groups: - logger.info(f'开始过滤规则/剧集过滤,使用规则组:{rule_groups} ...') - _match_torrents = __do_filter(_match_torrents) - if not _match_torrents: - logger.warn(f'{keyword or mediainfo.title} 没有符合过滤规则的资源') - return [] - logger.info(f"过滤规则/剧集过滤完成,剩余 {len(_match_torrents)} 个资源") + _match_torrents = [(t, MetaInfo(title=t.title, subtitle=t.description)) for t in torrents] # 去掉mediainfo中多余的数据 mediainfo.clear() # 组装上下文 - contexts = [Context(meta_info=MetaInfo(title=torrent.title, subtitle=torrent.description), + contexts = [Context(torrent_info=t[0], media_info=mediainfo, - torrent_info=torrent) for torrent in _match_torrents] - - self.progress.update(value=99, text=f'过滤完成,剩余 {len(contexts)} 个资源', key=ProgressKey.Search) + meta_info=t[1]) for t in _match_torrents] # 排序 self.progress.update(value=99, @@ -253,10 +258,10 @@ def __do_filter(torrent_list: List[TorrentInfo]) -> List[TorrentInfo]: contexts = self.torrenthelper.sort_torrents(contexts) # 结束进度 + logger.info(f'搜索完成,共 {len(contexts)} 个资源') self.progress.update(value=100, text=f'搜索完成,共 {len(contexts)} 个资源', key=ProgressKey.Search) - logger.info(f'搜索完成,共 {len(contexts)} 个资源') self.progress.end(ProgressKey.Search) # 返回 diff --git a/app/chain/subscribe.py b/app/chain/subscribe.py index 621a8617..92f46e31 100644 --- a/app/chain/subscribe.py +++ b/app/chain/subscribe.py @@ -14,6 +14,7 @@ from app.core.context import TorrentInfo, Context, MediaInfo from app.core.event import eventmanager, Event, EventManager from app.core.meta import MetaBase +from app.core.meta.words import WordsMatcher from app.core.metainfo import MetaInfo from app.db.downloadhistory_oper import DownloadHistoryOper from app.db.models.subscribe import Subscribe @@ -242,6 +243,7 @@ def search(self, sid: int = None, state: str = 'N', manual: bool = False): # 遍历订阅 for subscribe in subscribes: mediakey = subscribe.tmdbid or subscribe.doubanid + custom_word_list = subscribe.custom_words.split("\n") if subscribe.custom_words else None # 校验当前时间减订阅创建时间是否大于1分钟,否则跳过先,留出编辑订阅的时间 if subscribe.date: now = datetime.now() @@ -343,7 +345,8 @@ def search(self, sid: int = None, state: str = 'N', manual: bool = False): no_exists=no_exists, sites=sites, rule_groups=rule_groups, - area="imdbid" if subscribe.search_imdbid else "title") + area="imdbid" if subscribe.search_imdbid else "title", + custom_words=custom_word_list) if not contexts: logger.warn(f'订阅 {subscribe.keyword or subscribe.name} 未搜索到资源') self.finish_subscribe_or_not(subscribe=subscribe, meta=meta, @@ -519,6 +522,10 @@ def match(self, torrents: Dict[str, List[Context]]): if not torrents: logger.warn('没有缓存资源,无法匹配订阅') return + + # 记录重新识别过的种子 + _recognize_cached = [] + # 所有订阅 subscribes = self.subscribeoper.list('R') # 遍历订阅 @@ -538,8 +545,6 @@ def match(self, torrents: Dict[str, List[Context]]): domains = [] if subscribe.sites: domains = self.siteoper.get_domains_by_ids(subscribe.sites) - # 自定义识别词 - custom_words = subscribe.custom_words.split("\n") if subscribe.custom_words else [] # 识别媒体信息 mediainfo: MediaInfo = self.recognize_media(meta=meta, mtype=meta.type, tmdbid=subscribe.tmdbid, @@ -616,51 +621,45 @@ def match(self, torrents: Dict[str, List[Context]]): logger.debug(f"{torrent_info.site_name} - {torrent_info.title} 不符合订阅站点要求") continue - # 匹配订阅参数 - if not self.torrenthelper.filter_torrent(torrent_info=torrent_info, - filter_params=self.get_params(subscribe)): - continue - - # 先判断是否有没识别的种子,有则重新识别;如果订阅有自定义识别词,则不使用预识别的信息 - if not torrent_mediainfo \ - or (not torrent_mediainfo.tmdb_id and not torrent_mediainfo.douban_id) \ - or subscribe.custom_words: - if not subscribe.custom_words: - logger.info( - f'{torrent_info.site_name} - {torrent_info.title} 订阅缓存为未识别状态,' - f'尝试重新识别媒体信息...') - else: - logger.info( - f'{torrent_info.site_name} - {torrent_info.title} 因订阅存在自定义识别词,' - f'正在重新识别元数据和媒体信息...') + # 有自定义识别词时,需要判断是否需要重新识别 + if subscribe.custom_words: + _, apply_words = WordsMatcher().prepare(torrent_info.title, + custom_words=subscribe.custom_words.split("\n")) + if apply_words: + logger.info(f'{torrent_info.site_name} - {torrent_info.title} 因订阅存在自定义识别词,重新识别元数据...') # 重新识别元数据 torrent_meta = MetaInfo(title=torrent_info.title, subtitle=torrent_info.description, - custom_words=custom_words) - # 重新识别媒体信息 - if subscribe.custom_words: + custom_words=subscribe.custom_word) + # 媒体信息需要重新识别 + torrent_mediainfo = None + + # 先判断是否有没识别的种子,否则重新识别 + if not torrent_mediainfo \ + or (not torrent_mediainfo.tmdb_id and not torrent_mediainfo.douban_id): + # 避免重复处理 + _cache_key = f"{torrent_meta.org_string}_{torrent_info.description}" + if _cache_key not in _recognize_cached: + _recognize_cached.append(_cache_key) + # 重新识别媒体信息 torrent_mediainfo = self.recognize_media(meta=torrent_meta) - else: - # 不使用识别缓存 - torrent_mediainfo = self.recognize_media(meta=torrent_meta, cache=False) if torrent_mediainfo: # 更新种子缓存 context.media_info = torrent_mediainfo - if not torrent_mediainfo: - # 通过标题匹配兜底 - logger.warn( - f'{torrent_info.site_name} - {torrent_info.title} 重新识别失败,尝试通过标题匹配...') - if self.torrenthelper.match_torrent(mediainfo=mediainfo, - torrent_meta=torrent_meta, - torrent=torrent_info): - # 匹配成功 - logger.info( - f'{mediainfo.title_year} 通过标题匹配到可用资源:{torrent_info.site_name} - {torrent_info.title}') - if not subscribe.custom_words: + if not torrent_mediainfo: + # 通过标题匹配兜底 + logger.warn( + f'{torrent_info.site_name} - {torrent_info.title} 重新识别失败,尝试通过标题匹配...') + if self.torrenthelper.match_torrent(mediainfo=mediainfo, + torrent_meta=torrent_meta, + torrent=torrent_info): + # 匹配成功 + logger.info( + f'{mediainfo.title_year} 通过标题匹配到可选资源:{torrent_info.site_name} - {torrent_info.title}') # 更新种子缓存 torrent_mediainfo = mediainfo context.media_info = mediainfo - else: - continue + else: + continue # 直接比对媒体信息 if torrent_mediainfo and (torrent_mediainfo.tmdb_id or torrent_mediainfo.douban_id): @@ -673,7 +672,7 @@ def match(self, torrents: Dict[str, List[Context]]): and torrent_mediainfo.douban_id != mediainfo.douban_id: continue logger.info( - f'{mediainfo.title_year} 通过媒体信ID匹配到可用资源:{torrent_info.site_name} - {torrent_info.title}') + f'{mediainfo.title_year} 通过媒体信ID匹配到可选资源:{torrent_info.site_name} - {torrent_info.title}') else: continue @@ -715,6 +714,11 @@ def match(self, torrents: Dict[str, List[Context]]): logger.debug(f'{subscribe.name} 正在洗版,{torrent_info.title} 不是整季') continue + # 匹配订阅附加参数 + if not self.torrenthelper.filter_torrent(torrent_info=torrent_info, + filter_params=self.get_params(subscribe)): + continue + # 优先级过滤规则 if subscribe.best_version: rule_groups = subscribe.filter_groups \