Skip to content

Commit

Permalink
Fixed: Only parse HTML when descending or showing links
Browse files Browse the repository at this point in the history
  • Loading branch information
mnot committed Dec 26, 2023
1 parent 2946031 commit dcb01fa
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 5 deletions.
4 changes: 1 addition & 3 deletions redbot/formatter/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,9 +149,7 @@ def format_body_sample(self, resource: HttpResource) -> Markup:
except (TypeError, LookupError):
uni_sample = sample.decode("utf-8", "replace")
safe_sample = escape(uni_sample)
if self.config.getboolean("content_links", False) and hasattr(
resource, "links"
):
if self.config.getboolean("content_links", False):
for _, link_set in list(resource.links.items()):
for link in link_set:
if len(link) > 8000: # avoid processing inline assets through regex
Expand Down
2 changes: 1 addition & 1 deletion redbot/formatter/templates/response_finish.html
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@
</div>
{% endif %}

{% if is_resource and resource.link_count > 0 %}
{% if is_resource and resource.descendable() %}
<div class='option'>
{{ descend_link }}
</div>
Expand Down
12 changes: 11 additions & 1 deletion redbot/resource/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ def __init__(self, config: SectionProxy, descend: bool = False) -> None:
self._link_parser = link_parse.HTMLLinkParser(
self.response, [self.process_link]
)
self.response_content_processors.append(self._link_parser.feed_bytes)
if self.descend or config.getboolean("content_links", False):
self.response_content_processors.append(self._link_parser.feed_bytes)

def run_active_checks(self) -> None:
"""
Expand All @@ -71,6 +72,15 @@ def run_active_checks(self) -> None:
else:
self.finish_check()

def descendable(self) -> bool:
"""
Return whether this resource can be descended.
"""
return (
self.response.headers.parsed.get("content-type", [None])[0]
in self._link_parser.link_parseable_types
)

def add_check(self, *resources: RedFetcher) -> None:
"Remember a subordinate check on one or more HttpResource instance."
# pylint: disable=cell-var-from-loop
Expand Down

0 comments on commit dcb01fa

Please sign in to comment.