Skip to content

Commit

Permalink
add a log after the result of is_homepage()
Browse files Browse the repository at this point in the history
  • Loading branch information
BurnzZ committed May 6, 2024
1 parent 19604aa commit b4ffc30
Showing 1 changed file with 14 additions and 4 deletions.
18 changes: 14 additions & 4 deletions zyte_spider_templates/spiders/ecommerce.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,11 +146,21 @@ def get_start_request(self, url):
meta = {
"crawling_logs": {"page_type": "productNavigation"},
}
if self.args.crawl_strategy == EcommerceCrawlStrategy.full or (
self.args.crawl_strategy == EcommerceCrawlStrategy.automatic
and is_homepage(url)
):
if self.args.crawl_strategy == EcommerceCrawlStrategy.full:
meta["page_params"] = {"full_domain": get_domain(url)}
elif self.args.crawl_strategy == EcommerceCrawlStrategy.automatic:
if is_homepage(url):
meta["page_params"] = {"full_domain": get_domain(url)}
self.logger.info(
f"[Automatic Strategy] The input URL {url} seems to be a homepage. "
f"Heuristics will be used on it to crawl other pages which might have products."
)
else:
self.logger.info(
f"[Automatic Strategy] The input URL {url} doesn't seem to be a homepage. "
f"Heuristics won't be used to crawl other pages which might have products."
)

return Request(
url=url,
callback=self.parse_navigation,
Expand Down

0 comments on commit b4ffc30

Please sign in to comment.