Skip to content

Commit

Permalink
rename crawl_strategy: default → automatic
Browse files Browse the repository at this point in the history
  • Loading branch information
BurnzZ committed May 2, 2024
1 parent 7c80fde commit dc947d1
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 13 deletions.
16 changes: 8 additions & 8 deletions tests/test_ecommerce.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@ def test_parameters():

EcommerceSpider(url="https://example.com")
EcommerceSpider(
url="https://example.com", crawl_strategy=EcommerceCrawlStrategy.default
url="https://example.com", crawl_strategy=EcommerceCrawlStrategy.automatic
)
EcommerceSpider(url="https://example.com", crawl_strategy="default")
EcommerceSpider(url="https://example.com", crawl_strategy="automatic")

with pytest.raises(ValidationError):
EcommerceSpider(url="https://example.com", crawl_strategy="unknown")
Expand Down Expand Up @@ -465,16 +465,16 @@ def test_metadata():
"enum": ["httpResponseBody", "browserHtml"],
},
"crawl_strategy": {
"default": "default",
"default": "automatic",
"description": "Determines how the start URL and follow-up URLs are crawled.",
"enumMeta": {
"default": {
"automatic": {
"description": (
"Follow pagination, subcategories, and product detail pages. "
"If starting on a homepage, it would attempt to discover other "
"URLs in the page using heuristics."
),
"title": "Default",
"title": "Automatic",
},
"full": {
"description": (
Expand All @@ -500,7 +500,7 @@ def test_metadata():
},
},
"title": "Crawl strategy",
"enum": ["default", "full", "navigation", "pagination_only"],
"enum": ["automatic", "full", "navigation", "pagination_only"],
"type": "string",
},
},
Expand Down Expand Up @@ -760,7 +760,7 @@ def assert_meta(has_page_params):
assert result.meta == meta

for i, crawl_strategy in enumerate(
["default", "full", "navigation", "pagination_only"]
["automatic", "full", "navigation", "pagination_only"]
):
spider = EcommerceSpider.from_crawler(
get_crawler(), url=url, crawl_strategy=crawl_strategy
Expand All @@ -774,7 +774,7 @@ def assert_meta(has_page_params):
@pytest.mark.parametrize(
"crawl_strategy,expected_page_params",
(
("default", {}),
("automatic", {}),
("full", {"full_domain": "example.com"}),
("navigation", {}),
("pagination_only", {}),
Expand Down
10 changes: 5 additions & 5 deletions zyte_spider_templates/spiders/ecommerce.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@

@document_enum
class EcommerceCrawlStrategy(str, Enum):
default: str = "default"
automatic: str = "automatic"
"""Follow pagination, subcategories, and product detail pages.
If the starting URL points to a homepage, it would attempt to discover other
Expand All @@ -51,16 +51,16 @@ class EcommerceCrawlStrategyParam(BaseModel):
crawl_strategy: EcommerceCrawlStrategy = Field(
title="Crawl strategy",
description="Determines how the start URL and follow-up URLs are crawled.",
default=EcommerceCrawlStrategy.default,
default=EcommerceCrawlStrategy.automatic,
json_schema_extra={
"enumMeta": {
EcommerceCrawlStrategy.default: {
EcommerceCrawlStrategy.automatic: {
"description": (
"Follow pagination, subcategories, and product detail pages. "
"If starting on a homepage, it would attempt to discover other "
"URLs in the page using heuristics."
),
"title": "Default",
"title": "Automatic",
},
EcommerceCrawlStrategy.full: {
"title": "Full",
Expand Down Expand Up @@ -145,7 +145,7 @@ def get_start_request(self, url):
"crawling_logs": {"page_type": "productNavigation"},
}
if self.args.crawl_strategy == EcommerceCrawlStrategy.full or (
self.args.crawl_strategy == EcommerceCrawlStrategy.default
self.args.crawl_strategy == EcommerceCrawlStrategy.automatic
and is_homepage(url)
):
meta["page_params"] = {"full_domain": get_domain(url)}
Expand Down

0 comments on commit dc947d1

Please sign in to comment.