Skip to content

Commit

Permalink
Do not allow combining crawl_strategy=direct_item with search_queries
Browse files Browse the repository at this point in the history
  • Loading branch information
Gallaecio committed Nov 5, 2024
1 parent 38ffe75 commit 4c91132
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 1 deletion.
13 changes: 13 additions & 0 deletions tests/test_ecommerce.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,19 @@ def test_parameters():
with pytest.raises(ValidationError):
EcommerceSpider(url="https://example.com", crawl_strategy="unknown")

EcommerceSpider(
url="https://example.com", crawl_strategy="direct_item", search_queries=""
)
EcommerceSpider(
url="https://example.com", crawl_strategy="automatic", search_queries="foo"
)
with pytest.raises(ValidationError):
EcommerceSpider(
url="https://example.com",
crawl_strategy="direct_item",
search_queries="foo",
)


def test_start_requests():
url = "https://example.com"
Expand Down
14 changes: 13 additions & 1 deletion zyte_spider_templates/spiders/ecommerce.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from typing import Any, Callable, Dict, Iterable, Optional, Union

import scrapy
from pydantic import BaseModel, ConfigDict, Field
from pydantic import BaseModel, ConfigDict, Field, model_validator
from scrapy import Request
from scrapy.crawler import Crawler
from scrapy_poet import DummyResponse, DynamicDeps
Expand Down Expand Up @@ -142,6 +142,18 @@ class EcommerceCrawlStrategyParam(BaseModel):
},
)

@model_validator(mode="after")
def validate_direct_item_and_search_queries(self):
if (
self.search_queries
and self.crawl_strategy == EcommerceCrawlStrategy.direct_item
):
raise ValueError(
"Cannot combine the direct_item value of the crawl_strategy "
"spider parameter with the search_queries spider parameter."
)
return self


class EcommerceSpiderParams(
CustomAttrsMethodParam,
Expand Down

0 comments on commit 4c91132

Please sign in to comment.