zytedata · Gallaecio · Apr 16, 2024 · Apr 16, 2024
diff --git a/tests/test_ecommerce.py b/tests/test_ecommerce.py
@@ -395,17 +395,18 @@ def test_metadata():
                     "title": "URL",
                     "type": "string",
                 },
-                "seed_url": {
+                "urls_file": {
                     "default": "",
                     "description": (
-                        "URL that point to a list of URLs to crawl, e.g. "
+                        "URL that point to a plain-text file with a list of "
+                        "URLs to crawl, e.g. "
                         "https://example.com/url-list.txt. The linked list "
                         "must contain 1 URL per line."
                     ),
                     "exclusiveRequired": True,
                     "group": "inputs",
                     "pattern": r"^https?://[^:/\s]+(:\d{1,5})?(/[^\s]*)*(#[^\s]*)?$",
-                    "title": "Seed URL",
+                    "title": "URLs file",
                     "type": "string",
                 },
                 "geolocation": {
@@ -698,7 +699,7 @@ def test_input_multiple():
         EcommerceSpider.from_crawler(
             crawler,
             url="https://a.example",
-            seed_url="https://b.example",
+            urls_file="https://b.example",
         )
 
 
@@ -708,7 +709,7 @@ def test_url_invalid():
         EcommerceSpider.from_crawler(crawler, url="foo")
 
 
-def test_seed_url():
+def test_urls_file():
     crawler = get_crawler()
     url = "https://example.com"
 
@@ -718,7 +719,7 @@ def test_seed_url():
             b"https://a.example\n \nhttps://b.example\nhttps://c.example\n\n"
         )
         mock_get.return_value = response
-        spider = EcommerceSpider.from_crawler(crawler, seed_url=url)
+        spider = EcommerceSpider.from_crawler(crawler, urls_file=url)
         mock_get.assert_called_with(url)
 
     start_requests = list(spider.start_requests())

diff --git a/zyte_spider_templates/params.py b/zyte_spider_templates/params.py
@@ -78,13 +78,13 @@ class MaxRequestsParam(BaseModel):
     )
 
 
-class SeedUrlParam(BaseModel):
-    seed_url: str = Field(
-        title="Seed URL",
+class UrlsFileParam(BaseModel):
+    urls_file: str = Field(
+        title="URLs file",
         description=(
-            "URL that point to a list of URLs to crawl, e.g. "
-            "https://example.com/url-list.txt. The linked list must contain 1 "
-            "URL per line."
+            "URL that point to a plain-text file with a list of URLs to "
+            "crawl, e.g. https://example.com/url-list.txt. The linked list "
+            "must contain 1 URL per line."
         ),
         pattern=_URL_PATTERN,
         default="",

diff --git a/zyte_spider_templates/spiders/base.py b/zyte_spider_templates/spiders/base.py
@@ -9,21 +9,21 @@
     ExtractFromParam,
     GeolocationParam,
     MaxRequestsParam,
-    SeedUrlParam,
     UrlParam,
+    UrlsFileParam,
 )
 
 # Higher priority than command-line-defined settings (40).
 ARG_SETTING_PRIORITY: int = 50
 
-_INPUT_FIELDS = ("url", "seed_url")
+_INPUT_FIELDS = ("url", "urls_file")
 
 
 class BaseSpiderParams(
     ExtractFromParam,
     MaxRequestsParam,
     GeolocationParam,
-    SeedUrlParam,
+    UrlsFileParam,
     UrlParam,
     BaseModel,
 ):
@@ -47,7 +47,7 @@ def single_input(self):
         """Fields
         :class:`~zyte_spider_templates.spiders.ecommerce.EcommerceSpiderParams.url`
         and
-        :class:`~zyte_spider_templates.spiders.ecommerce.EcommerceSpiderParams.seed_url`
+        :class:`~zyte_spider_templates.spiders.ecommerce.EcommerceSpiderParams.urls_file`
         form a mandatory, mutually-exclusive field group: one of them must be
         defined, the rest must not be defined."""
         input_fields = set(

diff --git a/zyte_spider_templates/spiders/ecommerce.py b/zyte_spider_templates/spiders/ecommerce.py
@@ -99,11 +99,11 @@ def from_crawler(cls, crawler: Crawler, *args, **kwargs) -> scrapy.Spider:
         return spider
 
     def _init_input(self):
-        seed_url = self.args.seed_url
-        if seed_url:
-            response = requests.get(seed_url)
+        urls_file = self.args.urls_file
+        if urls_file:
+            response = requests.get(urls_file)
             urls = load_url_list(response.text)
-            self.logger.info(f"Loaded {len(urls)} initial URLs from {seed_url}.")
+            self.logger.info(f"Loaded {len(urls)} initial URLs from {urls_file}.")
             self.start_urls = urls
         else:
             self.start_urls = [self.args.url]