From dc22d80212e4a5d59f100c601cd9bf07c6d4185d Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Mon, 9 Oct 2023 13:19:25 +0400 Subject: [PATCH 1/2] Normalize the spider params schema. --- sh_scrapy/commands/shub_image_info.py | 2 +- tox.ini | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sh_scrapy/commands/shub_image_info.py b/sh_scrapy/commands/shub_image_info.py index 70bd3db..cc95a17 100644 --- a/sh_scrapy/commands/shub_image_info.py +++ b/sh_scrapy/commands/shub_image_info.py @@ -45,7 +45,7 @@ def run(self, args, opts): result['metadata'] = {} for spider_name in result['spiders']: spider_cls = self.crawler_process.spider_loader.load(spider_name) - metadata_dict = get_spider_metadata(spider_cls) + metadata_dict = get_spider_metadata(spider_cls, normalize=True) try: # make sure it's serializable json.dumps(metadata_dict) diff --git a/tox.ini b/tox.ini index c1b6641..a205e75 100644 --- a/tox.ini +++ b/tox.ini @@ -10,7 +10,7 @@ deps = hubstorage packaging py36-scrapy16: Scrapy==1.6 - scrapy-spider-metadata; python_version >= "3.8" + scrapy-spider-metadata>=0.1.1; python_version >= "3.8" commands = pytest --verbose --cov=sh_scrapy --cov-report=term-missing --cov-report=html --cov-report=xml {posargs: sh_scrapy tests} From 842716a48c98cbf13ba22c0cef71d31e8c23bb5b Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Mon, 9 Oct 2023 13:52:28 +0400 Subject: [PATCH 2/2] Add tests for retrieving spider params schema. --- tests/test_crawl.py | 92 +++++++++++++++++++++++++++++++++++++++++++++ tox.ini | 1 + 2 files changed, 93 insertions(+) diff --git a/tests/test_crawl.py b/tests/test_crawl.py index 329e4c6..749d844 100644 --- a/tests/test_crawl.py +++ b/tests/test_crawl.py @@ -343,3 +343,95 @@ class MySpider(Spider): if not SPIDER_METADATA_AVAILABLE: del expected["metadata"] assert data == expected + + +@pytest.mark.skipif(not SPIDER_METADATA_AVAILABLE, reason="scrapy-spider-metadata is not installed") +def test_image_info_args(tmp_path): + project_dir = create_project(tmp_path, spider_text=""" +from enum import Enum +from scrapy import Spider +from scrapy_spider_metadata import Args +from pydantic import BaseModel, Field + +class ToolEnum(Enum): + spanner = "spanner" + wrench = "wrench" + +class Parameters(BaseModel): + tool: ToolEnum = ToolEnum.spanner + +class MySpider(Args[Parameters], Spider): + name = "myspider" +""") + out, _ = call_command(project_dir, "shub-image-info") + data = json.loads(out) + expected = { + "project_type": "scrapy", + "spiders": ["myspider"], + "metadata": { + "myspider": { + "param_schema": { + "properties": { + "tool": { + "default": "spanner", + "enum": ["spanner", "wrench"], + "title": "Tool", + "type": "string", + }, + }, + "title": "Parameters", + "type": "object", + }, + }, + }, + } + if not SPIDER_METADATA_AVAILABLE: + del expected["metadata"] + assert data == expected + + +@pytest.mark.skipif(not SPIDER_METADATA_AVAILABLE, reason="scrapy-spider-metadata is not installed") +def test_image_info_args_metadata(tmp_path): + project_dir = create_project(tmp_path, spider_text=""" +from enum import Enum +from scrapy import Spider +from scrapy_spider_metadata import Args +from pydantic import BaseModel, Field + +class ToolEnum(Enum): + spanner = "spanner" + wrench = "wrench" + +class Parameters(BaseModel): + tool: ToolEnum = ToolEnum.spanner + +class MySpider(Args[Parameters], Spider): + name = "myspider" + metadata = {"foo": 42} +""") + out, _ = call_command(project_dir, "shub-image-info") + data = json.loads(out) + expected = { + "project_type": "scrapy", + "spiders": ["myspider"], + "metadata": { + "myspider": { + "foo": 42, + "param_schema": { + "properties": { + "tool": { + "default": "spanner", + "enum": ["spanner", "wrench"], + "title": "Tool", + "type": "string", + }, + }, + "title": "Parameters", + "type": "object", + }, + }, + }, + } + if not SPIDER_METADATA_AVAILABLE: + del expected["metadata"] + assert data == expected diff --git a/tox.ini b/tox.ini index a205e75..e6b5829 100644 --- a/tox.ini +++ b/tox.ini @@ -11,6 +11,7 @@ deps = packaging py36-scrapy16: Scrapy==1.6 scrapy-spider-metadata>=0.1.1; python_version >= "3.8" + pydantic>=2; python_version >= "3.8" commands = pytest --verbose --cov=sh_scrapy --cov-report=term-missing --cov-report=html --cov-report=xml {posargs: sh_scrapy tests}