Skip to content

Commit

Permalink
Merge pull request #76 from scrapinghub/metadata-normalize
Browse files Browse the repository at this point in the history
Normalize the spider params schema.
  • Loading branch information
kmike authored Oct 9, 2023
2 parents 37ec9fa + 842716a commit 2cc3ff9
Show file tree
Hide file tree
Showing 3 changed files with 95 additions and 2 deletions.
2 changes: 1 addition & 1 deletion sh_scrapy/commands/shub_image_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def run(self, args, opts):
result['metadata'] = {}
for spider_name in result['spiders']:
spider_cls = self.crawler_process.spider_loader.load(spider_name)
metadata_dict = get_spider_metadata(spider_cls)
metadata_dict = get_spider_metadata(spider_cls, normalize=True)
try:
# make sure it's serializable
json.dumps(metadata_dict)
Expand Down
92 changes: 92 additions & 0 deletions tests/test_crawl.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,3 +343,95 @@ class MySpider(Spider):
if not SPIDER_METADATA_AVAILABLE:
del expected["metadata"]
assert data == expected


@pytest.mark.skipif(not SPIDER_METADATA_AVAILABLE, reason="scrapy-spider-metadata is not installed")
def test_image_info_args(tmp_path):
project_dir = create_project(tmp_path, spider_text="""
from enum import Enum
from scrapy import Spider
from scrapy_spider_metadata import Args
from pydantic import BaseModel, Field
class ToolEnum(Enum):
spanner = "spanner"
wrench = "wrench"
class Parameters(BaseModel):
tool: ToolEnum = ToolEnum.spanner
class MySpider(Args[Parameters], Spider):
name = "myspider"
""")
out, _ = call_command(project_dir, "shub-image-info")
data = json.loads(out)
expected = {
"project_type": "scrapy",
"spiders": ["myspider"],
"metadata": {
"myspider": {
"param_schema": {
"properties": {
"tool": {
"default": "spanner",
"enum": ["spanner", "wrench"],
"title": "Tool",
"type": "string",
},
},
"title": "Parameters",
"type": "object",
},
},
},
}
if not SPIDER_METADATA_AVAILABLE:
del expected["metadata"]
assert data == expected


@pytest.mark.skipif(not SPIDER_METADATA_AVAILABLE, reason="scrapy-spider-metadata is not installed")
def test_image_info_args_metadata(tmp_path):
project_dir = create_project(tmp_path, spider_text="""
from enum import Enum
from scrapy import Spider
from scrapy_spider_metadata import Args
from pydantic import BaseModel, Field
class ToolEnum(Enum):
spanner = "spanner"
wrench = "wrench"
class Parameters(BaseModel):
tool: ToolEnum = ToolEnum.spanner
class MySpider(Args[Parameters], Spider):
name = "myspider"
metadata = {"foo": 42}
""")
out, _ = call_command(project_dir, "shub-image-info")
data = json.loads(out)
expected = {
"project_type": "scrapy",
"spiders": ["myspider"],
"metadata": {
"myspider": {
"foo": 42,
"param_schema": {
"properties": {
"tool": {
"default": "spanner",
"enum": ["spanner", "wrench"],
"title": "Tool",
"type": "string",
},
},
"title": "Parameters",
"type": "object",
},
},
},
}
if not SPIDER_METADATA_AVAILABLE:
del expected["metadata"]
assert data == expected
3 changes: 2 additions & 1 deletion tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ deps =
hubstorage
packaging
py36-scrapy16: Scrapy==1.6
scrapy-spider-metadata; python_version >= "3.8"
scrapy-spider-metadata>=0.1.1; python_version >= "3.8"
pydantic>=2; python_version >= "3.8"

commands =
pytest --verbose --cov=sh_scrapy --cov-report=term-missing --cov-report=html --cov-report=xml {posargs: sh_scrapy tests}

0 comments on commit 2cc3ff9

Please sign in to comment.