Skip to content

Commit

Permalink
Merge pull request #85 from scrapinghub/fix-runscript
Browse files Browse the repository at this point in the history
Fix running scripts with importlib installed.
  • Loading branch information
wRAR authored Jul 8, 2024
2 parents f31e458 + 0657553 commit 190d3ff
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 10 deletions.
38 changes: 33 additions & 5 deletions sh_scrapy/crawl.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,12 +124,17 @@ def _run_pkgscript(argv):
scriptname = argv[0]
sys.argv = argv

try:
import importlib.metadata
has_importlib = True
except ImportError:
import pkg_resources
has_importlib = False

def get_distribution():
try:
import importlib.metadata
if has_importlib:
eps = importlib.metadata.entry_points(group='scrapy')
except ImportError:
import pkg_resources
else:
eps = pkg_resources.WorkingSet().iter_entry_points('scrapy')

for ep in eps:
Expand All @@ -139,7 +144,30 @@ def get_distribution():
d = get_distribution()
if not d:
raise ValueError(SCRAPY_SETTINGS_ENTRYPOINT_NOT_FOUND)
d.run_script(scriptname, {'__name__': '__main__'})
ns = {"__name__": "__main__"}
if has_importlib:
_run_script(d, scriptname, ns)
else:
d.run_script(scriptname, ns)


def _run_script(dist, script_name, namespace):
# An importlib-based replacement for pkg_resources.NullProvider.run_script().
# It's possible that this doesn't support all cases that pkg_resources does,
# so it may need to be improved when those are discovered.
# Using a private attribute (dist._path) seems to be necessary to get the
# full file path, but it's only needed for diagnostic messages so it should
# be easy to fix this by moving to relative paths if this API is removed.
script = "scripts/" + script_name
source = dist.read_text(script)
if not source:
raise ValueError(
f"Script {script!r} not found in metadata at {dist._path!r}"
)
script_filename = dist._path.joinpath(script)
code = compile(source, str(script_filename), "exec")
exec(code, namespace, namespace)



def _run_usercode(spider, args, apisettings_func,
Expand Down
10 changes: 5 additions & 5 deletions tests/test_crawl.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,19 +182,19 @@ def get_entry_points_mock():
"""Helper to configure a fake entry point"""
ep = mock.Mock()
ep.name = 'settings'
ep.dist.run_script = mock.Mock()
ep.dist.run_script = mock.Mock() # only for the pkg_resources code path
return [ep]

@unittest.skipIf(sys.version_info < (3,8), "Requires Python 3.8 or higher")
@mock.patch('sh_scrapy.crawl._run_script')
@mock.patch('importlib.metadata.entry_points')
def test_run_pkgscript_base_usage_python_3_8_plus(entry_points_mock):
def test_run_pkgscript_base_usage_python_3_8_plus(entry_points_mock, mocked_run):
entry_points_mock.return_value = get_entry_points_mock()
_run_pkgscript(['py:script.py', 'arg1', 'arg2'])
assert entry_points_mock.called
assert entry_points_mock.call_args[1] == {'group': 'scrapy'}
ep = entry_points_mock.return_value[0]
assert ep.dist.run_script.called
assert ep.dist.run_script.call_args[0] == ('script.py', {'__name__': '__main__'})
assert mocked_run.called
assert mocked_run.call_args[0][1:] == ('script.py', {'__name__': '__main__'})
assert sys.argv == ['script.py', 'arg1', 'arg2']


Expand Down

0 comments on commit 190d3ff

Please sign in to comment.