diff --git a/ceci/main.py b/ceci/main.py index 9d3ddad..fa8504e 100644 --- a/ceci/main.py +++ b/ceci/main.py @@ -67,6 +67,8 @@ def run(pipeline_config_filename, extra_config=None, dry_run=False): # parsl execution/launcher configuration information launcher_config = pipe_config.get("launcher", {"name": "mini"}) launcher_name = launcher_config["name"] + # Launchers may need to know if this is a dry-run + launcher_config["dry_run"] = dry_run # Python modules in which to search for pipeline stages modules = pipe_config["modules"].split() diff --git a/ceci/sites/__init__.py b/ceci/sites/__init__.py index b3cc1f2..1cc4e2c 100644 --- a/ceci/sites/__init__.py +++ b/ceci/sites/__init__.py @@ -57,10 +57,17 @@ def load(launcher_config, site_configs): sites = [] launcher_name = launcher_config["name"] + dry_run = launcher_config.get("dry_run", False) # Create an object for each site. for site_config in site_configs: site_name = site_config["name"] + # Also tell the sites whether this is a dry-run. + # for example, the cori site checks you're not + # trying to run srun on a login node, but we skip + # that test if we are not actually running the command, + # just printing it. + site_config["dry_run"] = dry_run try: cls = site_classes[site_name] diff --git a/ceci/sites/cori.py b/ceci/sites/cori.py index 1f747b7..25de6b6 100644 --- a/ceci/sites/cori.py +++ b/ceci/sites/cori.py @@ -35,7 +35,11 @@ def command(self, cmd, sec): if sec.nodes: mpi1 += f" --nodes {sec.nodes}" - if (sec.nprocess > 1) and (os.environ.get("SLURM_JOB_ID") is None): + if ( + (sec.nprocess > 1) + and (os.environ.get("SLURM_JOB_ID") is None) + and (not self.config.get("dry_run")) + ): raise ValueError( "You cannot use MPI (by setting nprocess > 1) " "on Cori login nodes, only inside jobs." diff --git a/tests/test_site.py b/tests/test_site.py new file mode 100644 index 0000000..c9009ef --- /dev/null +++ b/tests/test_site.py @@ -0,0 +1,37 @@ +from ceci.sites import load, get_default_site +from ceci.pipeline import StageExecutionConfig +from ceci.main import run +import pytest + + +def test_cori_error(): + # check that errors when trying to run multi-process + # jobs on cori login nodes are handled correctly. + # should fail unless dry-run is set. + + launcher_config = { + "name": "mini", + "interval": 1.0, + } + site_config = { + "name": "cori-interactive", + } + + stage_config = { + "name": "Test", + "nprocess": 2, + } + + load(launcher_config, [site_config]) + site = get_default_site() + sec = StageExecutionConfig(stage_config) + + # should fail if we don't set dry-run + with pytest.raises(ValueError): + site.command("xxx", sec) + + # should work if we do set dry-run + launcher_config["dry_run"] = True + load(launcher_config, [site_config]) + site = get_default_site() + site.command("xxx", sec)