From ada40f9c3c3aff1e06b39171e508d39e5f3193a7 Mon Sep 17 00:00:00 2001 From: yibeichan Date: Thu, 3 Nov 2022 20:12:47 -0700 Subject: [PATCH 01/19] add datalad to pydra.tasks --- pydra/tasks/datalad.py | 134 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 134 insertions(+) create mode 100644 pydra/tasks/datalad.py diff --git a/pydra/tasks/datalad.py b/pydra/tasks/datalad.py new file mode 100644 index 0000000000..2c67b39f5c --- /dev/null +++ b/pydra/tasks/datalad.py @@ -0,0 +1,134 @@ +"""A :obj:`~nipype.interfaces.utility.base.IdentityInterface` with a grafted Datalad getter.""" + +from msilib.schema import Directory +from pathlib import Path +from pydra.engine.specs import ( + File, + Directory, + SpecInfo, + BaseSpec, + FunctionTask, + ) +from pydra.engine.task import FunctionTask +from pydra.engine.helpers_file import fname_presuffix, split_filename + +import os +import logging +import datalad.api as dl + +logger = logging.getLogger("pydra.tasks.datalad") + +input_fields = [ + ( + "in_file", + str, + { + "help_string": "Path to the data to be downloaded through datalad", + "mandatory": True, + } + + ), + ( + "dataset_path", + Directory, + { + "help_string": "Path to the dataset that will be used to get data", + "mandatory": True, + } + ), + ( + "dataset_url", + str, + { + "help_string": "URL to the dataset that will be used to get data", + } + ) +] + +DataladIdentity_input_spec = SpecInfo( + name="DataladIdentityInputSpec", + fields=input_fields, + bases=(BaseSpec,), +) + +output_fields = [ + ( + "out_file", + File, + { + "help_string": "file downloaded through datalad", + "requires": ["in_file"], + "output_file_template": "{in_file}", + }, + ) +] + +DataladIdentity_output_spec = SpecInfo( + name="DataladIdentityOutputSpec", + fields=output_fields, + bases=(BaseSpec,), +) + +class DataladIdentityInterface(FunctionTask): + """Sneaks a ``datalad get`` in paths, if datalad is available.""" + + input_spec = DataladIdentity_input_spec + output_spec = DataladIdentity_output_spec + + def _run_interface(self, runtime): + import attr + + inputs = self.inputs.get() + dataset_path = inputs.pop("dataset_path") + + if not (Path(dataset_path) / ".datalad").exists(): + logger.info("Datalad interface without dataset path defined.") + try: + dataset_url = inputs.pop("dataset_url") + os.makedirs(dataset_path, exist_ok=True) + dl.install(source=dataset_url, path=dataset_path) + except Exception as e: + logger.error(e) + raise e + + dataset_path = Path(dataset_path) + for field, value in inputs.items(): + if value in [None, attr.NOTHING]: + continue + + _pth = Path(value) + if not _pth.is_absolute(): + _pth = dataset_path / _pth + + _datalad_candidate = _pth.is_symlink() and not _pth.exists() + if not _datalad_candidate: + logger.warning("datalad was required but not found") + return runtime + + if _datalad_candidate: + try: + result = dl.get( + _pth, + dataset=dataset_path + ) + except Exception as exc: + logger.warning(f"datalad get on {_pth} failed.") + ## need to discuss with Dorota, this env was specified in mriqc + ## do we still need it for pydra? + # if ( + # config.environment.exec_env == "docker" + # and ("This repository is not initialized for use by git-annex, " + # "but .git/annex/objects/ exists") in f"{exc}" + # ): + # logger.warning( + # "Execution seems containerirzed with Docker, please make sure " + # "you are not running as root. To do so, please add the argument " + # "``-u $(id -u):$(id -g)`` to your command line." + # ) + # else: + # logger.warning(str(exc)) + else: + if result[0]["status"] == "error": + logger.warning(f"datalad get failed: {result}") + + return runtime From e1566a580b2b8354ada58c18b02f8e4789ca242a Mon Sep 17 00:00:00 2001 From: yibeichan Date: Thu, 3 Nov 2022 20:25:06 -0700 Subject: [PATCH 02/19] fix typo --- pydra/tasks/datalad.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pydra/tasks/datalad.py b/pydra/tasks/datalad.py index 2c67b39f5c..e20b66678a 100644 --- a/pydra/tasks/datalad.py +++ b/pydra/tasks/datalad.py @@ -7,10 +7,8 @@ Directory, SpecInfo, BaseSpec, - FunctionTask, ) from pydra.engine.task import FunctionTask -from pydra.engine.helpers_file import fname_presuffix, split_filename import os import logging @@ -92,6 +90,7 @@ def _run_interface(self, runtime): raise e dataset_path = Path(dataset_path) + # check the in_file is in the dataset for field, value in inputs.items(): if value in [None, attr.NOTHING]: continue From cae23b1bbaa65764dfbe352f539c36517d242ef6 Mon Sep 17 00:00:00 2001 From: yibeichan Date: Thu, 10 Nov 2022 12:12:55 -0800 Subject: [PATCH 03/19] create datalad interface and test --- pydra/tasks/datalad.py | 33 +++++++++++---------- pydra/tasks/tests/test_datalad.py | 49 +++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+), 15 deletions(-) create mode 100644 pydra/tasks/tests/test_datalad.py diff --git a/pydra/tasks/datalad.py b/pydra/tasks/datalad.py index e20b66678a..5e85dfe7c7 100644 --- a/pydra/tasks/datalad.py +++ b/pydra/tasks/datalad.py @@ -1,6 +1,5 @@ """A :obj:`~nipype.interfaces.utility.base.IdentityInterface` with a grafted Datalad getter.""" -from msilib.schema import Directory from pathlib import Path from pydra.engine.specs import ( File, @@ -12,7 +11,6 @@ import os import logging -import datalad.api as dl logger = logging.getLogger("pydra.tasks.datalad") @@ -43,8 +41,8 @@ ) ] -DataladIdentity_input_spec = SpecInfo( - name="DataladIdentityInputSpec", +Datalad_input_spec = SpecInfo( + name="DataladInputSpec", fields=input_fields, bases=(BaseSpec,), ) @@ -61,17 +59,16 @@ ) ] -DataladIdentity_output_spec = SpecInfo( - name="DataladIdentityOutputSpec", +Datalad_output_spec = SpecInfo( + name="DataladOutputSpec", fields=output_fields, bases=(BaseSpec,), ) -class DataladIdentityInterface(FunctionTask): - """Sneaks a ``datalad get`` in paths, if datalad is available.""" +class DataladInterface(FunctionTask): - input_spec = DataladIdentity_input_spec - output_spec = DataladIdentity_output_spec + input_spec = Datalad_input_spec + output_spec = Datalad_output_spec def _run_interface(self, runtime): import attr @@ -79,6 +76,13 @@ def _run_interface(self, runtime): inputs = self.inputs.get() dataset_path = inputs.pop("dataset_path") + _dl_found = False + try: + import datalad.api as dl + _dl_found = True + except: + raise ImportError("Datalad is not installed.") + if not (Path(dataset_path) / ".datalad").exists(): logger.info("Datalad interface without dataset path defined.") try: @@ -87,7 +91,6 @@ def _run_interface(self, runtime): dl.install(source=dataset_url, path=dataset_path) except Exception as e: logger.error(e) - raise e dataset_path = Path(dataset_path) # check the in_file is in the dataset @@ -112,7 +115,7 @@ def _run_interface(self, runtime): ) except Exception as exc: logger.warning(f"datalad get on {_pth} failed.") - ## need to discuss with Dorota, this env was specified in mriqc + ## discussed with @djarecka, we keep it commented here for now ## do we still need it for pydra? # if ( # config.environment.exec_env == "docker" @@ -126,8 +129,8 @@ def _run_interface(self, runtime): # ) # else: # logger.warning(str(exc)) - else: - if result[0]["status"] == "error": - logger.warning(f"datalad get failed: {result}") + else: + if result[0]["status"] == "error": + logger.warning(f"datalad get failed: {result}") return runtime diff --git a/pydra/tasks/tests/test_datalad.py b/pydra/tasks/tests/test_datalad.py new file mode 100644 index 0000000000..c8cabca920 --- /dev/null +++ b/pydra/tasks/tests/test_datalad.py @@ -0,0 +1,49 @@ +import typing as ty +import os, sys +import attr +import pytest + + +""" +Functions to test datalad interface +""" +def test_datalad_interface(tmpdir): + """ + Testing datalad interface + """ + import datalad.api as dl + from pydra.tasks.datalad import DataladInterface + from pydra.engine.core import Workflow + from pydra.engine.submitter import Submitter + from pydra.engine.helpers import hash_value + + # change PosixPath to str + tmpdir = str(tmpdir) + # creating a dataset + ds = dl.create(tmpdir) + ds.save() + ds_path = ds.pathobj + + # creating a file to download + file_path = ds_path / "file.txt" + file_path.write_text("test") + ds.save() + + # creating a workflow + wf = Workflow(name="wf", input_spec=["dataset_path", "dataset_url", "in_file"]) + wf.inputs.dataset_path = ds_path + wf.inputs.dataset_url = "" + wf.inputs.in_file = "file.txt" + + # adding datalad task + wf.add(DataladInterface(name="dl", in_file=wf.lzin.in_file, dataset_path=wf.lzin.dataset_path, dataset_url=wf.lzin.dataset_url)) + + # running the workflow + with Submitter(plugin="cf") as sub: + sub(wf) + + # checking if the file was downloaded + assert (wf.result().output.out_file.exists()) + +# Path: pydra/tasks/tests/test_datalad.py +# Compare this snippet from pydra/tasks/datalad.py: \ No newline at end of file From 45b61b7f16e70900d64b007c5a0ad56eb873aad4 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 10 Nov 2022 20:18:48 +0000 Subject: [PATCH 04/19] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- pydra/tasks/datalad.py | 22 ++++++++++------------ pydra/tasks/tests/test_datalad.py | 22 ++++++++++++++++------ 2 files changed, 26 insertions(+), 18 deletions(-) diff --git a/pydra/tasks/datalad.py b/pydra/tasks/datalad.py index 5e85dfe7c7..e1b7a18539 100644 --- a/pydra/tasks/datalad.py +++ b/pydra/tasks/datalad.py @@ -4,9 +4,9 @@ from pydra.engine.specs import ( File, Directory, - SpecInfo, + SpecInfo, BaseSpec, - ) +) from pydra.engine.task import FunctionTask import os @@ -21,8 +21,7 @@ { "help_string": "Path to the data to be downloaded through datalad", "mandatory": True, - } - + }, ), ( "dataset_path", @@ -30,15 +29,15 @@ { "help_string": "Path to the dataset that will be used to get data", "mandatory": True, - } + }, ), ( "dataset_url", str, { "help_string": "URL to the dataset that will be used to get data", - } - ) + }, + ), ] Datalad_input_spec = SpecInfo( @@ -65,6 +64,7 @@ bases=(BaseSpec,), ) + class DataladInterface(FunctionTask): input_spec = Datalad_input_spec @@ -79,6 +79,7 @@ def _run_interface(self, runtime): _dl_found = False try: import datalad.api as dl + _dl_found = True except: raise ImportError("Datalad is not installed.") @@ -95,7 +96,7 @@ def _run_interface(self, runtime): dataset_path = Path(dataset_path) # check the in_file is in the dataset for field, value in inputs.items(): - if value in [None, attr.NOTHING]: + if value in [None, attr.NOTHING]: continue _pth = Path(value) @@ -109,10 +110,7 @@ def _run_interface(self, runtime): if _datalad_candidate: try: - result = dl.get( - _pth, - dataset=dataset_path - ) + result = dl.get(_pth, dataset=dataset_path) except Exception as exc: logger.warning(f"datalad get on {_pth} failed.") ## discussed with @djarecka, we keep it commented here for now diff --git a/pydra/tasks/tests/test_datalad.py b/pydra/tasks/tests/test_datalad.py index c8cabca920..a2743d7029 100644 --- a/pydra/tasks/tests/test_datalad.py +++ b/pydra/tasks/tests/test_datalad.py @@ -7,13 +7,15 @@ """ Functions to test datalad interface """ + + def test_datalad_interface(tmpdir): """ Testing datalad interface """ import datalad.api as dl from pydra.tasks.datalad import DataladInterface - from pydra.engine.core import Workflow + from pydra.engine.core import Workflow from pydra.engine.submitter import Submitter from pydra.engine.helpers import hash_value @@ -34,16 +36,24 @@ def test_datalad_interface(tmpdir): wf.inputs.dataset_path = ds_path wf.inputs.dataset_url = "" wf.inputs.in_file = "file.txt" - + # adding datalad task - wf.add(DataladInterface(name="dl", in_file=wf.lzin.in_file, dataset_path=wf.lzin.dataset_path, dataset_url=wf.lzin.dataset_url)) + wf.add( + DataladInterface( + name="dl", + in_file=wf.lzin.in_file, + dataset_path=wf.lzin.dataset_path, + dataset_url=wf.lzin.dataset_url, + ) + ) # running the workflow with Submitter(plugin="cf") as sub: sub(wf) - + # checking if the file was downloaded - assert (wf.result().output.out_file.exists()) + assert wf.result().output.out_file.exists() + # Path: pydra/tasks/tests/test_datalad.py -# Compare this snippet from pydra/tasks/datalad.py: \ No newline at end of file +# Compare this snippet from pydra/tasks/datalad.py: From 7d669745c96683123a11361a8862a74014e0dec5 Mon Sep 17 00:00:00 2001 From: yibeichan Date: Thu, 17 Nov 2022 22:41:41 -0800 Subject: [PATCH 05/19] improve datalad interface and test --- pydra/tasks/datalad.py | 179 +++++++++++++++++++----------- pydra/tasks/tests/test_datalad.py | 25 +++-- setup.cfg | 2 + 3 files changed, 130 insertions(+), 76 deletions(-) diff --git a/pydra/tasks/datalad.py b/pydra/tasks/datalad.py index 5e85dfe7c7..c2507822d0 100644 --- a/pydra/tasks/datalad.py +++ b/pydra/tasks/datalad.py @@ -1,16 +1,17 @@ """A :obj:`~nipype.interfaces.utility.base.IdentityInterface` with a grafted Datalad getter.""" - +import os +import logging +import typing as ty from pathlib import Path -from pydra.engine.specs import ( +from ..engine.specs import ( File, Directory, SpecInfo, BaseSpec, ) -from pydra.engine.task import FunctionTask - -import os -import logging +from ..engine.core import TaskBase +from ..engine.helpers import output_from_inputfields +from ..utils.messenger import AuditFlag logger = logging.getLogger("pydra.tasks.datalad") @@ -41,11 +42,6 @@ ) ] -Datalad_input_spec = SpecInfo( - name="DataladInputSpec", - fields=input_fields, - bases=(BaseSpec,), -) output_fields = [ ( @@ -59,22 +55,47 @@ ) ] -Datalad_output_spec = SpecInfo( - name="DataladOutputSpec", - fields=output_fields, - bases=(BaseSpec,), -) - -class DataladInterface(FunctionTask): - - input_spec = Datalad_input_spec - output_spec = Datalad_output_spec - - def _run_interface(self, runtime): - import attr - - inputs = self.inputs.get() - dataset_path = inputs.pop("dataset_path") +# define a TaskBase calss +class DataladInterface(TaskBase): + """A :obj:`~nipype.interfaces.utility.base.IdentityInterface` with a grafted Datalad getter.""" + + def __init__( + self, name: str, + audit_flags: AuditFlag = AuditFlag.NONE, + cache_dir=None, + cache_locations=None, + input_spec: ty.Optional[ty.Union[SpecInfo, BaseSpec]] = None, + output_spec: ty.Optional[ty.Union[SpecInfo, BaseSpec]] = None, + cont_dim=None, + messenger_args=None, + messengers=None, + rerun=False, + **kwargs, + ): + """Initialize a DataladInterface instance.""" + + self.input_spec = ( + input_spec + or SpecInfo(name="Inputs", fields=input_fields, bases=(BaseSpec,)) + ) + self.output_spec = ( + output_spec + or SpecInfo(name="Output", fields=output_fields, bases=(BaseSpec,)) + ) + self.output_spec = output_from_inputfields(self.output_spec, self.input_spec) + super().__init__( + name=name, + inputs=kwargs, + audit_flags=audit_flags, + cache_dir=cache_dir, + cache_locations=cache_locations, + cont_dim=cont_dim, + messenger_args=messenger_args, + messengers=messengers, rerun=rerun) + + def _run_task(self): + in_file = self.inputs.in_file + dataset_path = self.inputs.dataset_path _dl_found = False try: @@ -83,54 +104,78 @@ def _run_interface(self, runtime): except: raise ImportError("Datalad is not installed.") + # checking if the dataset is already downloaded + if not (Path(dataset_path) / ".datalad").exists(): logger.info("Datalad interface without dataset path defined.") try: - dataset_url = inputs.pop("dataset_url") + dataset_url = self.inputs.dataset_url os.makedirs(dataset_path, exist_ok=True) dl.install(source=dataset_url, path=dataset_path) except Exception as e: logger.error(e) + else: + ds = dl.Dataset(self.inputs.dataset_path) + + # getting the file + ds.get(self.inputs.in_file) + + # checking if the file was downloaded + if not Path(dataset_path, in_file).exists(): + raise FileNotFoundError(f"File {in_file} not found in {dataset_path}") - dataset_path = Path(dataset_path) - # check the in_file is in the dataset - for field, value in inputs.items(): - if value in [None, attr.NOTHING]: - continue - - _pth = Path(value) - if not _pth.is_absolute(): - _pth = dataset_path / _pth - - _datalad_candidate = _pth.is_symlink() and not _pth.exists() - if not _datalad_candidate: - logger.warning("datalad was required but not found") - return runtime - - if _datalad_candidate: - try: - result = dl.get( - _pth, - dataset=dataset_path - ) - except Exception as exc: - logger.warning(f"datalad get on {_pth} failed.") - ## discussed with @djarecka, we keep it commented here for now - ## do we still need it for pydra? - # if ( - # config.environment.exec_env == "docker" - # and ("This repository is not initialized for use by git-annex, " - # "but .git/annex/objects/ exists") in f"{exc}" - # ): - # logger.warning( - # "Execution seems containerirzed with Docker, please make sure " - # "you are not running as root. To do so, please add the argument " - # "``-u $(id -u):$(id -g)`` to your command line." - # ) - # else: - # logger.warning(str(exc)) + _pth = Path(in_file) + if not _pth.is_absolute(): + _pth = dataset_path / _pth + + _datalad_candidate = _pth.is_symlink() and not _pth.exists() + if not _datalad_candidate: + logger.warning("datalad was required but not found") + + if _datalad_candidate: + try: + result = dl.get( + _pth, + dataset=dataset_path + ) + except Exception as exc: + logger.warning(f"datalad get on {_pth} failed.") + ## discussed with @djarecka, we keep it commented here for now + ## do we still need it for pydra? + # if ( + # config.environment.exec_env == "docker" + # and ("This repository is not initialized for use by git-annex, " + # "but .git/annex/objects/ exists") in f"{exc}" + # ): + # logger.warning( + # "Execution seems containerirzed with Docker, please make sure " + # "you are not running as root. To do so, please add the argument " + # "``-u $(id -u):$(id -g)`` to your command line." + # ) + # else: + # logger.warning(str(exc)) else: if result[0]["status"] == "error": logger.warning(f"datalad get failed: {result}") - - return runtime + + self.output_ = None + output = os.path.abspath(os.path.join(self.inputs.dataset_path, self.inputs.in_file)) + output_names = [el[0] for el in self.output_spec.fields] + if output is None: + self.output_ = {nm: None for nm in output_names} + elif len(output_names) == 1: + # if only one element in the fields, everything should be returned together + self.output_ = {output_names[0]: output} + elif isinstance(output, tuple) and len(output_names) == len(output): + self.output_ = dict(zip(output_names, output)) + else: + raise RuntimeError( + f"expected {len(self.output_spec.fields)} elements, " + f"but {output} were returned" + ) + # outputs = self.output_spec().get() + # outputs["out_file"] = os.path.abspath(os.path.join(self.inputs.dataset_path, self.inputs.in_file)) + def _list_outputs(self): + outputs = self.output_spec().get() + outputs["out_file"] = os.path.abspath(os.path.join(self.inputs.dataset_path, self.inputs.in_file)) + return outputs \ No newline at end of file diff --git a/pydra/tasks/tests/test_datalad.py b/pydra/tasks/tests/test_datalad.py index c8cabca920..b03bef70e4 100644 --- a/pydra/tasks/tests/test_datalad.py +++ b/pydra/tasks/tests/test_datalad.py @@ -4,23 +4,22 @@ import pytest -""" -Functions to test datalad interface -""" +from pydra.tasks.datalad import DataladInterface +from pydra.engine.core import Workflow +from pydra.engine.submitter import Submitter +from pydra.engine.helpers import hash_value + + def test_datalad_interface(tmpdir): """ Testing datalad interface """ import datalad.api as dl - from pydra.tasks.datalad import DataladInterface - from pydra.engine.core import Workflow - from pydra.engine.submitter import Submitter - from pydra.engine.helpers import hash_value # change PosixPath to str tmpdir = str(tmpdir) # creating a dataset - ds = dl.create(tmpdir) + ds = dl.Dataset(tmpdir).create() ds.save() ds_path = ds.pathobj @@ -36,7 +35,15 @@ def test_datalad_interface(tmpdir): wf.inputs.in_file = "file.txt" # adding datalad task - wf.add(DataladInterface(name="dl", in_file=wf.lzin.in_file, dataset_path=wf.lzin.dataset_path, dataset_url=wf.lzin.dataset_url)) + wf.add( + DataladInterface( + name="dl", in_file=wf.lzin.in_file, + dataset_path=wf.lzin.dataset_path, + dataset_url=wf.lzin.dataset_url) + ) + + # setup output + wf.set_output([("out_file", wf.dl.lzout.out_file)]) # running the workflow with Submitter(plugin="cf") as sub: diff --git a/setup.cfg b/setup.cfg index 89dc37dd10..63083b9a84 100644 --- a/setup.cfg +++ b/setup.cfg @@ -38,6 +38,7 @@ test_requires = pytest-timeout codecov numpy + datalad psutil python-dateutil tornado @@ -72,6 +73,7 @@ test = pytest-timeout codecov numpy + datalad pyld psutil python-dateutil From e707bd6e44722d76eaeaaff1758c2ba74318c435 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 18 Nov 2022 06:46:19 +0000 Subject: [PATCH 06/19] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- pydra/tasks/datalad.py | 59 ++++++++++++++++--------------- pydra/tasks/tests/test_datalad.py | 10 +++--- 2 files changed, 37 insertions(+), 32 deletions(-) diff --git a/pydra/tasks/datalad.py b/pydra/tasks/datalad.py index 415222d0b7..1e34672756 100644 --- a/pydra/tasks/datalad.py +++ b/pydra/tasks/datalad.py @@ -8,7 +8,7 @@ Directory, SpecInfo, BaseSpec, - ) +) from ..engine.core import TaskBase from ..engine.helpers import output_from_inputfields from ..utils.messenger import AuditFlag @@ -59,38 +59,39 @@ class DataladInterface(TaskBase): """A :obj:`~nipype.interfaces.utility.base.IdentityInterface` with a grafted Datalad getter.""" def __init__( - self, name: str, + self, + name: str, audit_flags: AuditFlag = AuditFlag.NONE, cache_dir=None, cache_locations=None, input_spec: ty.Optional[ty.Union[SpecInfo, BaseSpec]] = None, output_spec: ty.Optional[ty.Union[SpecInfo, BaseSpec]] = None, - cont_dim=None, - messenger_args=None, - messengers=None, + cont_dim=None, + messenger_args=None, + messengers=None, rerun=False, **kwargs, - ): + ): """Initialize a DataladInterface instance.""" - self.input_spec = ( - input_spec - or SpecInfo(name="Inputs", fields=input_fields, bases=(BaseSpec,)) + self.input_spec = input_spec or SpecInfo( + name="Inputs", fields=input_fields, bases=(BaseSpec,) ) - self.output_spec = ( - output_spec - or SpecInfo(name="Output", fields=output_fields, bases=(BaseSpec,)) + self.output_spec = output_spec or SpecInfo( + name="Output", fields=output_fields, bases=(BaseSpec,) ) self.output_spec = output_from_inputfields(self.output_spec, self.input_spec) super().__init__( - name=name, + name=name, inputs=kwargs, - audit_flags=audit_flags, - cache_dir=cache_dir, - cache_locations=cache_locations, - cont_dim=cont_dim, - messenger_args=messenger_args, - messengers=messengers, rerun=rerun) + audit_flags=audit_flags, + cache_dir=cache_dir, + cache_locations=cache_locations, + cont_dim=cont_dim, + messenger_args=messenger_args, + messengers=messengers, + rerun=rerun, + ) def _run_task(self): in_file = self.inputs.in_file @@ -105,7 +106,7 @@ def _run_task(self): raise ImportError("Datalad is not installed.") # checking if the dataset is already downloaded - + if not (Path(dataset_path) / ".datalad").exists(): logger.info("Datalad interface without dataset path defined.") try: @@ -134,10 +135,7 @@ def _run_task(self): if _datalad_candidate: try: - result = dl.get( - _pth, - dataset=dataset_path - ) + result = dl.get(_pth, dataset=dataset_path) except Exception as exc: logger.warning(f"datalad get on {_pth} failed.") ## discussed with @djarecka, we keep it commented here for now @@ -157,9 +155,11 @@ def _run_task(self): else: if result[0]["status"] == "error": logger.warning(f"datalad get failed: {result}") - + self.output_ = None - output = os.path.abspath(os.path.join(self.inputs.dataset_path, self.inputs.in_file)) + output = os.path.abspath( + os.path.join(self.inputs.dataset_path, self.inputs.in_file) + ) output_names = [el[0] for el in self.output_spec.fields] if output is None: self.output_ = {nm: None for nm in output_names} @@ -175,7 +175,10 @@ def _run_task(self): ) # outputs = self.output_spec().get() # outputs["out_file"] = os.path.abspath(os.path.join(self.inputs.dataset_path, self.inputs.in_file)) + def _list_outputs(self): outputs = self.output_spec().get() - outputs["out_file"] = os.path.abspath(os.path.join(self.inputs.dataset_path, self.inputs.in_file)) - return outputs \ No newline at end of file + outputs["out_file"] = os.path.abspath( + os.path.join(self.inputs.dataset_path, self.inputs.in_file) + ) + return outputs diff --git a/pydra/tasks/tests/test_datalad.py b/pydra/tasks/tests/test_datalad.py index db0a095cc8..62518b6c22 100644 --- a/pydra/tasks/tests/test_datalad.py +++ b/pydra/tasks/tests/test_datalad.py @@ -5,7 +5,7 @@ from pydra.tasks.datalad import DataladInterface -from pydra.engine.core import Workflow +from pydra.engine.core import Workflow from pydra.engine.submitter import Submitter from pydra.engine.helpers import hash_value @@ -37,10 +37,12 @@ def test_datalad_interface(tmpdir): # adding datalad task wf.add( DataladInterface( - name="dl", in_file=wf.lzin.in_file, - dataset_path=wf.lzin.dataset_path, - dataset_url=wf.lzin.dataset_url) + name="dl", + in_file=wf.lzin.in_file, + dataset_path=wf.lzin.dataset_path, + dataset_url=wf.lzin.dataset_url, ) + ) # setup output wf.set_output([("out_file", wf.dl.lzout.out_file)]) From 216314b629db9abd3eacfe9d021e14f325257440 Mon Sep 17 00:00:00 2001 From: yibeichan Date: Sun, 20 Nov 2022 21:23:58 -0800 Subject: [PATCH 07/19] setup git-annex --- .github/workflows/testpydra.yml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/.github/workflows/testpydra.yml b/.github/workflows/testpydra.yml index 5c0bb16643..d19cfb581c 100644 --- a/.github/workflows/testpydra.yml +++ b/.github/workflows/testpydra.yml @@ -31,6 +31,20 @@ jobs: python-version: ${{ matrix.python-version }} - name: Update build tools run: python -m pip install --upgrade pip build + - name: Set-up Git-annex + run: | + if [ "${{ runner.os }}" == "Linux" ]; then + sudo apt-get update + sudo apt-get install -y git-annex + elif [ "${{ runner.os }}" == "macOS" ]; then + brew install git-annex + elif [ "${{ runner.os }}" == "Windows" ]; then + choco install git-annex + fi + - name: Set git credentials + run: | + git config --global user.email "github-actions[bot]@users.noreply.github.com" + git config --global user.name "github-actions[bot]" - name: Build pydra run: python -m build From c77205e73b5f58ea450248f9c355710e5c5b6637 Mon Sep 17 00:00:00 2001 From: yibeichan Date: Sun, 20 Nov 2022 21:38:49 -0800 Subject: [PATCH 08/19] fix git-annex error on linux and windows --- .github/workflows/testpydra.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/testpydra.yml b/.github/workflows/testpydra.yml index d19cfb581c..dc11869fc3 100644 --- a/.github/workflows/testpydra.yml +++ b/.github/workflows/testpydra.yml @@ -35,10 +35,11 @@ jobs: run: | if [ "${{ runner.os }}" == "Linux" ]; then sudo apt-get update - sudo apt-get install -y git-annex + sudo apt-get install git-annex elif [ "${{ runner.os }}" == "macOS" ]; then brew install git-annex elif [ "${{ runner.os }}" == "Windows" ]; then + choco install git choco install git-annex fi - name: Set git credentials From dcf6b0845344ee601ce73234c4565e530692c31d Mon Sep 17 00:00:00 2001 From: yibeichan Date: Sun, 20 Nov 2022 22:22:06 -0800 Subject: [PATCH 09/19] upgrade git-annex on linux --- .github/workflows/testpydra.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/testpydra.yml b/.github/workflows/testpydra.yml index dc11869fc3..c0fc6a51e7 100644 --- a/.github/workflows/testpydra.yml +++ b/.github/workflows/testpydra.yml @@ -35,7 +35,7 @@ jobs: run: | if [ "${{ runner.os }}" == "Linux" ]; then sudo apt-get update - sudo apt-get install git-annex + sudo apt-get install git-annex-standalone elif [ "${{ runner.os }}" == "macOS" ]; then brew install git-annex elif [ "${{ runner.os }}" == "Windows" ]; then From bd4a9b6b38af05809f08d8dc371d7896bb8bd6ee Mon Sep 17 00:00:00 2001 From: yibeichan Date: Sun, 20 Nov 2022 22:26:51 -0800 Subject: [PATCH 10/19] fix git-annex on linux & windows --- .github/workflows/testpydra.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/testpydra.yml b/.github/workflows/testpydra.yml index c0fc6a51e7..452e0b18c5 100644 --- a/.github/workflows/testpydra.yml +++ b/.github/workflows/testpydra.yml @@ -35,12 +35,12 @@ jobs: run: | if [ "${{ runner.os }}" == "Linux" ]; then sudo apt-get update - sudo apt-get install git-annex-standalone + sudo apt-get install git-annex >= 8.20200309 elif [ "${{ runner.os }}" == "macOS" ]; then brew install git-annex elif [ "${{ runner.os }}" == "Windows" ]; then - choco install git - choco install git-annex + pip install datalad-installer + datalad-installer git-annex -m datalad/packages fi - name: Set git credentials run: | From a3c20109f64c1daf5481901e4978cb30062e5f34 Mon Sep 17 00:00:00 2001 From: yibeichan Date: Sun, 20 Nov 2022 22:38:16 -0800 Subject: [PATCH 11/19] fix git-annex on linux --- .github/workflows/testpydra.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/testpydra.yml b/.github/workflows/testpydra.yml index 452e0b18c5..c7c5d57116 100644 --- a/.github/workflows/testpydra.yml +++ b/.github/workflows/testpydra.yml @@ -34,15 +34,17 @@ jobs: - name: Set-up Git-annex run: | if [ "${{ runner.os }}" == "Linux" ]; then + wget -O- http://neuro.debian.net/lists/jammy.us-ca.libre | sudo tee /etc/apt/sources.list.d/neurodebian.sources.list + sudo apt-key adv --recv-keys --keyserver hkps://keyserver.ubuntu.com 0xA5D32F012649A5A9 sudo apt-get update - sudo apt-get install git-annex >= 8.20200309 + sudo apt-get install git-annex-standalone elif [ "${{ runner.os }}" == "macOS" ]; then brew install git-annex elif [ "${{ runner.os }}" == "Windows" ]; then pip install datalad-installer datalad-installer git-annex -m datalad/packages fi - - name: Set git credentials + - name: Set-up git credentials run: | git config --global user.email "github-actions[bot]@users.noreply.github.com" git config --global user.name "github-actions[bot]" From de348981966b1828ae31995813b03b222af8d4f4 Mon Sep 17 00:00:00 2001 From: yibeichan Date: Mon, 28 Nov 2022 11:49:56 -0800 Subject: [PATCH 12/19] add need_gitannex --- pydra/engine/tests/utils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pydra/engine/tests/utils.py b/pydra/engine/tests/utils.py index b2f3b6652d..a3619e1b4b 100644 --- a/pydra/engine/tests/utils.py +++ b/pydra/engine/tests/utils.py @@ -29,6 +29,10 @@ reason="sge not available", ) +need_gitannex = pytest.mark.skipif( + not (shutil.which("git-annex")) or bool(float(sp.check_output(["git-annex", "version", "--raw"])) < 8.20200309), + reason="git-annex is not installed or version is less than 8.20200309", +) def result_no_submitter(shell_task, plugin=None): """helper function to return result when running without submitter""" From d935433f69376f70978bf95f2fc27da9f0186a13 Mon Sep 17 00:00:00 2001 From: yibeichan Date: Mon, 28 Nov 2022 12:54:27 -0800 Subject: [PATCH 13/19] update test --- pydra/tasks/tests/__init__.py | 0 pydra/tasks/tests/test_datalad.py | 51 +++++++++++++------------------ 2 files changed, 21 insertions(+), 30 deletions(-) create mode 100644 pydra/tasks/tests/__init__.py diff --git a/pydra/tasks/tests/__init__.py b/pydra/tasks/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/pydra/tasks/tests/test_datalad.py b/pydra/tasks/tests/test_datalad.py index 62518b6c22..5db99b24c3 100644 --- a/pydra/tasks/tests/test_datalad.py +++ b/pydra/tasks/tests/test_datalad.py @@ -1,15 +1,17 @@ import typing as ty +from pathlib import Path import os, sys import attr import pytest -from pydra.tasks.datalad import DataladInterface -from pydra.engine.core import Workflow -from pydra.engine.submitter import Submitter -from pydra.engine.helpers import hash_value - +from ...tasks.datalad import DataladInterface +from ...engine.core import Workflow +from ...engine.submitter import Submitter +from ...engine.helpers import hash_value +from ...engine.tests.utils import need_gitannex +@need_gitannex def test_datalad_interface(tmpdir): """ Testing datalad interface @@ -28,31 +30,20 @@ def test_datalad_interface(tmpdir): file_path.write_text("test") ds.save() - # creating a workflow - wf = Workflow(name="wf", input_spec=["dataset_path", "dataset_url", "in_file"]) - wf.inputs.dataset_path = ds_path - wf.inputs.dataset_url = "" - wf.inputs.in_file = "file.txt" - - # adding datalad task - wf.add( - DataladInterface( - name="dl", - in_file=wf.lzin.in_file, - dataset_path=wf.lzin.dataset_path, - dataset_url=wf.lzin.dataset_url, - ) - ) - - # setup output - wf.set_output([("out_file", wf.dl.lzout.out_file)]) - - # running the workflow - with Submitter(plugin="cf") as sub: - sub(wf) - - # checking if the file was downloaded - assert wf.result().output.out_file.exists() + tmpdir = Path(tmpdir) + + # install the dataset to a new location + ds2 = dl.install(source=tmpdir, path=tmpdir / "ds2") + ds2_path = ds2.pathobj + + # use datalad interface to download the file + dl_interface = DataladInterface(name="dl_interface", in_file="file.txt", dataset_path=ds2_path) + # running the task + res = dl_interface() + + assert os.path.exists(res.output.out_file) + assert os.path.basename(res.output.out_file) == "file.txt" + # Path: pydra/tasks/tests/test_datalad.py From 7c8bbaf9b5330ddc5652ae94ef406cd4f7dcbdec Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 28 Nov 2022 20:54:51 +0000 Subject: [PATCH 14/19] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- pydra/engine/tests/utils.py | 4 +++- pydra/tasks/tests/test_datalad.py | 8 +++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/pydra/engine/tests/utils.py b/pydra/engine/tests/utils.py index a3619e1b4b..33abf92bf4 100644 --- a/pydra/engine/tests/utils.py +++ b/pydra/engine/tests/utils.py @@ -30,10 +30,12 @@ ) need_gitannex = pytest.mark.skipif( - not (shutil.which("git-annex")) or bool(float(sp.check_output(["git-annex", "version", "--raw"])) < 8.20200309), + not (shutil.which("git-annex")) + or bool(float(sp.check_output(["git-annex", "version", "--raw"])) < 8.20200309), reason="git-annex is not installed or version is less than 8.20200309", ) + def result_no_submitter(shell_task, plugin=None): """helper function to return result when running without submitter""" return shell_task() diff --git a/pydra/tasks/tests/test_datalad.py b/pydra/tasks/tests/test_datalad.py index 5db99b24c3..b63dd30fac 100644 --- a/pydra/tasks/tests/test_datalad.py +++ b/pydra/tasks/tests/test_datalad.py @@ -1,5 +1,5 @@ import typing as ty -from pathlib import Path +from pathlib import Path import os, sys import attr import pytest @@ -11,6 +11,7 @@ from ...engine.helpers import hash_value from ...engine.tests.utils import need_gitannex + @need_gitannex def test_datalad_interface(tmpdir): """ @@ -37,7 +38,9 @@ def test_datalad_interface(tmpdir): ds2_path = ds2.pathobj # use datalad interface to download the file - dl_interface = DataladInterface(name="dl_interface", in_file="file.txt", dataset_path=ds2_path) + dl_interface = DataladInterface( + name="dl_interface", in_file="file.txt", dataset_path=ds2_path + ) # running the task res = dl_interface() @@ -45,6 +48,5 @@ def test_datalad_interface(tmpdir): assert os.path.basename(res.output.out_file) == "file.txt" - # Path: pydra/tasks/tests/test_datalad.py # Compare this snippet from pydra/tasks/datalad.py: From bced78bd9d58c612e34deb065cf40da655f23eb4 Mon Sep 17 00:00:00 2001 From: yibeichan Date: Tue, 29 Nov 2022 10:34:14 -0800 Subject: [PATCH 15/19] fix need-gitannex --- pydra/engine/tests/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pydra/engine/tests/utils.py b/pydra/engine/tests/utils.py index 33abf92bf4..8a4d50f395 100644 --- a/pydra/engine/tests/utils.py +++ b/pydra/engine/tests/utils.py @@ -31,7 +31,7 @@ need_gitannex = pytest.mark.skipif( not (shutil.which("git-annex")) - or bool(float(sp.check_output(["git-annex", "version", "--raw"])) < 8.20200309), + or bool(float(sp.check_output(["git-annex", "version", "--raw"], universal_newlines=True)) < 8.20200309), reason="git-annex is not installed or version is less than 8.20200309", ) From 01a2f4bf6b3f0bdf84310bc7c34cab61abaaec9e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 16 Dec 2022 16:24:59 +0000 Subject: [PATCH 16/19] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- pydra/engine/tests/utils.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pydra/engine/tests/utils.py b/pydra/engine/tests/utils.py index 8a4d50f395..866404f67d 100644 --- a/pydra/engine/tests/utils.py +++ b/pydra/engine/tests/utils.py @@ -31,7 +31,12 @@ need_gitannex = pytest.mark.skipif( not (shutil.which("git-annex")) - or bool(float(sp.check_output(["git-annex", "version", "--raw"], universal_newlines=True)) < 8.20200309), + or bool( + float( + sp.check_output(["git-annex", "version", "--raw"], universal_newlines=True) + ) + < 8.20200309 + ), reason="git-annex is not installed or version is less than 8.20200309", ) From 8aad821c0c6e84062e95ad9ee1daef7da23ae44e Mon Sep 17 00:00:00 2001 From: yibeichan Date: Fri, 16 Dec 2022 08:40:16 -0800 Subject: [PATCH 17/19] add datalad --- pyproject.toml | 1 + setup.cfg | 120 ------------------------------------------------- 2 files changed, 1 insertion(+), 120 deletions(-) delete mode 100644 setup.cfg diff --git a/pyproject.toml b/pyproject.toml index ce7eb465dc..29bd9475db 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -75,6 +75,7 @@ test = [ "tornado", "boutiques", "pympler", + "datalad", ] # Aliases tests = ["pydra[test]"] diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 63083b9a84..0000000000 --- a/setup.cfg +++ /dev/null @@ -1,120 +0,0 @@ -[metadata] -url = https://github.com/nipype/pydra -author = Nipype developers -author_email = neuroimaging@python.org -maintainer = Nipype developers -maintainer_email = neuroimaging@python.org -description = Pydra dataflow engine -long_description = file:README.rst -long_description_content_type = text/x-rst; charset=UTF-8 -license = Apache License, 2.0 -provides = - pydra -classifiers = - Development Status :: 3 - Alpha - Environment :: Console - Intended Audience :: Science/Research - License :: OSI Approved :: Apache Software License - Operating System :: MacOS :: MacOS X - Operating System :: POSIX :: Linux - Programming Language :: Python :: 3.7 - Programming Language :: Python :: 3.8 - Topic :: Scientific/Engineering - -[options] -python_requires = >= 3.7 -install_requires = - attrs >= 19.1.0 - cloudpickle >= 2.0.0 - filelock >= 3.0.0 - etelemetry >= 0.2.2 - -test_requires = - pytest >= 6.2.5 - pytest-cov - pytest-env - pytest-xdist < 2.0 - pytest-rerunfailures - pytest-timeout - codecov - numpy - datalad - psutil - python-dateutil - tornado - boutiques - pympler -packages = find: -include_package_data = True - -[options.package_data] -pydra = - schema/context.jsonld - -[options.extras_require] -doc = - attrs >= 19.1.0 - cloudpickle - filelock - packaging - sphinx >= 2.1.2 - sphinx_rtd_theme - sphinxcontrib-apidoc ~= 0.3.0 - sphinxcontrib-napoleon - sphinxcontrib-versioning -docs = - %(doc)s -test = - pytest >= 6.2.5 - pytest-cov - pytest-env - pytest-xdist < 2.0 - pytest-rerunfailures - pytest-timeout - codecov - numpy - datalad - pyld - psutil - python-dateutil - tornado - boutiques - pympler -tests = - %(test)s -dev = - %(test)s - black==21.4b2 - pre-commit -dask = - %(test)s - dask - distributed -all = - %(doc)s - %(dev)s - -[versioneer] -VCS = git -style = pep440 -versionfile_source = pydra/_version.py -versionfile_build = pydra/_version.py -tag_prefix = -parentdir_prefix = - -[flake8] -doctests = True -exclude = - **/__init__.py - **/tests/* - *build/ - docs/sphinxext/ - docs/tools/ - docs/conf.py - pydra/_version.py - versioneer.py -max-line-length=105 -ignore = E203,W503,F541 - -[codespell] -ignore-words = .codespell-ignorewords From e41d519e1cb8230c6e64332b92d7ee81f724ef8c Mon Sep 17 00:00:00 2001 From: yibeichan Date: Fri, 16 Dec 2022 08:40:39 -0800 Subject: [PATCH 18/19] change sp string output --- pydra/engine/tests/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pydra/engine/tests/utils.py b/pydra/engine/tests/utils.py index 866404f67d..37eb28b67c 100644 --- a/pydra/engine/tests/utils.py +++ b/pydra/engine/tests/utils.py @@ -33,7 +33,7 @@ not (shutil.which("git-annex")) or bool( float( - sp.check_output(["git-annex", "version", "--raw"], universal_newlines=True) + sp.check_output(["git-annex", "version", "--raw"], universal_newlines=True)[:6] ) < 8.20200309 ), From edac2c88debc39d242d4278288e99acb6eb58807 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 16 Dec 2022 16:41:03 +0000 Subject: [PATCH 19/19] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- pydra/engine/tests/utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pydra/engine/tests/utils.py b/pydra/engine/tests/utils.py index 37eb28b67c..f32343de04 100644 --- a/pydra/engine/tests/utils.py +++ b/pydra/engine/tests/utils.py @@ -33,7 +33,9 @@ not (shutil.which("git-annex")) or bool( float( - sp.check_output(["git-annex", "version", "--raw"], universal_newlines=True)[:6] + sp.check_output(["git-annex", "version", "--raw"], universal_newlines=True)[ + :6 + ] ) < 8.20200309 ),