diff --git a/docs/developer/testing.rst b/docs/developer/testing.rst index 6eb7e131..8e1f08dc 100644 --- a/docs/developer/testing.rst +++ b/docs/developer/testing.rst @@ -21,22 +21,22 @@ In addition, the fakes are relatively complex and may diverge from the real impl To mitigate these problems, the fake client is tested alongside a real client. But to (mostly) avoid the downsides stated in the beginning, the real client is connected to a local SciCat server. -See ``tests/common/backend.py`` and ``tests/conftest.py`` for the concrete setup. -The backend is launched in a docker container based on the images provided by `scicatlive `_. -Tests in ``tests/client_tests.py`` are then run with both the fake and the real client to ensure that both produce the same results. +See ``src/testing/backend.py`` and ``tests/conftest.py`` for the concrete setup. +The backend is launched in a docker container with the image of the latest release of the SciCat backend. +Tests in ``tests/client`` are then run with both the fake and the real client to ensure that both produce the same results. Use ``--backend-tests`` with ``pytest`` to run these tests. -SSH file transfer ------------------ +SFTP file transfer +------------------ -Testing :class:`scitacean.transfer.ssh.SSHFileTransfer` requires an SSH server. -``tests/common/ssh_server.py`` contains helpers for managing one via Docker. -Tests can use it by depending on the ``ssh_fileserver`` fixture. -See the documentation in ``tests/common/ssh_server.py`` for how it works. +Testing :class:`scitacean.transfer.sftp.SFTPFileTransfer` requires an SFTP server. +``tests/common/sftp_server.py`` contains helpers for managing one via Docker. +Tests can use it by depending on the ``sftp_fileserver`` fixture. +See the documentation in ``tests/common/sftp_server.py`` for how it works. Note that those tests may leave a small directory behind. This is an issue with file ownership and permissions caused by making the Docker volumes writable. It should not be a problem in practice as those files will only be in temporary directories which should get cleaned up at reboot. -Use ``--ssh-tests`` with ``pytest`` to run these tests. +Use ``--sftp-tests`` with ``pytest`` to run these tests. diff --git a/docs/release-notes.rst b/docs/release-notes.rst index 2c3b7403..756ad142 100644 --- a/docs/release-notes.rst +++ b/docs/release-notes.rst @@ -43,6 +43,7 @@ Features * File downloads now use the checksum algorithm stored in SciCat when possible. So it is no longer necessary to specify it by hand in many cases. +* Added ``SFTPFileTransfer`` which is similar to ``SSHFileTransfer`` but relies only on SFTP. Breaking changes ~~~~~~~~~~~~~~~~ @@ -58,6 +59,8 @@ Documentation Deprecations ~~~~~~~~~~~~ +* Deprecated ``SSHFileTransfer`` in favor of ``SFTPFileTransfer``. + Stability, Maintainability, and Testing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/docs/user-guide/downloading.ipynb b/docs/user-guide/downloading.ipynb index 8ff2da21..62b3002f 100644 --- a/docs/user-guide/downloading.ipynb +++ b/docs/user-guide/downloading.ipynb @@ -12,10 +12,10 @@ "\n", "```python\n", "from scitacean import Client\n", - "from scitacean.transfer.ssh import SSHFileTransfer\n", + "from scitacean.transfer.sftp import SFTPFileTransfer\n", "client = Client.from_token(url=\"https://scicat.ess.eu/api/v3\",\n", " token=...,\n", - " file_transfer=SSHFileTransfer(\n", + " file_transfer=SFTPFileTransfer(\n", " host=\"login.esss.dk\"\n", " ))\n", "```\n", @@ -40,13 +40,10 @@ "\n", "\n", "While the client itself is responsible for talking to SciCat, a `file_transfer` object is required to download data files.\n", - "Currently, only `SSHFileTransfer` is implemented.\n", - "It downloads / uploads files via SSH.\n", - "It will almost definitely change in the future!\n", + "Here, we use `SFTPFileTransfer` which downloads / uploads files via SFTP.\n", "\n", "The file transfer needs to authenticate separately from the SciCat connection.\n", - "It will ask for username and password on the command if needed.\n", - "But it is recommended to set up an SSH agent with a key and customize the `host` argument to point to the configured connection.\n", + "By default, it requires an SSH agent to be running an set up for the selected `host`.\n", "\n", "For the purposes of this guide, we don't want to connect to a real SciCat server in order to avoid the complications associated with that.\n", "So we set up a fake client that only pretends to connect to SciCat and file servers.\n", diff --git a/docs/user-guide/testing.ipynb b/docs/user-guide/testing.ipynb index fc4a51c0..a45ef168 100644 --- a/docs/user-guide/testing.ipynb +++ b/docs/user-guide/testing.ipynb @@ -18,7 +18,7 @@ "They can be mixed and matched freely with the real client and file transfers.\n", "But it is generally recommended to combine them.\n", "\n", - "Secondly, SciCat servers and fileservers are managed by the [scicat_backend](../generated/modules/scitacean.testing.backend.fixtures.scicat_backend.rst) and [ssh_fileserver](../generated/modules/scitacean.testing.ssh.fixtures.ssh_fileserver.rst) pytest fixtures.\n", + "Secondly, SciCat servers and fileservers are managed by the [scicat_backend](../generated/modules/scitacean.testing.backend.fixtures.scicat_backend.rst) and [sftp_fileserver](../generated/modules/scitacean.testing.sftp.fixtures.sftp_fileserver.rst) pytest fixtures.\n", "\n", "First, create a test dataset and file." ] @@ -554,15 +554,16 @@ "id": "97930d15-517a-4cea-bf2b-38ed700220b3", "metadata": {}, "source": [ - "## Local SSH fileserver\n", + "## Local SFTP fileserver\n", "\n", - "[scitacean.testing.ssh](../generated/modules/scitacean.testing.ssh.rst) provides tools to set up an SSH server in a Docker container on the local machine.\n", - "It is primarily intended to be used via the [pytest](https://docs.pytest.org/) fixtures in [scitacean.testing.ssh.fixtures](../generated/modules/scitacean.testing.ssh.fixtures.rst).\n", + "[scitacean.testing.sftp](../generated/modules/scitacean.testing.sftp.rst) provides tools to set up an SFTP server in a Docker container on the local machine.\n", + "It is primarily intended to be used via the [pytest](https://docs.pytest.org/) fixtures in [scitacean.testing.sftp.fixtures](../generated/modules/scitacean.testing.sftp.fixtures.rst).\n", "\n", - "The fixtures can configure, spin up, and seed an SSH server in a Docker container.\n", + "The fixtures can configure, spin up, and seed an SFTP server in a Docker container.\n", "They also clean up after the test session by stopping the Docker container.\n", + "(Scritly speaking, the server is an SSH server but all users except root are restricted to SFTP.)\n", "\n", - "Note the caveats in [scitacean.testing.ssh](../generated/modules/scitacean.testing.ssh.rst) about clean up and use of `pytest-xdist`.\n", + "Note the caveats in [scitacean.testing.sftp](../generated/modules/scitacean.testing.sftp.rst) about clean up and use of `pytest-xdist`.\n", "\n", "### Set up\n", "\n", @@ -570,7 +571,7 @@ "Then, configure pytest by\n", "\n", "- registering the fixtures and\n", - "- adding a command line option to enable ssh tests.\n", + "- adding a command line option to enable sftp tests.\n", "\n", "To this end, add the following in your `conftest.py`: (Or merge it into the setup for backend tests from above.)" ] @@ -583,15 +584,15 @@ "outputs": [], "source": [ "import pytest\n", - "from scitacean.testing.ssh import add_pytest_option as add_ssh_option\n", + "from scitacean.testing.sftp import add_pytest_option as add_sftp_option\n", "\n", "\n", "pytest_plugins = (\n", - " \"scitacean.testing.ssh.fixtures\",\n", + " \"scitacean.testing.sftp.fixtures\",\n", ")\n", "\n", "def pytest_addoption(parser: pytest.Parser) -> None:\n", - " add_backend_option(parser)" + " add_sftp_option(parser)" ] }, { @@ -599,10 +600,10 @@ "id": "0d92f3dd-986c-4f2c-acec-00b6e55ae87b", "metadata": {}, "source": [ - "The SSH server will only be launched when the corresponding command line option is given.\n", - "By default, this is `--ssh-tests` but it can be changed via the `option` argument of `add_pytest_option`.\n", + "The SFTP server will only be launched when the corresponding command line option is given.\n", + "By default, this is `--sftp-tests` but it can be changed via the `option` argument of `add_pytest_option`.\n", "\n", - "### Use SSH in tests\n", + "### Use SFTP in tests\n", "\n", "Tests that require the server can now request it as a fixture:" ] @@ -614,7 +615,7 @@ "metadata": {}, "outputs": [], "source": [ - "def test_something_with_ssh(require_ssh_fileserver):\n", + "def test_something_with_sftp(require_sftp_fileserver):\n", " # test something\n", " ..." ] @@ -624,12 +625,12 @@ "id": "3126d0d9-727a-4d55-aff1-6b31fd5df91f", "metadata": {}, "source": [ - "The `require_ssh_fileserver` fixture will ensure that the SSH server is running during the test.\n", - "If SSH tests have not been enabled by the command line option, the test will be skipped.\n", + "The `require_sftp_fileserver` fixture will ensure that the SFTP server is running during the test.\n", + "If SFTP tests have not been enabled by the command line option, the test will be skipped.\n", "\n", "Connecting to the server is not as straight forward as for the SciCat backend.\n", "It requires passing a special `connect` function to the file transfer.\n", - "This can be done by requesting `ssh_connect_with_username_password`.\n", + "This can be done by requesting `sftp_connect_with_username_password`.\n", "For example, the following opens a connection to the server to upload a file:" ] }, @@ -640,23 +641,22 @@ "metadata": {}, "outputs": [], "source": [ - "from scitacean.transfer.ssh import SSHFileTransfer\n", + "from scitacean.transfer.sftp import SFTPFileTransfer\n", "\n", - "def test_ssh_upload(\n", - " ssh_access,\n", - " ssh_connect_with_username_password,\n", - " require_ssh_fileserver,\n", - " ssh_data_dir,\n", + "def test_sftp_upload(\n", + " sftp_access,\n", + " sftp_connect_with_username_password,\n", + " require_sftp_fileserver,\n", + " sftp_data_dir,\n", "):\n", - " ssh = SSHFileTransfer(host=ssh_access.host, port=ssh_access.port)\n", + " sftp = SFTPFileTransfer(host=sftp_access.host,\n", + " port=sftp_access.port,\n", + " connect=sftp_connect_with_username_password)\n", " ds = Dataset(...)\n", - " with ssh.connect_for_upload(\n", - " dataset=ds,\n", - " connect=ssh_connect_with_username_password\n", - " ) as connection:\n", + " with sftp.connect_for_upload(dataset=ds) as connection:\n", " # do upload\n", " ...\n", - " # assert that the file has been copied to ssh_data_dir\n", + " # assert that the file has been copied to sftp_data_dir\n", " ..." ] }, @@ -666,17 +666,17 @@ "metadata": {}, "source": [ "Uploaded files are readable on the host.\n", - "So the test can read from `ssh_data_dir` to check if the upload succeeded.\n", + "So the test can read from `sftp_data_dir` to check if the upload succeeded.\n", "This directory is mounted as `/data` on the server.\n", "\n", - "Using an SSH file transfer with `Client` requires some extra steps.\n", - "An example is given by `test_client_with_ssh` in https://github.com/SciCatProject/scitacean/blob/main/tests/transfer/ssh_test.py.\n", - "It uses a subclass of `SSHFileTransfer` to pass `ssh_connect_with_username_password` to the connection as `Client` cannot do this itself. \n", + "Using an SFTP file transfer with `Client` requires some extra steps.\n", + "An example is given by `test_client_with_sftp` in https://github.com/SciCatProject/scitacean/blob/main/tests/transfer/sftp_test.py.\n", + "It uses a subclass of `SFTPFileTransfer` to pass `sftp_connect_with_username_password` to the connection as `Client` cannot do this itself.\n", "\n", "### Seed data\n", "\n", - "The server's filesystem gets seeded with some files from https://github.com/SciCatProject/scitacean/tree/main/src/scitacean/testing/ssh/ssh_server_seed.\n", - "Those files are copied to `ssh_data_dir` on the host which is mounted to `/data/seed` on the server." + "The server's filesystem gets seeded with some files from https://github.com/SciCatProject/scitacean/tree/main/src/scitacean/testing/sftp/sftp_server_seed.\n", + "Those files are copied to `sftp_data_dir` on the host which is mounted to `/data/seed` on the server." ] }, { diff --git a/docs/user-guide/uploading.ipynb b/docs/user-guide/uploading.ipynb index d5325692..6134ae83 100644 --- a/docs/user-guide/uploading.ipynb +++ b/docs/user-guide/uploading.ipynb @@ -12,10 +12,10 @@ "We connect to SciCat and a file server using a [Client](../generated/classes/scitacean.Client.rst):\n", "```python\n", "from scitacean import Client\n", - "from scitacean.transfer.ssh import SSHFileTransfer\n", + "from scitacean.transfer.sftp import SFTPFileTransfer\n", "client = Client.from_token(url=\"https://scicat.ess.eu/api/v3\",\n", " token=...,\n", - " file_transfer=SSHFileTransfer(\n", + " file_transfer=SFTPFileTransfer(\n", " host=\"login.esss.dk\"\n", " ))\n", "```\n", diff --git a/pyproject.toml b/pyproject.toml index d8a16979..d29eba6e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,6 +42,7 @@ dynamic = ["version"] [project.optional-dependencies] ssh = ["fabric"] +sftp = ["paramiko"] test = ["filelock", "hypothesis", "pyyaml"] [tool.setuptools_scm] @@ -60,6 +61,8 @@ filterwarnings = [ "error", # Many tests don't set a checksum, so File raises this warning. "ignore:Cannot check if local file:UserWarning", + # Internal deprecations. + "ignore:SSHFileTransfer is deprecated:scitacean.VisibleDeprecationWarning", # From fabric / invoke "ignore:_SixMetaPathImporter:ImportWarning", "ignore:the imp module is deprecated in favour of importlib:DeprecationWarning", diff --git a/requirements-pydantic2/base.in b/requirements-pydantic2/base.in index 33ff6e2c..e104e3af 100644 --- a/requirements-pydantic2/base.in +++ b/requirements-pydantic2/base.in @@ -1,5 +1,6 @@ email-validator fabric +paramiko pydantic == 2 python-dateutil requests diff --git a/requirements-pydantic2/base.txt b/requirements-pydantic2/base.txt new file mode 100644 index 00000000..223e7ab8 --- /dev/null +++ b/requirements-pydantic2/base.txt @@ -0,0 +1,63 @@ +# SHA1:2b55aaac9edd959da15c313d2f5f84f4e2cc5f2a +# +# This file is autogenerated by pip-compile-multi +# To update, run: +# +# pip-compile-multi +# +annotated-types==0.5.0 + # via pydantic +bcrypt==4.0.1 + # via paramiko +certifi==2023.7.22 + # via requests +cffi==1.15.1 + # via + # cryptography + # pynacl +charset-normalizer==3.2.0 + # via requests +cryptography==41.0.3 + # via paramiko +decorator==5.1.1 + # via fabric +deprecated==1.2.14 + # via fabric +dnspython==2.4.2 + # via email-validator +email-validator==2.0.0.post2 + # via -r requirements-pydantic2/base.in +fabric==3.2.1 + # via -r requirements-pydantic2/base.in +idna==3.4 + # via + # email-validator + # requests +invoke==2.2.0 + # via fabric +paramiko==3.3.1 + # via + # -r requirements-pydantic2/base.in + # fabric +pycparser==2.21 + # via cffi +pydantic==2.0 + # via -r requirements-pydantic2/base.in +pydantic-core==2.0.1 + # via pydantic +pynacl==1.5.0 + # via paramiko +python-dateutil==2.8.2 + # via -r requirements-pydantic2/base.in +requests==2.31.0 + # via -r requirements-pydantic2/base.in +six==1.16.0 + # via python-dateutil +typing-extensions==4.7.1 + # via + # pydantic + # pydantic-core +urllib3==2.0.4 + # via requests +wrapt==1.15.0 + # via deprecated diff --git a/requirements-pydantic2/test.txt b/requirements-pydantic2/test.txt new file mode 100644 index 00000000..51f86f72 --- /dev/null +++ b/requirements-pydantic2/test.txt @@ -0,0 +1,37 @@ +# SHA1:cefe43dc5cb9d9d7430647ec8f8955a6f1576e53 +# +# This file is autogenerated by pip-compile-multi +# To update, run: +# +# pip-compile-multi +# +-r base.txt +attrs==23.1.0 + # via hypothesis +execnet==2.0.2 + # via pytest-xdist +filelock==3.12.2 + # via -r requirements-pydantic2/test.in +hypothesis==6.82.6 + # via -r requirements-pydantic2/test.in +iniconfig==2.0.0 + # via pytest +packaging==23.1 + # via pytest +pluggy==1.3.0 + # via pytest +pyfakefs==5.2.4 + # via -r requirements-pydantic2/test.in +pytest==7.4.0 + # via + # -r requirements-pydantic2/test.in + # pytest-randomly + # pytest-xdist +pytest-randomly==3.15.0 + # via -r requirements-pydantic2/test.in +pytest-xdist==3.3.1 + # via -r requirements-pydantic2/test.in +pyyaml==6.0.1 + # via -r requirements-pydantic2/test.in +sortedcontainers==2.4.0 + # via hypothesis diff --git a/requirements/base.in b/requirements/base.in index d7343624..a3e8e358 100644 --- a/requirements/base.in +++ b/requirements/base.in @@ -1,5 +1,6 @@ email-validator fabric +paramiko pydantic < 2 # autodoc_pydantic is not compatible with pydantic 2 python-dateutil requests diff --git a/requirements/base.txt b/requirements/base.txt index 606cd943..b7101a09 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -1,4 +1,4 @@ -# SHA1:a579aa51495a128838b22e1692012f3918753a8c +# SHA1:f8c625c62cb057cead0d70b0a458c8aed4f43459 # # This file is autogenerated by pip-compile-multi # To update, run: @@ -34,7 +34,9 @@ idna==3.4 invoke==2.2.0 # via fabric paramiko==3.3.1 - # via fabric + # via + # -r requirements/base.in + # fabric pycparser==2.21 # via cffi pydantic==1.10.12 diff --git a/requirements/ci.txt b/requirements/ci.txt index 9eb1c122..c04f174d 100644 --- a/requirements/ci.txt +++ b/requirements/ci.txt @@ -25,11 +25,11 @@ platformdirs==3.10.0 # via # tox # virtualenv -pluggy==1.2.0 +pluggy==1.3.0 # via tox -pyproject-api==1.5.3 +pyproject-api==1.5.4 # via tox -tox==4.9.0 +tox==4.10.0 # via -r requirements/ci.in virtualenv==20.24.3 # via tox diff --git a/requirements/dev.txt b/requirements/dev.txt index ce049f20..1c98de92 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -23,7 +23,7 @@ async-lru==2.0.4 # via jupyterlab black==23.7.0 # via -r requirements/dev.in -click==8.1.6 +click==8.1.7 # via # black # pip-compile-multi @@ -45,7 +45,7 @@ jupyter-events==0.7.0 # via jupyter-server jupyter-lsp==2.2.0 # via jupyterlab -jupyter-server==2.7.1 +jupyter-server==2.7.2 # via # jupyter-lsp # jupyterlab @@ -111,9 +111,9 @@ uri-template==1.3.0 # via jsonschema webcolors==1.13 # via jsonschema -websocket-client==1.6.1 +websocket-client==1.6.2 # via jupyter-server -wheel==0.41.1 +wheel==0.41.2 # via pip-tools # The following packages are considered to be unsafe in a requirements file: diff --git a/requirements/docs.txt b/requirements/docs.txt index 633a4476..45d46671 100644 --- a/requirements/docs.txt +++ b/requirements/docs.txt @@ -105,7 +105,7 @@ nbformat==5.9.2 # nbclient # nbconvert # nbsphinx -nbsphinx==0.9.2 +nbsphinx==0.9.3 # via -r requirements/docs.in nest-asyncio==1.5.7 # via ipykernel @@ -183,7 +183,7 @@ sphinxcontrib-jsmath==1.0.1 # via sphinx sphinxcontrib-qthelp==1.0.6 # via sphinx -sphinxcontrib-serializinghtml==1.1.8 +sphinxcontrib-serializinghtml==1.1.9 # via sphinx stack-data==0.6.2 # via ipython diff --git a/requirements/static.txt b/requirements/static.txt index 8362c2f8..23276f06 100644 --- a/requirements/static.txt +++ b/requirements/static.txt @@ -11,7 +11,7 @@ distlib==0.3.7 # via virtualenv filelock==3.12.2 # via virtualenv -identify==2.5.26 +identify==2.5.27 # via pre-commit nodeenv==1.8.0 # via pre-commit diff --git a/requirements/test.txt b/requirements/test.txt index 1f71a303..7cb6019a 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -12,15 +12,15 @@ execnet==2.0.2 # via pytest-xdist filelock==3.12.2 # via -r requirements/test.in -hypothesis==6.82.4 +hypothesis==6.82.6 # via -r requirements/test.in iniconfig==2.0.0 # via pytest packaging==23.1 # via pytest -pluggy==1.2.0 +pluggy==1.3.0 # via pytest -pyfakefs==5.2.3 +pyfakefs==5.2.4 # via -r requirements/test.in pytest==7.4.0 # via diff --git a/src/scitacean/__init__.py b/src/scitacean/__init__.py index 532d3a4b..6fbde532 100644 --- a/src/scitacean/__init__.py +++ b/src/scitacean/__init__.py @@ -16,6 +16,7 @@ from .filesystem import RemotePath from .model import DatasetType from .pid import PID +from .warning import VisibleDeprecationWarning __all__ = ( "Client", @@ -29,4 +30,5 @@ "RemotePath", "ScicatCommError", "ScicatLoginError", + "VisibleDeprecationWarning", ) diff --git a/src/scitacean/filesystem.py b/src/scitacean/filesystem.py index 59223a2d..74cf5185 100644 --- a/src/scitacean/filesystem.py +++ b/src/scitacean/filesystem.py @@ -134,6 +134,15 @@ def suffix(self) -> Optional[str]: return None return "." + parts[1] + @property + def parent(self) -> RemotePath: + """The logical parent of the path.""" + parts = self._path.rstrip("/").rsplit("/", 1) + base = "/" if self._path.startswith("/") else "." + if len(parts) == 1: + return RemotePath(base) + return RemotePath(parts[0] or base) + def truncated(self, max_length: int = 255) -> RemotePath: """Return a new remote path with all path segments truncated. diff --git a/src/scitacean/testing/sftp/Dockerfile-sftp-server b/src/scitacean/testing/sftp/Dockerfile-sftp-server new file mode 100644 index 00000000..9501e3a6 --- /dev/null +++ b/src/scitacean/testing/sftp/Dockerfile-sftp-server @@ -0,0 +1,8 @@ +FROM ghcr.io/linuxserver/openssh-server +RUN echo -e "Match Group *,!root \n\ + ForceCommand internal-sftp \n\ + PasswordAuthentication yes \n\ + PermitTunnel no \n\ + AllowAgentForwarding no \n\ + AllowTcpForwarding no \n\ + X11Forwarding no" >> /etc/ssh/sshd_config diff --git a/src/scitacean/testing/sftp/__init__.py b/src/scitacean/testing/sftp/__init__.py new file mode 100644 index 00000000..2ff3043a --- /dev/null +++ b/src/scitacean/testing/sftp/__init__.py @@ -0,0 +1,122 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2022 Scitacean contributors (https://github.com/SciCatProject/scitacean) +"""Helpers for running tests with an SFTP server. + +This subpackage is primarily meant for testing +:class:`scitacean.testing.sftp.SFTPFileTransfer`. +But it can also be used to test downstream code that uses the SFTP file transfer. + +The `pytest `_ fixtures in this package manage an SFTP server +running in a docker container on the local machine. +They, therefore, require docker to be installed and running. + +Use the :func:`scitacean.testing.sftp.fixtures.sftp_fileserver` +fixture to manage the server and use +:func:`scitacean.testing.sftp.fixtures.sftp_access` +to get all required access parameters. +See below for examples. + +Attention +--------- +The fixtures support `pytest-xdist `_ +but only if all workers run on the local machine (the default). + +It may still happen that tests fail due to the complexity of synchronizing start up +and shut down of the SFTP server between workers. + +See Also +-------- +`Testing <../../user-guide/testing.ipynb>`_ user guide. + +Examples +-------- +In order to test the SFTP file transfer directly, use the provided fixtures to +open a connection manually. +Here, requesting the ``require_sftp_fileserver`` fixture ensures that the server +is running during the test, or that the test gets skipped if SFTP tests are disabled. +Passing the ``connect`` argument as shown ensures that the file transfer +connects to the test server with the correct parameters. + +.. code-block:: python + + from scitacean.transfer.sftp import SFTPFileTransfer + + def test_sftp_upload( + sftp_access, + sftp_connect_with_username_password, + require_sftp_fileserver, + sftp_data_dir, + ): + sftp = SFTPFileTransfer(host=sftp_access.host, port=sftp_access.port) + ds = Dataset(...) + with sftp.connect_for_upload( + dataset=ds, + connect=sftp_connect_with_username_password + ) as connection: + # do upload + # assert that the file has been copied to sftp_data_dir + +Testing the SFTP transfer together with a client requires some additional setup. +See ``test_client_with_sftp`` in +`sftp_test.py `_ +for an example. + +Implementation notes +-------------------- +When the server fixture is first used, it initializes the server using these steps: + +1. Create a temporary directory with contents:: + + tmpdir + ├ docker-compose.yaml + ├ .env (specifies paths for docker volumes) + ├ counter (number of workers currently using the server) + ├ counter.lock (file lock) + └ data (storage of files) + └ seed (populated from scitacean/testing/sftp/sftp_server_seed) + +2. Start docker. +3. Make data writable by the user in docker. + This changes the ownership of data on the host to root (on some machines). + +The docker container and its volumes are removed at the end of the tests. +The fixture also tries to remove the temporary directory. +This can fail as the owner of its contents (in particular data) +may have been changed to root. +So cleanup can fail and leave the directory behind. + +Use the seed directory (``sftp_data_dir/"seed"``) to test downloads. +Corresponds to ``/data/seed`` on the server. + +Use the base data directory (``sftp_data_dir``) to test uploads. +Corresponds to ``/data`` on the server. + +The counter and counter.lock files are used to synchronize starting and stopping +of the docker container between processes. +This is required when ``pytest-xdist`` is used. +Otherwise, those files will not be present. +""" + +from ._pytest_helpers import add_pytest_option, sftp_enabled, skip_if_not_sftp +from ._sftp import ( + IgnorePolicy, + SFTPAccess, + SFTPUser, + can_connect, + configure, + local_access, + wait_until_sftp_server_is_live, +) + +__all__ = [ + "add_pytest_option", + "can_connect", + "configure", + "local_access", + "sftp_enabled", + "skip_if_not_sftp", + "wait_until_sftp_server_is_live", + "IgnorePolicy", + "SFTPAccess", + "SFTPUser", +] diff --git a/src/scitacean/testing/sftp/_pytest_helpers.py b/src/scitacean/testing/sftp/_pytest_helpers.py new file mode 100644 index 00000000..dfff5061 --- /dev/null +++ b/src/scitacean/testing/sftp/_pytest_helpers.py @@ -0,0 +1,44 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2023 SciCat Project (https://github.com/SciCatProject/scitacean) + +from typing import Optional + +import pytest + +_COMMAND_LINE_OPTION: Optional[str] = None + + +def add_pytest_option(parser: pytest.Parser, option: str = "--sftp-tests") -> None: + """Add a command-line option to pytest to toggle SFTP tests. + + Parameters + ---------- + parser: + Pytest's command-line argument parser. + option: + Name of the command-line option. + """ + parser.addoption( + option, + action="store_true", + default=False, + help="Select whether to run tests with an SFTP fileserver", + ) + global _COMMAND_LINE_OPTION + _COMMAND_LINE_OPTION = option + + +def skip_if_not_sftp(request: pytest.FixtureRequest) -> None: + """Mark the current test to be skipped if SFTP tests are disabled.""" + if not sftp_enabled(request): + pytest.skip( + "Tests against an SFTP file server are disabled, " + f"use {_COMMAND_LINE_OPTION} to enable them" + ) + + +def sftp_enabled(request: pytest.FixtureRequest) -> bool: + """Return True if SFTP tests are enabled.""" + return _COMMAND_LINE_OPTION is not None and bool( + request.config.getoption(_COMMAND_LINE_OPTION) + ) diff --git a/src/scitacean/testing/sftp/_sftp.py b/src/scitacean/testing/sftp/_sftp.py new file mode 100644 index 00000000..da20bbcd --- /dev/null +++ b/src/scitacean/testing/sftp/_sftp.py @@ -0,0 +1,164 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2023 SciCat Project (https://github.com/SciCatProject/scitacean) +import importlib.resources +import time +from dataclasses import dataclass +from functools import lru_cache +from pathlib import Path +from typing import Any, Dict, Iterable, Tuple, Union + +import paramiko +import yaml + + +@dataclass +class SFTPUser: + username: str + password: str + + +@dataclass +class SFTPAccess: + host: str + port: int + user: SFTPUser + + +def _read_resource_text(filename: str) -> str: + if hasattr(importlib.resources, "files"): + # Use new API added in Python 3.9 + return ( + importlib.resources.files("scitacean.testing.sftp") + .joinpath(filename) + .read_text() + ) + # Old API, deprecated as of Python 3.11 + return importlib.resources.read_text("scitacean.testing.sftp", filename) + + +def _read_resource_yaml(filename: str) -> Any: + return yaml.safe_load(_read_resource_text(filename)) + + +@lru_cache(maxsize=1) +def _docker_compose_file() -> Dict[str, Any]: + return _read_resource_yaml( + "docker-compose-sftp-server.yaml" + ) # type: ignore[no-any-return] + + +@lru_cache(maxsize=1) +def _docker_file() -> str: + return _read_resource_text("Dockerfile-sftp-server") + + +def _seed_files() -> Iterable[Tuple[str, str]]: + if hasattr(importlib.resources, "files"): + # Use new API added in Python 3.9 + yield from ( + (file.name, file.read_text()) + for file in importlib.resources.files("scitacean.testing.sftp") + .joinpath("sftp_server_seed") + .iterdir() + ) + else: + # Old API, deprecated as of Python 3.11 + with importlib.resources.path( + "scitacean.testing.sftp", "sftp_server_seed" + ) as seed_dir: + for path in seed_dir.iterdir(): + yield path.name, path.read_text() + + +def local_access() -> SFTPAccess: + config = _docker_compose_file() + service = config["services"]["scitacean-test-sftp-server"] + env = {k: v for k, v in map(lambda s: s.split("="), service["environment"])} + return SFTPAccess( + host="localhost", + port=service["ports"][0].split(":")[0], + user=SFTPUser( + username=env["USER_NAME"], + password=env["USER_PASSWORD"], + ), + ) + + +def _copy_seed(target_seed_dir: Path) -> None: + for name, content in _seed_files(): + target_seed_dir.joinpath(name).write_text(content) + + +def configure(target_dir: Union[Path, str]) -> Path: + """Generate a config file for docker compose and copy seed data.""" + target_dir = Path(target_dir) + target_seed_dir = target_dir / "data" / "seed" + target_seed_dir.mkdir(parents=True) + _copy_seed(target_seed_dir) + + config_target = target_dir / "docker-compose.yaml" + config_target.write_text(yaml.dump(_docker_compose_file())) + target_dir.joinpath("Dockerfile-sftp-server").write_text(_docker_file()) + + target_dir.joinpath(".env").write_text( + f"""DATA_DIR={target_dir / 'data'} +SEED_DIR={target_seed_dir}""" + ) + + return config_target + + +def can_connect(sftp_access: SFTPAccess) -> bool: + try: + _make_client(sftp_access) + except paramiko.SSHException: + return False + return True + + +def wait_until_sftp_server_is_live( + sftp_access: SFTPAccess, max_time: float, n_tries: int +) -> None: + # The container takes a while to be fully live. + for _ in range(n_tries): + if can_connect(sftp_access): + return + time.sleep(max_time / n_tries) + if not can_connect(sftp_access): + raise RuntimeError("Cannot connect to SFTP server") + + +def cleanup_data_dir( + sftp_access: SFTPAccess, sftp_connect_with_username_password: Any +) -> None: + # Delete all directories created by tests. + # These are owned by root on the host and cannot be deleted by Python's tempfile. + connection = sftp_connect_with_username_password( + host=sftp_access.host, port=sftp_access.port + ) + connection.run( + "find /data -not -path '/data' -not -path '/data/seed' | xargs rm -rf", + hide=True, + in_stream=False, + ) + + +def _make_client(sftp_access: SFTPAccess) -> paramiko.SSHClient: + client = paramiko.SSHClient() + client.set_missing_host_key_policy(IgnorePolicy()) + client.connect( + hostname=sftp_access.host, + port=sftp_access.port, + username=sftp_access.user.username, + password=sftp_access.user.password, + allow_agent=False, + look_for_keys=False, + ) + return client + + +# Every time we create a container, it gets a new host key. +# So simply accept any host keys. +class IgnorePolicy(paramiko.MissingHostKeyPolicy): + def missing_host_key(self, client: Any, hostname: Any, key: Any) -> None: + return diff --git a/src/scitacean/testing/sftp/docker-compose-sftp-server.yaml b/src/scitacean/testing/sftp/docker-compose-sftp-server.yaml new file mode 100644 index 00000000..4d2f6103 --- /dev/null +++ b/src/scitacean/testing/sftp/docker-compose-sftp-server.yaml @@ -0,0 +1,26 @@ +version: "2.1" +services: + scitacean-test-sftp-server: + build: + context: . + dockerfile: Dockerfile-sftp-server + container_name: scitacean-test-sftp + hostname: scitacean-test-sftp-server + environment: + - PUID=1000 + - PGID=1000 + - TZ=CET # Not UTC on purpose to test timezone detection +# - PUBLIC_KEY=yourpublickey #optional +# - PUBLIC_KEY_FILE=/path/to/file #optional +# - PUBLIC_KEY_DIR=/path/to/directory/containing/_only_/pubkeys #optional +# - PUBLIC_KEY_URL=https://github.com/username.keys #optional + - SUDO_ACCESS=false + - PASSWORD_ACCESS=true + - USER_NAME=the-scitacean + - USER_PASSWORD=sup3r-str0ng + volumes: # configured in Python + - ${DATA_DIR}:/data + - ${SEED_DIR}:/data/seed + ports: + - "2222:2222" + restart: unless-stopped diff --git a/src/scitacean/testing/sftp/fixtures.py b/src/scitacean/testing/sftp/fixtures.py new file mode 100644 index 00000000..8eeb84ca --- /dev/null +++ b/src/scitacean/testing/sftp/fixtures.py @@ -0,0 +1,205 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2023 SciCat Project (https://github.com/SciCatProject/scitacean) +# mypy: disable-error-code="no-untyped-def" +"""Pytest fixtures to manage and access a local SFTP server.""" + +import logging +from pathlib import Path +from typing import Callable, Generator, Optional + +import pytest +from paramiko import SFTPClient, SSHClient + +from ..._internal import docker +from .._pytest_helpers import init_work_dir, root_tmp_dir +from ._pytest_helpers import sftp_enabled, skip_if_not_sftp +from ._sftp import ( + IgnorePolicy, + SFTPAccess, + configure, + local_access, + wait_until_sftp_server_is_live, +) + + +@pytest.fixture(scope="session") +def sftp_access(request: pytest.FixtureRequest) -> SFTPAccess: + """Fixture that returns SFTP access parameters. + + Returns + ------- + : + A URL and user to connect to the testing SFTP server. + The user has access to all initial files registered in the + database and permissions to create new files. + """ + skip_if_not_sftp(request) + return local_access() + + +@pytest.fixture(scope="session") +def sftp_base_dir( + request: pytest.FixtureRequest, tmp_path_factory: pytest.TempPathFactory +) -> Optional[Path]: + """Fixture that returns the base working directory for the SFTP server setup. + + Returns + ------- + : + A path to a directory on the host machine. + The directory gets populated by the + :func:`scitacean.testing.sftp.fixtures.sftp_fileserver` fixture. + It contains the docker configuration and volumes. + + Returns ``None`` if SFTP tests are disabled + """ + if not sftp_enabled(request): + return None + return root_tmp_dir(request, tmp_path_factory) / "scitacean-sftp" + + +@pytest.fixture(scope="session") +def sftp_data_dir(sftp_base_dir: Optional[Path]) -> Optional[Path]: + """Fixture that returns the data directory for the SFTP server setup. + + Returns + ------- + : + A path to a directory on the host machine. + The directory is mounted as ``/data`` on the server. + + Returns ``None`` if SFTP tests are disabled + """ + if sftp_base_dir is None: + return None + return sftp_base_dir / "data" + + +@pytest.fixture() +def require_sftp_fileserver(request, sftp_fileserver) -> None: + """Fixture to declare that a test needs a local SFTP server. + + Like :func:`scitacean.testing.sftp.sftp_fileserver` + but this skips the test if SFTP tests are disabled. + """ + skip_if_not_sftp(request) + + +@pytest.fixture(scope="session") +def sftp_fileserver( + request: pytest.FixtureRequest, + sftp_access: SFTPAccess, + sftp_base_dir: Optional[Path], + sftp_data_dir: Optional[Path], + sftp_connect_with_username_password, +) -> Generator[bool, None, None]: + """Fixture to declare that a test needs a local SFTP server. + + If SFTP tests are enabled, this fixture configures and starts an SFTP server + in a docker container the first time a test requests it. + The server and container will be stopped and removed at the end of the test session. + + Does nothing if the SFTP tests are disabled. + + Returns + ------- + : + True if SFTP tests are enabled and False otherwise. + """ + if sftp_base_dir is None: + yield False + return + + target_dir, counter = init_work_dir(request, sftp_base_dir, name=None) + + try: + with counter.increment() as count: + if count == 1: + _sftp_docker_up(target_dir, sftp_access) + elif not _sftp_server_is_running(): + raise RuntimeError("Expected SFTP server to be running") + yield True + finally: + with counter.decrement() as count: + if count == 0: + _sftp_docker_down(target_dir) + + +@pytest.fixture(scope="session") +def sftp_connect_with_username_password( + sftp_access: SFTPAccess, +) -> Callable[[str, int], SFTPClient]: + """Fixture that returns a function to connect to the testing SFTP server. + + Uses username+password from the test config. + + Returns + ------- + : + A function to pass as the ``connect`` argument when constructing a + :class:`scitacean.transfer.sftp.SFTPFileTransfer`. + + Examples + -------- + Explicitly connect to the test server: + + .. code-block:: python + + def test_sftp(sftp_access, + sftp_connect_with_username_password, + sftp_fileserver): + sftp = SFTPFileTransfer(host=sftp_access.host, + port=sftp_access.port, + connect=sftp_connect_with_username_password) + with sftp.connect_for_download() as connection: + # use connection + """ + + def connect(host: str, port: int) -> SFTPClient: + client = SSHClient() + client.set_missing_host_key_policy(IgnorePolicy()) + client.connect( + hostname=host, + port=port, + username=sftp_access.user.username, + password=sftp_access.user.password, + allow_agent=False, + look_for_keys=False, + ) + return client.open_sftp() + + return connect + + +def _sftp_docker_up(target_dir: Path, sftp_access: SFTPAccess) -> None: + if _sftp_server_is_running(): + raise RuntimeError("SFTP docker container is already running") + docker_compose_file = target_dir / "docker-compose.yaml" + log = logging.getLogger("scitacean.testing") + log.info("Starting docker container with SFTP server from %s", docker_compose_file) + configure(target_dir) + docker.docker_compose_up(docker_compose_file) + log.info("Waiting for SFTP docker to become accessible") + wait_until_sftp_server_is_live(sftp_access=sftp_access, max_time=20, n_tries=20) + log.info("Successfully connected to SFTP server") + # Give the user write access. + docker.docker_compose_run( + docker_compose_file, "scitacean-test-sftp-server", "chown", "1000:1000", "/data" + ) + + +def _sftp_docker_down(target_dir: Path) -> None: + # Check if container is running because the fixture can call this function + # if there was an exception in _sftp_docker_up. + # In that case, there is nothing to tear down. + if _sftp_server_is_running(): + docker_compose_file = target_dir / "docker-compose.yaml" + log = logging.getLogger("scitacean.testing") + log.info( + "Stopping docker container with SFTP server from %s", docker_compose_file + ) + docker.docker_compose_down(docker_compose_file) + + +def _sftp_server_is_running() -> bool: + return docker.container_is_running("scitacean-test-sftp") diff --git a/src/scitacean/testing/sftp/sftp_server_seed/table.csv b/src/scitacean/testing/sftp/sftp_server_seed/table.csv new file mode 100644 index 00000000..76891270 --- /dev/null +++ b/src/scitacean/testing/sftp/sftp_server_seed/table.csv @@ -0,0 +1,2 @@ +7,2 +5,2 diff --git a/src/scitacean/testing/sftp/sftp_server_seed/text.txt b/src/scitacean/testing/sftp/sftp_server_seed/text.txt new file mode 100644 index 00000000..499ea0ea --- /dev/null +++ b/src/scitacean/testing/sftp/sftp_server_seed/text.txt @@ -0,0 +1 @@ +This is some text for testing. diff --git a/src/scitacean/transfer/sftp.py b/src/scitacean/transfer/sftp.py new file mode 100644 index 00000000..fa59bf45 --- /dev/null +++ b/src/scitacean/transfer/sftp.py @@ -0,0 +1,331 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2023 SciCat Project (https://github.com/SciCatProject/scitacean) + +import os +from contextlib import contextmanager +from datetime import datetime, timezone +from pathlib import Path +from typing import Callable, Iterator, List, Optional, Union + +from invoke.exceptions import UnexpectedExit +from paramiko import SFTPAttributes, SFTPClient, SSHClient + +from ..dataset import Dataset +from ..error import FileUploadError +from ..file import File +from ..filesystem import RemotePath +from ..logging import get_logger +from .util import source_folder_for + + +class SFTPDownloadConnection: + def __init__(self, *, sftp_client: SFTPClient, host: str) -> None: + self._sftp_client = sftp_client + self._host = host + + def download_files(self, *, remote: List[RemotePath], local: List[Path]) -> None: + """Download files from the given remote path.""" + for r, l in zip(remote, local): + self.download_file(remote=r, local=l) + + def download_file(self, *, remote: RemotePath, local: Path) -> None: + get_logger().info( + "Downloading file %s from host %s to %s", + remote, + self._host, + local, + ) + self._sftp_client.get(remotepath=remote.posix, localpath=os.fspath(local)) + + +class SFTPUploadConnection: + def __init__( + self, *, sftp_client: SFTPClient, source_folder: RemotePath, host: str + ) -> None: + self._sftp_client = sftp_client + self._source_folder = source_folder + self._host = host + + @property + def source_folder(self) -> RemotePath: + return self._source_folder + + def remote_path(self, filename: Union[str, RemotePath]) -> RemotePath: + return self.source_folder / filename + + def _make_source_folder(self) -> None: + try: + _mkdir_remote(self._sftp_client, self.source_folder) + except OSError as exc: + raise FileUploadError( + f"Failed to create source folder {self.source_folder}: {exc.args}" + ) from None + + def upload_files(self, *files: File) -> List[File]: + """Upload files to the remote folder.""" + self._make_source_folder() + uploaded = [] + try: + uploaded.extend(self._upload_file(file) for file in files) + except Exception: + self.revert_upload(*uploaded) + raise + return uploaded + + def _upload_file(self, file: File) -> File: + if file.local_path is None: + raise ValueError( + f"Cannot upload file to {file.remote_path}, " + "the file has no local path" + ) + remote_path = self.remote_path(file.remote_path) + get_logger().info( + "Uploading file %s to %s on host %s", + file.local_path, + remote_path, + self._host, + ) + st = self._sftp_client.put( + remotepath=remote_path.posix, localpath=os.fspath(file.local_path) + ) + return file.uploaded( + remote_gid=str(st.st_gid), + remote_uid=str(st.st_uid), + remote_creation_time=datetime.now().astimezone(timezone.utc), + remote_perm=str(st.st_mode), + remote_size=st.st_size, + ) + + def revert_upload(self, *files: File) -> None: + """Remove uploaded files from the remote folder.""" + for file in files: + self._revert_upload_single(remote=file.remote_path, local=file.local_path) + + if _remote_folder_is_empty(self._sftp_client, self.source_folder): + try: + get_logger().info( + "Removing empty remote directory %s on host %s", + self.source_folder, + self._host, + ) + self._sftp_client.rmdir(self.source_folder.posix) + except UnexpectedExit as exc: + get_logger().warning( + "Failed to remove empty remote directory %s on host:\n%s", + self.source_folder, + self._host, + exc.result, + ) + + def _revert_upload_single( + self, *, remote: RemotePath, local: Optional[Path] + ) -> None: + remote_path = self.remote_path(remote) + get_logger().info( + "Reverting upload of file %s to %s on host %s", + local, + remote_path, + self._host, + ) + + try: + self._sftp_client.remove(remote_path.posix) + except UnexpectedExit as exc: + get_logger().warning( + "Error reverting file %s:\n%s", remote_path, exc.result + ) + return + + +class SFTPFileTransfer: + """Upload / download files using SFTP. + + Configuration & Authentication + ------------------------------ + The file transfer connects to the server at the address given + as the ``host`` constructor argument. + This may be + + - a full url such as ``some.fileserver.edu``, + - an IP address like ``127.0.0.1``, + - or a host defined in the user's openSFTP config file. + + The file transfer can authenticate using username+password. + It will ask for those on the command line. + However, it is **highly recommended to set up a key and use an SFTP agent!** + This increases security as Scitacean no longer has to handle credentials itself. + And it is required for automated programs where a user cannot enter credentials + on a command line. + + Upload folder + ------------- + The file transfer can take an optional ``source_folder`` as a constructor argument. + If it is given, ``SFTPFileTransfer`` uploads all files to it and ignores the + source folder set in the dataset. + If it is not given, ``SFTPFileTransfer`` uses the dataset's source folder. + + The source folder argument to ``SFTPFileTransfer`` may be a Python format string. + In that case, all format fields are replaced by the corresponding fields + of the dataset. + All non-ASCII characters and most special ASCII characters are replaced. + This should avoid broken paths from essentially random contents in datasets. + + Examples + -------- + Given + + .. code-block:: python + + dset = Dataset(type="raw", + name="my-dataset", + source_folder="/dataset/source", + ) + + This uploads to ``/dataset/source``: + + .. code-block:: python + + file_transfer = SFTPFileTransfer(host="fileserver") + + This uploads to ``/transfer/folder``: + + .. code-block:: python + + file_transfer = SFTPFileTransfer(host="fileserver", + source_folder="transfer/folder") + + This uploads to ``/transfer/my-dataset``: + (Note that ``{name}`` is replaced by ``dset.name``.) + + .. code-block:: python + + file_transfer = SFTPFileTransfer(host="fileserver", + source_folder="transfer/{name}") + + A useful approach is to include a unique ID in the source folder, for example + ``"/some/base/folder/{uid}"``, to avoid clashes between different datasets. + Scitacean will fill in the ``"{uid}"`` placeholder with a new UUID4. + """ + + def __init__( + self, + *, + host: str, + port: int = 22, + source_folder: Optional[Union[str, RemotePath]] = None, + connect: Optional[Callable[[str, Optional[int]], SFTPClient]] = None, + ) -> None: + """Construct a new SFTP file transfer. + + Parameters + ---------- + host: + URL or name of the server to connect to. + port: + Port of the server. + source_folder: + Upload files to this folder if set. + Otherwise, upload to the dataset's source_folder. + Ignored when downloading files. + connect: + If this argument is set, it will be called to create a client + for the server instead of the builtin method. + The function arguments are ``host`` and ``port`` as determined by the + arguments to ``__init__`` shown above. + """ + self._host = host + self._port = port + self._source_folder_pattern = ( + RemotePath(source_folder) + if isinstance(source_folder, str) + else source_folder + ) + self._connect = connect + + def source_folder_for(self, dataset: Dataset) -> RemotePath: + """Return the source folder used for the given dataset.""" + return source_folder_for(dataset, self._source_folder_pattern) + + @contextmanager + def connect_for_download(self) -> Iterator[SFTPDownloadConnection]: + """Create a connection for downloads, use as a context manager.""" + sftp_client = _connect(self._host, self._port, connect=self._connect) + try: + yield SFTPDownloadConnection(sftp_client=sftp_client, host=self._host) + finally: + sftp_client.close() + + @contextmanager + def connect_for_upload(self, dataset: Dataset) -> Iterator[SFTPUploadConnection]: + """Create a connection for uploads, use as a context manager. + + Parameters + ---------- + dataset: + The connection will be used to upload files of this dataset. + Used to determine the target folder. + """ + source_folder = self.source_folder_for(dataset) + sftp_client = _connect(self._host, self._port, connect=self._connect) + try: + yield SFTPUploadConnection( + sftp_client=sftp_client, source_folder=source_folder, host=self._host + ) + finally: + sftp_client.close() + + +def _default_connect(host: str, port: int) -> SFTPClient: + client = SSHClient() + client.load_system_host_keys() + client.connect(hostname=host, port=port) + return client.open_sftp() + + +def _connect( + host: str, + port: int, + connect: Optional[Callable[[str, Optional[int]], SFTPClient]], +) -> SFTPClient: + try: + if connect is None: + connect = _default_connect + return connect(host, port) + except Exception as exc: + # We pass secrets as arguments to functions called in this block, and those + # can be leaked through exception handlers. So catch all exceptions + # and strip the backtrace up to this point to hide those secrets. + raise type(exc)(exc.args) from None + + +def _remote_folder_is_empty(sftp: SFTPClient, path: RemotePath) -> bool: + return not sftp.listdir(path.posix) + + +def _mkdir_remote(sftp: SFTPClient, path: RemotePath) -> None: + if (parent := path.parent) not in (".", "/"): + _mkdir_remote(sftp, parent) + + st_stat = _try_remote_stat(sftp, path) + if st_stat is None: + sftp.mkdir(path.posix) + elif not _is_remote_dir(st_stat): + raise FileExistsError( + f"Cannot make directory because path points to a file: {path}" + ) + + +def _try_remote_stat(sftp: SFTPClient, path: RemotePath) -> Optional[SFTPAttributes]: + try: + return sftp.stat(path.posix) + except FileNotFoundError: + return None + + +def _is_remote_dir(st_stat: SFTPAttributes) -> bool: + if st_stat.st_mode is None: + return True # Assume it is a dir and let downstream code fail if it isn't. + return st_stat.st_mode & 0o040000 == 0o040000 + + +__all__ = ["SFTPFileTransfer", "SFTPUploadConnection", "SFTPDownloadConnection"] diff --git a/src/scitacean/transfer/ssh.py b/src/scitacean/transfer/ssh.py index 419a8e52..20c28921 100644 --- a/src/scitacean/transfer/ssh.py +++ b/src/scitacean/transfer/ssh.py @@ -2,6 +2,7 @@ # Copyright (c) 2023 SciCat Project (https://github.com/SciCatProject/scitacean) import os +import warnings from contextlib import contextmanager from datetime import datetime, timedelta from getpass import getpass @@ -21,10 +22,15 @@ from ..file import File from ..filesystem import RemotePath from ..logging import get_logger +from ..warning import VisibleDeprecationWarning from .util import source_folder_for -# TODO pass pid in put/revert? -# downloading does not need a pid, so it should not be required in the constructor/ +warnings.warn( + "SSHFileTransfer is deprecated and scheduled for removal in Scitacean v23.11.0." + "Use SFTPFileTransfer instead.", + VisibleDeprecationWarning, + stacklevel=0, +) class SSHDownloadConnection: @@ -278,6 +284,8 @@ class SSHFileTransfer: A useful approach is to include a unique ID in the source folder, for example ``"/some/base/folder/{uid}"``, to avoid clashes between different datasets. Scitacean will fill in the ``"{uid}"`` placeholder with a new UUID4. + + .. deprecated:: 23.08.0 """ def __init__( diff --git a/src/scitacean/warning.py b/src/scitacean/warning.py new file mode 100644 index 00000000..ef2d76c5 --- /dev/null +++ b/src/scitacean/warning.py @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2023 SciCat Project (https://github.com/SciCatProject/scitacean) + +"""Custom warning classes.""" + + +class VisibleDeprecationWarning(UserWarning): + """Visible deprecation warning. + + By default, Python and in particular Jupyter will not show deprecation + warnings, so this class can be used when a very visible warning is helpful. + """ + + +VisibleDeprecationWarning.__module__ = "scitacean" diff --git a/tests/conftest.py b/tests/conftest.py index 89893bbc..7841902f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -5,10 +5,12 @@ import pytest from scitacean.testing.backend import add_pytest_option as add_backend_option +from scitacean.testing.sftp import add_pytest_option as add_sftp_option from scitacean.testing.ssh import add_pytest_option as add_ssh_option pytest_plugins = ( "scitacean.testing.backend.fixtures", + "scitacean.testing.sftp.fixtures", "scitacean.testing.ssh.fixtures", ) @@ -27,4 +29,5 @@ def pytest_addoption(parser: pytest.Parser) -> None: add_backend_option(parser) + add_sftp_option(parser) add_ssh_option(parser) diff --git a/tests/filesystem_test.py b/tests/filesystem_test.py index 084c6de3..cb424d4d 100644 --- a/tests/filesystem_test.py +++ b/tests/filesystem_test.py @@ -143,6 +143,18 @@ def test_remote_path_suffix(): assert RemotePath("source/file").suffix is None +def test_remote_path_parent(): + assert RemotePath("/").parent == RemotePath("/") + assert RemotePath("/folder").parent == RemotePath("/") + assert RemotePath("/folder/").parent == RemotePath("/") + assert RemotePath("/folder/sub").parent == RemotePath("/folder") + + assert RemotePath("").parent == RemotePath(".") + assert RemotePath(".").parent == RemotePath(".") + assert RemotePath("relative").parent == RemotePath(".") + assert RemotePath("relative/sub").parent == RemotePath("relative") + + def test_remote_path_truncated(): assert RemotePath("something-long.txt").truncated(10) == "someth.txt" assert RemotePath("longlonglong/short").truncated(5) == "longl/short" diff --git a/tests/transfer/sftp_test.py b/tests/transfer/sftp_test.py new file mode 100644 index 00000000..c61b22e0 --- /dev/null +++ b/tests/transfer/sftp_test.py @@ -0,0 +1,418 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2022 Scitacean contributors (https://github.com/SciCatProject/scitacean) +# mypy: disable-error-code="no-untyped-def, return-value, arg-type, union-attr" + +import dataclasses +import tempfile +from contextlib import contextmanager +from datetime import datetime, timedelta, timezone +from pathlib import Path +from typing import Iterator + +import paramiko +import pytest + +from scitacean import Dataset, File, RemotePath +from scitacean.testing.client import FakeClient +from scitacean.testing.sftp import IgnorePolicy, skip_if_not_sftp +from scitacean.transfer.sftp import ( + SFTPDownloadConnection, + SFTPFileTransfer, + SFTPUploadConnection, +) + + +@pytest.fixture(scope="session", autouse=True) +def server(request, sftp_fileserver): + skip_if_not_sftp(request) + + +def test_download_one_file(sftp_access, sftp_connect_with_username_password, tmp_path): + sftp = SFTPFileTransfer( + host=sftp_access.host, + port=sftp_access.port, + connect=sftp_connect_with_username_password, + ) + with sftp.connect_for_download() as con: + con.download_files( + remote=[RemotePath("/data/seed/text.txt")], local=[tmp_path / "text.txt"] + ) + assert ( + tmp_path.joinpath("text.txt").read_text() == "This is some text for testing.\n" + ) + + +def test_download_two_files(sftp_access, sftp_connect_with_username_password, tmp_path): + sftp = SFTPFileTransfer( + host=sftp_access.host, + port=sftp_access.port, + connect=sftp_connect_with_username_password, + ) + with sftp.connect_for_download() as con: + con.download_files( + remote=[ + RemotePath("/data/seed/table.csv"), + RemotePath("/data/seed/text.txt"), + ], + local=[tmp_path / "local-table.csv", tmp_path / "text.txt"], + ) + assert tmp_path.joinpath("local-table.csv").read_text() == "7,2\n5,2\n" + assert ( + tmp_path.joinpath("text.txt").read_text() == "This is some text for testing.\n" + ) + + +def test_upload_one_file_source_folder_in_dataset( + sftp_access, sftp_connect_with_username_password, tmp_path, sftp_data_dir +): + ds = Dataset(type="raw", source_folder=RemotePath("/data/upload")) + tmp_path.joinpath("file0.txt").write_text("File to test upload123") + + sftp = SFTPFileTransfer( + host=sftp_access.host, + port=sftp_access.port, + connect=sftp_connect_with_username_password, + ) + with sftp.connect_for_upload(dataset=ds) as con: + assert con.source_folder == RemotePath("/data/upload") + con.upload_files( + File.from_local(path=tmp_path / "file0.txt", remote_path="upload_0.txt") + ) + + assert ( + sftp_data_dir.joinpath("upload", "upload_0.txt").read_text() + == "File to test upload123" + ) + + +def test_upload_one_file_source_folder_in_transfer( + sftp_access, sftp_connect_with_username_password, tmp_path, sftp_data_dir +): + ds = Dataset(type="raw", owner="librarian") + tmp_path.joinpath("file1.txt").write_text("File no. 2") + + sftp = SFTPFileTransfer( + host=sftp_access.host, + port=sftp_access.port, + source_folder="/data/upload/{owner}", + connect=sftp_connect_with_username_password, + ) + with sftp.connect_for_upload(dataset=ds) as con: + assert con.source_folder == RemotePath("/data/upload/librarian") + con.upload_files( + File.from_local( + path=tmp_path / "file1.txt", remote_path=RemotePath("upload_1.txt") + ) + ) + + assert ( + sftp_data_dir.joinpath("upload", "librarian", "upload_1.txt").read_text() + == "File no. 2" + ) + + +def test_upload_two_files( + sftp_access, sftp_connect_with_username_password, tmp_path, sftp_data_dir +): + ds = Dataset(type="raw", source_folder=RemotePath("/data/upload2")) + tmp_path.joinpath("file2.1.md").write_text("First part of file 2") + tmp_path.joinpath("file2.2.md").write_text("Second part of file 2") + + sftp = SFTPFileTransfer( + host=sftp_access.host, + port=sftp_access.port, + connect=sftp_connect_with_username_password, + ) + with sftp.connect_for_upload(dataset=ds) as con: + assert con.source_folder == RemotePath("/data/upload2") + con.upload_files( + File.from_local(path=tmp_path / "file2.1.md", base_path=tmp_path), + File.from_local(path=tmp_path / "file2.2.md", base_path=tmp_path), + ) + + assert ( + sftp_data_dir.joinpath("upload2", "file2.1.md").read_text() + == "First part of file 2" + ) + assert ( + sftp_data_dir.joinpath("upload2", "file2.2.md").read_text() + == "Second part of file 2" + ) + + +def test_upload_one_file_existing_source_folder( + sftp_access, sftp_connect_with_username_password, tmp_path, sftp_data_dir +): + ds = Dataset(type="raw", source_folder=RemotePath("/data/upload-multiple")) + tmp_path.joinpath("file3.1.md").write_text("First part of file 3") + tmp_path.joinpath("file3.2.md").write_text("Second part of file 3") + + # First upload to ensure the folder exists. + sftp = SFTPFileTransfer( + host=sftp_access.host, + port=sftp_access.port, + connect=sftp_connect_with_username_password, + ) + with sftp.connect_for_upload(dataset=ds) as con: + assert con.source_folder == RemotePath("/data/upload-multiple") + con.upload_files( + File.from_local(path=tmp_path / "file3.1.md", base_path=tmp_path), + ) + + # Second upload to test uploading to existing folder. + sftp = SFTPFileTransfer( + host=sftp_access.host, + port=sftp_access.port, + connect=sftp_connect_with_username_password, + ) + with sftp.connect_for_upload(dataset=ds) as con: + assert con.source_folder == RemotePath("/data/upload-multiple") + con.upload_files( + File.from_local(path=tmp_path / "file3.2.md", base_path=tmp_path), + ) + + assert ( + sftp_data_dir.joinpath("upload-multiple", "file3.1.md").read_text() + == "First part of file 3" + ) + assert ( + sftp_data_dir.joinpath("upload-multiple", "file3.2.md").read_text() + == "Second part of file 3" + ) + + +def test_revert_all_uploaded_files_single( + sftp_access, sftp_connect_with_username_password, tmp_path, sftp_data_dir +): + ds = Dataset(type="raw", source_folder=RemotePath("/data/revert-all-test-1")) + tmp_path.joinpath("file3.txt").write_text("File that should get reverted") + + sftp = SFTPFileTransfer( + host=sftp_access.host, + port=sftp_access.port, + connect=sftp_connect_with_username_password, + ) + with sftp.connect_for_upload(dataset=ds) as con: + file = File.from_local(path=tmp_path / "file3.txt", base_path=tmp_path) + con.upload_files(file) + con.revert_upload(file) + + assert "revert-all-test-1" not in (p.name for p in sftp_data_dir.iterdir()) + + +def test_revert_all_uploaded_files_two( + sftp_access, sftp_connect_with_username_password, tmp_path, sftp_data_dir +): + ds = Dataset(type="raw", source_folder=RemotePath("/data/revert-all-test-2")) + tmp_path.joinpath("file3.1.txt").write_text("File that should get reverted 1") + tmp_path.joinpath("file3.2.txt").write_text("File that should get reverted 2") + + sftp = SFTPFileTransfer( + host=sftp_access.host, + port=sftp_access.port, + connect=sftp_connect_with_username_password, + ) + with sftp.connect_for_upload(dataset=ds) as con: + file1 = File.from_local(path=tmp_path / "file3.1.txt", base_path=tmp_path) + file2 = File.from_local(path=tmp_path / "file3.2.txt", base_path=tmp_path) + con.upload_files(file1, file2) + con.revert_upload(file1, file2) + + assert "revert-all-test-2" not in (p.name for p in sftp_data_dir.iterdir()) + + +def test_revert_one_uploaded_file( + sftp_access, sftp_connect_with_username_password, tmp_path, sftp_data_dir +): + ds = Dataset(type="raw", source_folder=RemotePath("/data/revert-test")) + tmp_path.joinpath("file4.txt").write_text("File that should get reverted") + tmp_path.joinpath("file5.txt").write_text("File that should be kept") + + sftp = SFTPFileTransfer( + host=sftp_access.host, + port=sftp_access.port, + connect=sftp_connect_with_username_password, + ) + with sftp.connect_for_upload(dataset=ds) as con: + file4 = File.from_local(path=tmp_path / "file4.txt", base_path=tmp_path) + file5 = File.from_local(path=tmp_path / "file5.txt", base_path=tmp_path) + con.upload_files(file4, file5) + con.revert_upload(file4) + + assert "file4.txt" not in ( + p.name for p in (sftp_data_dir / "revert-test").iterdir() + ) + assert ( + sftp_data_dir.joinpath("revert-test", "file5.txt").read_text() + == "File that should be kept" + ) + + +def test_stat_uploaded_file( + sftp_access, sftp_connect_with_username_password, tmp_path, sftp_data_dir +): + ds = Dataset(type="raw", source_folder=RemotePath("/data/upload6")) + tmp_path.joinpath("file6.txt").write_text("File to test upload no 6") + + sftp = SFTPFileTransfer( + host=sftp_access.host, + port=sftp_access.port, + connect=sftp_connect_with_username_password, + ) + with sftp.connect_for_upload(dataset=ds) as con: + [uploaded] = con.upload_files( + File.from_local(path=tmp_path / "file6.txt", remote_path="upload_6.txt") + ) + + st = (sftp_data_dir / "upload6" / "upload_6.txt").stat() + assert uploaded.size == st.st_size + + # Set in docker-compose + assert uploaded.remote_uid == "1000" + assert uploaded.remote_gid == "1000" + + uploaded = dataclasses.replace(uploaded, local_path=None) + assert datetime.now(tz=timezone.utc) - uploaded.creation_time < timedelta(seconds=5) + + +class CorruptingSFTP(paramiko.SFTPClient): + """Appends bytes to uploaded files to simulate a broken transfer.""" + + def put(self, localpath, remotepath, callback=None, confirm=True): + with open(localpath, "r") as f: + content = f.read() + with tempfile.TemporaryDirectory() as tempdir: + corrupted_path = Path(tempdir) / "corrupted" + with open(corrupted_path, "w") as f: + f.write(content + "\nevil bytes") + super().put(str(corrupted_path), remotepath, callback, confirm) + + +class CorruptingTransfer(paramiko.Transport): + """Uses CorruptingSFTP to simulate a broken connection.""" + + def open_sftp_client(self) -> paramiko.SFTPClient: + return CorruptingSFTP.from_transport(self) + + +@pytest.fixture() +def sftp_corrupting_connect(sftp_access, sftp_connection_config): + def connect(host: str, port: int) -> paramiko.SFTPClient: + client = paramiko.SSHClient() + client.set_missing_host_key_policy(IgnorePolicy()) + client.connect( + hostname=host, + port=port, + username=sftp_access.user.username, + password=sftp_access.user.password, + allow_agent=False, + look_for_keys=False, + transport_factory=CorruptingTransfer, + ) + return client.open_sftp() + + return connect + + +class RaisingSFTP(paramiko.SFTPClient): + def put(self, localpath, remotepath, callback=None, confirm=True): + raise RuntimeError("Upload disabled") + + +class RaisingTransfer(paramiko.Transport): + def open_sftp_client(self) -> paramiko.SFTPClient: + return RaisingSFTP.from_transport(self) + + +@pytest.fixture() +def sftp_raising_connect(sftp_access): + def connect(host: str, port: int) -> paramiko.SFTPClient: + client = paramiko.SSHClient() + client.set_missing_host_key_policy(IgnorePolicy()) + client.connect( + hostname=host, + port=port, + username=sftp_access.user.username, + password=sftp_access.user.password, + allow_agent=False, + look_for_keys=False, + transport_factory=RaisingTransfer, + ) + return client.open_sftp() + + return connect + + +def test_upload_file_reverts_if_upload_fails( + sftp_access, sftp_raising_connect, tmp_path, sftp_data_dir +): + ds = Dataset(type="raw", source_folder=RemotePath("/data/upload8")) + tmp_path.joinpath("file8.txt").write_text("File to test upload no 8") + + sftp = SFTPFileTransfer( + host=sftp_access.host, port=sftp_access.port, connect=sftp_raising_connect + ) + with sftp.connect_for_upload(dataset=ds) as con: + with pytest.raises(RuntimeError): + con.upload_files( + File.from_local( + path=tmp_path / "file8.txt", + remote_path=RemotePath("upload_8.txt"), + ) + ) + + assert "upload8" not in (p.name for p in sftp_data_dir.iterdir()) + + +class SFTPTestFileTransfer(SFTPFileTransfer): + def __init__(self, **kwargs): + super().__init__(**kwargs) + + @contextmanager + def connect_for_download(self) -> Iterator[SFTPDownloadConnection]: + with super().connect_for_download() as connection: + yield connection + + @contextmanager + def connect_for_upload(self, dataset: Dataset) -> Iterator[SFTPUploadConnection]: + with super().connect_for_upload(dataset=dataset) as connection: + yield connection + + +# This test is referenced in the docs. +def test_client_with_sftp( + require_sftp_fileserver, + sftp_access, + sftp_connect_with_username_password, + sftp_data_dir, + tmp_path, +): + tmp_path.joinpath("file1.txt").write_text("File contents") + + client = FakeClient.without_login( + url="", + file_transfer=SFTPTestFileTransfer( + connect=sftp_connect_with_username_password, + host=sftp_access.host, + port=sftp_access.port, + ), + ) + ds = Dataset( + access_groups=["group1"], + contact_email="p.stibbons@uu.am", + creation_location="UU", + creation_time=datetime(2023, 6, 23, 10, 0, 0), + owner="PonderStibbons", + owner_group="uu", + principal_investigator="MustrumRidcully", + source_folder="/data", + type="raw", + ) + ds.add_local_files(tmp_path / "file1.txt", base_path=tmp_path) + finalized = client.upload_new_dataset_now(ds) + + downloaded = client.get_dataset(finalized.pid) + downloaded = client.download_files(downloaded, target=tmp_path / "download") + + assert sftp_data_dir.joinpath("file1.txt").read_text() == "File contents" + assert downloaded.files[0].local_path.read_text() == "File contents" diff --git a/tox.ini b/tox.ini index a8274fa7..46bb49d5 100644 --- a/tox.ini +++ b/tox.ini @@ -5,7 +5,7 @@ isolated_build = true [testenv] deps = -r requirements/test.txt commands = - full: python -m pytest --backend-tests --ssh-tests + full: python -m pytest --backend-tests --sftp-tests !full: python -m pytest [testenv:pydantic2]