From e79cd6cf6374cc3fae59a772c0a3ff81876e4ff7 Mon Sep 17 00:00:00 2001 From: Jan-Lukas Wynen Date: Mon, 25 Sep 2023 14:47:28 +0200 Subject: [PATCH 1/4] Fix sftp module name in docs --- docs/reference/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/index.rst b/docs/reference/index.rst index 41fabce9..bac4c045 100644 --- a/docs/reference/index.rst +++ b/docs/reference/index.rst @@ -28,7 +28,7 @@ File transfer :template: scitacean-class-template.rst :recursive: - transfer.ssh.SFTPFileTransfer + transfer.sftp.SFTPFileTransfer transfer.ssh.SSHFileTransfer Auxiliary classes From 79f88bcbb43436580fb441917806c570da21f068 Mon Sep 17 00:00:00 2001 From: Jan-Lukas Wynen Date: Mon, 25 Sep 2023 14:48:56 +0200 Subject: [PATCH 2/4] Fix docs for SFTP --- src/scitacean/transfer/sftp.py | 42 ++++++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 10 deletions(-) diff --git a/src/scitacean/transfer/sftp.py b/src/scitacean/transfer/sftp.py index fa59bf45..d8058051 100644 --- a/src/scitacean/transfer/sftp.py +++ b/src/scitacean/transfer/sftp.py @@ -1,5 +1,6 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright (c) 2023 SciCat Project (https://github.com/SciCatProject/scitacean) +"""SFTP file transfer.""" import os from contextlib import contextmanager @@ -147,15 +148,13 @@ class SFTPFileTransfer: This may be - a full url such as ``some.fileserver.edu``, - - an IP address like ``127.0.0.1``, - - or a host defined in the user's openSFTP config file. + - or an IP address like ``127.0.0.1``. - The file transfer can authenticate using username+password. - It will ask for those on the command line. - However, it is **highly recommended to set up a key and use an SFTP agent!** - This increases security as Scitacean no longer has to handle credentials itself. - And it is required for automated programs where a user cannot enter credentials - on a command line. + The file transfer can currently only authenticate through an SSH agent. + The agent must be set up for the chosen host and hold a valid key. + If this is not the case, it is possible to inject a custom ``connect`` function + that authenticates in a different way. + See the examples below. Upload folder ------------- @@ -192,7 +191,7 @@ class SFTPFileTransfer: .. code-block:: python file_transfer = SFTPFileTransfer(host="fileserver", - source_folder="transfer/folder") + source_folder="transfer/folder") This uploads to ``/transfer/my-dataset``: (Note that ``{name}`` is replaced by ``dset.name``.) @@ -202,9 +201,32 @@ class SFTPFileTransfer: file_transfer = SFTPFileTransfer(host="fileserver", source_folder="transfer/{name}") - A useful approach is to include a unique ID in the source folder, for example + A useful approach is to include a unique ID in the source folder, for example, ``"/some/base/folder/{uid}"``, to avoid clashes between different datasets. Scitacean will fill in the ``"{uid}"`` placeholder with a new UUID4. + + The connection and authentication method can be customized + using the ``connect`` argument. + For example, to use a specific username + SSH key file, use the following: + + .. code-block:: python + + def connect(host, port): + from paramiko import SSHClient + + client = SSHClient() + client.load_system_host_keys() + client.connect( + hostname=host, + port=port, + username="", + key_filename="", + ) + return client.open_sftp() + + file_transfer = SFTPFileTransfer(host="fileserver", connect=connect) + + The :class:`paramiko.client.SSHClient` can be configured as needed in this function. """ def __init__( From b4a298f91212bd91f381d59e0ba7e3b508e378d9 Mon Sep 17 00:00:00 2001 From: Jan-Lukas Wynen Date: Mon, 25 Sep 2023 14:54:26 +0200 Subject: [PATCH 3/4] Add missing docstrings --- src/scitacean/__init__.py | 2 ++ src/scitacean/dataset.py | 2 ++ src/scitacean/pid.py | 4 +++- src/scitacean/testing/client.py | 14 +++++++++++++- src/scitacean/testing/docs.py | 1 + src/scitacean/testing/transfer.py | 7 +++++++ src/scitacean/transfer/__init__.py | 5 +++++ src/scitacean/transfer/sftp.py | 15 +++++++++++++++ src/scitacean/transfer/ssh.py | 1 + src/scitacean/util/__init__.py | 1 + src/scitacean/util/credentials.py | 1 + src/scitacean/util/formatter.py | 3 +++ 12 files changed, 54 insertions(+), 2 deletions(-) diff --git a/src/scitacean/__init__.py b/src/scitacean/__init__.py index ec35694d..6dd85f7a 100644 --- a/src/scitacean/__init__.py +++ b/src/scitacean/__init__.py @@ -1,6 +1,8 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright (c) 2023 SciCat Project (https://github.com/SciCatProject/scitacean) +"""High-level interface for SciCat.""" + import importlib.metadata try: diff --git a/src/scitacean/dataset.py b/src/scitacean/dataset.py index 8a9805a4..8fe2ad5c 100644 --- a/src/scitacean/dataset.py +++ b/src/scitacean/dataset.py @@ -40,6 +40,8 @@ class Dataset(DatasetBase): + """Metadata and linked data files for a measurement, simulation, or analysis.""" + @classmethod def from_download_models( cls, diff --git a/src/scitacean/pid.py b/src/scitacean/pid.py index 1609690c..72a7b15b 100644 --- a/src/scitacean/pid.py +++ b/src/scitacean/pid.py @@ -1,5 +1,7 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright (c) 2023 SciCat Project (https://github.com/SciCatProject/scitacean) +"""Helper type for handling persistent identifiers.""" + from __future__ import annotations import uuid @@ -30,7 +32,7 @@ def _get_pid_core_schema( class PID: - """Stores the ID of database item. + """Stores the persistent identifier of a database item. The ID is split into a prefix and the main identifier. The prefix identifies an instance of SciCat and the main identifier a dataset. diff --git a/src/scitacean/testing/client.py b/src/scitacean/testing/client.py index 2b78cdb6..00d3863e 100644 --- a/src/scitacean/testing/client.py +++ b/src/scitacean/testing/client.py @@ -178,6 +178,7 @@ def __init__(self, main_client: FakeClient) -> None: def get_dataset_model( self, pid: PID, strict_validation: bool = False ) -> model.DownloadDataset: + """Fetch a dataset from SciCat.""" _ = strict_validation # unused by fake try: return self.main.datasets[pid] @@ -187,7 +188,8 @@ def get_dataset_model( @_conditionally_disabled def get_orig_datablocks( self, pid: PID, strict_validation: bool = False - ) -> List[model.DownloadOrigDatablock]: # TODO here and rest of classes + ) -> List[model.DownloadOrigDatablock]: + """Fetch an orig datablock from SciCat.""" _ = strict_validation # unused by fake try: return self.main.orig_datablocks[pid] @@ -200,6 +202,7 @@ def get_orig_datablocks( def get_attachments_for_dataset( self, pid: PID, strict_validation: bool = False ) -> List[model.DownloadAttachment]: + """Fetch all attachments from SciCat for a given dataset.""" _ = strict_validation # unused by fake return self.main.attachments.get(pid) or [] @@ -207,6 +210,7 @@ def get_attachments_for_dataset( def create_dataset_model( self, dset: Union[model.UploadDerivedDataset, model.UploadRawDataset] ) -> model.DownloadDataset: + """Create a new dataset in SciCat.""" ingested = _process_dataset(dset) pid: PID = ingested.pid # type: ignore[assignment] self.main.datasets[pid] = ingested @@ -216,6 +220,7 @@ def create_dataset_model( def create_orig_datablock( self, dblock: model.UploadOrigDatablock ) -> model.DownloadOrigDatablock: + """Create a new orig datablock in SciCat.""" ingested = _process_orig_datablock(dblock) dataset_id = ingested.datasetId if dataset_id not in self.main.datasets: @@ -229,6 +234,7 @@ def create_attachment_for_dataset( attachment: model.UploadAttachment, dataset_id: Optional[PID] = None, ) -> model.DownloadAttachment: + """Create a new attachment for a dataset in SciCat.""" if dataset_id is None: dataset_id = attachment.datasetId if dataset_id is None: @@ -348,6 +354,12 @@ def process_uploaded_dataset( Optional[List[model.DownloadOrigDatablock]], Optional[List[model.DownloadAttachment]], ]: + """Process a dataset as if it was uploaded to SciCat. + + This function attempts to mimic how SciCat converts uploaded dataset + (and associated) models to the in-database (and download) models. + It is not completely faithful to the real SciCat but only an approximation. + """ dblocks = ( list(map(_process_orig_datablock, orig_datablocks)) if orig_datablocks is not None diff --git a/src/scitacean/testing/docs.py b/src/scitacean/testing/docs.py index df7bf917..0904b973 100644 --- a/src/scitacean/testing/docs.py +++ b/src/scitacean/testing/docs.py @@ -79,6 +79,7 @@ def _create_raw_dataset(client: FakeClient) -> None: def setup_fake_client() -> FakeClient: + """Create a fake client for use in the user guide.""" client = FakeClient.from_token( url="fake-url.sci/api/v3", token="fake-token", # noqa: S106 diff --git a/src/scitacean/testing/transfer.py b/src/scitacean/testing/transfer.py index f83945c5..c788c0b4 100644 --- a/src/scitacean/testing/transfer.py +++ b/src/scitacean/testing/transfer.py @@ -27,6 +27,7 @@ def __init__(self, fs: Optional[FakeFilesystem], files: Dict[RemotePath, bytes]) self.fs = fs def download_file(self, *, remote: RemotePath, local: Path) -> None: + """Download a single file.""" if self.fs is not None: self.fs.create_file(local, contents=self.files[remote]) else: @@ -34,6 +35,7 @@ def download_file(self, *, remote: RemotePath, local: Path) -> None: f.write(self.files[remote]) def download_files(self, *, remote: List[RemotePath], local: List[Path]) -> None: + """Download multiple files.""" for r, l in zip(remote, local): self.download_file(remote=r, local=l) @@ -63,11 +65,13 @@ def _upload_file(self, *, remote: RemotePath, local: Optional[Path]) -> RemotePa return remote def upload_files(self, *files: File) -> List[File]: + """Upload files.""" for file in files: self._upload_file(remote=file.remote_path, local=file.local_path) return list(files) def revert_upload(self, *files: File) -> None: + """Remove uploaded files.""" for file in files: remote = self._remote_path(file.remote_path) self.reverted[remote] = self.files.pop(remote) @@ -134,14 +138,17 @@ def __init__( self._source_folder_pattern = source_folder def source_folder_for(self, dataset: Dataset) -> RemotePath: + """Return the source folder for a given dataset.""" return source_folder_for(dataset, self._source_folder_pattern) @contextmanager def connect_for_download(self) -> Iterator[FakeDownloadConnection]: + """Open a connection for downloads.""" yield FakeDownloadConnection(fs=self.fs, files=self.files) @contextmanager def connect_for_upload(self, dataset: Dataset) -> Iterator[FakeUploadConnection]: + """Open a connection for uploads.""" yield FakeUploadConnection( files=self.files, reverted=self.reverted, diff --git a/src/scitacean/transfer/__init__.py b/src/scitacean/transfer/__init__.py index 2cbaa3e9..151586b2 100644 --- a/src/scitacean/transfer/__init__.py +++ b/src/scitacean/transfer/__init__.py @@ -1,2 +1,7 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright (c) 2023 SciCat Project (https://github.com/SciCatProject/scitacean) +"""File transfers to upload and download files to / from a file server. + +All file transfers are designed to work with :class:`scitacean.Client`. +Refer to the submodules for concrete setup and usage instructions. +""" diff --git a/src/scitacean/transfer/sftp.py b/src/scitacean/transfer/sftp.py index d8058051..ac1a42f2 100644 --- a/src/scitacean/transfer/sftp.py +++ b/src/scitacean/transfer/sftp.py @@ -20,6 +20,12 @@ class SFTPDownloadConnection: + """Connection for downloading files with SFTP. + + Should be created using + :meth:`scitacean.transfer.sftp.SFTPFileTransfer.connect_for_download`. + """ + def __init__(self, *, sftp_client: SFTPClient, host: str) -> None: self._sftp_client = sftp_client self._host = host @@ -30,6 +36,7 @@ def download_files(self, *, remote: List[RemotePath], local: List[Path]) -> None self.download_file(remote=r, local=l) def download_file(self, *, remote: RemotePath, local: Path) -> None: + """Download a file from the given remote path.""" get_logger().info( "Downloading file %s from host %s to %s", remote, @@ -40,6 +47,12 @@ def download_file(self, *, remote: RemotePath, local: Path) -> None: class SFTPUploadConnection: + """Connection for uploading files with SFTP. + + Should be created using + :meth:`scitacean.transfer.sftp.SFTPFileTransfer.connect_for_upload`. + """ + def __init__( self, *, sftp_client: SFTPClient, source_folder: RemotePath, host: str ) -> None: @@ -49,9 +62,11 @@ def __init__( @property def source_folder(self) -> RemotePath: + """The source folder this connection uploads to.""" return self._source_folder def remote_path(self, filename: Union[str, RemotePath]) -> RemotePath: + """Return the complete remote path for a given path.""" return self.source_folder / filename def _make_source_folder(self) -> None: diff --git a/src/scitacean/transfer/ssh.py b/src/scitacean/transfer/ssh.py index 20c28921..26d2835e 100644 --- a/src/scitacean/transfer/ssh.py +++ b/src/scitacean/transfer/ssh.py @@ -1,5 +1,6 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright (c) 2023 SciCat Project (https://github.com/SciCatProject/scitacean) +# ruff: noqa: D100, D101, D102, D103 import os import warnings diff --git a/src/scitacean/util/__init__.py b/src/scitacean/util/__init__.py index e69de29b..9b637c35 100644 --- a/src/scitacean/util/__init__.py +++ b/src/scitacean/util/__init__.py @@ -0,0 +1 @@ +"""Generic utilities.""" diff --git a/src/scitacean/util/credentials.py b/src/scitacean/util/credentials.py index f40501ea..20720acd 100644 --- a/src/scitacean/util/credentials.py +++ b/src/scitacean/util/credentials.py @@ -112,6 +112,7 @@ def __init__( self._expires_at = expires_at - tolerance def get_str(self) -> str: + """Return the stored plain str object.""" if self._is_expired(): raise RuntimeError("Login has expired") return super().get_str() diff --git a/src/scitacean/util/formatter.py b/src/scitacean/util/formatter.py index edf9a8ac..073ff464 100644 --- a/src/scitacean/util/formatter.py +++ b/src/scitacean/util/formatter.py @@ -59,6 +59,8 @@ def parse( Optional[StrOrLiteralStr], ] ]: + """Parse a format string.""" + def add0(field_name: Optional[str]) -> Optional[str]: if field_name == "uid": return field_name @@ -69,6 +71,7 @@ def add0(field_name: Optional[str]) -> Optional[str]: return ((t[0], add0(t[1]), t[2], t[3]) for t in super().parse(format_string)) def format_field(self, value: Any, format_spec: str) -> str: + """Format a field with the given spec.""" if value is None: raise ValueError("Cannot format path, dataset field is None") formatted: str = super().format_field(value, format_spec) From fa14b459a616b738c0a53e41f323cdf5321f4368 Mon Sep 17 00:00:00 2001 From: Jan-Lukas Wynen Date: Mon, 25 Sep 2023 14:54:35 +0200 Subject: [PATCH 4/4] Require docstrings --- pyproject.toml | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index d29eba6e..e2e8b4e8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -99,13 +99,19 @@ ignore = [ "B905", # `zip()` without an explicit `strict=` parameter "S324", # insecure hsh function; we don't use hashing for security "E741", "E742", "E743", # do not use names ‘l’, ‘O’, or ‘I’; they are not a problem with a proper font - "D100", "D101", "D102", "D103", "D104", "D105", # TODO remove D10* once everything has docstrings + "D105", ] extend-exclude = [".*", "__pycache__", "build", "dist", "venv"] fixable = ["I001"] [tool.ruff.per-file-ignores] -"tests/*" = ["S101"] # asserts are fine in tests +"tests/*" = [ + "S101", # asserts are fine in tests + "D10", # no docstrings required in tests +] +"tools/*" = ["D10"] +"docs/conf.py" = ["D10"] +"src/scitacean/model.py" = ["D10"] "src/scitacean/testing/strategies.py" = ["D401"] [tool.ruff.isort]