Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add missing docstrings and require docstrings in static analysis #143

Merged
merged 4 commits into from
Sep 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/reference/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ File transfer
:template: scitacean-class-template.rst
:recursive:

transfer.ssh.SFTPFileTransfer
transfer.sftp.SFTPFileTransfer
transfer.ssh.SSHFileTransfer

Auxiliary classes
Expand Down
10 changes: 8 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -99,13 +99,19 @@ ignore = [
"B905", # `zip()` without an explicit `strict=` parameter
"S324", # insecure hsh function; we don't use hashing for security
"E741", "E742", "E743", # do not use names ‘l’, ‘O’, or ‘I’; they are not a problem with a proper font
"D100", "D101", "D102", "D103", "D104", "D105", # TODO remove D10* once everything has docstrings
"D105",
]
extend-exclude = [".*", "__pycache__", "build", "dist", "venv"]
fixable = ["I001"]

[tool.ruff.per-file-ignores]
"tests/*" = ["S101"] # asserts are fine in tests
"tests/*" = [
"S101", # asserts are fine in tests
"D10", # no docstrings required in tests
]
"tools/*" = ["D10"]
"docs/conf.py" = ["D10"]
"src/scitacean/model.py" = ["D10"]
"src/scitacean/testing/strategies.py" = ["D401"]

[tool.ruff.isort]
Expand Down
2 changes: 2 additions & 0 deletions src/scitacean/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# SPDX-License-Identifier: BSD-3-Clause
# Copyright (c) 2023 SciCat Project (https://github.com/SciCatProject/scitacean)

"""High-level interface for SciCat."""

import importlib.metadata

try:
Expand Down
2 changes: 2 additions & 0 deletions src/scitacean/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@


class Dataset(DatasetBase):
"""Metadata and linked data files for a measurement, simulation, or analysis."""

@classmethod
def from_download_models(
cls,
Expand Down
4 changes: 3 additions & 1 deletion src/scitacean/pid.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# SPDX-License-Identifier: BSD-3-Clause
# Copyright (c) 2023 SciCat Project (https://github.com/SciCatProject/scitacean)
"""Helper type for handling persistent identifiers."""

from __future__ import annotations

import uuid
Expand Down Expand Up @@ -30,7 +32,7 @@ def _get_pid_core_schema(


class PID:
"""Stores the ID of database item.
"""Stores the persistent identifier of a database item.

The ID is split into a prefix and the main identifier.
The prefix identifies an instance of SciCat and the main identifier a dataset.
Expand Down
14 changes: 13 additions & 1 deletion src/scitacean/testing/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ def __init__(self, main_client: FakeClient) -> None:
def get_dataset_model(
self, pid: PID, strict_validation: bool = False
) -> model.DownloadDataset:
"""Fetch a dataset from SciCat."""
_ = strict_validation # unused by fake
try:
return self.main.datasets[pid]
Expand All @@ -187,7 +188,8 @@ def get_dataset_model(
@_conditionally_disabled
def get_orig_datablocks(
self, pid: PID, strict_validation: bool = False
) -> List[model.DownloadOrigDatablock]: # TODO here and rest of classes
) -> List[model.DownloadOrigDatablock]:
"""Fetch an orig datablock from SciCat."""
_ = strict_validation # unused by fake
try:
return self.main.orig_datablocks[pid]
Expand All @@ -200,13 +202,15 @@ def get_orig_datablocks(
def get_attachments_for_dataset(
self, pid: PID, strict_validation: bool = False
) -> List[model.DownloadAttachment]:
"""Fetch all attachments from SciCat for a given dataset."""
_ = strict_validation # unused by fake
return self.main.attachments.get(pid) or []

@_conditionally_disabled
def create_dataset_model(
self, dset: Union[model.UploadDerivedDataset, model.UploadRawDataset]
) -> model.DownloadDataset:
"""Create a new dataset in SciCat."""
ingested = _process_dataset(dset)
pid: PID = ingested.pid # type: ignore[assignment]
self.main.datasets[pid] = ingested
Expand All @@ -216,6 +220,7 @@ def create_dataset_model(
def create_orig_datablock(
self, dblock: model.UploadOrigDatablock
) -> model.DownloadOrigDatablock:
"""Create a new orig datablock in SciCat."""
ingested = _process_orig_datablock(dblock)
dataset_id = ingested.datasetId
if dataset_id not in self.main.datasets:
Expand All @@ -229,6 +234,7 @@ def create_attachment_for_dataset(
attachment: model.UploadAttachment,
dataset_id: Optional[PID] = None,
) -> model.DownloadAttachment:
"""Create a new attachment for a dataset in SciCat."""
if dataset_id is None:
dataset_id = attachment.datasetId
if dataset_id is None:
Expand Down Expand Up @@ -348,6 +354,12 @@ def process_uploaded_dataset(
Optional[List[model.DownloadOrigDatablock]],
Optional[List[model.DownloadAttachment]],
]:
"""Process a dataset as if it was uploaded to SciCat.

This function attempts to mimic how SciCat converts uploaded dataset
(and associated) models to the in-database (and download) models.
It is not completely faithful to the real SciCat but only an approximation.
"""
dblocks = (
list(map(_process_orig_datablock, orig_datablocks))
if orig_datablocks is not None
Expand Down
1 change: 1 addition & 0 deletions src/scitacean/testing/docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ def _create_raw_dataset(client: FakeClient) -> None:


def setup_fake_client() -> FakeClient:
"""Create a fake client for use in the user guide."""
client = FakeClient.from_token(
url="fake-url.sci/api/v3",
token="fake-token", # noqa: S106
Expand Down
7 changes: 7 additions & 0 deletions src/scitacean/testing/transfer.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,15 @@ def __init__(self, fs: Optional[FakeFilesystem], files: Dict[RemotePath, bytes])
self.fs = fs

def download_file(self, *, remote: RemotePath, local: Path) -> None:
"""Download a single file."""
if self.fs is not None:
self.fs.create_file(local, contents=self.files[remote])
else:
with open(local, "wb") as f:
f.write(self.files[remote])

def download_files(self, *, remote: List[RemotePath], local: List[Path]) -> None:
"""Download multiple files."""
for r, l in zip(remote, local):
self.download_file(remote=r, local=l)

Expand Down Expand Up @@ -63,11 +65,13 @@ def _upload_file(self, *, remote: RemotePath, local: Optional[Path]) -> RemotePa
return remote

def upload_files(self, *files: File) -> List[File]:
"""Upload files."""
for file in files:
self._upload_file(remote=file.remote_path, local=file.local_path)
return list(files)

def revert_upload(self, *files: File) -> None:
"""Remove uploaded files."""
for file in files:
remote = self._remote_path(file.remote_path)
self.reverted[remote] = self.files.pop(remote)
Expand Down Expand Up @@ -134,14 +138,17 @@ def __init__(
self._source_folder_pattern = source_folder

def source_folder_for(self, dataset: Dataset) -> RemotePath:
"""Return the source folder for a given dataset."""
return source_folder_for(dataset, self._source_folder_pattern)

@contextmanager
def connect_for_download(self) -> Iterator[FakeDownloadConnection]:
"""Open a connection for downloads."""
yield FakeDownloadConnection(fs=self.fs, files=self.files)

@contextmanager
def connect_for_upload(self, dataset: Dataset) -> Iterator[FakeUploadConnection]:
"""Open a connection for uploads."""
yield FakeUploadConnection(
files=self.files,
reverted=self.reverted,
Expand Down
5 changes: 5 additions & 0 deletions src/scitacean/transfer/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,7 @@
# SPDX-License-Identifier: BSD-3-Clause
# Copyright (c) 2023 SciCat Project (https://github.com/SciCatProject/scitacean)
"""File transfers to upload and download files to / from a file server.

All file transfers are designed to work with :class:`scitacean.Client`.
Refer to the submodules for concrete setup and usage instructions.
"""
57 changes: 47 additions & 10 deletions src/scitacean/transfer/sftp.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# SPDX-License-Identifier: BSD-3-Clause
# Copyright (c) 2023 SciCat Project (https://github.com/SciCatProject/scitacean)
"""SFTP file transfer."""

import os
from contextlib import contextmanager
Expand All @@ -19,6 +20,12 @@


class SFTPDownloadConnection:
"""Connection for downloading files with SFTP.

Should be created using
:meth:`scitacean.transfer.sftp.SFTPFileTransfer.connect_for_download`.
"""

def __init__(self, *, sftp_client: SFTPClient, host: str) -> None:
self._sftp_client = sftp_client
self._host = host
Expand All @@ -29,6 +36,7 @@ def download_files(self, *, remote: List[RemotePath], local: List[Path]) -> None
self.download_file(remote=r, local=l)

def download_file(self, *, remote: RemotePath, local: Path) -> None:
"""Download a file from the given remote path."""
get_logger().info(
"Downloading file %s from host %s to %s",
remote,
Expand All @@ -39,6 +47,12 @@ def download_file(self, *, remote: RemotePath, local: Path) -> None:


class SFTPUploadConnection:
"""Connection for uploading files with SFTP.

Should be created using
:meth:`scitacean.transfer.sftp.SFTPFileTransfer.connect_for_upload`.
"""

def __init__(
self, *, sftp_client: SFTPClient, source_folder: RemotePath, host: str
) -> None:
Expand All @@ -48,9 +62,11 @@ def __init__(

@property
def source_folder(self) -> RemotePath:
"""The source folder this connection uploads to."""
return self._source_folder

def remote_path(self, filename: Union[str, RemotePath]) -> RemotePath:
"""Return the complete remote path for a given path."""
return self.source_folder / filename

def _make_source_folder(self) -> None:
Expand Down Expand Up @@ -147,15 +163,13 @@ class SFTPFileTransfer:
This may be

- a full url such as ``some.fileserver.edu``,
- an IP address like ``127.0.0.1``,
- or a host defined in the user's openSFTP config file.
- or an IP address like ``127.0.0.1``.

The file transfer can authenticate using username+password.
It will ask for those on the command line.
However, it is **highly recommended to set up a key and use an SFTP agent!**
This increases security as Scitacean no longer has to handle credentials itself.
And it is required for automated programs where a user cannot enter credentials
on a command line.
The file transfer can currently only authenticate through an SSH agent.
The agent must be set up for the chosen host and hold a valid key.
If this is not the case, it is possible to inject a custom ``connect`` function
that authenticates in a different way.
See the examples below.

Upload folder
-------------
Expand Down Expand Up @@ -192,7 +206,7 @@ class SFTPFileTransfer:
.. code-block:: python

file_transfer = SFTPFileTransfer(host="fileserver",
source_folder="transfer/folder")
source_folder="transfer/folder")

This uploads to ``/transfer/my-dataset``:
(Note that ``{name}`` is replaced by ``dset.name``.)
Expand All @@ -202,9 +216,32 @@ class SFTPFileTransfer:
file_transfer = SFTPFileTransfer(host="fileserver",
source_folder="transfer/{name}")

A useful approach is to include a unique ID in the source folder, for example
A useful approach is to include a unique ID in the source folder, for example,
``"/some/base/folder/{uid}"``, to avoid clashes between different datasets.
Scitacean will fill in the ``"{uid}"`` placeholder with a new UUID4.

The connection and authentication method can be customized
using the ``connect`` argument.
For example, to use a specific username + SSH key file, use the following:

.. code-block:: python

def connect(host, port):
from paramiko import SSHClient

client = SSHClient()
client.load_system_host_keys()
client.connect(
hostname=host,
port=port,
username="<username>",
key_filename="<key-file-name>",
)
return client.open_sftp()

file_transfer = SFTPFileTransfer(host="fileserver", connect=connect)

The :class:`paramiko.client.SSHClient` can be configured as needed in this function.
"""

def __init__(
Expand Down
1 change: 1 addition & 0 deletions src/scitacean/transfer/ssh.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# SPDX-License-Identifier: BSD-3-Clause
# Copyright (c) 2023 SciCat Project (https://github.com/SciCatProject/scitacean)
# ruff: noqa: D100, D101, D102, D103

import os
import warnings
Expand Down
1 change: 1 addition & 0 deletions src/scitacean/util/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Generic utilities."""
1 change: 1 addition & 0 deletions src/scitacean/util/credentials.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ def __init__(
self._expires_at = expires_at - tolerance

def get_str(self) -> str:
"""Return the stored plain str object."""
if self._is_expired():
raise RuntimeError("Login has expired")
return super().get_str()
Expand Down
3 changes: 3 additions & 0 deletions src/scitacean/util/formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ def parse(
Optional[StrOrLiteralStr],
]
]:
"""Parse a format string."""

def add0(field_name: Optional[str]) -> Optional[str]:
if field_name == "uid":
return field_name
Expand All @@ -69,6 +71,7 @@ def add0(field_name: Optional[str]) -> Optional[str]:
return ((t[0], add0(t[1]), t[2], t[3]) for t in super().parse(format_string))

def format_field(self, value: Any, format_spec: str) -> str:
"""Format a field with the given spec."""
if value is None:
raise ValueError("Cannot format path, dataset field is None")
formatted: str = super().format_field(value, format_spec)
Expand Down