Skip to content

Commit

Permalink
Merge pull request #143 from SciCatProject/missing-docstrings
Browse files Browse the repository at this point in the history
Add missing docstrings and require docstrings in static analysis
  • Loading branch information
jl-wynen authored Sep 25, 2023
2 parents 07a721b + fa14b45 commit 6a16125
Show file tree
Hide file tree
Showing 14 changed files with 95 additions and 15 deletions.
2 changes: 1 addition & 1 deletion docs/reference/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ File transfer
:template: scitacean-class-template.rst
:recursive:

transfer.ssh.SFTPFileTransfer
transfer.sftp.SFTPFileTransfer
transfer.ssh.SSHFileTransfer

Auxiliary classes
Expand Down
10 changes: 8 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -99,13 +99,19 @@ ignore = [
"B905", # `zip()` without an explicit `strict=` parameter
"S324", # insecure hsh function; we don't use hashing for security
"E741", "E742", "E743", # do not use names ‘l’, ‘O’, or ‘I’; they are not a problem with a proper font
"D100", "D101", "D102", "D103", "D104", "D105", # TODO remove D10* once everything has docstrings
"D105",
]
extend-exclude = [".*", "__pycache__", "build", "dist", "venv"]
fixable = ["I001"]

[tool.ruff.per-file-ignores]
"tests/*" = ["S101"] # asserts are fine in tests
"tests/*" = [
"S101", # asserts are fine in tests
"D10", # no docstrings required in tests
]
"tools/*" = ["D10"]
"docs/conf.py" = ["D10"]
"src/scitacean/model.py" = ["D10"]
"src/scitacean/testing/strategies.py" = ["D401"]

[tool.ruff.isort]
Expand Down
2 changes: 2 additions & 0 deletions src/scitacean/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# SPDX-License-Identifier: BSD-3-Clause
# Copyright (c) 2023 SciCat Project (https://github.com/SciCatProject/scitacean)

"""High-level interface for SciCat."""

import importlib.metadata

try:
Expand Down
2 changes: 2 additions & 0 deletions src/scitacean/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@


class Dataset(DatasetBase):
"""Metadata and linked data files for a measurement, simulation, or analysis."""

@classmethod
def from_download_models(
cls,
Expand Down
4 changes: 3 additions & 1 deletion src/scitacean/pid.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# SPDX-License-Identifier: BSD-3-Clause
# Copyright (c) 2023 SciCat Project (https://github.com/SciCatProject/scitacean)
"""Helper type for handling persistent identifiers."""

from __future__ import annotations

import uuid
Expand Down Expand Up @@ -30,7 +32,7 @@ def _get_pid_core_schema(


class PID:
"""Stores the ID of database item.
"""Stores the persistent identifier of a database item.
The ID is split into a prefix and the main identifier.
The prefix identifies an instance of SciCat and the main identifier a dataset.
Expand Down
14 changes: 13 additions & 1 deletion src/scitacean/testing/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ def __init__(self, main_client: FakeClient) -> None:
def get_dataset_model(
self, pid: PID, strict_validation: bool = False
) -> model.DownloadDataset:
"""Fetch a dataset from SciCat."""
_ = strict_validation # unused by fake
try:
return self.main.datasets[pid]
Expand All @@ -187,7 +188,8 @@ def get_dataset_model(
@_conditionally_disabled
def get_orig_datablocks(
self, pid: PID, strict_validation: bool = False
) -> List[model.DownloadOrigDatablock]: # TODO here and rest of classes
) -> List[model.DownloadOrigDatablock]:
"""Fetch an orig datablock from SciCat."""
_ = strict_validation # unused by fake
try:
return self.main.orig_datablocks[pid]
Expand All @@ -200,13 +202,15 @@ def get_orig_datablocks(
def get_attachments_for_dataset(
self, pid: PID, strict_validation: bool = False
) -> List[model.DownloadAttachment]:
"""Fetch all attachments from SciCat for a given dataset."""
_ = strict_validation # unused by fake
return self.main.attachments.get(pid) or []

@_conditionally_disabled
def create_dataset_model(
self, dset: Union[model.UploadDerivedDataset, model.UploadRawDataset]
) -> model.DownloadDataset:
"""Create a new dataset in SciCat."""
ingested = _process_dataset(dset)
pid: PID = ingested.pid # type: ignore[assignment]
self.main.datasets[pid] = ingested
Expand All @@ -216,6 +220,7 @@ def create_dataset_model(
def create_orig_datablock(
self, dblock: model.UploadOrigDatablock
) -> model.DownloadOrigDatablock:
"""Create a new orig datablock in SciCat."""
ingested = _process_orig_datablock(dblock)
dataset_id = ingested.datasetId
if dataset_id not in self.main.datasets:
Expand All @@ -229,6 +234,7 @@ def create_attachment_for_dataset(
attachment: model.UploadAttachment,
dataset_id: Optional[PID] = None,
) -> model.DownloadAttachment:
"""Create a new attachment for a dataset in SciCat."""
if dataset_id is None:
dataset_id = attachment.datasetId
if dataset_id is None:
Expand Down Expand Up @@ -348,6 +354,12 @@ def process_uploaded_dataset(
Optional[List[model.DownloadOrigDatablock]],
Optional[List[model.DownloadAttachment]],
]:
"""Process a dataset as if it was uploaded to SciCat.
This function attempts to mimic how SciCat converts uploaded dataset
(and associated) models to the in-database (and download) models.
It is not completely faithful to the real SciCat but only an approximation.
"""
dblocks = (
list(map(_process_orig_datablock, orig_datablocks))
if orig_datablocks is not None
Expand Down
1 change: 1 addition & 0 deletions src/scitacean/testing/docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ def _create_raw_dataset(client: FakeClient) -> None:


def setup_fake_client() -> FakeClient:
"""Create a fake client for use in the user guide."""
client = FakeClient.from_token(
url="fake-url.sci/api/v3",
token="fake-token", # noqa: S106
Expand Down
7 changes: 7 additions & 0 deletions src/scitacean/testing/transfer.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,15 @@ def __init__(self, fs: Optional[FakeFilesystem], files: Dict[RemotePath, bytes])
self.fs = fs

def download_file(self, *, remote: RemotePath, local: Path) -> None:
"""Download a single file."""
if self.fs is not None:
self.fs.create_file(local, contents=self.files[remote])
else:
with open(local, "wb") as f:
f.write(self.files[remote])

def download_files(self, *, remote: List[RemotePath], local: List[Path]) -> None:
"""Download multiple files."""
for r, l in zip(remote, local):
self.download_file(remote=r, local=l)

Expand Down Expand Up @@ -63,11 +65,13 @@ def _upload_file(self, *, remote: RemotePath, local: Optional[Path]) -> RemotePa
return remote

def upload_files(self, *files: File) -> List[File]:
"""Upload files."""
for file in files:
self._upload_file(remote=file.remote_path, local=file.local_path)
return list(files)

def revert_upload(self, *files: File) -> None:
"""Remove uploaded files."""
for file in files:
remote = self._remote_path(file.remote_path)
self.reverted[remote] = self.files.pop(remote)
Expand Down Expand Up @@ -134,14 +138,17 @@ def __init__(
self._source_folder_pattern = source_folder

def source_folder_for(self, dataset: Dataset) -> RemotePath:
"""Return the source folder for a given dataset."""
return source_folder_for(dataset, self._source_folder_pattern)

@contextmanager
def connect_for_download(self) -> Iterator[FakeDownloadConnection]:
"""Open a connection for downloads."""
yield FakeDownloadConnection(fs=self.fs, files=self.files)

@contextmanager
def connect_for_upload(self, dataset: Dataset) -> Iterator[FakeUploadConnection]:
"""Open a connection for uploads."""
yield FakeUploadConnection(
files=self.files,
reverted=self.reverted,
Expand Down
5 changes: 5 additions & 0 deletions src/scitacean/transfer/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,7 @@
# SPDX-License-Identifier: BSD-3-Clause
# Copyright (c) 2023 SciCat Project (https://github.com/SciCatProject/scitacean)
"""File transfers to upload and download files to / from a file server.
All file transfers are designed to work with :class:`scitacean.Client`.
Refer to the submodules for concrete setup and usage instructions.
"""
57 changes: 47 additions & 10 deletions src/scitacean/transfer/sftp.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# SPDX-License-Identifier: BSD-3-Clause
# Copyright (c) 2023 SciCat Project (https://github.com/SciCatProject/scitacean)
"""SFTP file transfer."""

import os
from contextlib import contextmanager
Expand All @@ -19,6 +20,12 @@


class SFTPDownloadConnection:
"""Connection for downloading files with SFTP.
Should be created using
:meth:`scitacean.transfer.sftp.SFTPFileTransfer.connect_for_download`.
"""

def __init__(self, *, sftp_client: SFTPClient, host: str) -> None:
self._sftp_client = sftp_client
self._host = host
Expand All @@ -29,6 +36,7 @@ def download_files(self, *, remote: List[RemotePath], local: List[Path]) -> None
self.download_file(remote=r, local=l)

def download_file(self, *, remote: RemotePath, local: Path) -> None:
"""Download a file from the given remote path."""
get_logger().info(
"Downloading file %s from host %s to %s",
remote,
Expand All @@ -39,6 +47,12 @@ def download_file(self, *, remote: RemotePath, local: Path) -> None:


class SFTPUploadConnection:
"""Connection for uploading files with SFTP.
Should be created using
:meth:`scitacean.transfer.sftp.SFTPFileTransfer.connect_for_upload`.
"""

def __init__(
self, *, sftp_client: SFTPClient, source_folder: RemotePath, host: str
) -> None:
Expand All @@ -48,9 +62,11 @@ def __init__(

@property
def source_folder(self) -> RemotePath:
"""The source folder this connection uploads to."""
return self._source_folder

def remote_path(self, filename: Union[str, RemotePath]) -> RemotePath:
"""Return the complete remote path for a given path."""
return self.source_folder / filename

def _make_source_folder(self) -> None:
Expand Down Expand Up @@ -147,15 +163,13 @@ class SFTPFileTransfer:
This may be
- a full url such as ``some.fileserver.edu``,
- an IP address like ``127.0.0.1``,
- or a host defined in the user's openSFTP config file.
- or an IP address like ``127.0.0.1``.
The file transfer can authenticate using username+password.
It will ask for those on the command line.
However, it is **highly recommended to set up a key and use an SFTP agent!**
This increases security as Scitacean no longer has to handle credentials itself.
And it is required for automated programs where a user cannot enter credentials
on a command line.
The file transfer can currently only authenticate through an SSH agent.
The agent must be set up for the chosen host and hold a valid key.
If this is not the case, it is possible to inject a custom ``connect`` function
that authenticates in a different way.
See the examples below.
Upload folder
-------------
Expand Down Expand Up @@ -192,7 +206,7 @@ class SFTPFileTransfer:
.. code-block:: python
file_transfer = SFTPFileTransfer(host="fileserver",
source_folder="transfer/folder")
source_folder="transfer/folder")
This uploads to ``/transfer/my-dataset``:
(Note that ``{name}`` is replaced by ``dset.name``.)
Expand All @@ -202,9 +216,32 @@ class SFTPFileTransfer:
file_transfer = SFTPFileTransfer(host="fileserver",
source_folder="transfer/{name}")
A useful approach is to include a unique ID in the source folder, for example
A useful approach is to include a unique ID in the source folder, for example,
``"/some/base/folder/{uid}"``, to avoid clashes between different datasets.
Scitacean will fill in the ``"{uid}"`` placeholder with a new UUID4.
The connection and authentication method can be customized
using the ``connect`` argument.
For example, to use a specific username + SSH key file, use the following:
.. code-block:: python
def connect(host, port):
from paramiko import SSHClient
client = SSHClient()
client.load_system_host_keys()
client.connect(
hostname=host,
port=port,
username="<username>",
key_filename="<key-file-name>",
)
return client.open_sftp()
file_transfer = SFTPFileTransfer(host="fileserver", connect=connect)
The :class:`paramiko.client.SSHClient` can be configured as needed in this function.
"""

def __init__(
Expand Down
1 change: 1 addition & 0 deletions src/scitacean/transfer/ssh.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# SPDX-License-Identifier: BSD-3-Clause
# Copyright (c) 2023 SciCat Project (https://github.com/SciCatProject/scitacean)
# ruff: noqa: D100, D101, D102, D103

import os
import warnings
Expand Down
1 change: 1 addition & 0 deletions src/scitacean/util/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Generic utilities."""
1 change: 1 addition & 0 deletions src/scitacean/util/credentials.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ def __init__(
self._expires_at = expires_at - tolerance

def get_str(self) -> str:
"""Return the stored plain str object."""
if self._is_expired():
raise RuntimeError("Login has expired")
return super().get_str()
Expand Down
3 changes: 3 additions & 0 deletions src/scitacean/util/formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ def parse(
Optional[StrOrLiteralStr],
]
]:
"""Parse a format string."""

def add0(field_name: Optional[str]) -> Optional[str]:
if field_name == "uid":
return field_name
Expand All @@ -69,6 +71,7 @@ def add0(field_name: Optional[str]) -> Optional[str]:
return ((t[0], add0(t[1]), t[2], t[3]) for t in super().parse(format_string))

def format_field(self, value: Any, format_spec: str) -> str:
"""Format a field with the given spec."""
if value is None:
raise ValueError("Cannot format path, dataset field is None")
formatted: str = super().format_field(value, format_spec)
Expand Down

0 comments on commit 6a16125

Please sign in to comment.