Skip to content

Commit

Permalink
Merge pull request #130 from SciCatProject/use-checkalg
Browse files Browse the repository at this point in the history
Use the checksum algorithm of datablocks
  • Loading branch information
jl-wynen authored Aug 15, 2023
2 parents dcaa10f + 5358a2f commit 7f9e0cb
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 12 deletions.
27 changes: 27 additions & 0 deletions docs/release-notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,33 @@ Release notes
Stability, Maintainability, and Testing
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
vYY.0M.MICRO (Unreleased)
-------------------------

Security
~~~~~~~~

Features
~~~~~~~~

* File downloads now use the checksum algorithm stored in SciCat when possible.
So it is no longer necessary to specify it by hand in many cases.

Breaking changes
~~~~~~~~~~~~~~~~

Bugfixes
~~~~~~~~

Documentation
~~~~~~~~~~~~~

Deprecations
~~~~~~~~~~~~

Stability, Maintainability, and Testing
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

v23.07.0 (2023-07-07)
---------------------

Expand Down
17 changes: 10 additions & 7 deletions src/scitacean/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,12 +317,13 @@ def download_files(
- A **Callable[File]**: if this callable returns ``True`` for ``f``
checksum_algorithm:
Select an algorithm for computing file checksums.
This argument will be removed when the next SciCat version
has been released.
The data block normally determines the algorithm, but this argument can
override the stored value if the data block is broken
or no algorithm has been set.
force:
If ``True``, download files regardless of whether they already exist
locally.
This bypasses the chekcsum computation of pre-existing local files.
This bypasses the checksum computation of pre-existing local files.
Returns
-------
Expand Down Expand Up @@ -861,13 +862,15 @@ def _select_files(select: FileSelector, dataset: Dataset) -> List[File]:
def _remove_up_to_date_local_files(
files: List[File], checksum_algorithm: Optional[str]
) -> List[File]:
def is_up_to_date(file: File) -> bool:
if checksum_algorithm is not None:
file = dataclasses.replace(file, checksum_algorithm=checksum_algorithm)
return file.local_is_up_to_date()

return [
file
for file in files
if not (
file.local_path.exists() # type: ignore[union-attr]
and dataclasses.replace(
file, checksum_algorithm=checksum_algorithm
).local_is_up_to_date()
file.local_path.exists() and is_up_to_date(file) # type: ignore[union-attr]
)
]
6 changes: 2 additions & 4 deletions src/scitacean/datablock.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def from_download_model(
"""
dblock = orig_datablock_model
return OrigDatablock(
checksum_algorithm=None,
checksum_algorithm=dblock.chkAlg,
owner_group=dblock.ownerGroup,
access_groups=dblock.accessGroups,
instrument_group=dblock.instrumentGroup,
Expand All @@ -75,9 +75,7 @@ def from_download_model(
_updated_at=dblock.updatedAt,
_updated_by=dblock.updatedBy,
init_files=[
File.from_download_model(
file, checksum_algorithm=orig_datablock_model.chkAlg
)
File.from_download_model(file, checksum_algorithm=dblock.chkAlg)
for file in dblock.dataFileList or ()
],
)
Expand Down
30 changes: 29 additions & 1 deletion tests/download_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,6 @@ def test_download_files_detects_bad_size(fs, dataset_and_files, caplog):
assert "89412" in caplog.text


@pytest.mark.skip("Checksum algorithm not yet supported by datablocks")
def test_download_does_not_download_up_to_date_file(fs, dataset_and_files):
# Ensure the file exists locally
dataset, contents = dataset_and_files
Expand Down Expand Up @@ -328,6 +327,35 @@ def connect_for_download(self):
assert all(file.local_path is not None for file in downloaded.files)


def test_override_datablock_checksum(fs, dataset_and_files):
# Ensure the file exists locally
dataset, contents = dataset_and_files
client = Client.without_login(
url="/", file_transfer=FakeFileTransfer(fs=fs, files=contents)
)
client.download_files(dataset, target="./download", select=True)

# Downloads the same file again when the checksum algorithm is wrong.
class RaisingDownloader(FakeFileTransfer):
source_dir = "/"

@contextmanager
def connect_for_download(self):
raise RuntimeError("Download disabled")

client = Client.without_login(
url="/",
file_transfer=RaisingDownloader(fs=fs),
)
with pytest.raises(RuntimeError, match="Download disabled"):
client.download_files(
dataset,
target="./download",
select=True,
checksum_algorithm="sha256", # The datablock uses md5
)


def test_force_file_download(fs, dataset_and_files):
# Ensure the file exists locally
dataset, contents = dataset_and_files
Expand Down

0 comments on commit 7f9e0cb

Please sign in to comment.