Skip to content

Commit

Permalink
fix compute_checksums for literal files
Browse files Browse the repository at this point in the history
  • Loading branch information
mr-c committed Dec 6, 2023
1 parent d2059c7 commit 08a9cc2
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 6 deletions.
2 changes: 1 addition & 1 deletion cwltool/command_line_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -866,7 +866,7 @@ def calc_checksum(location: str) -> Optional[str]:
and "checksum" in e
and e["checksum"] != "sha1$hash"
):
return cast(Optional[str], e["checksum"])
return cast(str, e["checksum"])
return None

def remove_prefix(s: str, prefix: str) -> str:
Expand Down
15 changes: 10 additions & 5 deletions cwltool/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -1342,10 +1342,15 @@ def compute_checksums(fs_access: StdFsAccess, fileobj: CWLObjectType) -> None:
if "checksum" not in fileobj:
checksum = hashlib.sha1() # nosec
location = cast(str, fileobj["location"])
with fs_access.open(location, "rb") as f:
contents = f.read(1024 * 1024)
while contents != b"":
checksum.update(contents)
if "contents" in fileobj:
contents = cast(str, fileobj["contents"]).encode("utf-8")
checksum.update(contents)
fileobj["size"] = len(contents)
else:
with fs_access.open(location, "rb") as f:
contents = f.read(1024 * 1024)
while contents != b"":
checksum.update(contents)
contents = f.read(1024 * 1024)
fileobj["size"] = fs_access.size(location)
fileobj["checksum"] = "sha1$%s" % checksum.hexdigest()
fileobj["size"] = fs_access.size(location)
22 changes: 22 additions & 0 deletions tests/test_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -1325,6 +1325,28 @@ def test_cache_relative_paths(tmp_path: Path, factor: str) -> None:
assert (tmp_path / "cwltool_cache" / "27903451fc1ee10c148a0bdeb845b2cf").exists()


@pytest.mark.parametrize("factor", test_factors)
def test_cache_default_literal_file(tmp_path: Path, factor: str) -> None:
"""Confirm that running a CLT with a default literal file with caching succeeds."""
test_file = "tests/wf/extract_region_specs.cwl"
cache_dir = str(tmp_path / "cwltool_cache")
commands = factor.split()
commands.extend(
[
"--out",
str(tmp_path / "out"),
"--cachedir",
cache_dir,
get_data(test_file),
]
)
error_code, _, stderr = get_main_output(commands)

stderr = re.sub(r"\s\s+", " ", stderr)
assert "completed success" in stderr
assert error_code == 0


def test_write_summary(tmp_path: Path) -> None:
"""Test --write-summary."""
commands = [
Expand Down
21 changes: 21 additions & 0 deletions tests/wf/extract_region_specs.cwl
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{
"cwlVersion": "v1.0",
"class": "CommandLineTool",
"inputs": [
{
"type": "File",
"default": {
"class": "File",
"basename": "extract_regions.py",
"contents": "#!/usr/bin/env python3\n\nfrom __future__ import print_function, division\nimport sys\n\ninput_filename = sys.argv[1]\nif len(sys.argv) == 3:\n fuzz = int(sys.argv[2])\nelse:\n fuzz = 0\ninput_file = open(input_filename)\n\ncount = 0\nfor line in input_file:\n if not line.startswith(\">\"):\n continue\n count += 1\n contig_regions_file = open(\"contig_regions{}.txt\".format(count), \"w\")\n proteins_list_file = open(\"proteins{}.txt\".format(count), \"w\")\n fields = line.split(\"|\")\n protein_id = fields[0][1:]\n contig_id = fields[1]\n r_start = int(fields[6])\n if r_start > fuzz:\n r_start = r_start - fuzz\n r_end = int(fields[7]) + fuzz\n print(\"{}:{}-{}\".format(contig_id, r_start, r_end), file=contig_regions_file)\n print(protein_id, file=proteins_list_file)\n contig_regions_file.close()\n proteins_list_file.close()\n"
},
"inputBinding": {
"position": 1
},
"id": "scripts"
}
],
"outputs": [
],
"baseCommand": "cat"
}

0 comments on commit 08a9cc2

Please sign in to comment.