Skip to content

Commit

Permalink
Add tests, add --hard-link option, fixups
Browse files Browse the repository at this point in the history
  • Loading branch information
jcushman committed Dec 2, 2024
1 parent 2c9a77c commit 98f8d0f
Show file tree
Hide file tree
Showing 32 changed files with 1,319 additions and 113 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
/temp
__pycache__
*.pyc
.coverage
33 changes: 29 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -128,18 +128,20 @@ Options:
-u, --url TEXT URL to archive (can be repeated)
-p, --path PATH File or directory to archive (can be
repeated)
--hard-link Use hard links when copying files (when
possible)
-i, --info TEXT bag-info.txt metadata in key:value format
(can be repeated)
--signed-metadata FILE JSON file to be copied to data/signed-
metadata.json
--unsigned-metadata FILE JSON file to be copied to unsigned-
metadata.json
-s, --sign <key_file>:<cert_chain>
Sign using private key and certificate chain
-s, --sign <cert_chain>:<key_file>
Sign using certificate chain and private key
files (can be repeated)
-t, --timestamp <tsa_keyword> | <url>:<cert_chain>
-t, --timestamp <tsa_keyword> | <cert_chain>:<url>
Timestamp using either a TSA keyword or a
URL and cert chain (can be repeated)
cert chain path and URL (can be repeated)
--help Show this message and exit.
```

Expand Down Expand Up @@ -311,6 +313,29 @@ but the provided filenames are encouraged to ensure that users will understand t
`bag-nabit` does not currently specify anything regarding the
contents of the metadata files.
Development
-----------
We use [uv](https://docs.astral.sh/uv/) to manage development dependencies. After cloning the repository, to run from source:
```
uv run nabit
```
This will automatically install dependencies and run the command.
To run tests:
```
uv run pytest
```
Some tests use the [inline-snapshot](https://github.com/15r10nk/inline-snapshot/) library. If the tool output changes
intentionally, you may need to run `uv run pytest --inline-snapshot=review` to review the changes and apply them
to test files.
After making changes to the command line interface, run `uv run scripts/update_docs.py` to update README.md.
Limitations and Caveats
-----------------------
Expand Down
19 changes: 18 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ dependencies = [
"warcio>=1.7.4",
"requests>=2.32.3",
"bagit>=1.8.1",
"setuptools>=75.6.0",
"setuptools>=75.6.0", # required by bagit
]

[project.scripts]
Expand All @@ -18,3 +18,20 @@ nabit = "nabit.bin.cli:main"
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"

[tool.uv]
dev-dependencies = [
"pytest>=8.3.3",
"inline-snapshot>=0.14.0",
"pytest-cov>=6.0.0",
"pytest-httpserver>=1.1.0",
]

[tool.pytest.ini_options]
addopts = "--cov=nabit --cov-report=term-missing"
testpaths = ["tests"]

[tool.coverage.run]
source = ["nabit"]
branch = true

15 changes: 10 additions & 5 deletions scripts/update_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@
import re
# Add the project root to Python path so we can import the CLI
project_root = Path(__file__).parent.parent
sys.path.append(str(project_root))

from nabit.bin.cli import main, archive, validate
from nabit.bin.cli import main

readme_path = project_root / 'README.md'

def update_readme():
"""Update README.md with latest command help"""

def get_new_readme_text():
ctx = click.Context(main)

# Get help text for the main command
Expand All @@ -33,7 +33,12 @@ def update_readme():
readme_content,
flags=re.DOTALL
)
readme_path.write_text(readme_content)

return readme_content

def update_readme():
"""Update README.md with latest command help"""
readme_path.write_text(get_new_readme_text())
print("README.md updated")

if __name__ == '__main__':
Expand Down
2 changes: 1 addition & 1 deletion src/nabit/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@

try:
__version__ = version("nabit")
except PackageNotFoundError:
except PackageNotFoundError: # pragma: no cover
# package is not installed
__version__ = "0.0.0.dev0"
2 changes: 0 additions & 2 deletions src/nabit/bin/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +0,0 @@
def hello() -> str:
return "Hello from bagit-sign!"
51 changes: 30 additions & 21 deletions src/nabit/bin/cli.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from collections import defaultdict
import click
import sys
import json
from pathlib import Path

from .utils import assert_file_exists, assert_url, cli_validate
from .utils import assert_file_exists, assert_url, cli_validate, CaptureCommand
from ..lib.archive import package, validate_package
from ..lib.sign import KNOWN_TSAS

Expand All @@ -12,34 +12,42 @@ def main():
"""BagIt package signing tool"""
pass

@main.command()

@main.command(cls=CaptureCommand)
@click.argument('bag_path', type=click.Path(path_type=Path))
@click.option('--amend', '-a', is_flag=True, help='Update an existing archive. May add OR OVERWRITE existing data.')
@click.option('--url', '-u', multiple=True, help='URL to archive (can be repeated)')
@click.option('--path', '-p', multiple=True, type=click.Path(exists=True, path_type=Path), help='File or directory to archive (can be repeated)')
@click.option('--url', '-u', 'urls', multiple=True, help='URL to archive (can be repeated)')
@click.option('--path', '-p', 'paths', multiple=True, type=click.Path(exists=True, path_type=Path), help='File or directory to archive (can be repeated)')
@click.option('--hard-link', is_flag=True, help='Use hard links when copying files (when possible)')
@click.option('--info', '-i', multiple=True, help='bag-info.txt metadata in key:value format (can be repeated)')
@click.option('--signed-metadata', type=click.Path(exists=True, path_type=Path, dir_okay=False),
help='JSON file to be copied to data/signed-metadata.json')
@click.option('--unsigned-metadata', type=click.Path(exists=True, path_type=Path, dir_okay=False),
help='JSON file to be copied to unsigned-metadata.json')
@click.option('--sign', '-s', 'signature_args', multiple=True,
help='Sign using private key and certificate chain files (can be repeated)',
metavar='<key_file>:<cert_chain>',
help='Sign using certificate chain and private key files (can be repeated)',
metavar='<cert_chain>:<key_file>',
)
@click.option('--timestamp', '-t', 'signature_args', multiple=True,
help='Timestamp using either a TSA keyword or a URL and cert chain (can be repeated)',
metavar='<tsa_keyword> | <url>:<cert_chain>',
help='Timestamp using either a TSA keyword or a cert chain path and URL (can be repeated)',
metavar='<tsa_keyword> | <cert_chain>:<url>',
)
@click.pass_context
def archive(ctx, bag_path, amend, url, path, info, signed_metadata, unsigned_metadata, signature_args):
def archive(ctx, bag_path, amend, urls, paths, hard_link, info, signed_metadata, unsigned_metadata, signature_args):
"""
Archive files and URLs into a BagIt package.
bag_path is the destination directory for the package.
"""
# Validate JSON files if provided
for metadata_path in (signed_metadata, unsigned_metadata):
if metadata_path and not metadata_path.suffix.lower() == '.json':
if not metadata_path:
continue
if not metadata_path.suffix.lower() == '.json':
raise click.BadParameter(f'Metadata file must be a .json file, got "{metadata_path}"')
try:
json.loads(metadata_path.read_text())
except json.JSONDecodeError as e:
raise click.BadParameter(f'Metadata file must be valid JSON, got "{metadata_path}": {e}')

# Check if output directory exists and is not empty
if bag_path.exists() and any(bag_path.iterdir()):
Expand All @@ -63,19 +71,22 @@ def archive(ctx, bag_path, amend, url, path, info, signed_metadata, unsigned_met
raise click.BadParameter(f'Metadata must be in "key:value" format, got "{item}"')
bag_info[key.strip()].append(value.strip())

# validate URLs
for url in urls:
assert_url(url)

## handle --sign and --timestamp options
# order matters, so get ordered list of signature flags from sys.argv
command_index = sys.argv.index(ctx.info_name)
signature_flags = [arg for arg in sys.argv[command_index + 1:] if arg in ['-s', '--sign', '-t', '--timestamp']]
signature_flags = [arg for arg in ctx.raw_args if arg in ['-s', '--sign', '-t', '--timestamp']]
# process each signature flag
signatures = []
for kind, value in zip(signature_flags, signature_args):
if kind in ['-s', '--sign']:
# Convert sign list of "<key_file>:<cert_chain>" strings into a list of signature operations
try:
key, cert_chain = value.split(':', 1)
cert_chain, key = value.split(':', 1)
except ValueError:
raise click.BadParameter(f'Sign must be in "key:cert_chain" format, got "{value}"')
raise click.BadParameter(f'Sign must be in "cert_chain:key_file" format, got "{value}"')
assert_file_exists(key)
assert_file_exists(cert_chain)
signatures.append({
Expand All @@ -88,7 +99,7 @@ def archive(ctx, bag_path, amend, url, path, info, signed_metadata, unsigned_met
params = KNOWN_TSAS[value]
else:
try:
url, cert_chain = value.split(':', 1)
cert_chain, url = value.split(':', 1)
except ValueError:
all_tsas = ', '.join(f'"{key}"' for key in KNOWN_TSAS.keys())
raise click.BadParameter(f'Timestamp must be in "url:cert_chain" format, or one of {all_tsas}. Got "{value}".')
Expand All @@ -101,13 +112,14 @@ def archive(ctx, bag_path, amend, url, path, info, signed_metadata, unsigned_met

package(
output_path=bag_path,
paths=path,
urls=url,
paths=paths,
urls=urls,
bag_info=bag_info,
signatures=signatures,
signed_metadata=signed_metadata,
unsigned_metadata=unsigned_metadata,
amend=amend,
use_hard_links=hard_link,
)

cli_validate(bag_path)
Expand All @@ -122,6 +134,3 @@ def validate(bag_path):
bag_path is the path to the package directory to validate.
"""
cli_validate(bag_path)

if __name__ == '__main__':
main()
21 changes: 15 additions & 6 deletions src/nabit/bin/utils.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
from urllib.parse import urlparse
import click
import requests
from ..lib.archive import validate_package


def assert_file_exists(path):
click.Path(exists=True, path_type=str, dir_okay=False)(path)

def assert_url(url):
parsed = urlparse(url)
if parsed.scheme not in ['http', 'https']:
raise click.BadParameter(f'Expected a URL with http or https scheme, got "{url}"')
try:
requests.Request('GET', url).prepare()
except requests.RequestException as e:
raise click.BadParameter(str(e))

def cli_validate(bag_path):
"""
Expand All @@ -18,6 +20,7 @@ def cli_validate(bag_path):
click.echo(f"Validating package at {bag_path} ...")
has_errors = False
def error(message: str, metadata: dict | None = None) -> None:
nonlocal has_errors
click.secho("ERROR:", fg='red', bold=True, nl=False)
click.echo(f" {message}")
has_errors = True
Expand All @@ -33,7 +36,13 @@ def success(message: str, metadata: dict | None = None) -> None:
validate_package(bag_path, error, warn, success)

if has_errors:
click.echo("Errors found in package")
click.exit(1)
raise click.ClickException("Errors found in package")

click.echo("Package is valid")
click.echo("Package is valid")


class CaptureCommand(click.Command):
""" Custom click command that captures raw args to the command."""
def parse_args(self, ctx: click.Context, args: list[str]) -> list[str]:
ctx.raw_args = list(args)
return super().parse_args(ctx, args)
2 changes: 0 additions & 2 deletions src/nabit/lib/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +0,0 @@
def hello() -> str:
return "Hello from bagit-sign!"
Loading

0 comments on commit 98f8d0f

Please sign in to comment.