From a8aba97a634ee88feeac8d0cf150952d3a515674 Mon Sep 17 00:00:00 2001 From: Ryan Williams Date: Fri, 18 Oct 2024 22:26:43 -0400 Subject: [PATCH] `juq papermill {clean,run}`, update readme --- README.md | 70 +++++++++++++++++++----- juq/cli.py | 91 ++++++++++++++++++++++---------- setup.py | 4 +- tests/test_merge_cell_outputs.py | 4 +- tests/test_papermill_clean.py | 4 +- 5 files changed, 125 insertions(+), 48 deletions(-) diff --git a/README.md b/README.md index 3a98bab..a40e051 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,25 @@ # `juq` -CLI for viewing/slicing Jupyter notebooks (name is inspired by "`jq` for Jupyter") +Query, run, and clean Jupyter notebooks (name is inspired by "`jq` for Jupyter") [![PyPI version](https://badge.fury.io/py/juq.py.svg)](https://badge.fury.io/py/juq.py) -## Installation + +- [Installation](#installation) +- [Usage](#usage) + - [`juq cells`](#juq-cells) + - [`juq merge-outputs`](#juq-merge-outputs) + - [`juq papermill clean`](#juq-papermill-clean) + - [`juq papermill run`](#juq-papermill-run) + + +## Installation ```bash pip install juq.py ``` -## Usage +## Usage -### `juq cells` +### `juq cells` Slice/Filter cells: ```bash juq cells --help @@ -36,8 +45,10 @@ juq cells --help # --help Show this message and exit. ``` -### `juq merge-outputs` +### `juq merge-outputs` Merge consecutive "stream" outputs (e.g. stderr): + + ```bash juq merge-outputs --help # Usage: juq merge-outputs [OPTIONS] [NB_PATH] @@ -45,14 +56,20 @@ juq merge-outputs --help # Merge consecutive "stream" outputs (e.g. stderr). # # Options: -# -i, --in-place Modify [NB_PATH] in-place -# -o, --out-path TEXT Write to this file instead of stdout -# --help Show this message and exit. +# -i, --in-place Modify [NB_PATH] in-place +# -n, --indent INTEGER Indentation level for the output notebook +# JSON (default: infer from input) +# -o, --out-path TEXT Write to this file instead of stdout +# -t, --trailing-newline / -T, --no-trailing-newline +# Enforce presence or absence of a trailing +# newline (default: match input) +# --help Show this message and exit. ``` e.g.: ```bash juq merge-outputs -i notebook.ipynb ``` + Useful for situations like: - [jupyter-book#973](https://github.com/executablebooks/jupyter-book/issues/973) - [nbval#138](https://github.com/computationalmodelling/nbval/issues/138#issuecomment-1869177219) @@ -63,10 +80,11 @@ As of [nbconvert#2089](https://github.com/jupyter/nbconvert/pull/2089), this sho jupyter nbconvert --coalesce-streams --inplace notebook.ipynb ``` -### `juq papermill-clean` +### `juq papermill clean` + ```bash -juq papermill-clean --help -# Usage: juq papermill-clean [OPTIONS] [NB_PATH] +juq papermill clean --help +# Usage: juq papermill clean [OPTIONS] [NB_PATH] # # Remove Papermill metadata from a notebook. # @@ -74,7 +92,31 @@ juq papermill-clean --help # `.cells[*].metadata.{papermill,execution,widgets}`. # # Options: -# -i, --in-place Modify [NB_PATH] in-place -# -o, --out-path TEXT Write to this file instead of stdout -# --help Show this message and exit. +# -i, --in-place Modify [NB_PATH] in-place +# -n, --indent INTEGER Indentation level for the output notebook +# JSON (default: infer from input) +# -o, --out-path TEXT Write to this file instead of stdout +# -t, --trailing-newline / -T, --no-trailing-newline +# Enforce presence or absence of a trailing +# newline (default: match input) +# --help Show this message and exit. +``` + +### `juq papermill run` + +```bash +juq papermill run --help +# Usage: juq papermill run [OPTIONS] [NB_PATH] +# +# Run a notebook using Papermill, clean nondeterministic metadata. +# +# Options: +# -i, --in-place Modify [NB_PATH] in-place +# -n, --indent INTEGER Indentation level for the output notebook +# JSON (default: infer from input) +# -o, --out-path TEXT Write to this file instead of stdout +# -t, --trailing-newline / -T, --no-trailing-newline +# Enforce presence or absence of a trailing +# newline (default: match input) +# --help Show this message and exit. ``` diff --git a/juq/cli.py b/juq/cli.py index 3307d09..a444deb 100644 --- a/juq/cli.py +++ b/juq/cli.py @@ -1,35 +1,48 @@ import json -from contextlib import contextmanager +from contextlib import nullcontext from functools import wraps from inspect import getfullargspec +from subprocess import check_output from sys import stdin, stdout, stderr -import click +from click import argument, group, option -@click.group() +@group() def cli(): pass -@contextmanager -def identity(obj): - yield obj - - def with_nb_input(func): spec = getfullargspec(func) @wraps(func) - @click.argument('nb_path', required=False) + @argument('nb_path', required=False) def wrapper(nb_path, *args, **kwargs): - ctx = identity(stdin) if nb_path == '-' or nb_path is None else open(nb_path, 'r') + ctx = nullcontext(stdin) if nb_path == '-' or nb_path is None else open(nb_path, 'r') with ctx as f: - # nb = nbformat.read(f, as_version=4) - nb = json.load(f) + nb_str = f.read() + indent = kwargs.pop('indent', None) + if indent is None: + if nb_str.startswith('{'): + if nb_str[1] == "\n": + idx = 2 + indent = 0 + while nb_str[idx] == ' ': + idx += 1 + indent += 1 + else: + indent = None + else: + raise ValueError(f"Cannot infer `indent` from non-JSON input beginning with {nb_str[:30]}") + + trailing_newline = kwargs.pop('trailing_newline', None) + if trailing_newline is None: + trailing_newline = nb_str.endswith('\n') + nb = json.loads(nb_str) if 'nb_path' in spec.args or 'nb_path' in spec.kwonlyargs: kwargs['nb_path'] = nb_path - func(*args, nb=nb, **kwargs) + return func(*args, nb=nb, indent=indent, trailing_newline=trailing_newline, **kwargs) return wrapper @@ -37,12 +50,13 @@ def with_nb(func): spec = getfullargspec(func) @wraps(func) - @click.option('-i', '--in-place', is_flag=True, help='Modify [NB_PATH] in-place') - @click.option('-o', '--out-path', help='Write to this file instead of stdout') + @option('-i', '--in-place', is_flag=True, help='Modify [NB_PATH] in-place') + @option('-n', '--indent', type=int, help='Indentation level for the output notebook JSON (default: infer from input)') + @option('-o', '--out-path', help='Write to this file instead of stdout') + @option('-t/-T', '--trailing-newline/--no-trailing-newline', default=None, help='Enforce presence or absence of a trailing newline (default: match input)') @with_nb_input - def wrapper(*args, nb_path, **kwargs): + def wrapper(*args, nb_path, indent, trailing_newline, **kwargs): """Merge consecutive "stream" outputs (e.g. stderr).""" - nb = kwargs['nb'] in_place = kwargs.get('in_place') out_path = kwargs.get('out_path') if in_place: @@ -60,9 +74,11 @@ def wrapper(*args, nb_path, **kwargs): } nb = func(*args, **kwargs) - out_ctx = identity(stdout) if out_path == '-' or out_path is None else open(out_path, 'w') + out_ctx = nullcontext(stdout) if out_path == '-' or out_path is None else open(out_path, 'w') with out_ctx as f: - json.dump(nb, f, indent=2) + json.dump(nb, f, indent=indent) + if trailing_newline: + f.write('\n') return wrapper @@ -76,11 +92,11 @@ def wrapper(*args, nb_path, **kwargs): @cli.command() -@click.option('-m/-M', '--metadata/--no-metadata', default=None, help='Explicitly include or exclude each cell\'s "metadata" key. If only `-m` is passed, only the "metadata" value of each cell is printed') -@click.option('-o/-O', '--outputs/--no-outputs', default=None, help='Explicitly include or exclude each cell\'s "outputs" key. If only `-o` is passed, only the "outputs" value of each cell is printed') -@click.option('-s/-S', '--source/--no-source', default=None, help='Explicitly include or exclude each cell\'s "source" key. If only `-s` is passed, the source is printed directly (not as JSON)') -@click.option('-t', '--cell-type', help='Only print cells of this type. Recognizes abbreviations: "c" for "code", {"m","md"} for "markdown", "r" for "raw"') -@click.argument('cells_slice') +@option('-m/-M', '--metadata/--no-metadata', default=None, help='Explicitly include or exclude each cell\'s "metadata" key. If only `-m` is passed, only the "metadata" value of each cell is printed') +@option('-o/-O', '--outputs/--no-outputs', default=None, help='Explicitly include or exclude each cell\'s "outputs" key. If only `-o` is passed, only the "outputs" value of each cell is printed') +@option('-s/-S', '--source/--no-source', default=None, help='Explicitly include or exclude each cell\'s "source" key. If only `-s` is passed, the source is printed directly (not as JSON)') +@option('-t', '--cell-type', help='Only print cells of this type. Recognizes abbreviations: "c" for "code", {"m","md"} for "markdown", "r" for "raw"') +@argument('cells_slice') @with_nb_input def cells(cell_type, cells_slice, nb, **flags): """Slice/Filter cells.""" @@ -169,8 +185,6 @@ def merge_cell_outputs(cell): return cell -@cli.command('merge-outputs') -@with_nb def merge_outputs(nb): """Merge consecutive "stream" outputs (e.g. stderr).""" nb['cells'] = [ @@ -180,6 +194,15 @@ def merge_outputs(nb): return nb +merge_outputs_cmd = cli.command('merge-outputs')(with_nb(merge_outputs)) + + +@cli.group +def papermill(): + """Wrapper for Papermill commands (`clean`, `run`).""" + pass + + def papermill_clean_cell(cell): if 'id' in cell: del cell['id'] @@ -191,8 +214,6 @@ def papermill_clean_cell(cell): return cell -@cli.command('papermill-clean') -@with_nb def papermill_clean(nb): """Remove Papermill metadata from a notebook. @@ -208,5 +229,19 @@ def papermill_clean(nb): return nb +papermill_clean_cmd = papermill.command('clean')(with_nb(papermill_clean)) + + +@papermill.command('run') +@with_nb +def papermill_run(nb): + """Run a notebook using Papermill, clean nondeterministic metadata, normalize output streams.""" + output = check_output(['papermill'], input=json.dumps(nb).encode()) + nb = json.loads(output) + nb = papermill_clean(nb) + nb = merge_outputs(nb) + return nb + + if __name__ == '__main__': cli() diff --git a/setup.py b/setup.py index b9cecec..4be07f1 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ setup( name='juq.py', - version='0.1.1', + version='0.2.0', packages=find_packages(), install_requires=open('requirements.txt').read(), extras_require={ @@ -13,7 +13,7 @@ author_email="ryan@runsascoded.com", author_url="https://github.com/ryan-williams", url="https://github.com/runsascoded/juq", - description='CLI for viewing/slicing Jupyter notebooks (name is inspired by "`jq` for Jupyter")', + description='Query, run, and clean Jupyter notebooks (name is inspired by "`jq` for Jupyter")', long_description=open("README.md").read(), long_description_content_type="text/markdown", entry_points={ diff --git a/tests/test_merge_cell_outputs.py b/tests/test_merge_cell_outputs.py index e056435..26e64c7 100644 --- a/tests/test_merge_cell_outputs.py +++ b/tests/test_merge_cell_outputs.py @@ -3,7 +3,7 @@ from os.path import dirname from tempfile import TemporaryDirectory -from juq.cli import merge_outputs +from juq.cli import merge_outputs_cmd TEST_DIR = path.join(dirname(__file__), "files", "merge-outputs") @@ -14,7 +14,7 @@ def test_merge_cell_outputs(): expected_path = path.join(TEST_DIR, out_name) with TemporaryDirectory() as tmpdir: actual_path = path.join(tmpdir, out_name) - merge_outputs.callback(nb_path=nb_path, out_path=actual_path) + merge_outputs_cmd.callback(nb_path=nb_path, out_path=actual_path) with open(expected_path, 'r') as f: expected_nb = json.load(f) with open(actual_path, 'r') as f: diff --git a/tests/test_papermill_clean.py b/tests/test_papermill_clean.py index 48b3cd1..787a1a0 100644 --- a/tests/test_papermill_clean.py +++ b/tests/test_papermill_clean.py @@ -3,7 +3,7 @@ from os.path import dirname from tempfile import TemporaryDirectory from papermill import execute_notebook -from juq.cli import papermill_clean +from juq.cli import papermill_clean_cmd TEST_DIR = path.join(dirname(__file__), "files", "merge-outputs") @@ -19,7 +19,7 @@ def test_papermill_clean(): assert 'papermill' in post_papermill['metadata'] cleaned_path = path.join(tmpdir, 'cleaned.ipynb') - papermill_clean.callback(nb_path=post_papermill_path, out_path=cleaned_path) + papermill_clean_cmd.callback(nb_path=post_papermill_path, out_path=cleaned_path) with open(cleaned_path, 'r') as f: cleaned_nb = json.load(f) split_outputs_nb_path = path.join(TEST_DIR, "split-outputs.ipynb")