juq papermill {clean,run}, update readme

runsascoded · Oct 19, 2024 · a8aba97 · a8aba97
1 parent a827d01
commit a8aba97
Show file tree

Hide file tree

Showing 5 changed files with 125 additions and 48 deletions.
diff --git a/README.md b/README.md
@@ -1,16 +1,25 @@
 # `juq`
-CLI for viewing/slicing Jupyter notebooks (name is inspired by "`jq` for Jupyter")
+Query, run, and clean Jupyter notebooks (name is inspired by "`jq` for Jupyter")
 
 [![PyPI version](https://badge.fury.io/py/juq.py.svg)](https://badge.fury.io/py/juq.py)
 
-## Installation
+<!-- toc -->
+- [Installation](#installation)
+- [Usage](#usage)
+    - [`juq cells`](#juq-cells)
+    - [`juq merge-outputs`](#juq-merge-outputs)
+    - [`juq papermill clean`](#juq-papermill-clean)
+    - [`juq papermill run`](#juq-papermill-run)
+<!-- /toc -->
+
+## Installation <a id="installation"></a>
 ```bash
 pip install juq.py
 ```
 
-## Usage
+## Usage <a id="usage"></a>
 
-### `juq cells`
+### `juq cells` <a id="juq-cells"></a>
 Slice/Filter cells:
 ```bash
 juq cells --help
@@ -36,23 +45,31 @@ juq cells --help
 #   --help                          Show this message and exit.
 ```
 
-### `juq merge-outputs`
+### `juq merge-outputs` <a id="juq-merge-outputs"></a>
 Merge consecutive "stream" outputs (e.g. stderr):
+
+<!-- `bmdf -- juq merge-outputs --help` -->
 ```bash
 juq merge-outputs --help
 # Usage: juq merge-outputs [OPTIONS] [NB_PATH]
 #
 #   Merge consecutive "stream" outputs (e.g. stderr).
 #
 # Options:
-#   -i, --in-place       Modify [NB_PATH] in-place
-#   -o, --out-path TEXT  Write to this file instead of stdout
-#   --help               Show this message and exit.
+#   -i, --in-place                  Modify [NB_PATH] in-place
+#   -n, --indent INTEGER            Indentation level for the output notebook
+#                                   JSON (default: infer from input)
+#   -o, --out-path TEXT             Write to this file instead of stdout
+#   -t, --trailing-newline / -T, --no-trailing-newline
+#                                   Enforce presence or absence of a trailing
+#                                   newline (default: match input)
+#   --help                          Show this message and exit.
 ```
 e.g.:
 ```bash
 juq merge-outputs -i notebook.ipynb
 ```
+
 Useful for situations like:
 - [jupyter-book#973](https://github.com/executablebooks/jupyter-book/issues/973)
 - [nbval#138](https://github.com/computationalmodelling/nbval/issues/138#issuecomment-1869177219)
@@ -63,18 +80,43 @@ As of [nbconvert#2089](https://github.com/jupyter/nbconvert/pull/2089), this sho
 jupyter nbconvert --coalesce-streams --inplace notebook.ipynb
 ```
 
-### `juq papermill-clean`
+### `juq papermill clean` <a id="juq-papermill-clean"></a>
+<!-- `bmdf -- juq papermill clean --help` -->
 ```bash
-juq papermill-clean --help
-# Usage: juq papermill-clean [OPTIONS] [NB_PATH]
+juq papermill clean --help
+# Usage: juq papermill clean [OPTIONS] [NB_PATH]
 #
 #   Remove Papermill metadata from a notebook.
 #
 #   Removes `.metadata.papermill` and
 #   `.cells[*].metadata.{papermill,execution,widgets}`.
 #
 # Options:
-#   -i, --in-place       Modify [NB_PATH] in-place
-#   -o, --out-path TEXT  Write to this file instead of stdout
-#   --help               Show this message and exit.
+#   -i, --in-place                  Modify [NB_PATH] in-place
+#   -n, --indent INTEGER            Indentation level for the output notebook
+#                                   JSON (default: infer from input)
+#   -o, --out-path TEXT             Write to this file instead of stdout
+#   -t, --trailing-newline / -T, --no-trailing-newline
+#                                   Enforce presence or absence of a trailing
+#                                   newline (default: match input)
+#   --help                          Show this message and exit.
+```
+
+### `juq papermill run` <a id="juq-papermill-run"></a>
+<!-- `bmdf -- juq papermill run --help` -->
+```bash
+juq papermill run --help
+# Usage: juq papermill run [OPTIONS] [NB_PATH]
+#
+#   Run a notebook using Papermill, clean nondeterministic metadata.
+#
+# Options:
+#   -i, --in-place                  Modify [NB_PATH] in-place
+#   -n, --indent INTEGER            Indentation level for the output notebook
+#                                   JSON (default: infer from input)
+#   -o, --out-path TEXT             Write to this file instead of stdout
+#   -t, --trailing-newline / -T, --no-trailing-newline
+#                                   Enforce presence or absence of a trailing
+#                                   newline (default: match input)
+#   --help                          Show this message and exit.
 ```
diff --git a/juq/cli.py b/juq/cli.py
@@ -1,48 +1,62 @@
 import json
-from contextlib import contextmanager
+from contextlib import nullcontext
 from functools import wraps
 from inspect import getfullargspec
+from subprocess import check_output
 from sys import stdin, stdout, stderr
 
-import click
+from click import argument, group, option
 
 
-@click.group()
+@group()
 def cli():
     pass
 
 
-@contextmanager
-def identity(obj):
-    yield obj
-
-
 def with_nb_input(func):
     spec = getfullargspec(func)
 
     @wraps(func)
-    @click.argument('nb_path', required=False)
+    @argument('nb_path', required=False)
     def wrapper(nb_path, *args, **kwargs):
-        ctx = identity(stdin) if nb_path == '-' or nb_path is None else open(nb_path, 'r')
+        ctx = nullcontext(stdin) if nb_path == '-' or nb_path is None else open(nb_path, 'r')
         with ctx as f:
-            # nb = nbformat.read(f, as_version=4)
-            nb = json.load(f)
+            nb_str = f.read()
+            indent = kwargs.pop('indent', None)
+            if indent is None:
+                if nb_str.startswith('{'):
+                    if nb_str[1] == "\n":
+                        idx = 2
+                        indent = 0
+                        while nb_str[idx] == ' ':
+                            idx += 1
+                            indent += 1
+                    else:
+                        indent = None
+                else:
+                    raise ValueError(f"Cannot infer `indent` from non-JSON input beginning with {nb_str[:30]}")
+
+            trailing_newline = kwargs.pop('trailing_newline', None)
+            if trailing_newline is None:
+                trailing_newline = nb_str.endswith('\n')
+            nb = json.loads(nb_str)
         if 'nb_path' in spec.args or 'nb_path' in spec.kwonlyargs:
             kwargs['nb_path'] = nb_path
-        func(*args, nb=nb, **kwargs)
+        return func(*args, nb=nb, indent=indent, trailing_newline=trailing_newline, **kwargs)
     return wrapper
 
 
 def with_nb(func):
     spec = getfullargspec(func)
 
     @wraps(func)
-    @click.option('-i', '--in-place', is_flag=True, help='Modify [NB_PATH] in-place')
-    @click.option('-o', '--out-path', help='Write to this file instead of stdout')
+    @option('-i', '--in-place', is_flag=True, help='Modify [NB_PATH] in-place')
+    @option('-n', '--indent', type=int, help='Indentation level for the output notebook JSON (default: infer from input)')
+    @option('-o', '--out-path', help='Write to this file instead of stdout')
+    @option('-t/-T', '--trailing-newline/--no-trailing-newline', default=None, help='Enforce presence or absence of a trailing newline (default: match input)')
     @with_nb_input
-    def wrapper(*args, nb_path, **kwargs):
+    def wrapper(*args, nb_path, indent, trailing_newline, **kwargs):
         """Merge consecutive "stream" outputs (e.g. stderr)."""
-        nb = kwargs['nb']
         in_place = kwargs.get('in_place')
         out_path = kwargs.get('out_path')
         if in_place:
@@ -60,9 +74,11 @@ def wrapper(*args, nb_path, **kwargs):
         }
         nb = func(*args, **kwargs)
 
-        out_ctx = identity(stdout) if out_path == '-' or out_path is None else open(out_path, 'w')
+        out_ctx = nullcontext(stdout) if out_path == '-' or out_path is None else open(out_path, 'w')
         with out_ctx as f:
-            json.dump(nb, f, indent=2)
+            json.dump(nb, f, indent=indent)
+            if trailing_newline:
+                f.write('\n')
 
     return wrapper
 
@@ -76,11 +92,11 @@ def wrapper(*args, nb_path, **kwargs):
 
 
 @cli.command()
-@click.option('-m/-M', '--metadata/--no-metadata', default=None, help='Explicitly include or exclude each cell\'s "metadata" key. If only `-m` is passed, only the "metadata" value of each cell is printed')
-@click.option('-o/-O', '--outputs/--no-outputs', default=None, help='Explicitly include or exclude each cell\'s "outputs" key. If only `-o` is passed, only the "outputs" value of each cell is printed')
-@click.option('-s/-S', '--source/--no-source', default=None, help='Explicitly include or exclude each cell\'s "source" key. If only `-s` is passed, the source is printed directly (not as JSON)')
-@click.option('-t', '--cell-type', help='Only print cells of this type. Recognizes abbreviations: "c" for "code", {"m","md"} for "markdown", "r" for "raw"')
-@click.argument('cells_slice')
+@option('-m/-M', '--metadata/--no-metadata', default=None, help='Explicitly include or exclude each cell\'s "metadata" key. If only `-m` is passed, only the "metadata" value of each cell is printed')
+@option('-o/-O', '--outputs/--no-outputs', default=None, help='Explicitly include or exclude each cell\'s "outputs" key. If only `-o` is passed, only the "outputs" value of each cell is printed')
+@option('-s/-S', '--source/--no-source', default=None, help='Explicitly include or exclude each cell\'s "source" key. If only `-s` is passed, the source is printed directly (not as JSON)')
+@option('-t', '--cell-type', help='Only print cells of this type. Recognizes abbreviations: "c" for "code", {"m","md"} for "markdown", "r" for "raw"')
+@argument('cells_slice')
 @with_nb_input
 def cells(cell_type, cells_slice, nb, **flags):
     """Slice/Filter cells."""
@@ -169,8 +185,6 @@ def merge_cell_outputs(cell):
     return cell
 
 
-@cli.command('merge-outputs')
-@with_nb
 def merge_outputs(nb):
     """Merge consecutive "stream" outputs (e.g. stderr)."""
     nb['cells'] = [
@@ -180,6 +194,15 @@ def merge_outputs(nb):
     return nb
 
 
+merge_outputs_cmd = cli.command('merge-outputs')(with_nb(merge_outputs))
+
+
+@cli.group
+def papermill():
+    """Wrapper for Papermill commands (`clean`, `run`)."""
+    pass
+
+
 def papermill_clean_cell(cell):
     if 'id' in cell:
         del cell['id']
@@ -191,8 +214,6 @@ def papermill_clean_cell(cell):
     return cell
 
 
-@cli.command('papermill-clean')
-@with_nb
 def papermill_clean(nb):
     """Remove Papermill metadata from a notebook.
 
@@ -208,5 +229,19 @@ def papermill_clean(nb):
     return nb
 
 
+papermill_clean_cmd = papermill.command('clean')(with_nb(papermill_clean))
+
+
+@papermill.command('run')
+@with_nb
+def papermill_run(nb):
+    """Run a notebook using Papermill, clean nondeterministic metadata, normalize output streams."""
+    output = check_output(['papermill'], input=json.dumps(nb).encode())
+    nb = json.loads(output)
+    nb = papermill_clean(nb)
+    nb = merge_outputs(nb)
+    return nb
+
+
 if __name__ == '__main__':
     cli()
diff --git a/setup.py b/setup.py
@@ -2,7 +2,7 @@
 
 setup(
     name='juq.py',
-    version='0.1.1',
+    version='0.2.0',
     packages=find_packages(),
     install_requires=open('requirements.txt').read(),
     extras_require={
@@ -13,7 +13,7 @@
     author_email="[email protected]",
     author_url="https://github.com/ryan-williams",
     url="https://github.com/runsascoded/juq",
-    description='CLI for viewing/slicing Jupyter notebooks (name is inspired by "`jq` for Jupyter")',
+    description='Query, run, and clean Jupyter notebooks (name is inspired by "`jq` for Jupyter")',
     long_description=open("README.md").read(),
     long_description_content_type="text/markdown",
     entry_points={

diff --git a/tests/test_merge_cell_outputs.py b/tests/test_merge_cell_outputs.py
@@ -3,7 +3,7 @@
 from os.path import dirname
 from tempfile import TemporaryDirectory
 
-from juq.cli import merge_outputs
+from juq.cli import merge_outputs_cmd
 
 TEST_DIR = path.join(dirname(__file__), "files", "merge-outputs")
 
@@ -14,7 +14,7 @@ def test_merge_cell_outputs():
     expected_path = path.join(TEST_DIR, out_name)
     with TemporaryDirectory() as tmpdir:
         actual_path = path.join(tmpdir, out_name)
-        merge_outputs.callback(nb_path=nb_path, out_path=actual_path)
+        merge_outputs_cmd.callback(nb_path=nb_path, out_path=actual_path)
         with open(expected_path, 'r') as f:
             expected_nb = json.load(f)
         with open(actual_path, 'r') as f:

diff --git a/tests/test_papermill_clean.py b/tests/test_papermill_clean.py
@@ -3,7 +3,7 @@
 from os.path import dirname
 from tempfile import TemporaryDirectory
 from papermill import execute_notebook
-from juq.cli import papermill_clean
+from juq.cli import papermill_clean_cmd
 
 TEST_DIR = path.join(dirname(__file__), "files", "merge-outputs")
 
@@ -19,7 +19,7 @@ def test_papermill_clean():
         assert 'papermill' in post_papermill['metadata']
 
         cleaned_path = path.join(tmpdir, 'cleaned.ipynb')
-        papermill_clean.callback(nb_path=post_papermill_path, out_path=cleaned_path)
+        papermill_clean_cmd.callback(nb_path=post_papermill_path, out_path=cleaned_path)
         with open(cleaned_path, 'r') as f:
             cleaned_nb = json.load(f)
         split_outputs_nb_path = path.join(TEST_DIR, "split-outputs.ipynb")