diff --git a/HISTORY.rst b/HISTORY.rst index 8badb4f92..8abc52bfb 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -6,6 +6,26 @@ Release History dev +++ +0.5.0 (2018-08-21) ++++++++++++++++++++ + +**Improvements** + +- Jupyter magic commands escaped when exported (#29) +- 'endofcell' option for explicit (optional) end-of-cell marker (#31) +- 'active' cell option now supported for .py and .R export (#30) +- Raw cells now preserved when exported to .py or .R (#32) +- Extensions can be prefixed, like `.nb.py`, (mwouts/nbsrc#5) +- When a file with an extension not associated to 'ipynb' is opened and saved, +no 'ipynb' file is created (mwouts/nbsrc#5) +- Extensions can now be a sequence of groups. For instance, +`nbrmd_formats="ipynb,nb.py;script.ipynb,py"` will create an `ipynb` file +when a `nb.py` is opened (and conversely), and a `script.ipynb` file when a +`py` file is opened (mwouts/nbsrc#5) +- `nbsrc` script was moved to the `nbrmd` package. The `nbsrc` package now only +contains the documentation (mwouts/nbsrc#3) + + 0.4.6 (2018-07-26) +++++++++++++++++++ diff --git a/README.md b/README.md index 839700a91..19078f7e5 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Jupyter notebooks from/to R markdown +# Jupyter notebooks as R markdown, Python or R scripts [![Pypi](https://img.shields.io/pypi/v/nbrmd.svg)](https://pypi.python.org/pypi/nbrmd) [![Pypi](https://img.shields.io/pypi/l/nbrmd.svg)](https://pypi.python.org/pypi/nbrmd) @@ -8,30 +8,27 @@ [![pyversions](https://img.shields.io/pypi/pyversions/nbrmd.svg)](https://pypi.python.org/pypi/nbrmd) [![Binder](https://mybinder.org/badge.svg)](https://mybinder.org/v2/gh/mwouts/nbrmd/master?filepath=demo) -This package is an implementation of the standard -[R markdown](https://rmarkdown.rstudio.com/) notebook format for Jupyter. -R markdown notebooks are source only notebooks, and they -are great companion files for the standard `.ipynb` notebooks. +This package offers a representation of Jupyter notebooks as Python scripts, R scripts, or [R markdown](https://rmarkdown.rstudio.com/) notebooks. -Use the `nbrmd` package if -- you prefer to have simple text files under version control -- if you want to use RStudio's advanced rendering of notebooks to PDF, HTML or [HTML slides](https://rmarkdown.rstudio.com/ioslides_presentation_format.html) -- or, you have a collection of markdown or R markdown notebooks and you want to open them in Jupyter. +These alternative representations allow to +- edit notebooks in both Jupyter and your favorite IDE, and refactor them +- extract executable scripts from your notebooks +- get meaningfull diffs for notebooks under version control, and easily merge contributions +- edit the same notebook in both Jupyter and Rstudio, and use RStudio's advanced rendering of notebooks to PDF, HTML or [HTML slides](https://rmarkdown.rstudio.com/ioslides_presentation_format.html). -Only the source of your notebook is represented in R markdown. -When a pair of `.Rmd`, `.ipynb` notebooks with identical names are opened -in Jupyter, inputs are taken from the `.Rmd` file, and outputs, when they match the input, -are taken from the `.ipynb` file. This allows you to edit the R markdown -version in your favorite text editor, and reload the notebook in Jupyter with the -convenience of preserving outputs when possible. +Scripts and R markdown notebooks only store the source of the notebook, and work in pair with the original `.ipynb` file. Jupyter saves notebooks to both the classical `.ipynb` form, and to the text-only representation. When a text-only notebook is loaded in Jupyter, inputs are taken there, and outputs are taken from the `.ipynb` file, if present. ## Can I have a demo? -Sure. Try our package on [binder](https://mybinder.org/v2/gh/mwouts/nbrmd/master?filepath=demo) -and open our python-oriented R markdown notebook! -As you will see there, the package also offers opening and saving -notebooks as python or R scripts. Go to -[nbsrc](https://github.com/mwouts/nbsrc) for a specific documentation on this. +Sure. Try our package on [binder](https://mybinder.org/v2/gh/mwouts/nbrmd/master?filepath=demo). Notice that every `.py`, `.R` and `.Rmd` file now opens as a Jupyter notebook. I suggest you open the matplotlib demo there: `filled_step.py`, run it and save it, close notebook and reopen, to observe persistence of outputs. + +The other examples demo how to *edit* the script and reload the notebook (preserving the kernel variables), and how to edit in Jupyter an interactive ioslide presentation. + +## How does the Python script look like? + +Python [notebook](https://mybinder.org/v2/gh/mwouts/nbrmd/master?filepath=tests/python_notebook_sample.py) in Jupyter | Python [script](https://github.com/mwouts/nbrmd/blob/master/tests/python_notebook_sample.py) +:--------------------------:|:-----------------------: +![](https://raw.githubusercontent.com/mwouts/nbrmd/master/img/python_notebook.png) | ![](https://raw.githubusercontent.com/mwouts/nbrmd/master/img/python_source.png) ## How does R markdown look like? @@ -41,23 +38,25 @@ Rmd notebook in jupyter | Rmd notebook as text ## Have you tested round-trip conversion? -Round trip conversion is safe! And backed by hundreds of tests. -- R markdown to Jupyter notebook, to R markdown again is identity. If you +Round trip conversion is safe! A few hundreds of tests help to guarantee this. +- Script to Jupyter notebook, to script again is identity. If you associate a Jupyter kernel to your notebook, that information will go to -the yaml header of your notebook. +a yaml header at the top of your script. +- R markdown to Jupyter notebook, to R markdown again is identity. +- Jupyter to script, and Jupyter again preserves source and metadata. - Jupyter to R markdown, and Jupyter again preserves source and metadata. -In some occasions (consecutive blank lines in -markdown cells), cells may be splitted into smaller ones. +In some occasions (consecutive blank lines in markdown cells), markdown cells may +be splitted into smaller ones. -## How do I activate R markdown notebooks in Jupyter? +## How do I activate the companion scripts or R markdown notebooks in Jupyter? The `nbrmd` package offers a `ContentsManager` for Jupyter that recognizes -`.Rmd` files as notebooks. To use it, +`.py`, `.R` and `.Rmd` files as notebooks. To use it, - generate a jupyter config, if you don't have one yet, with `jupyter notebook --generate-config` - edit the config and include the below: ```python -c.NotebookApp.contents_manager_class = 'nbrmd.RmdFileContentsManager' -c.ContentsManager.default_nbrmd_formats = 'ipynb,Rmd' +c.NotebookApp.contents_manager_class = "nbrmd.RmdFileContentsManager" +c.ContentsManager.default_nbrmd_formats = "ipynb,py" # or "ipynb,nb.py" # or "ipynb,Rmd" ``` Then, make sure you have the `nbrmd` package up-to-date, and re-start jupyter, i.e. run @@ -66,21 +65,12 @@ pip install nbrmd --upgrade jupyter notebook ``` -With the above configuration, every Jupyter notebook will have a companion `.Rmd` notebook. -And every `.Rmd` notebook will have a companion `.ipynb` notebook. +With the above configuration, every Jupyter notebook will have a companion `.py` (`.nb.py`, or `.Rmd`) notebook. And every `.py` (`.nb.py`, or `.Rmd`) notebook will have a companion `.ipynb` notebook. -If you prefer the `.ipynb` notebook not to be created by Jupyter when a `.Rmd` -notebook is edited, set -``` -c.ContentsManager.default_nbrmd_formats = '' -``` -(as the default value is `ipynb`). Outputs for R markdown notebooks, however, -will not be saved any more. ## Per notebook configuration -If you prefer that the companion R markdown notebook be generated only for - selected notebooks, +If you prefer that the companion notebook be generated only for selected notebooks, remove the `c.ContentsManager.default_nbrmd_formats` line from Jupyter's configuration, and instead edit the notebook metadata as follows: ``` @@ -92,30 +82,48 @@ configuration, and instead edit the notebook metadata as follows: "language_info": { (...) }, - "nbrmd_formats": "ipynb,Rmd" + "nbrmd_formats": "ipynb,py" } ``` -Accepted formats are: `ipynb`, `Rmd`, `py` and `R`. +Accepted formats should have these extensions: `ipynb`, `Rmd`, `py` and `R`. In case you want both `py` and `Rmd`, please note that the order matters: the first non-`ipynb` extension is the one used as the reference source for notebook inputs when you open the `ipynb` file. +Learn more on the possible values for `nbrmd_formats` [here](https://github.com/mwouts/nbsrc/issues/5#issuecomment-414093471). + ## Command line conversion -The package also provides a `nbrmd` script that converts Jupyter notebooks to R markdown notebooks, and vice-versa. +The package provides two `nbrmd` and `nbsrc` scripts that convert Jupyter notebooks to R markdown notebooks and scripts, and vice-versa. -Use it as: +Use them as: ```bash nbrmd jupyter.ipynb # this prints the Rmarkdown alternative nbrmd jupyter.ipynb -i # this creates a jupyter.Rmd file nbrmd jupyter.Rmd -i # and this, a jupyter.ipynb file nbrmd jupyter.Rmd -i -p # update the jupyter.ipynb file and preserve outputs that correspond to unchanged inputs + +nbsrc jupyter.ipynb # this prints the `.py` or `.R` alternative +nbsrc jupyter.ipynb -i # this creates a jupyter.py or jupyter.R file +nbsrc jupyter.py -i # and this, a jupyter.ipynb file +nbsrc jupyter.py -i -p # update the jupyter.ipynb file and preserve outputs that correspond to unchanged inputs ``` -Alternatively, the `nbrmd` package provides a `nbconvert` rmarkdown exporter that you can use with +Alternatively, the `nbrmd` package provides a few `nbconvert` exporters: ```bash nbconvert jupyter.ipynb --to rmarkdown +nbconvert jupyter.ipynb --to pynotebook +nbconvert jupyter.ipynb --to rnotebook ``` +## Usefull cell metadata + +- Set `"active": "ipynb,py"` if you want that cell to be active only in the Jupyter notebook, and the Python script representation. Use `"active": "ipynb"` if you want that cell to be active only in Jupyter. +- Code cells that contain two consecutive blank lines use an explicit end-of-cell marker `"endofcell"` in the script representation. +- R markdown's cell options `echo` and `include` are mapped to the opposite of Jupyter cell metadata `hide_input` and `hide_output`. + +## Jupyter magics + +Jupyter magics are escaped in the script and R markdown representations so that scripts can actually be executed. Comment a magic with `#noescape` on the same line to avoid escaping. Non-standard magics can be escaped with `#escape`. diff --git a/binder/requirements.txt b/binder/requirements.txt index 4f50c7755..8ba68987b 100644 --- a/binder/requirements.txt +++ b/binder/requirements.txt @@ -1,4 +1,4 @@ -nbrmd>=0.4.5 +nbrmd>=0.5.0 plotly matplotlib pandas diff --git a/demo/Sample notebook with python representation.py b/demo/Sample notebook with python representation.py index aba12bcc6..1159465f6 100644 --- a/demo/Sample notebook with python representation.py +++ b/demo/Sample notebook with python representation.py @@ -33,7 +33,7 @@ # 4. Now, modify the python file, save, and refresh the Jupyter notebook. Observe how inputs were updated, outputs preserved when possible, and kernel was left unchanged. # 5. Browse the github repo using Jupyter in binder, and open arbitrary python files as notebooks. Run some of them when applicable (test `filled_step.py` for instance). -%matplotlib inline +# %matplotlib inline import matplotlib.pyplot as plt diff --git a/img/python_notebook.png b/img/python_notebook.png new file mode 100644 index 000000000..72e81d3e3 Binary files /dev/null and b/img/python_notebook.png differ diff --git a/img/python_source.png b/img/python_source.png new file mode 100644 index 000000000..e5f6220bc Binary files /dev/null and b/img/python_source.png differ diff --git a/nbrmd/__init__.py b/nbrmd/__init__.py index f07ee2ce4..b2c8d5d4a 100644 --- a/nbrmd/__init__.py +++ b/nbrmd/__init__.py @@ -13,6 +13,8 @@ try: from .rmarkdownexporter import RMarkdownExporter + from .srcexporter import PyNotebookExporter + from .srcexporter import RNotebookExporter except ImportError as err: RMarkdownExporter = str(err) diff --git a/nbrmd/cell_metadata.py b/nbrmd/cell_metadata.py index a9d9c114f..7a15c0158 100644 --- a/nbrmd/cell_metadata.py +++ b/nbrmd/cell_metadata.py @@ -48,7 +48,7 @@ def metadata_to_rmd_options(language, metadata): :param metadata: :return: """ - options = language.lower() + options = (language or 'R').lower() metadata = copy(metadata) if 'name' in metadata: options += ' ' + metadata['name'] + ',' @@ -74,6 +74,8 @@ def metadata_to_rmd_options(language, metadata): ', '.join(['"{}"'.format(str(v)) for v in opt_value]))) else: options += ' {}={},'.format(opt_name, str(opt_value)) + if not language: + options = options[2:] return options.strip(',').strip() @@ -255,7 +257,7 @@ def json_options_to_metadata(options): :return: """ try: - return json.loads(options) + return json.loads('{' + options + '}') except ValueError: return {} diff --git a/nbrmd/cells.py b/nbrmd/cells.py index 8aa2e74f6..de9d6c14a 100644 --- a/nbrmd/cells.py +++ b/nbrmd/cells.py @@ -8,6 +8,8 @@ from .languages import cell_language, is_code from .cell_metadata import metadata_to_rmd_options, rmd_options_to_metadata, \ json_options_to_metadata, metadata_to_json_options +from .magics import is_magic, escape_magic, unescape_magic +from .stringparser import StringParser def code_to_rmd(source, metadata, default_language): @@ -20,7 +22,7 @@ def code_to_rmd(source, metadata, default_language): """ lines = [] language = cell_language(source) or default_language - if 'Rmd' not in re.split('\\.|,', metadata.get('active', 'Rmd')): + if not is_active('Rmd', metadata): metadata['eval'] = False options = metadata_to_rmd_options(language, metadata) lines.append(u'```{{{}}}'.format(options)) @@ -43,32 +45,69 @@ def code_to_text(self, :param next_cell_is_code: :return: """ + + # Escape jupyter magics + language = cell_language(source) or default_language + active = is_active(self.ext, metadata) + if self.ext in ['.R', '.py']: + if language != ('R' if self.ext == '.R' else 'python') and active: + active = False + metadata['active'] = 'ipynb' + + if active: + source = escape_magic(source, language) + if self.ext == '.Rmd': return code_to_rmd(source, metadata, default_language) else: lines = [] - language = cell_language(source) or default_language - if language == default_language: - if self.ext == '.R': - options = metadata_to_rmd_options(language, metadata)[2:] + if self.ext == '.R': + if not active: + metadata['eval'] = False + options = metadata_to_rmd_options(None, metadata) + if language != 'R': + options = 'language="{}" {}'.format(language, options) + source = ['#+ ' + options] + ["#' " + s for s in source] + else: + options = metadata_to_rmd_options(None, metadata) if options != '': lines.append('#+ ' + options) - else: - options = metadata_to_json_options(metadata) - if options != '{}': - lines.append('# + ' + options) - lines.extend(source) - else: - lines.extend(self.markdown_escape( - code_to_rmd(source, metadata, default_language))) + else: # py + # end of cell marker + if not active or need_end_cell_marker(self, source): + endofcell = '-' + while True: + endofcell_re = re.compile(r'^#( )' + endofcell + r'\s*$') + if list(filter(endofcell_re.match, source)): + endofcell = endofcell + '-' + else: + break + metadata['endofcell'] = endofcell + + options = metadata_to_json_options(metadata) + if options != '{}': + lines.append('# + ' + options) + if not active: + source = ['# ' + line for line in source] + lines.extend(source) + if 'endofcell' in metadata: + lines.append('# ' + metadata['endofcell']) # Two blank lines before next code cell - if next_cell_is_code: + if next_cell_is_code and 'endofcell' not in metadata: lines.append('') return lines +def need_end_cell_marker(self, source): + """Issue #31: does the cell ends with a blank line? +Do we find two blank lines in the cell? In that case +we add an end-of-cell marker""" + return ((source and _BLANK_LINE.match(source[-1])) or + code_to_cell(self, source, False)[1] != len(source)) + + def cell_to_text(self, cell, next_cell=None, @@ -83,6 +122,8 @@ def cell_to_text(self, :return: """ source = cell.get('source').splitlines() + if cell.get('source').endswith('\n'): + source.append('') metadata = cell.get('metadata', {}) skipline = True if 'noskipline' in metadata: @@ -91,8 +132,11 @@ def cell_to_text(self, lines = [] if is_code(cell): - lines.extend(code_to_text(self, source, metadata, default_language, - next_cell and is_code(next_cell))) + lines.extend(code_to_text( + self, source, metadata, default_language, + next_cell and is_code(next_cell) and + (not need_end_cell_marker( + self, next_cell.get('source').splitlines())))) else: if source == []: source = [''] @@ -111,7 +155,8 @@ def cell_to_text(self, _START_CODE_RMD = re.compile(r"^```\{(.*)\}\s*$") _END_CODE_MD = re.compile(r"^```\s*$") -_CODE_OPTION_RPY = re.compile(r"^(#|# )\+(.*)$") +_CODE_OPTION_R = re.compile(r"^#\+(.*)\s*$") +_CODE_OPTION_PY = re.compile(r"^(#|# )\+(\s*)\{(.*)\}\s*$") _BLANK_LINE = re.compile(r"^\s*$") @@ -124,13 +169,35 @@ def start_code_rmd(line): return _START_CODE_RMD.match(line) -def start_code_rpy(line): +def start_code_r(line): + """ + A code cell starts here, in a R file + :param line: + :return: + """ + return _CODE_OPTION_R.match(line) + + +def start_code_py(line): + """ + A code cell starts here, in a R file + :param line: + :return: + """ + return _CODE_OPTION_PY.match(line) + + +def start_code_rpy(line, ext): """ A code cell starts here, in a py or R file :param line: :return: """ - return _CODE_OPTION_RPY.match(line) + if ext == '.R': + return start_code_r(line) + if ext == '.py': + return start_code_py(line) + raise ValueError('Unexpected extension {}'.format(ext)) def next_uncommented_is_code(lines): @@ -158,7 +225,8 @@ def text_to_cell(self, lines): return self.code_to_cell(lines, parse_opt=True) elif self.prefix != '' and not lines[0].startswith(self.prefix): return self.code_to_cell(lines, parse_opt=False) - elif self.ext == '.py' and next_uncommented_is_code(lines): + elif self.ext == '.py' and (next_uncommented_is_code(lines) + or is_magic(lines[0])): return self.code_to_cell(lines, parse_opt=False) return self.markdown_to_cell(lines) @@ -174,10 +242,15 @@ def parse_code_options(line, ext): if ext == '.Rmd': return rmd_options_to_metadata(_START_CODE_RMD.findall(line)[0]) elif ext == '.R': - return rmd_options_to_metadata(_CODE_OPTION_RPY.match(line).group(2)) + language, metadata = \ + rmd_options_to_metadata(_CODE_OPTION_R.findall(line)[0]) + if 'language' in metadata: + language = metadata['language'] + del metadata['language'] + return language, metadata return 'python', json_options_to_metadata( - _CODE_OPTION_RPY.match(line).group(2)) + _CODE_OPTION_PY.match(line).group(3)) def next_code_is_indented(lines): @@ -211,9 +284,12 @@ def no_code_before_next_blank_line(lines): def code_or_raw_cell(source, metadata): - if 'ipynb' not in re.split('\\.|,', metadata.get('active', 'ipynb')): - return new_raw_cell(source=source, metadata=metadata) - return new_code_cell(source=source, metadata=metadata) + """Return a code, or raw cell from given source and metadata""" + if not is_active('ipynb', metadata): + if metadata.get('active') == '': + del metadata['active'] + return new_raw_cell(source='\n'.join(source), metadata=metadata) + return new_code_cell(source='\n'.join(source), metadata=metadata) def code_to_cell(self, lines, parse_opt): @@ -227,67 +303,68 @@ def code_to_cell(self, lines, parse_opt): # Parse options if parse_opt: language, metadata = parse_code_options(lines[0], self.ext) - if self.ext == '.Rmd': + if self.ext == '.Rmd' and metadata.get('active') != '': metadata['language'] = language else: + language = 'python' if self.ext == '.py' else 'R' metadata = {} - # Find end of cell and return if self.ext == '.Rmd': + end_cell_re = _END_CODE_MD + elif self.ext == '.py' and 'endofcell' in metadata: + end_cell_re = re.compile(r'^#( )' + metadata['endofcell'] + r'\s*$') + del metadata['endofcell'] + elif self.ext == '.R' and not is_active('.R', metadata): + end_cell_re = _BLANK_LINE + else: + end_cell_re = None + + # Find end of cell and return + if end_cell_re: for pos, line in enumerate(lines): - if pos > 0 and _END_CODE_MD.match(line): + if pos > 0 and end_cell_re.match(line): next_line_blank = pos + 1 == len(lines) or \ _BLANK_LINE.match(lines[pos + 1]) + source = lines[1:pos] + if is_active(self.ext, metadata): + source = unescape_magic(source, language) + elif self.ext in ['.py', '.R']: + source = [self.markdown_unescape(s) for s in source] + if end_cell_re == _BLANK_LINE: + return code_or_raw_cell( + source=source, metadata=metadata), pos + 1 if next_line_blank and pos + 2 != len(lines): return code_or_raw_cell( - source='\n'.join(lines[1:pos]), metadata=metadata), \ - pos + 2 - cell = code_or_raw_cell( - source='\n'.join(lines[1:pos]), - metadata=metadata) + source=source, metadata=metadata), pos + 2 + cell = code_or_raw_cell(source=source, metadata=metadata) cell.metadata['noskipline'] = True return cell, pos + 1 else: prev_blank = False - triple = None + parser = StringParser(language) for pos, line in enumerate(lines): if parse_opt and pos == 0: continue - if self.ext == '.R' and line.startswith(self.prefix): + if parser.is_quoted(): + parser.read_line(line) + continue + + parser.read_line(line) + + if start_code_rpy(line, self.ext) or ( + self.ext == '.R' and line.startswith(self.prefix)): + source = lines[parse_opt:pos] + if is_active(self.ext, metadata): + source = unescape_magic(source, language) if prev_blank: return code_or_raw_cell( - source='\n'.join(lines[parse_opt:(pos - 1)]), - metadata=metadata), pos + source=source[:-1], metadata=metadata), pos cell = code_or_raw_cell( - source='\n'.join(lines[parse_opt:pos]), - metadata=metadata) + source=source, metadata=metadata) cell.metadata['noskipline'] = True return cell, pos - if self.ext == '.py': - single = None - for i, char in enumerate(line): - if char not in ['"', "'"]: - continue - - if single == char: - single = None - continue - if single is not None: - continue - if triple == char: - if line[i - 2:i + 1] == 3 * char: - triple = None - continue - if triple is not None: - continue - if line[i - 2:i + 1] == 3 * char: - triple = char - - if triple: - continue - if prev_blank: if _BLANK_LINE.match(line): # Two blank lines => end of cell @@ -298,25 +375,42 @@ def code_to_cell(self, lines, parse_opt): if next_code_is_indented(lines[pos:]): continue + source = lines[parse_opt:(pos - 1)] + if is_active(self.ext, metadata): + source = unescape_magic(source, language) + return code_or_raw_cell( - source='\n'.join(lines[parse_opt:(pos - 1)]), - metadata=metadata), min(pos + 1, - len(lines) - 1) + source=source, + metadata=metadata), min(pos + 1, len(lines) - 1) # are all the lines from here to next blank # escaped with the prefix? if self.ext == '.py': if no_code_before_next_blank_line(lines[pos:]): + + source = lines[parse_opt:(pos - 1)] + if is_active(self.ext, metadata): + source = unescape_magic(source, language) + return code_or_raw_cell( - source='\n'.join(lines[parse_opt:(pos - 1)]), - metadata=metadata), pos + source=source, metadata=metadata), pos prev_blank = _BLANK_LINE.match(line) # Unterminated cell? - return code_or_raw_cell( - source='\n'.join(lines[parse_opt:]), - metadata=metadata), len(lines) + source = lines[parse_opt:] + if is_active(self.ext, metadata): + source = unescape_magic(source, language) + elif self.ext in ['.py', '.R']: + source = [self.markdown_unescape(s) for s in source] + + if len(lines) >= 2 and _BLANK_LINE.match(lines[-1]) \ + and not _BLANK_LINE.match(lines[-2]): + cell = code_or_raw_cell(source=source[:-1], metadata=metadata) + cell.metadata['noskipline'] = True + return cell, len(lines) - 1 + + return code_or_raw_cell(source=source, metadata=metadata), len(lines) def markdown_to_cell(self, lines): @@ -366,3 +460,10 @@ def markdown_to_cell_rmd(lines): # Unterminated cell? return new_markdown_cell(source='\n'.join(lines)), len(lines) + + +def is_active(ext, metadata): + """Is the cell active for the given file extension?""" + if 'active' not in metadata: + return True + return ext.replace('.', '') in re.split('\\.|,', metadata['active']) diff --git a/nbrmd/cli.py b/nbrmd/cli.py index d522a29e5..032872753 100644 --- a/nbrmd/cli.py +++ b/nbrmd/cli.py @@ -1,4 +1,4 @@ -"""Command line conversion tool `nbrmd` +"""Command line conversion tools `nbrmd` and `nbsrc` """ import os @@ -6,11 +6,12 @@ from nbformat import writes as ipynb_writes from nbformat.reader import NotJSONError from nbrmd import readf, writef -from nbrmd import writes as rmd_writes +from nbrmd import writes +from .languages import get_default_language from .combine import combine_inputs_with_outputs -def convert(nb_files, in_place=True, combine=True): +def convert_nbrmd(nb_files, in_place=True, combine=True): """ Export R markdown notebooks, or Jupyter notebooks, to the opposite format :param nb_files: one or more notebooks @@ -50,12 +51,12 @@ def convert(nb_files, in_place=True, combine=True): writef(nb, nb_dest) else: if ext == '.ipynb': - print(rmd_writes(nb)) + print(writes(nb)) else: print(ipynb_writes(nb)) -def cli(args=None): +def cli_nbrmd(args=None): """ Command line parser :param args: @@ -76,10 +77,87 @@ def cli(args=None): return parser.parse_args(args) -def main(): +def nbrmd(): """ Entry point for the nbrmd script :return: """ - args = cli() - convert(args.notebooks, args.in_place, args.preserve_outputs) + args = cli_nbrmd() + convert_nbrmd(args.notebooks, args.in_place, args.preserve_outputs) + + +def convert_nbsrc(nb_files, in_place=True, combine=True): + """ + Export python or R scripts, or Jupyter notebooks, to the opposite format + :param nb_files: one or more notebooks + :param in_place: should result of conversion be stored in file + with opposite extension? + :param combine: should the current outputs of .ipynb file be preserved, + when a cell with corresponding input is found in .py or .R file? + :return: + """ + for nb_file in nb_files: + file, ext = os.path.splitext(nb_file) + if ext not in ['.ipynb', '.py', '.R']: + raise TypeError( + 'File {} is neither a Jupyter (.ipynb) nor a ' + 'python script (.py), nor a R script (.R)'.format(nb_file)) + + nb = readf(nb_file) + main_language = get_default_language(nb) + ext_dest = '.R' if main_language == 'R' else '.py' + + if in_place: + if ext == '.ipynb': + nb_dest = file + ext_dest + print('Jupyter notebook {} being converted to ' + 'source {}'.format(nb_file, nb_dest)) + else: + msg = '' + nb_dest = file + '.ipynb' + if combine and os.path.isfile(nb_dest): + try: + nb_outputs = readf(nb_dest) + combine_inputs_with_outputs(nb, nb_outputs) + msg = '(outputs were preserved)' + except (IOError, NotJSONError) as error: + msg = '(outputs were not preserved: {})'.format(error) + print('R Markdown {} being converted to ' + 'Jupyter notebook {} {}' + .format(nb_file, nb_dest, msg)) + writef(nb, nb_dest) + else: + if ext == '.ipynb': + print(writes(nb, ext_dest)) + else: + print(ipynb_writes(nb)) + + +def cli_nbsrc(args=None): + """ + Command line parser + :param args: + :return: + """ + parser = argparse.ArgumentParser(description='Jupyter notebook ' + 'from/to R or Python script') + parser.add_argument('notebooks', + help='One or more .ipynb or .R or .py script(s) ' + 'to be converted to the alternate form', + nargs='+') + parser.add_argument('-i', '--in-place', action='store_true', + help='Store the result of conversion ' + 'to file with opposite extension') + parser.add_argument('-p', '--preserve_outputs', action='store_true', + help='Preserve outputs of .ipynb ' + 'notebook when file exists and inputs match') + return parser.parse_args(args) + + +def nbsrc(): + """ + Entry point for the nbsrc script + :return: + """ + args = cli_nbsrc() + convert_nbsrc(args.notebooks, args.in_place, args.preserve_outputs) diff --git a/nbrmd/contentsmanager.py b/nbrmd/contentsmanager.py index 45fd6f622..f0b23cc2a 100644 --- a/nbrmd/contentsmanager.py +++ b/nbrmd/contentsmanager.py @@ -3,6 +3,7 @@ import os import nbformat import mock +import six try: import notebook.transutils # noqa @@ -33,21 +34,62 @@ def _reads(text, as_version, **kwargs): def check_formats(formats): """ Parse, validate and return notebooks extensions - :param formats: a list of notebook extensions, or a comma separated string - :return: list of extensions + :param formats: a list of lists of notebook extensions, + or a colon separated string of extension groups, comma separated + :return: list of lists (groups) of notebook extensions """ - if not isinstance(formats, list): - formats = formats.split(',') - formats = [fmt if fmt.startswith('.') else '.' + fmt - for fmt in formats if fmt != ''] + # Parse formats represented as strings + if not isinstance(formats, list): + formats = [group.split(',') for group in formats.split(';')] + + expected_format = ("Notebook metadata 'nbrmd_formats' should " + "be a list of extension groups, like 'ipynb,Rmd'. " + "Groups can be separated with colon, for instance: " + "'ipynb,nb.py;script.ipynb,py'") + + validated_formats = [] + for group in formats: + if not isinstance(group, list): + # In early versions (0.4 and below), formats could be a list of + # extensions. We understand this as a single group + return check_formats([formats]) + validated_group = [] + for fmt in group: + if not isinstance(fmt, six.string_types): + raise ValueError('Extensions should be strings, not {}.\n{}' + .format(str(fmt), + str(nbrmd.NOTEBOOK_EXTENSIONS), + expected_format)) + if fmt == '': + continue + if not fmt.startswith('.'): + fmt = '.' + fmt + if not any([fmt.endswith(ext) + for ext in nbrmd.NOTEBOOK_EXTENSIONS]): + raise ValueError('Group extension {} contains {}, ' + 'which does not end with either {}.\n{}' + .format(str(group), fmt, + str(nbrmd.NOTEBOOK_EXTENSIONS), + expected_format)) + validated_group.append(fmt) + + if validated_group: + validated_formats.append(validated_group) + + return validated_formats + + +def file_fmt_ext(path): + """ + Return file name, format (possibly .nb.py) and extension (.py) + """ + file, ext = os.path.splitext(path) + for fmt in ['.nb.py', '.nb.R']: + if path.endswith(fmt): + return path[:-len(fmt)], fmt, ext - allowed = nbrmd.NOTEBOOK_EXTENSIONS - if not isinstance(formats, list) or not set(formats).issubset(allowed): - raise TypeError("Notebook metadata 'nbrmd_formats' " - "should be subset of {}, but was {}" - "".format(str(allowed), str(formats))) - return formats + return file, ext, ext class RmdFileContentsManager(FileContentsManager, Configurable): @@ -70,13 +112,30 @@ def all_nb_extensions(self): default_nbrmd_formats = Unicode( u'ipynb', help='Save notebooks to these file extensions. ' - 'Can be any of ipynb,Rmd,py,R, comma separated', + 'Can be any of ipynb,Rmd,py,R,nb.py,nb.R comma separated', config=True) + def format_group(self, fmt, nb=None): + """Return the group of extensions that contains 'fmt'""" + nbrmd_formats = ((nb.metadata.get('nbrmd_formats') if nb else None) + or self.default_nbrmd_formats) + + nbrmd_formats = check_formats(nbrmd_formats) + + # Find group that contains the current format + for group in nbrmd_formats: + if fmt in group: + return group + + if ['.ipynb'] in nbrmd_formats: + return [fmt, '.ipynb'] + + return [fmt] + def _read_notebook(self, os_path, as_version=4, load_alternative_format=True): """Read a notebook from an os path.""" - file, ext = os.path.splitext(os_path) + file, fmt, ext = file_fmt_ext(os_path) if ext in self.nb_extensions: with mock.patch('nbformat.reads', _nbrmd_reads(ext)): nb = super(RmdFileContentsManager, self) \ @@ -88,49 +147,44 @@ def _read_notebook(self, os_path, as_version=4, if not load_alternative_format: return nb - # Notebook formats: default, notebook metadata, or current extension - nbrmd_formats = (nb.metadata.get('nbrmd_formats') or - self.default_nbrmd_formats) - - nbrmd_formats = check_formats(nbrmd_formats) - - if ext not in nbrmd_formats: - nbrmd_formats.append(ext) + fmt_group = self.format_group(fmt, nb) - nbrmd_formats = check_formats(nbrmd_formats) + source_format = fmt + outputs_format = fmt - # Source format is current ext, or is first non .ipynb format - # that is found on disk - source_format = None - if ext != '.ipynb': - source_format = ext + # Source format is first non ipynb format found on disk + if fmt.endswith('.ipynb'): + for alt_fmt in fmt_group: + if not alt_fmt.endswith('.ipynb') and \ + os.path.isfile(file + alt_fmt): + source_format = alt_fmt + break + # Outputs taken from ipynb if in group, if file exists else: - for fmt in nbrmd_formats: - if fmt != '.ipynb' and os.path.isfile(file + fmt): - source_format = fmt + for alt_fmt in fmt_group: + if alt_fmt.endswith('.ipynb') and \ + os.path.isfile(file + alt_fmt): + outputs_format = alt_fmt break - nb_outputs = None - if source_format is not None and ext != source_format: + if source_format != fmt: self.log.info('Reading SOURCE from {}' .format(os.path.basename(file + source_format))) - self.log.info('Reading OUTPUTS from {}' - .format(os.path.basename(os_path))) nb_outputs = nb nb = self._read_notebook(file + source_format, as_version=as_version, load_alternative_format=False) - elif ext != '.ipynb' and '.ipynb' in nbrmd_formats \ - and os.path.isfile(file + '.ipynb'): - self.log.info('Reading SOURCE from {}' - .format(os.path.basename(os_path))) + elif outputs_format != fmt: self.log.info('Reading OUTPUTS from {}' - .format(os.path.basename(file + '.ipynb'))) - nb_outputs = self._read_notebook(file + '.ipynb', - as_version=as_version, - load_alternative_format=False) + .format(os.path.basename(file + outputs_format))) + if outputs_format != fmt: + nb_outputs = self._read_notebook(file + outputs_format, + as_version=as_version, + load_alternative_format=False) + else: + nb_outputs = None - if nb_outputs is not None: + if nb_outputs: combine.combine_inputs_with_outputs(nb, nb_outputs) if self.notary.check_signature(nb_outputs): self.notary.sign(nb) @@ -139,28 +193,18 @@ def _read_notebook(self, os_path, as_version=4, def _save_notebook(self, os_path, nb): """Save a notebook to an os_path.""" - os_file, org_ext = os.path.splitext(os_path) - - formats = (nb.get('metadata', {}).get('nbrmd_formats') or - self.default_nbrmd_formats) - - formats = check_formats(formats) - - if org_ext not in formats: - formats.append(org_ext) - - formats = check_formats(formats) - - for ext in formats: - os_path_ext = os_file + ext - self.log.info("Saving %s", os.path.basename(os_path_ext)) - if ext in self.nb_extensions: - with mock.patch('nbformat.writes', _nbrmd_writes(ext)): + os_file, fmt, _ = file_fmt_ext(os_path) + for alt_fmt in self.format_group(fmt, nb): + os_path_fmt = os_file + alt_fmt + self.log.info("Saving %s", os.path.basename(os_path_fmt)) + alt_ext = '.' + alt_fmt.split('.')[-1] + if alt_ext in self.nb_extensions: + with mock.patch('nbformat.writes', _nbrmd_writes(alt_ext)): super(RmdFileContentsManager, self) \ - ._save_notebook(os_path_ext, nb) + ._save_notebook(os_path_fmt, nb) else: super(RmdFileContentsManager, self) \ - ._save_notebook(os_path_ext, nb) + ._save_notebook(os_path_fmt, nb) def get(self, path, content=True, type=None, format=None): """ Takes a path for an entity and returns its model""" @@ -178,18 +222,22 @@ def get(self, path, content=True, type=None, format=None): def trust_notebook(self, path): """Trust the current notebook""" - file, _ = os.path.splitext(path) - super(RmdFileContentsManager, self).trust_notebook(file + '.ipynb') + file, fmt, _ = file_fmt_ext(path) + for alt_fmt in self.format_group(fmt): + if alt_fmt.endswith('.ipynb'): + super(RmdFileContentsManager, self).trust_notebook(file + + alt_fmt) def rename_file(self, old_path, new_path): """Rename the current notebook, as well as its alternative representations""" - old_file, org_ext = os.path.splitext(old_path) - new_file, new_ext = os.path.splitext(new_path) - if org_ext in self.all_nb_extensions() and org_ext == new_ext: - for ext in self.all_nb_extensions(): - if self.file_exists(old_file + ext): + old_file, org_fmt, _ = file_fmt_ext(old_path) + new_file, new_fmt, _ = file_fmt_ext(new_path) + + if org_fmt == new_fmt: + for alt_fmt in self.format_group(org_fmt): + if self.file_exists(old_file + alt_fmt): super(RmdFileContentsManager, self) \ - .rename_file(old_file + ext, new_file + ext) + .rename_file(old_file + alt_fmt, new_file + alt_fmt) else: super(RmdFileContentsManager, self).rename_file(old_path, new_path) diff --git a/nbrmd/languages.py b/nbrmd/languages.py index 54b0e04e3..e7f414cfe 100644 --- a/nbrmd/languages.py +++ b/nbrmd/languages.py @@ -4,7 +4,7 @@ _JUPYTER_LANGUAGES = ['R', 'bash', 'sh', 'python', 'python2', 'python3', 'javascript', 'js', 'perl'] -_JUPYTER_LANGUAGES_RE = [re.compile(r"^%%{}\s*".format(lang)) +_JUPYTER_LANGUAGES_RE = [re.compile(r"^# %%{}\s*".format(lang)) for lang in _JUPYTER_LANGUAGES] diff --git a/nbrmd/magics.py b/nbrmd/magics.py new file mode 100644 index 000000000..1d7510474 --- /dev/null +++ b/nbrmd/magics.py @@ -0,0 +1,62 @@ +"""Escape Jupyter magics when converting to other formats""" + +import re +from .stringparser import StringParser + +# Line magics retrieved manually (Aug 18) with %lsmagic +_LINE_MAGICS = '%alias %alias_magic %autocall %automagic %autosave ' \ + '%bookmark %cd %clear %cls %colors %config ' \ + '%connect_info %copy %ddir %debug %dhist %dirs ' \ + '%doctest_mode %echo %ed %edit %env %gui %hist ' \ + '%history %killbgscripts %ldir %less %load %load_ext ' \ + '%loadpy %logoff %logon %logstart %logstate %logstop ' \ + '%ls %lsmagic %macro %magic %matplotlib %mkdir %more ' \ + '%notebook %page %pastebin %pdb %pdef %pdoc %pfile ' \ + '%pinfo %pinfo2 %popd %pprint %precision %profile ' \ + '%prun %psearch %psource %pushd %pwd %pycat %pylab ' \ + '%qtconsole %quickref %recall %rehashx %reload_ext ' \ + '%ren %rep %rerun %reset %reset_selective %rmdir %run ' \ + '%save %sc %set_env %store %sx %system %tb %time ' \ + '%timeit %unalias %unload_ext %who %who_ls %whos ' \ + '%xdel %xmode'.split(' ') + +# A magic expression is a line or cell magic escaped zero, or multiple times +_PERCENT_RE = re.compile(r"^(# |#)*%") +_FORCE_ESC_RE = re.compile(r"^(# |#)*%(.*)(#| )escape") +_FORCE_NOT_ESC_RE = re.compile(r"^(# |#)*%(.*)noescape") +_MAGIC_RE = re.compile(r"^(# |#)*(%%|{})".format('|'.join(_LINE_MAGICS))) + + +def is_magic(line): + """Is the current line a (possibly escaped) Jupyter magic?""" + return (_FORCE_ESC_RE.match(line) or (not _FORCE_NOT_ESC_RE.match(line) + and _MAGIC_RE.match(line))) + + +def escape_magic(source, language='python'): + """Escape Jupyter magics with '# '""" + parser = StringParser(language) + for pos, line in enumerate(source): + if not parser.is_quoted() and is_magic(line): + source[pos] = '# ' + line + parser.read_line(line) + return source + + +def unesc(line): + """Uncomment once a commented line""" + if line.startswith('# '): + return line[2:] + if line.startswith('#'): + return line[1:] + return line + + +def unescape_magic(source, language='python'): + """Unescape Jupyter magics""" + parser = StringParser(language) + for pos, line in enumerate(source): + if not parser.is_quoted() and is_magic(line): + source[pos] = unesc(line) + parser.read_line(line) + return source diff --git a/nbrmd/nbrmd.py b/nbrmd/nbrmd.py index 50caac627..9757c0e8f 100644 --- a/nbrmd/nbrmd.py +++ b/nbrmd/nbrmd.py @@ -15,7 +15,7 @@ import os import io -from copy import copy +from copy import deepcopy from nbformat.v4.rwbase import NotebookReader, NotebookWriter from nbformat.v4.nbbase import new_notebook import nbformat @@ -23,7 +23,8 @@ from .header import header_to_metadata_and_cell, metadata_and_cell_to_header, \ encoding_and_executable from .languages import get_default_language, find_main_language -from .cells import start_code_rmd, start_code_rpy, cell_to_text, text_to_cell +from .cells import start_code_rmd, start_code_r, start_code_py +from .cells import cell_to_text, text_to_cell from .cells import markdown_to_cell_rmd, markdown_to_cell, code_to_cell # ----------------------------------------------------------------------------- @@ -45,7 +46,8 @@ class TextNotebookReader(NotebookReader): def __init__(self, ext): self.ext = ext self.prefix = markdown_comment(ext) - self.start_code = start_code_rmd if ext == '.Rmd' else start_code_rpy + self.start_code = start_code_rmd if ext == '.Rmd' else \ + start_code_py if ext == '.py' else start_code_r if ext == '.Rmd': self.markdown_to_cell = markdown_to_cell_rmd @@ -131,11 +133,15 @@ def markdown_escape(self, lines): def writes(self, nb, **kwargs): """Write the text representation of a notebook to a string""" - nb = copy(nb) + nb = deepcopy(nb) if self.ext == '.py': - default_language = 'python' + default_language = (nb.metadata.get('main_language') or + nb.metadata.get('language_info', {}) + .get('name', 'python')) elif self.ext == '.R': - default_language = 'R' + default_language = (nb.metadata.get('main_language') or + nb.metadata.get('language_info', {}) + .get('name', 'R')) else: default_language = get_default_language(nb) @@ -145,6 +151,9 @@ def writes(self, nb, **kwargs): for i in range(len(nb.cells)): cell = nb.cells[i] next_cell = nb.cells[i + 1] if i + 1 < len(nb.cells) else None + if cell.cell_type == 'raw' and 'active' not in cell.metadata: + cell.metadata['active'] = '' + lines.extend(self.cell_to_text(cell, next_cell, default_language=default_language)) diff --git a/nbrmd/srcexporter.py b/nbrmd/srcexporter.py new file mode 100644 index 000000000..6853aeafe --- /dev/null +++ b/nbrmd/srcexporter.py @@ -0,0 +1,37 @@ +""" +R and Py notebook exporters for nbconvert +""" + +from traitlets import default +from nbconvert.exporters import Exporter +import nbrmd + + +class PyNotebookExporter(Exporter): + """ + Exports to a python notebook (.py) + """ + + @default('file_extension') + def _file_extension_default(self): + return '.py' + + def from_notebook_node(self, nb, resources=None, **kw): + resources = resources or {} + resources['output_extension'] = self.file_extension + return nbrmd.writes(nb, ext='.py'), resources + + +class RNotebookExporter(Exporter): + """ + Exports to a R notebook (.R) + """ + + @default('file_extension') + def _file_extension_default(self): + return '.R' + + def from_notebook_node(self, nb, resources=None, **kw): + resources = resources or {} + resources['output_extension'] = self.file_extension + return nbrmd.writes(nb, ext='.R'), resources diff --git a/nbrmd/stringparser.py b/nbrmd/stringparser.py new file mode 100644 index 000000000..7ed7a043b --- /dev/null +++ b/nbrmd/stringparser.py @@ -0,0 +1,49 @@ +"""A simple file parser that can tell whether the first character of a line +is quoted or not""" + + +class StringParser: + """A simple file parser that can tell whether the first character of a line + is quoted or not""" + python = True + single = None + triple = None + + def __init__(self, language): + self.python = language != 'R' + + def is_quoted(self): + """Is the next line quoted?""" + return self.single or self.triple + + def read_line(self, line): + """Read a new line""" + for i, char in enumerate(line): + if char not in ['"', "'"]: + continue + # Is the char escaped? + if line[i - 1:i] == '\\': + continue + + if self.single == char: + self.single = None + continue + if self.single is not None: + continue + + if not self.python: + continue + + if self.triple == char: + if line[i - 2:i + 1] == 3 * char: + self.triple = None + continue + if self.triple is not None: + continue + if line[i - 2:i + 1] == 3 * char: + self.triple = char + continue + + # Line ended + if self.python: + self.single = None diff --git a/requirements.txt b/requirements.txt index 435242de5..f53c695f9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ nbformat>=4.0.0 pyyaml mock +six diff --git a/setup.py b/setup.py index f31c8fb74..62d6c7fd2 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ setup( name='nbrmd', - version='0.4.6', + version='0.5.0', author='Marc Wouts', author_email='marc.wouts@gmail.com', description='Jupyter from/to R markdown notebooks', @@ -15,11 +15,14 @@ long_description_content_type='text/markdown', url='https://github.com/mwouts/nbrmd', packages=find_packages(), - entry_points={'console_scripts': ['nbrmd = nbrmd.cli:main'], + entry_points={'console_scripts': ['nbrmd = nbrmd.cli:nbrmd', + 'nbsrc = nbrmd.cli:nbsrc'], 'nbconvert.exporters': - ['rmarkdown = nbrmd:RMarkdownExporter']}, + ['rmarkdown = nbrmd:RMarkdownExporter', + 'pynotebook = nbrmd:PyNotebookExporter', + 'rnotebook = nbrmd:RNotebookExporter']}, tests_require=['pytest', 'testfixtures'], - install_requires=['nbformat>=4.0.0', 'mock', 'pyyaml'], + install_requires=['nbformat>=4.0.0', 'mock', 'pyyaml', 'six'], license='MIT', classifiers=('Development Status :: 4 - Beta', 'Environment :: Console', diff --git a/tests/chunk_options.Rmd b/tests/chunk_options.Rmd index 2121e6df3..c0458560f 100644 --- a/tests/chunk_options.Rmd +++ b/tests/chunk_options.Rmd @@ -17,7 +17,7 @@ import pandas as pd x = pd.Series({'A':1, 'B':3, 'C':2}) ``` -```{python bar_plot, echo=FALSE, fig.width=8, fig.height=5} +```{python bar_plot, echo=FALSE, fig.height=5, fig.width=8} x.plot(kind='bar', title='Sample plot') ``` diff --git a/tests/ioslides.Rmd b/tests/ioslides.Rmd index 68429ca97..13da6baeb 100644 --- a/tests/ioslides.Rmd +++ b/tests/ioslides.Rmd @@ -37,7 +37,7 @@ For this plot I chose not to display the source code.
-```{python echo=FALSE, fig.width=8, fig.height=5} +```{python echo=FALSE, fig.height=5, fig.width=8} x.plot(kind='bar', title='Sample plot') ```
diff --git a/tests/jupyter_with_raw_cell_in_body.ipynb b/tests/jupyter_with_raw_cell_in_body.ipynb new file mode 100644 index 000000000..6b380fb3a --- /dev/null +++ b/tests/jupyter_with_raw_cell_in_body.ipynb @@ -0,0 +1,60 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "6" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "1+2+3" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "This is a raw cell" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is a markdown cell" + ] + } + ], + "metadata": { + "celltoolbar": "Edit Metadata", + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/tests/mirror/R_sample.ipynb b/tests/mirror/R_sample.ipynb new file mode 100644 index 000000000..2186eba9b --- /dev/null +++ b/tests/mirror/R_sample.ipynb @@ -0,0 +1,48 @@ +{ + "cells": [ + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "---\n", + "title: \"R Notebook\"\n", + "output: html_notebook\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is an [R Markdown](http://rmarkdown.rstudio.com) Notebook. When you execute code within the notebook, the results appear beneath the code. \n", + "\n", + "Try executing this chunk by clicking the *Run* button within the chunk or by placing your cursor inside it and pressing *Ctrl+Shift+Enter*. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plot(cars)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Add a new chunk by clicking the *Insert Chunk* button on the toolbar or by pressing *Ctrl+Alt+I*.\n", + "\n", + "When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the *Preview* button or press *Ctrl+Shift+K* to preview the HTML file).\n", + "\n", + "The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike *Knit*, *Preview* does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed." + ] + } + ], + "metadata": { + "main_language": "R" + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/tests/mirror/chunk_options.ipynb b/tests/mirror/chunk_options.ipynb new file mode 100644 index 000000000..c13cb7075 --- /dev/null +++ b/tests/mirror/chunk_options.ipynb @@ -0,0 +1,74 @@ +{ + "cells": [ + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "---\n", + "title: \"Test chunk options in Rmd/Jupyter conversion\"\n", + "author: \"Marc Wouts\"\n", + "date: \"June 16, 2018\"\n", + "---" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hide_output": true, + "name": "knitr_setup" + }, + "outputs": [], + "source": [ + "%%R\n", + "knitr::opts_chunk$set(echo = FALSE, fig.width = 10, fig.height = 5)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "noskipline": true + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hide_input": false + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "x = pd.Series({'A':1, 'B':3, 'C':2})" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "fig.height": 5, + "fig.width": 8, + "hide_input": true, + "name": "bar_plot", + "noskipline": true + }, + "outputs": [], + "source": [ + "x.plot(kind='bar', title='Sample plot')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/tests/mirror/ioslides.ipynb b/tests/mirror/ioslides.ipynb new file mode 100644 index 000000000..ded305a89 --- /dev/null +++ b/tests/mirror/ioslides.ipynb @@ -0,0 +1,94 @@ +{ + "cells": [ + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "---\n", + "title: \"Quick ioslides\"\n", + "subtitle: \"Slides generated using R, python and ioslides\"\n", + "author: \"Marc Wouts\"\n", + "date: \"June 15, 2018\"\n", + "output:\n", + " ioslides_presentation:\n", + " widescreen: true\n", + " smaller: true\n", + "editor_options:\n", + " chunk_output_type: console\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## What is ioslides?\n", + "\n", + "This is the default format in rstudio for building interactive HTML presentations.\n", + "\n", + "Enjoy the [manual](https://rmarkdown.rstudio.com/ioslides_presentation_format.html)!\n", + "\n", + "These slides can be turned to a single HTML file with either a click on 'knitr' in rstudio, or, command line:\n", + "```bash\n", + "R -e 'rmarkdown::render(\"ioslides.Rmd\")'\n", + "```\n", + "\n", + "## A sample plot\n", + "\n", + "
\n", + "Here we create a sample data set for plotting." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hide_input": false + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "x = pd.Series({'A':1, 'B':3, 'C':2})" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "noskipline": true + }, + "source": [ + "Then, in another column we plot. The R notebook code chunks have many [options](https://yihui.name/knitr/options/).\n", + "For this plot I chose not to display the source code.\n", + "
\n", + "\n", + "
" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "fig.height": 5, + "fig.width": 8, + "hide_input": true, + "noskipline": true + }, + "outputs": [], + "source": [ + "x.plot(kind='bar', title='Sample plot')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
" + ] + } + ], + "metadata": { + "main_language": "python" + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/tests/mirror/ir_notebook.R b/tests/mirror/ir_notebook.R new file mode 100644 index 000000000..b8926f984 --- /dev/null +++ b/tests/mirror/ir_notebook.R @@ -0,0 +1,23 @@ +#' --- +#' jupyter: +#' kernelspec: +#' display_name: R +#' language: R +#' name: ir +#' language_info: +#' codemirror_mode: r +#' file_extension: .r +#' mimetype: text/x-r-source +#' name: R +#' pygments_lexer: r +#' version: 3.5.0 +#' --- + +#' This is a jupyter notebook that uses the IR kernel. + +sum(1:10) + + +plot(cars) + + diff --git a/tests/mirror/jupyter.py b/tests/mirror/jupyter.py new file mode 100644 index 000000000..fe5502f74 --- /dev/null +++ b/tests/mirror/jupyter.py @@ -0,0 +1,34 @@ +# --- +# jupyter: +# kernelspec: +# display_name: Python 3 +# language: python +# name: python3 +# language_info: +# codemirror_mode: +# name: ipython +# version: 3 +# file_extension: .py +# mimetype: text/x-python +# name: python +# nbconvert_exporter: python +# pygments_lexer: ipython3 +# version: 3.6.4 +# --- + +# # Jupyter notebook +# +# This notebook is a simple jupyter notebook. It only has markdown and code cells. And it does not contain consecutive markdown cells. We start with an addition: + +a = 1 +b = 2 +a + b + +# Now we return a few tuples + +a, b + + +a, b, a+b + +# And this is already the end of the notebook diff --git a/tests/mirror/jupyter_again.py b/tests/mirror/jupyter_again.py new file mode 100644 index 000000000..316570d85 --- /dev/null +++ b/tests/mirror/jupyter_again.py @@ -0,0 +1,34 @@ +# --- +# jupyter: +# kernelspec: +# display_name: Python 3 +# language: python +# name: python3 +# language_info: +# codemirror_mode: +# name: ipython +# version: 3 +# file_extension: .py +# mimetype: text/x-python +# name: python +# nbconvert_exporter: python +# pygments_lexer: ipython3 +# version: 3.6.5 +# --- + +c = ''' +title: "Quick test" +output: + ioslides_presentation: + widescreen: true + smaller: true +editor_options: + chunk_output_type console +''' + + +import yaml +print(yaml.dump(yaml.load(c))) + + +?next diff --git a/tests/mirror/jupyter_with_raw_cell_in_body.py b/tests/mirror/jupyter_with_raw_cell_in_body.py new file mode 100644 index 000000000..de82af561 --- /dev/null +++ b/tests/mirror/jupyter_with_raw_cell_in_body.py @@ -0,0 +1,26 @@ +# --- +# jupyter: +# celltoolbar: Edit Metadata +# kernelspec: +# display_name: Python 3 +# language: python +# name: python3 +# language_info: +# codemirror_mode: +# name: ipython +# version: 3 +# file_extension: .py +# mimetype: text/x-python +# name: python +# nbconvert_exporter: python +# pygments_lexer: ipython3 +# version: 3.6.6 +# --- + +1+2+3 + +# + {"active": "", "endofcell": "-"} +# This is a raw cell +# - + +# This is a markdown cell diff --git a/tests/mirror/jupyter_with_raw_cell_on_top.py b/tests/mirror/jupyter_with_raw_cell_on_top.py new file mode 100644 index 000000000..31c89a80f --- /dev/null +++ b/tests/mirror/jupyter_with_raw_cell_on_top.py @@ -0,0 +1,28 @@ +# --- +# title: "Quick test" +# output: +# ioslides_presentation: +# widescreen: true +# smaller: true +# editor_options: +# chunk_output_type console +# jupyter: +# kernelspec: +# display_name: Python 3 +# language: python +# name: python3 +# language_info: +# codemirror_mode: +# name: ipython +# version: 3 +# file_extension: .py +# mimetype: text/x-python +# name: python +# nbconvert_exporter: python +# pygments_lexer: ipython3 +# version: 3.6.5 +# --- + +1+2+3 + + diff --git a/tests/mirror/markdown.ipynb b/tests/mirror/markdown.ipynb new file mode 100644 index 000000000..9f4d6daf7 --- /dev/null +++ b/tests/mirror/markdown.ipynb @@ -0,0 +1,22 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# This is a plain markdown file\n", + "\n", + "```python\n", + "1+1\n", + "```\n", + "\n", + "And more comments" + ] + } + ], + "metadata": { + "main_language": "python" + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/tests/mirror/python_notebook_sample.ipynb b/tests/mirror/python_notebook_sample.ipynb new file mode 100644 index 000000000..4d3fe44fe --- /dev/null +++ b/tests/mirror/python_notebook_sample.ipynb @@ -0,0 +1,121 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Specifications for Jupyter notebooks as python scripts" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Markdown (and raw) cells" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Markdown cells are escaped with a single quote. Two consecutive\n", + "cells are separated with a blank line. Raw cells are not\n", + "distinguished from markdown." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Code cells" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Code cells are separated by one blank line from markdown cells.\n", + "If a code cells follows a comment, then that comment belong to the\n", + "code cell." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# For instance, this is a code cell that starts with a\n", + "# code comment, split on multiple lines\n", + "1 + 2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Code cells are terminated with either\n", + "- end of file\n", + "- two blank lines if followed by an other code cell\n", + "- one blank line if followed by a markdown cell" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Code cells can have blank lines, but no two consecutive blank lines (unless\n", + "a specific cell end is specified in the cell options, see below).\n", + "Now we have a cell with multiple instructions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "a = 3\n", + "\n", + "a + 1" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And a cell with an arbitrary count of blank lines. This last example\n", + "is also an instance of a cell with metadata information in json format,\n", + "escaped with '#+' or '# +'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def f(x):\n", + " return x + 1\n", + "\n", + "\n", + "def g(x):\n", + " return x + 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Cells after a cell with and explicit end-of-cell are just separated by one\n", + "# blank line\n", + "a + 2" + ] + } + ], + "metadata": {}, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/tests/python_notebook_sample.py b/tests/python_notebook_sample.py index 6bce6cefc..c26bc8a8d 100755 --- a/tests/python_notebook_sample.py +++ b/tests/python_notebook_sample.py @@ -21,17 +21,27 @@ # - two blank lines if followed by an other code cell # - one blank line if followed by a markdown cell -# Code cells can have blank lines, but no two consecutive blank lines (that's -# a cell break!). Below we have a cell with multiple instructions: +# Code cells can have blank lines, but no two consecutive blank lines (unless +# a specific cell end is specified in the cell options, see below). +# Now we have a cell with multiple instructions a = 3 a + 1 -# ## Metadata in code cells +# And a cell with an arbitrary count of blank lines. This last example +# is also an instance of a cell with metadata information in json format, +# escaped with '#+' or '# +' -# In case a code cell has metadata information, it -# is represented in json format, escaped with '#+' or '# +' +# + {"endofcell": "-"} +def f(x): + return x + 1 -# + {"scrolled": true} + +def g(x): + return x + 2 +# - + +# Cells after a cell with and explicit end-of-cell are just separated by one +# blank line a + 2 diff --git a/tests/test_active_cells.py b/tests/test_active_cells.py index b7f57bd95..bbbe88a1c 100644 --- a/tests/test_active_cells.py +++ b/tests/test_active_cells.py @@ -28,6 +28,39 @@ def test_active_all(ext): compare(ACTIVE_ALL[ext], nbrmd.writes(nb, ext=ext)) +ACTIVE_IPYNB = {'.py': """# + {"active": "ipynb", "endofcell": "-"} +# # This cell is active only in ipynb +# %matplotlib inline +# - +""", + '.Rmd': """```{python active="ipynb", eval=FALSE} +# This cell is active only in ipynb +%matplotlib inline +``` +""", + '.R': """#+ language="python", active="ipynb", eval=FALSE +#' # This cell is active only in ipynb +#' %matplotlib inline +""", + '.ipynb': {'cell_type': 'code', + 'source': '# This cell is active only in ipynb\n' + '%matplotlib inline', + 'metadata': {'active': 'ipynb'}, + 'execution_count': None, + 'outputs': []}} + + +@pytest.mark.skipif(sys.version_info < (3, 6), + reason="unordered dict result in changes in chunk options") +@pytest.mark.parametrize('ext', ['.Rmd', '.py', '.R']) +def test_active_ipynb(ext): + nb = nbrmd.reads(ACTIVE_IPYNB[ext], ext=ext) + assert len(nb.cells) == 1 + compare(nb.cells[0], ACTIVE_IPYNB['.ipynb']) + if ext != '.R': + compare(ACTIVE_IPYNB[ext], nbrmd.writes(nb, ext=ext)) + + ACTIVE_PY_IPYNB = {'.py': """# + {"active": "ipynb,py"} # This cell is active in py and ipynb extensions """, @@ -35,9 +68,8 @@ def test_active_all(ext): # This cell is active in py and ipynb extensions ``` """, - '.R': """#' ```{python active="ipynb,py", eval=FALSE} + '.R': """#+ language="python", active="ipynb,py", eval=FALSE #' # This cell is active in py and ipynb extensions -#' ``` """, '.ipynb': {'cell_type': 'code', 'source': '# This cell is active in py and ' @@ -49,36 +81,40 @@ def test_active_all(ext): @pytest.mark.skipif(sys.version_info < (3, 6), reason="unordered dict result in changes in chunk options") -@pytest.mark.parametrize('ext', ['.Rmd', '.py']) # TODO: add R +@pytest.mark.parametrize('ext', ['.Rmd', '.py', '.R']) def test_active_py_ipynb(ext): nb = nbrmd.reads(ACTIVE_PY_IPYNB[ext], ext=ext) assert len(nb.cells) == 1 compare(nb.cells[0], ACTIVE_PY_IPYNB['.ipynb']) - compare(ACTIVE_PY_IPYNB[ext], nbrmd.writes(nb, ext=ext)) + if ext != '.R': + compare(ACTIVE_PY_IPYNB[ext], nbrmd.writes(nb, ext=ext)) -ACTIVE_RMD = {'.py': """# # + {"active": "Rmd"} +ACTIVE_RMD = {'.py': """# + {"active": "Rmd", "endofcell": "-"} # # This cell is active in Rmd only +# - """, '.Rmd': """```{python active="Rmd"} # This cell is active in Rmd only ``` """, - '.R': """#' ```{python active="Rmd", eval=FALSE} + '.R': """#+ language="python", active="Rmd", eval=FALSE #' # This cell is active in Rmd only -#' ``` """, '.ipynb': {'cell_type': 'raw', 'source': '# This cell is active in Rmd only', 'metadata': {'active': 'Rmd'}}} -@pytest.mark.parametrize('ext', ['.Rmd']) # TODO: add R and py +@pytest.mark.skipif(sys.version_info < (3, 6), + reason="unordered dict result in changes in chunk options") +@pytest.mark.parametrize('ext', ['.Rmd', '.py', '.R']) def test_active_rmd(ext): nb = nbrmd.reads(ACTIVE_RMD[ext], ext=ext) assert len(nb.cells) == 1 compare(nb.cells[0], ACTIVE_RMD['.ipynb']) - compare(ACTIVE_RMD[ext], nbrmd.writes(nb, ext=ext)) + if ext != '.R': + compare(ACTIVE_RMD[ext], nbrmd.writes(nb, ext=ext)) ACTIVE_NOT_INCLUDE_RMD = {'.py': """# # + {"hide_output": true, "active": "Rmd"} diff --git a/tests/test_cli.py b/tests/test_cli.py index cf6d8b06c..4100e517a 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -2,7 +2,7 @@ from shutil import copyfile import pytest import nbrmd -from nbrmd.cli import convert, cli +from nbrmd.cli import convert_nbrmd as convert, cli_nbrmd as cli from .utils import list_all_notebooks, remove_outputs diff --git a/tests/test_cli_nbsrc.py b/tests/test_cli_nbsrc.py new file mode 100644 index 000000000..d4b71295e --- /dev/null +++ b/tests/test_cli_nbsrc.py @@ -0,0 +1,75 @@ +import os +from shutil import copyfile +import pytest +import nbrmd +from nbrmd.cli import convert_nbsrc as convert, cli_nbsrc as cli +from .utils import list_all_notebooks, remove_outputs + + +@pytest.mark.parametrize('nb_file', + list_all_notebooks('.ipynb') + + list_all_notebooks('.py')) +def test_cli_single_file(nb_file): + assert cli([nb_file]).notebooks == [nb_file] + + +@pytest.mark.parametrize('nb_files', [list_all_notebooks('.ipynb') + + list_all_notebooks('.py')]) +def test_cli_multiple_files(nb_files): + assert cli(nb_files).notebooks == nb_files + + +@pytest.mark.parametrize('nb_file', + list_all_notebooks('.ipynb') + + list_all_notebooks('.py')) +def test_convert_single_file_in_place(nb_file, tmpdir): + nb_org = str(tmpdir.join(os.path.basename(nb_file))) + base, ext = os.path.splitext(nb_org) + nb_other = base + '.ipynb' if ext == '.py' else base + '.py' + + copyfile(nb_file, nb_org) + convert([nb_org]) + + nb1 = nbrmd.readf(nb_org) + nb2 = nbrmd.readf(nb_other) + + remove_outputs(nb1) == remove_outputs(nb2) + + +@pytest.mark.parametrize('nb_file', + list_all_notebooks('.ipynb') + + list_all_notebooks('.py')) +def test_convert_single_file(nb_file, capsys): + convert([nb_file], False) + + out, err = capsys.readouterr() + assert out != '' + assert err == '' + + +@pytest.mark.parametrize('nb_files', + [list_all_notebooks('.ipynb') + + list_all_notebooks('.py')]) +def test_convert_multiple_file(nb_files, tmpdir): + nb_orgs = [] + nb_others = [] + + for nb_file in nb_files: + nb_org = str(tmpdir.join(os.path.basename(nb_file))) + base, ext = os.path.splitext(nb_org) + nb_other = base + '.ipynb' if ext == '.py' else base + '.py' + copyfile(nb_file, nb_org) + nb_orgs.append(nb_org) + nb_others.append(nb_other) + + convert(nb_orgs) + + for nb_org, nb_other in zip(nb_orgs, nb_others): + nb1 = nbrmd.readf(nb_org) + nb2 = nbrmd.readf(nb_other) + remove_outputs(nb1) == remove_outputs(nb2) + + +def test_error_not_notebook(nb_file='notebook.ext'): + with pytest.raises(TypeError): + convert([nb_file]) diff --git a/tests/test_contentsmanager.py b/tests/test_contentsmanager.py index f54ba3d4d..cfdb5ef17 100644 --- a/tests/test_contentsmanager.py +++ b/tests/test_contentsmanager.py @@ -21,6 +21,7 @@ def test_load_save_rename(nb_file, tmpdir): tmp_rmd = 'notebook.Rmd' cm = RmdFileContentsManager() + cm.default_nbrmd_formats = 'ipynb,Rmd' cm.root_dir = str(tmpdir) # open ipynb, save Rmd, reopen @@ -39,3 +40,77 @@ def test_load_save_rename(nb_file, tmpdir): assert os.path.isfile(str(tmpdir.join('new.ipynb'))) assert os.path.isfile(str(tmpdir.join('new.Rmd'))) + + +@pytest.mark.skipif(sys.version_info < (3, 6), + reason="unordered dict result in changes in chunk options") +@pytest.mark.skipif(isinstance(RmdFileContentsManager, str), + reason=RmdFileContentsManager) +@pytest.mark.parametrize('nb_file', list_all_notebooks('.ipynb')) +def test_load_save_rename_nbpy(nb_file, tmpdir): + tmp_ipynb = 'notebook.ipynb' + tmp_nbpy = 'notebook.nb.py' + + cm = RmdFileContentsManager() + cm.default_nbrmd_formats = 'ipynb,nb.py' + cm.root_dir = str(tmpdir) + + # open ipynb, save nb.py, reopen + nb = readf(nb_file) + cm.save(model=dict(type='notebook', content=nb), path=tmp_nbpy) + nbpy = cm.get(tmp_nbpy) + assert remove_outputs(nb) == remove_outputs(nbpy['content']) + + # save ipynb + cm.save(model=dict(type='notebook', content=nb), path=tmp_ipynb) + + # rename nbpy + cm.rename(tmp_nbpy, 'new.nb.py') + assert not os.path.isfile(str(tmpdir.join(tmp_ipynb))) + assert not os.path.isfile(str(tmpdir.join(tmp_nbpy))) + + assert os.path.isfile(str(tmpdir.join('new.ipynb'))) + assert os.path.isfile(str(tmpdir.join('new.nb.py'))) + + +@pytest.mark.skipif(sys.version_info < (3, 6), + reason="unordered dict result in changes in chunk options") +@pytest.mark.skipif(isinstance(RmdFileContentsManager, str), + reason=RmdFileContentsManager) +@pytest.mark.parametrize('nb_file', list_all_notebooks('.ipynb')) +def test_load_save_rename_nbpy_default_config(nb_file, tmpdir): + tmp_ipynb = 'notebook.ipynb' + tmp_nbpy = 'notebook.nb.py' + + cm = RmdFileContentsManager() + cm.default_nbrmd_formats = 'ipynb' + cm.root_dir = str(tmpdir) + + # open ipynb, save nb.py, reopen + nb = readf(nb_file) + cm.save(model=dict(type='notebook', content=nb), path=tmp_nbpy) + nbpy = cm.get(tmp_nbpy) + assert remove_outputs(nb) == remove_outputs(nbpy['content']) + + # open ipynb + nbipynb = cm.get(tmp_ipynb) + assert remove_outputs(nb) == remove_outputs(nbipynb['content']) + + # save ipynb + cm.save(model=dict(type='notebook', content=nb), path=tmp_ipynb) + + # rename nbpy + cm.rename(tmp_nbpy, 'new.nb.py') + assert not os.path.isfile(str(tmpdir.join(tmp_ipynb))) + assert not os.path.isfile(str(tmpdir.join(tmp_nbpy))) + + assert os.path.isfile(str(tmpdir.join('new.ipynb'))) + assert os.path.isfile(str(tmpdir.join('new.nb.py'))) + + # rename ipynb + cm.rename('new.ipynb', tmp_ipynb) + assert os.path.isfile(str(tmpdir.join(tmp_ipynb))) + assert not os.path.isfile(str(tmpdir.join(tmp_nbpy))) + + assert not os.path.isfile(str(tmpdir.join('new.ipynb'))) + assert os.path.isfile(str(tmpdir.join('new.nb.py'))) diff --git a/tests/test_escape_magics.py b/tests/test_escape_magics.py new file mode 100644 index 000000000..86ecff794 --- /dev/null +++ b/tests/test_escape_magics.py @@ -0,0 +1,24 @@ +import pytest +from nbrmd.magics import escape_magic, unescape_magic + + +@pytest.mark.parametrize('line', ['%matplotlib inline', '#%matplotlib inline', + '##%matplotlib inline', '%%HTML']) +def test_escape(line): + assert escape_magic([line]) == ['# ' + line] + assert unescape_magic(escape_magic([line])) == [line] + + +@pytest.mark.parametrize('line', ['%pytest.fixture']) +def test_escape_magic_only(line): + assert escape_magic([line]) == [line] + + +@pytest.mark.parametrize('line', ['%matplotlib inline #noescape']) +def test_force_noescape(line): + assert escape_magic([line]) == [line] + + +@pytest.mark.parametrize('line', ['%pytest.fixture #escape']) +def test_force_escape(line): + assert escape_magic([line]) == ['# ' + line] diff --git a/tests/test_ipynb_to_R.py b/tests/test_ipynb_to_R.py index 46520544a..ff059c93e 100644 --- a/tests/test_ipynb_to_R.py +++ b/tests/test_ipynb_to_R.py @@ -2,10 +2,10 @@ import pytest from testfixtures import compare import nbrmd -from .utils import list_all_notebooks, remove_outputs +from .utils import list_r_notebooks, remove_outputs -@pytest.mark.parametrize('nb_file', list_all_notebooks('.ipynb')) +@pytest.mark.parametrize('nb_file', list_r_notebooks('.ipynb')) def test_identity_source_write_read(nb_file): """ Test that writing the notebook with R, and read again, diff --git a/tests/test_mirror.py b/tests/test_mirror.py new file mode 100644 index 000000000..5284dd919 --- /dev/null +++ b/tests/test_mirror.py @@ -0,0 +1,90 @@ +"""Here we generate mirror representation of py, Rmd and ipynb files +as py or ipynb, and make sure that these representations minimally +change on new releases. +""" + +import os +import sys +import pytest +from testfixtures import compare +import nbrmd +from .utils import list_all_notebooks, list_r_notebooks + + +def mirror_file(nb_file): + dir, file = os.path.split(nb_file) + if nb_file.endswith('.py'): + return os.path.join(dir, 'mirror', file.replace('.py', '.ipynb')) + if nb_file.endswith('.Rmd'): + return os.path.join(dir, 'mirror', file.replace('.Rmd', '.ipynb')) + return os.path.join(dir, 'mirror', file.replace('.ipynb', '.py')) + + +@pytest.mark.skipif(sys.version_info < (3, 6), + reason="unordered dict result in changes in chunk options") +@pytest.mark.parametrize('py_file', + [py_file for py_file in list_all_notebooks('.py') + if py_file.find('notebook_sample') > 0]) +def test_py_unchanged_py(py_file): + with open(py_file, encoding='utf-8') as fp: + py = fp.read() + + ipynb_file = mirror_file(py_file) + + if not os.path.isfile(ipynb_file): + nb = nbrmd.readf(py_file) + nbrmd.writef(nb, ipynb_file) + + py_ref = nbrmd.writes(nbrmd.readf(ipynb_file), ext='.py') + compare(py, py_ref) + + +@pytest.mark.skipif(sys.version_info < (3, 6), + reason="unordered dict result in changes in chunk options") +@pytest.mark.parametrize('rmd_file', list_all_notebooks('.Rmd')) +def test_rmd_unchanged(rmd_file): + with open(rmd_file, encoding='utf-8') as fp: + rmd = fp.read() + + ipynb_file = mirror_file(rmd_file) + + if not os.path.isfile(ipynb_file): + nb = nbrmd.readf(rmd_file) + nbrmd.writef(nb, ipynb_file) + + rmd_ref = nbrmd.writes(nbrmd.readf(ipynb_file), ext='.Rmd') + compare(rmd, rmd_ref) + + +@pytest.mark.skipif(sys.version_info < (3, 6), + reason="unordered dict result in changes in chunk options") +@pytest.mark.parametrize('nb_file', list_all_notebooks('.ipynb')) +def test_py_unchanged_ipynb(nb_file): + py_file = mirror_file(nb_file) + + if not os.path.isfile(py_file): + nb = nbrmd.readf(nb_file) + nbrmd.writef(nb, py_file) + + with open(py_file, encoding='utf-8') as fp: + py_ref = fp.read() + + py = nbrmd.writes(nbrmd.readf(nb_file), ext='.py') + compare(py, py_ref) + + +@pytest.mark.skipif(sys.version_info < (3, 6), + reason="unordered dict result in changes in chunk options") +@pytest.mark.parametrize('nb_file', list_r_notebooks('.ipynb')) +def test_R_unchanged_ipynb(nb_file): + r_file = mirror_file(nb_file).replace('.py', '.R') + + if not os.path.isfile(r_file): + nb = nbrmd.readf(nb_file) + nbrmd.writef(nb, r_file) + + with open(r_file, encoding='utf-8') as fp: + r_ref = fp.read() + + r = nbrmd.writes(nbrmd.readf(nb_file), ext='.R') + compare(r, r_ref) diff --git a/tests/test_nbconvert.py b/tests/test_nbconvert.py index e7894a56a..d8afb6964 100644 --- a/tests/test_nbconvert.py +++ b/tests/test_nbconvert.py @@ -1,10 +1,13 @@ import os +import sys import subprocess import pytest import nbrmd from .utils import list_all_notebooks +@pytest.mark.skipif(sys.version_info < (3, 6), + reason="unordered dict result in changes in chunk options") @pytest.mark.skipif(isinstance(nbrmd.RMarkdownExporter, str), reason=nbrmd.RMarkdownExporter) @pytest.mark.parametrize('nb_file', list_all_notebooks('.ipynb')) @@ -25,6 +28,8 @@ def test_nbconvert_and_read(nb_file): pytest.importorskip('jupyter') +@pytest.mark.skipif(sys.version_info < (3, 6), + reason="unordered dict result in changes in chunk options") @pytest.mark.skipif(isinstance(nbrmd.RMarkdownExporter, str), reason=nbrmd.RMarkdownExporter) @pytest.mark.parametrize('nb_file', list_all_notebooks('.ipynb')) @@ -42,3 +47,82 @@ def test_nbconvert_cmd_line(nb_file, tmpdir): rmd2 = fp.read() assert rmd1 == rmd2 + + +@pytest.mark.skipif(isinstance(nbrmd.PyNotebookExporter, str), + reason=nbrmd.PyNotebookExporter) +@pytest.mark.parametrize('nb_file', list_all_notebooks('.ipynb')) +def test_nbconvert_and_read_py(nb_file): + # Load notebook + nb = nbrmd.readf(nb_file) + + # Export to py using nbrmd package + py1 = nbrmd.writes(nb, ext='.py') + + # Export to py using nbconvert exporter + py_exporter = nbrmd.PyNotebookExporter() + (py2, resources) = py_exporter.from_notebook_node(nb) + + assert py1 == py2 + + +@pytest.mark.skipif(isinstance(nbrmd.PyNotebookExporter, str), + reason=nbrmd.PyNotebookExporter) +@pytest.mark.parametrize('nb_file', list_all_notebooks('.ipynb')) +def test_nbconvert_and_read_r(nb_file): + # Load notebook + nb = nbrmd.readf(nb_file) + + # Export to py using nbrmd package + r1 = nbrmd.writes(nb, ext='.R') + + # Export to py using nbconvert exporter + r_exporter = nbrmd.RNotebookExporter() + (r2, resources) = r_exporter.from_notebook_node(nb) + + assert r1 == r2 + + +pytest.importorskip('jupyter') + + +@pytest.mark.skipif(sys.version_info < (3, 6), + reason="unordered dict result in changes in chunk options") +@pytest.mark.skipif(isinstance(nbrmd.PyNotebookExporter, str), + reason=nbrmd.PyNotebookExporter) +@pytest.mark.parametrize('nb_file', list_all_notebooks('.ipynb')) +def test_nbconvert_cmd_line_py(nb_file, tmpdir): + py_file = str(tmpdir.join('notebook.py')) + + subprocess.call(['jupyter', 'nbconvert', '--to', 'pynotebook', + nb_file, '--output', py_file]) + + assert os.path.isfile(py_file) + + nb = nbrmd.readf(nb_file) + py1 = nbrmd.writes(nb, ext='.py') + with open(py_file) as fp: + py2 = fp.read() + + assert py1 == py2 + + +@pytest.mark.skipif(sys.version_info < (3, 6), + reason="unordered dict result in changes in chunk options") +@pytest.mark.skipif(isinstance(nbrmd.RNotebookExporter, str), + reason=nbrmd.RNotebookExporter) +@pytest.mark.parametrize('nb_file', list_all_notebooks('.ipynb')) +def test_nbconvert_cmd_line_R(nb_file, tmpdir): + r_file = str(tmpdir.join('notebook.R')) + + subprocess.call(['jupyter', 'nbconvert', '--to', 'rnotebook', + nb_file, '--output', r_file]) + + assert os.path.isfile(r_file) + + nb = nbrmd.readf(nb_file) + r = nbrmd.writes(nb, ext='.R') + with open(r_file) as fp: + r2 = fp.read() + + assert r == r2 diff --git a/tests/test_read_simple_python.py b/tests/test_read_simple_python.py index a527018d8..edbf141a6 100644 --- a/tests/test_read_simple_python.py +++ b/tests/test_read_simple_python.py @@ -2,6 +2,12 @@ import nbrmd from testfixtures import compare +from .python_notebook_sample import f, g + + +def test_python_notebook_sample(): + assert f(1) == 2 + assert g(2) == 4 def test_read_simple_file(pynb="""# --- @@ -104,6 +110,130 @@ def f(x): compare(pynb, pynb2) +def test_read_cell_two_blank_lines(pynb="""# --- +# title: cell with two consecutive blank lines +# --- + +# + {"endofcell": "-"} +a = 1 + + +a + 2 +# - +"""): + nb = nbrmd.reads(pynb, ext='.py') + + assert len(nb.cells) == 2 + assert nb.cells[0].cell_type == 'raw' + assert nb.cells[0].source == '---\ntitle: cell with two ' \ + 'consecutive blank lines\n---' + assert nb.cells[1].cell_type == 'code' + assert nb.cells[1].source == 'a = 1\n\n\na + 2' + + pynb2 = nbrmd.writes(nb, ext='.py') + compare(pynb, pynb2) + + +def test_read_cell_explicit_start_end(pynb=''' +import pandas as pd +# + {"endofcell": "-"} +def data(): + return pd.DataFrame({'A': [0, 1]}) + + +data() +# - +'''): + nb = nbrmd.reads(pynb, ext='.py') + pynb2 = nbrmd.writes(nb, ext='.py') + compare(pynb, pynb2) + + +def test_read_prev_function(pynb="""def test_read_cell_explicit_start_end(pynb=''' +import pandas as pd +# + {"endofcell": "-"} +def data(): + return pd.DataFrame({'A': [0, 1]}) + + +data() +# - +'''): + nb = nbrmd.reads(pynb, ext='.py') + pynb2 = nbrmd.writes(nb, ext='.py') + compare(pynb, pynb2) +"""): + nb = nbrmd.reads(pynb, ext='.py') + pynb2 = nbrmd.writes(nb, ext='.py') + compare(pynb, pynb2) + + +def test_read_cell_with_one_blank_line_end(pynb="""import pandas + +"""): + nb = nbrmd.reads(pynb, ext='.py') + pynb2 = nbrmd.writes(nb, ext='.py') + compare(pynb, pynb2) + + +def test_file_with_two_blank_line_end(pynb="""import pandas + + +"""): + nb = nbrmd.reads(pynb, ext='.py') + pynb2 = nbrmd.writes(nb, ext='.py') + compare(pynb, pynb2) + + +def test_one_blank_line_after_endofcell(pynb="""# + {"endofcell": "-"} +# This is a cell with explicit end of cell + + +# - + +# This cell is a cell with implicit start/end +1 + 1 +"""): + nb = nbrmd.reads(pynb, ext='.py') + pynb2 = nbrmd.writes(nb, ext='.py') + compare(pynb, pynb2) + + +def test_isolated_cell_with_magic(pynb="""# --- +# title: cell with isolated jupyter magic +# --- + +# A magic command included in a markdown +# paragraph is not code, like the one below: +# +# %matplotlib inline + +# However, a code block may start with +# a magic command, like this one: + +# %matplotlib inline + + +1 + 1 +"""): + nb = nbrmd.reads(pynb, ext='.py') + + assert len(nb.cells) == 5 + assert nb.cells[0].cell_type == 'raw' + assert nb.cells[0].source == '---\ntitle: cell with isolated jupyter ' \ + 'magic\n---' + assert nb.cells[1].cell_type == 'markdown' + assert nb.cells[2].cell_type == 'markdown' + assert nb.cells[3].cell_type == 'code' + assert nb.cells[3].source == '%matplotlib inline' + + assert nb.cells[4].cell_type == 'code' + assert nb.cells[4].source == '1 + 1' + + pynb2 = nbrmd.writes(nb, ext='.py') + compare(pynb, pynb2) + + def test_read_multiline_comment(pynb="""'''This is a multiline comment with "quotes", 'single quotes' # and comments diff --git a/tests/test_save_multiple.py b/tests/test_save_multiple.py index 5ceffc3dd..6f56c4664 100644 --- a/tests/test_save_multiple.py +++ b/tests/test_save_multiple.py @@ -15,7 +15,7 @@ def test_rmd_is_ok(nb_file, tmpdir): tmp_ipynb = 'notebook.ipynb' tmp_rmd = 'notebook.Rmd' - nb.metadata['nbrmd_formats'] = ['.Rmd'] + nb.metadata['nbrmd_formats'] = 'ipynb,Rmd' cm = RmdFileContentsManager() cm.root_dir = str(tmpdir) @@ -53,7 +53,7 @@ def test_all_files_created(nb_file, tmpdir): tmp_ipynb = 'notebook.ipynb' tmp_rmd = 'notebook.Rmd' tmp_py = 'notebook.py' - nb.metadata['nbrmd_formats'] = ['.ipynb', '.Rmd', '.py'] + nb.metadata['nbrmd_formats'] = 'ipynb,Rmd,py' cm = RmdFileContentsManager() cm.root_dir = str(tmpdir) @@ -109,7 +109,7 @@ def test_no_rmd_on_not_notebook(tmpdir): cm = RmdFileContentsManager() cm.root_dir = str(tmpdir) - cm.default_nbrmd_formats = '.Rmd' + cm.default_nbrmd_formats = 'ipynb,Rmd' with pytest.raises(HTTPError): cm.save(model=dict(type='not notebook', @@ -124,7 +124,7 @@ def test_no_rmd_on_not_v4(tmpdir): cm = RmdFileContentsManager() cm.root_dir = str(tmpdir) - cm.default_nbrmd_formats = '.Rmd' + cm.default_nbrmd_formats = 'ipynb,Rmd' with pytest.raises(NotebookValidationError): cm.save(model=dict(type='notebook', diff --git a/tests/test_stringparser.py b/tests/test_stringparser.py new file mode 100644 index 000000000..c301dadcf --- /dev/null +++ b/tests/test_stringparser.py @@ -0,0 +1,22 @@ +from nbrmd.stringparser import StringParser + + +def test_long_string(text="""'''This is a multiline +comment with "quotes", 'single quotes' +# and comments +and line breaks + + +and it ends here''' + + +1 + 1 +"""): + quoted = [] + sp = StringParser('python') + for i, line in enumerate(text.splitlines()): + if sp.is_quoted(): + quoted.append(i) + sp.read_line(line) + + assert quoted == [1, 2, 3, 4, 5, 6] diff --git a/tests/test_trust_notebook.py b/tests/test_trust_notebook.py new file mode 100644 index 000000000..412516d62 --- /dev/null +++ b/tests/test_trust_notebook.py @@ -0,0 +1,53 @@ +import os +import shutil +import pytest +from nbrmd.contentsmanager import RmdFileContentsManager +from .utils import list_all_notebooks + + +@pytest.mark.parametrize('nb_file', list_all_notebooks('.py')) +def test_py_notebooks_are_trusted(nb_file): + cm = RmdFileContentsManager() + root, file = os.path.split(nb_file) + cm.root_dir = root + nb = cm.get(file) + assert cm.notary.check_cells(nb['content']) + + +@pytest.mark.parametrize('nb_file', list_all_notebooks('.Rmd')) +def test_rmd_notebooks_are_trusted(nb_file): + cm = RmdFileContentsManager() + root, file = os.path.split(nb_file) + cm.root_dir = root + nb = cm.get(file) + assert cm.notary.check_cells(nb['content']) + + +@pytest.mark.parametrize('nb_file', list_all_notebooks('.ipynb')) +def test_ipynb_notebooks_can_be_trusted(nb_file, tmpdir): + cm = RmdFileContentsManager() + root, file = os.path.split(nb_file) + tmp_ipynb = str(tmpdir.join(file)) + py_file = file.replace('.ipynb', '.py') + tmp_py = str(tmpdir.join(py_file)) + shutil.copy(nb_file, tmp_ipynb) + + cm.default_nbrmd_formats = 'ipynb,py' + cm.root_dir = str(tmpdir) + nb = cm.get(file) + + # Sign notebook explicitely (save it, and reload without + # validating to remove 'trusted' metadata in cells) + cm.save(nb, py_file) + nb = cm._read_notebook(tmp_py) + cm.notary.sign(nb) + + nb = cm.get(file) + assert cm.notary.check_cells(nb['content']) + + # Remove py file, content should be the same + os.remove(tmp_py) + nb2 = cm.get(file) + assert cm.notary.check_cells(nb2['content']) + + assert nb['content'] == nb2['content'] diff --git a/tests/utils.py b/tests/utils.py index 6a3ed34a7..b5ed33dd2 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -14,7 +14,28 @@ def list_all_notebooks(ext, path=None): notebooks = [] for nb_file in os.listdir(nb_path): _, nb_ext = os.path.splitext(nb_file) - if nb_ext.lower() == ext.lower(): + if nb_ext.lower() == ext.lower() and \ + (not nb_file.startswith('ir_notebook') + or nb_file.startswith('R_sample')): + notebooks.append(os.path.join(nb_path, nb_file)) + return notebooks + + +def list_r_notebooks(ext, path=None): + """ + :ext: desired extension + :return: all notebooks in the directory of this script, + with the desired extension + """ + nb_path = os.path.dirname(os.path.abspath(__file__)) + if path: + nb_path = os.path.join(nb_path, path) + notebooks = [] + for nb_file in os.listdir(nb_path): + _, nb_ext = os.path.splitext(nb_file) + if nb_ext.lower() == ext.lower() and \ + (nb_file.startswith('ir_notebook') + or nb_file.startswith('R_sample')): notebooks.append(os.path.join(nb_path, nb_file)) return notebooks