From 246a4065f3ce301ddbba2e310bd1fc52652109a3 Mon Sep 17 00:00:00 2001
From: Jeff Newman <jeffnewman@camsys.com>
Date: Wed, 2 Feb 2022 14:00:26 -0600
Subject: [PATCH] book updates

---
 book/_build.sh                             |  3 +-
 book/_config.yml                           | 27 ++++++++
 book/_toc.yml                              |  2 +
 book/api/dataframes.rst                    |  4 +-
 book/api/dataset.rst                       | 50 ++++++++++++++
 book/api/datatree.rst                      |  8 +++
 book/example/101_swissmetro_mnl.ipynb      | 12 +++-
 book/example/102-swissmetro-weighted.ipynb | 18 ++++-
 book/example/109-swissmetro-nl.ipynb       | 24 +++++--
 book/example/examples.rst                  | 25 -------
 book/example/legacy.md                     |  1 +
 book/intro.md                              | 13 ++--
 larch/dataset.py                           | 78 +++++++++++++++++++++-
 13 files changed, 223 insertions(+), 42 deletions(-)
 create mode 100644 book/api/dataset.rst
 create mode 100644 book/api/datatree.rst
 delete mode 100644 book/example/examples.rst

diff --git a/book/_build.sh b/book/_build.sh
index 672962dd..4ac71fa7 100644
--- a/book/_build.sh
+++ b/book/_build.sh
@@ -1,3 +1,4 @@
-#!/bin/zsh
+#!/bin/bash
+conda info
 python _scripts/hide_test_cells.py
 jb build .
diff --git a/book/_config.yml b/book/_config.yml
index cec0257b..e5130a63 100644
--- a/book/_config.yml
+++ b/book/_config.yml
@@ -47,6 +47,7 @@ sphinx:
     - 'sphinx.ext.intersphinx'
     - 'sphinx.ext.doctest'
     - 'sphinx.ext.graphviz'
+    - 'sphinx.ext.viewcode'
   mathjax_config:
     TeX:
       Macros:
@@ -73,5 +74,31 @@ sphinx:
     conda:
       - "https://docs.conda.io/projects/conda/en/latest/"
       - null
+    xarray:
+      - "https://xarray.pydata.org/en/stable/"
+      - null
   config:
     bibtex_reference_style: author_year
+    autosummary_generate: True
+    autodoc_default_options:
+      show-inheritance: True
+    add_module_names: False
+    html_theme_options:
+      home_page_in_toc: false
+      search_bar_text: Search these docs...
+      repository_url: https://github.com/jpn--/larch
+      use_repository_button: true
+      use_issues_button: true
+      repository_branch: master
+      path_to_docs: docs
+      extra_navbar:
+      favicons:
+      - rel: icon
+        sizes: 32x32
+        href: img/larch_favicon.png
+      switcher:
+        json_url: "https://larch.newman.me/_static/switcher.json"
+        url_template: "https://larch.newman.me/v{version}/"
+        version_match: "5.5.10"
+      navbar_end:
+      - version-switcher
diff --git a/book/_toc.yml b/book/_toc.yml
index ca5a66c3..4fbbd4ee 100644
--- a/book/_toc.yml
+++ b/book/_toc.yml
@@ -17,6 +17,8 @@ parts:
     chapters:
     - file: api/~data
       sections:
+        - file: api/dataset
+        - file: api/datatree
         - file: api/dataframes
     - file: api/linear
     - file: api/~models
diff --git a/book/api/dataframes.rst b/book/api/dataframes.rst
index baf13ea2..8dc1090d 100644
--- a/book/api/dataframes.rst
+++ b/book/api/dataframes.rst
@@ -2,8 +2,10 @@
 DataFrames
 ==========
 
+The :ref:`DataFrames` interface for larch remains available for use, but future
+development on this package will be on the :ref:`Dataset`/:ref:`DataTree` interface.
+
 .. autosummary::
     :toctree: generated/
 
     larch.DataFrames
-
diff --git a/book/api/dataset.rst b/book/api/dataset.rst
new file mode 100644
index 00000000..a814f952
--- /dev/null
+++ b/book/api/dataset.rst
@@ -0,0 +1,50 @@
+.. currentmodule:: larch
+
+=======
+Dataset
+=======
+
+Constructors
+------------
+
+.. autosummary::
+    :toctree: generated/
+
+    Dataset
+    Dataset.from_idca
+    Dataset.from_idco
+    Dataset.construct
+
+Attributes
+----------
+
+.. autosummary::
+    :toctree: generated/
+
+    Dataset.n_cases
+    Dataset.n_alts
+    Dataset.CASEID
+    Dataset.ALTID
+    Dataset.dims
+    Dataset.sizes
+    Dataset.data_vars
+    Dataset.coords
+    Dataset.attrs
+    Dataset.encoding
+    Dataset.indexes
+    Dataset.chunks
+    Dataset.chunksizes
+    Dataset.nbytes
+
+Methods
+-------
+
+.. autosummary::
+    :toctree: generated/
+
+    Dataset.caseids
+    Dataset.dissolve_zero_variance
+    Dataset.query_cases
+    Dataset.set_altnames
+    Dataset.set_dtypes
+    Dataset.setup_flow
diff --git a/book/api/datatree.rst b/book/api/datatree.rst
new file mode 100644
index 00000000..dc493880
--- /dev/null
+++ b/book/api/datatree.rst
@@ -0,0 +1,8 @@
+=============
+DataTree Home
+=============
+
+.. autosummary::
+    :toctree: generated/
+
+    larch.DataTree
diff --git a/book/example/101_swissmetro_mnl.ipynb b/book/example/101_swissmetro_mnl.ipynb
index 3c4cbca3..88bc1608 100644
--- a/book/example/101_swissmetro_mnl.ipynb
+++ b/book/example/101_swissmetro_mnl.ipynb
@@ -12,7 +12,11 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "09dd4536",
-   "metadata": {},
+   "metadata": {
+    "tags": [
+     "remove_cell"
+    ]
+   },
    "outputs": [],
    "source": [
     "# TEST\n",
@@ -292,7 +296,11 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "fda7d9c1",
-   "metadata": {},
+   "metadata": {
+    "tags": [
+     "remove_cell"
+    ]
+   },
    "outputs": [],
    "source": [
     "# TEST\n",
diff --git a/book/example/102-swissmetro-weighted.ipynb b/book/example/102-swissmetro-weighted.ipynb
index b13c555d..01360784 100644
--- a/book/example/102-swissmetro-weighted.ipynb
+++ b/book/example/102-swissmetro-weighted.ipynb
@@ -12,7 +12,11 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "faf59f97",
-   "metadata": {},
+   "metadata": {
+    "tags": [
+     "remove_cell"
+    ]
+   },
    "outputs": [],
    "source": [
     "# TEST\n",
@@ -190,7 +194,11 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "64f60ffb",
-   "metadata": {},
+   "metadata": {
+    "tags": [
+     "remove_cell"
+    ]
+   },
    "outputs": [],
    "source": [
     "# TEST\n",
@@ -221,7 +229,11 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "baf04617",
-   "metadata": {},
+   "metadata": {
+    "tags": [
+     "remove_cell"
+    ]
+   },
    "outputs": [],
    "source": [
     "# TEST\n",
diff --git a/book/example/109-swissmetro-nl.ipynb b/book/example/109-swissmetro-nl.ipynb
index d5b3a4a8..3bf1dd9b 100644
--- a/book/example/109-swissmetro-nl.ipynb
+++ b/book/example/109-swissmetro-nl.ipynb
@@ -12,7 +12,11 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "2c734ca6",
-   "metadata": {},
+   "metadata": {
+    "tags": [
+     "remove_cell"
+    ]
+   },
    "outputs": [],
    "source": [
     "# TEST\n",
@@ -191,7 +195,11 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "7e8649b5",
-   "metadata": {},
+   "metadata": {
+    "tags": [
+     "remove_cell"
+    ]
+   },
    "outputs": [],
    "source": [
     "# TEST\n",
@@ -222,7 +230,11 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "025ba5f9",
-   "metadata": {},
+   "metadata": {
+    "tags": [
+     "remove_cell"
+    ]
+   },
    "outputs": [],
    "source": [
     "# TEST\n",
@@ -272,7 +284,11 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "7133fda4",
-   "metadata": {},
+   "metadata": {
+    "tags": [
+     "remove_cell"
+    ]
+   },
    "outputs": [],
    "source": [
     "# TEST\n",
diff --git a/book/example/examples.rst b/book/example/examples.rst
deleted file mode 100644
index 3da1b9df..00000000
--- a/book/example/examples.rst
+++ /dev/null
@@ -1,25 +0,0 @@
-
-.. currentmodule:: larch.examples
-
-.. _examples:
-
-========
-Examples
-========
-
-Here we provide some illustrative examples.
-You can reproduce all of these examples in your own install of Larch,
-as all of the example data is included with the standard distribution.
-To work with any objects you find in the examples, a handy function
-is provided to extract that object directly into your workspace:
-
-.. autofunction:: larch.example
-
-
-.. toctree::
-
-	mtc
-	swissmetro
-	exampville
-	itinerary
-
diff --git a/book/example/legacy.md b/book/example/legacy.md
index 34bc5b89..bf0222f1 100644
--- a/book/example/legacy.md
+++ b/book/example/legacy.md
@@ -1,3 +1,4 @@
+(deprecated-examples)=
 # Deprecated Example Models
 
 Future development of Larch will be on version that sits on numba, xarray, and sharrow,
diff --git a/book/intro.md b/book/intro.md
index 0b299a4b..87ec729a 100644
--- a/book/intro.md
+++ b/book/intro.md
@@ -4,21 +4,26 @@
 [![conda-forge](https://img.shields.io/conda/dn/conda-forge/larch)](https://anaconda.org/conda-forge/larch)
 [![conda-forge](https://img.shields.io/azure-devops/build/wire-paladin/larch/jpn--.larch/master)](https://dev.azure.com/wire-paladin/larch/_build?definitionId=1&_a=summary&repositoryFilter=1&branchFilter=5%2C5%2C5%2C5%2C5%2C5)
 
-🏆︁ Winner of the [AGIFORS 56th Annual Symposium Best Innovation award](http://agifors.org/Symposium>).
+🏆︁ Winner of the [AGIFORS 56th Annual Symposium Best Innovation award](http://agifors.org/Symposium).
 
 This documentation is for the Python interface for Larch. If this is your first go
 with Larch, or the first go on a new computer, you might want to start with [installation](installation).
 
 Larch is undergoing a transformation, with a new computational architecture
 that can significantly improve performance when working with large datasets.
-The new code relies on [numba](https://numba.pydata.org/),
+The old version of Larch used a carefully customized `DataFrames` object to
+organize several different aspects of discrete choice data.
+The new code uses a more standardized (although still enhanced) `xarray.Dataset`
+interface for data, and relies on [numba](https://numba.pydata.org/),
 [xarray](https://xarray.pydata.org/en/stable/), and
 [sharrow](https://activitysim.github.io/sharrow) to enable super-fast estimation
 of choice models.  Many (but not yet all) of the core features of Larch have been moved
 over to this new platform.
 
-You can still use the old version of Larch as normal, but to try out the new version
-just import `larch.numba` instead of larch itself.
+*You can still use the old version of Larch as normal.* If you want to try out the new version,
+just import `larch.numba` instead of larch itself.  All of the compatible examples in this
+documentation are being migrated over to the new platform, but the old examples remain
+available for now under the [Deprecated Examples](deprecated-examples) section.
 
 :::{note}
 This project is very much under development.  There are plenty of undocumented functions
diff --git a/larch/dataset.py b/larch/dataset.py
index b1f48f22..99593af2 100644
--- a/larch/dataset.py
+++ b/larch/dataset.py
@@ -262,6 +262,28 @@ def __initialize_for_larch(cls, obj, caseid=None, alts=None):
 
     @classmethod
     def construct(cls, source, caseid=None, alts=None):
+        """
+        A generic constructor for creating Datasets from various similar objects.
+
+        Parameters
+        ----------
+        source : pandas.DataFrame, pyarrow.Table, xarray.Dataset, or Sequence[str]
+            The source from which to create a Dataset.  DataFrames and Tables
+            are converted to Datasets that have one dimension (the rows) and
+            seperate variables for each of the columns.  A list of strings
+            creates a dataset with those named empty variables.
+        caseid : str, optional
+            The name of a dimension referencing cases.
+        alts : Mapping or str or array-like, optional
+            If given as a mapping, links alternative codes to names.
+            A string names a dimension that defines the alternatives.
+            An array or list of integers gives codes for the alternatives,
+            which are otherwise unnamed.
+
+        Returns
+        -------
+        Dataset
+        """
         if isinstance(source, pd.DataFrame):
             source = cls.from_dataframe(source)
         else:
@@ -376,8 +398,44 @@ def validate_format(self):
             msgs.extend(warn_msgs)
         return msgs
 
-    def query_cases(self, query):
-        return self.query({self.CASEID: query})
+    def query_cases(self, query, parser="pandas", engine=None):
+        """
+        Return a new dataset with each array indexed along the CASEID dimension.
+
+        The indexers are given as strings containing Python expressions to be
+        evaluated against the data variables in the dataset.
+
+        Parameters
+        ----------
+        query : str
+            Python expressions to be evaluated against the data variables
+            in the dataset. The expressions will be evaluated using the pandas
+            eval() function, and can contain any valid Python expressions but cannot
+            contain any Python statements.
+        parser : {"pandas", "python"}, default: "pandas"
+            The parser to use to construct the syntax tree from the expression.
+            The default of 'pandas' parses code slightly different than standard
+            Python. Alternatively, you can parse an expression using the 'python'
+            parser to retain strict Python semantics.
+        engine : {"python", "numexpr", None}, default: None
+            The engine used to evaluate the expression. Supported engines are:
+
+            - None: tries to use numexpr, falls back to python
+            - "numexpr": evaluates expressions using numexpr
+            - "python": performs operations as if you had eval’d in top level python
+
+        Returns
+        -------
+        obj : Dataset
+            A new Dataset with the same contents as this dataset, except each
+            array is indexed by the results of the query on the CASEID dimension.
+
+        See Also
+        --------
+        Dataset.isel
+        pandas.eval
+        """
+        return self.query({self.CASEID: query}, parser=parser, engine=engine)
 
     def dissolve_coords(self, dim, others=None):
         d = self.reset_index(dim)
@@ -408,6 +466,22 @@ def dissolve_zero_variance(self, dim='<ALTID>', inplace=False):
         return obj
 
     def set_dtypes(self, dtypes, inplace=False, on_error='warn'):
+        """
+        Set the dtypes for the variables in this Dataset.
+        Parameters
+        ----------
+        dtypes : Mapping or DataFrame
+            Mapping of names to dtypes, or a DataFrame to infer such a
+            mapping.
+        inplace : bool, default False
+            Whether to convert dtypes inplace.
+        on_error : {'warn', 'raise', 'ignore'}
+            What to do when a type conversion triggers an error.
+
+        Returns
+        -------
+        Dataset
+        """
         if isinstance(dtypes, pd.DataFrame):
             dtypes = dtypes.dtypes
         if inplace: