From 246a4065f3ce301ddbba2e310bd1fc52652109a3 Mon Sep 17 00:00:00 2001 From: Jeff Newman Date: Wed, 2 Feb 2022 14:00:26 -0600 Subject: [PATCH] book updates --- book/_build.sh | 3 +- book/_config.yml | 27 ++++++++ book/_toc.yml | 2 + book/api/dataframes.rst | 4 +- book/api/dataset.rst | 50 ++++++++++++++ book/api/datatree.rst | 8 +++ book/example/101_swissmetro_mnl.ipynb | 12 +++- book/example/102-swissmetro-weighted.ipynb | 18 ++++- book/example/109-swissmetro-nl.ipynb | 24 +++++-- book/example/examples.rst | 25 ------- book/example/legacy.md | 1 + book/intro.md | 13 ++-- larch/dataset.py | 78 +++++++++++++++++++++- 13 files changed, 223 insertions(+), 42 deletions(-) create mode 100644 book/api/dataset.rst create mode 100644 book/api/datatree.rst delete mode 100644 book/example/examples.rst diff --git a/book/_build.sh b/book/_build.sh index 672962dd..4ac71fa7 100644 --- a/book/_build.sh +++ b/book/_build.sh @@ -1,3 +1,4 @@ -#!/bin/zsh +#!/bin/bash +conda info python _scripts/hide_test_cells.py jb build . diff --git a/book/_config.yml b/book/_config.yml index cec0257b..e5130a63 100644 --- a/book/_config.yml +++ b/book/_config.yml @@ -47,6 +47,7 @@ sphinx: - 'sphinx.ext.intersphinx' - 'sphinx.ext.doctest' - 'sphinx.ext.graphviz' + - 'sphinx.ext.viewcode' mathjax_config: TeX: Macros: @@ -73,5 +74,31 @@ sphinx: conda: - "https://docs.conda.io/projects/conda/en/latest/" - null + xarray: + - "https://xarray.pydata.org/en/stable/" + - null config: bibtex_reference_style: author_year + autosummary_generate: True + autodoc_default_options: + show-inheritance: True + add_module_names: False + html_theme_options: + home_page_in_toc: false + search_bar_text: Search these docs... + repository_url: https://github.com/jpn--/larch + use_repository_button: true + use_issues_button: true + repository_branch: master + path_to_docs: docs + extra_navbar: + favicons: + - rel: icon + sizes: 32x32 + href: img/larch_favicon.png + switcher: + json_url: "https://larch.newman.me/_static/switcher.json" + url_template: "https://larch.newman.me/v{version}/" + version_match: "5.5.10" + navbar_end: + - version-switcher diff --git a/book/_toc.yml b/book/_toc.yml index ca5a66c3..4fbbd4ee 100644 --- a/book/_toc.yml +++ b/book/_toc.yml @@ -17,6 +17,8 @@ parts: chapters: - file: api/~data sections: + - file: api/dataset + - file: api/datatree - file: api/dataframes - file: api/linear - file: api/~models diff --git a/book/api/dataframes.rst b/book/api/dataframes.rst index baf13ea2..8dc1090d 100644 --- a/book/api/dataframes.rst +++ b/book/api/dataframes.rst @@ -2,8 +2,10 @@ DataFrames ========== +The :ref:`DataFrames` interface for larch remains available for use, but future +development on this package will be on the :ref:`Dataset`/:ref:`DataTree` interface. + .. autosummary:: :toctree: generated/ larch.DataFrames - diff --git a/book/api/dataset.rst b/book/api/dataset.rst new file mode 100644 index 00000000..a814f952 --- /dev/null +++ b/book/api/dataset.rst @@ -0,0 +1,50 @@ +.. currentmodule:: larch + +======= +Dataset +======= + +Constructors +------------ + +.. autosummary:: + :toctree: generated/ + + Dataset + Dataset.from_idca + Dataset.from_idco + Dataset.construct + +Attributes +---------- + +.. autosummary:: + :toctree: generated/ + + Dataset.n_cases + Dataset.n_alts + Dataset.CASEID + Dataset.ALTID + Dataset.dims + Dataset.sizes + Dataset.data_vars + Dataset.coords + Dataset.attrs + Dataset.encoding + Dataset.indexes + Dataset.chunks + Dataset.chunksizes + Dataset.nbytes + +Methods +------- + +.. autosummary:: + :toctree: generated/ + + Dataset.caseids + Dataset.dissolve_zero_variance + Dataset.query_cases + Dataset.set_altnames + Dataset.set_dtypes + Dataset.setup_flow diff --git a/book/api/datatree.rst b/book/api/datatree.rst new file mode 100644 index 00000000..dc493880 --- /dev/null +++ b/book/api/datatree.rst @@ -0,0 +1,8 @@ +============= +DataTree Home +============= + +.. autosummary:: + :toctree: generated/ + + larch.DataTree diff --git a/book/example/101_swissmetro_mnl.ipynb b/book/example/101_swissmetro_mnl.ipynb index 3c4cbca3..88bc1608 100644 --- a/book/example/101_swissmetro_mnl.ipynb +++ b/book/example/101_swissmetro_mnl.ipynb @@ -12,7 +12,11 @@ "cell_type": "code", "execution_count": null, "id": "09dd4536", - "metadata": {}, + "metadata": { + "tags": [ + "remove_cell" + ] + }, "outputs": [], "source": [ "# TEST\n", @@ -292,7 +296,11 @@ "cell_type": "code", "execution_count": null, "id": "fda7d9c1", - "metadata": {}, + "metadata": { + "tags": [ + "remove_cell" + ] + }, "outputs": [], "source": [ "# TEST\n", diff --git a/book/example/102-swissmetro-weighted.ipynb b/book/example/102-swissmetro-weighted.ipynb index b13c555d..01360784 100644 --- a/book/example/102-swissmetro-weighted.ipynb +++ b/book/example/102-swissmetro-weighted.ipynb @@ -12,7 +12,11 @@ "cell_type": "code", "execution_count": null, "id": "faf59f97", - "metadata": {}, + "metadata": { + "tags": [ + "remove_cell" + ] + }, "outputs": [], "source": [ "# TEST\n", @@ -190,7 +194,11 @@ "cell_type": "code", "execution_count": null, "id": "64f60ffb", - "metadata": {}, + "metadata": { + "tags": [ + "remove_cell" + ] + }, "outputs": [], "source": [ "# TEST\n", @@ -221,7 +229,11 @@ "cell_type": "code", "execution_count": null, "id": "baf04617", - "metadata": {}, + "metadata": { + "tags": [ + "remove_cell" + ] + }, "outputs": [], "source": [ "# TEST\n", diff --git a/book/example/109-swissmetro-nl.ipynb b/book/example/109-swissmetro-nl.ipynb index d5b3a4a8..3bf1dd9b 100644 --- a/book/example/109-swissmetro-nl.ipynb +++ b/book/example/109-swissmetro-nl.ipynb @@ -12,7 +12,11 @@ "cell_type": "code", "execution_count": null, "id": "2c734ca6", - "metadata": {}, + "metadata": { + "tags": [ + "remove_cell" + ] + }, "outputs": [], "source": [ "# TEST\n", @@ -191,7 +195,11 @@ "cell_type": "code", "execution_count": null, "id": "7e8649b5", - "metadata": {}, + "metadata": { + "tags": [ + "remove_cell" + ] + }, "outputs": [], "source": [ "# TEST\n", @@ -222,7 +230,11 @@ "cell_type": "code", "execution_count": null, "id": "025ba5f9", - "metadata": {}, + "metadata": { + "tags": [ + "remove_cell" + ] + }, "outputs": [], "source": [ "# TEST\n", @@ -272,7 +284,11 @@ "cell_type": "code", "execution_count": null, "id": "7133fda4", - "metadata": {}, + "metadata": { + "tags": [ + "remove_cell" + ] + }, "outputs": [], "source": [ "# TEST\n", diff --git a/book/example/examples.rst b/book/example/examples.rst deleted file mode 100644 index 3da1b9df..00000000 --- a/book/example/examples.rst +++ /dev/null @@ -1,25 +0,0 @@ - -.. currentmodule:: larch.examples - -.. _examples: - -======== -Examples -======== - -Here we provide some illustrative examples. -You can reproduce all of these examples in your own install of Larch, -as all of the example data is included with the standard distribution. -To work with any objects you find in the examples, a handy function -is provided to extract that object directly into your workspace: - -.. autofunction:: larch.example - - -.. toctree:: - - mtc - swissmetro - exampville - itinerary - diff --git a/book/example/legacy.md b/book/example/legacy.md index 34bc5b89..bf0222f1 100644 --- a/book/example/legacy.md +++ b/book/example/legacy.md @@ -1,3 +1,4 @@ +(deprecated-examples)= # Deprecated Example Models Future development of Larch will be on version that sits on numba, xarray, and sharrow, diff --git a/book/intro.md b/book/intro.md index 0b299a4b..87ec729a 100644 --- a/book/intro.md +++ b/book/intro.md @@ -4,21 +4,26 @@ [![conda-forge](https://img.shields.io/conda/dn/conda-forge/larch)](https://anaconda.org/conda-forge/larch) [![conda-forge](https://img.shields.io/azure-devops/build/wire-paladin/larch/jpn--.larch/master)](https://dev.azure.com/wire-paladin/larch/_build?definitionId=1&_a=summary&repositoryFilter=1&branchFilter=5%2C5%2C5%2C5%2C5%2C5) -🏆︁ Winner of the [AGIFORS 56th Annual Symposium Best Innovation award](http://agifors.org/Symposium>). +🏆︁ Winner of the [AGIFORS 56th Annual Symposium Best Innovation award](http://agifors.org/Symposium). This documentation is for the Python interface for Larch. If this is your first go with Larch, or the first go on a new computer, you might want to start with [installation](installation). Larch is undergoing a transformation, with a new computational architecture that can significantly improve performance when working with large datasets. -The new code relies on [numba](https://numba.pydata.org/), +The old version of Larch used a carefully customized `DataFrames` object to +organize several different aspects of discrete choice data. +The new code uses a more standardized (although still enhanced) `xarray.Dataset` +interface for data, and relies on [numba](https://numba.pydata.org/), [xarray](https://xarray.pydata.org/en/stable/), and [sharrow](https://activitysim.github.io/sharrow) to enable super-fast estimation of choice models. Many (but not yet all) of the core features of Larch have been moved over to this new platform. -You can still use the old version of Larch as normal, but to try out the new version -just import `larch.numba` instead of larch itself. +*You can still use the old version of Larch as normal.* If you want to try out the new version, +just import `larch.numba` instead of larch itself. All of the compatible examples in this +documentation are being migrated over to the new platform, but the old examples remain +available for now under the [Deprecated Examples](deprecated-examples) section. :::{note} This project is very much under development. There are plenty of undocumented functions diff --git a/larch/dataset.py b/larch/dataset.py index b1f48f22..99593af2 100644 --- a/larch/dataset.py +++ b/larch/dataset.py @@ -262,6 +262,28 @@ def __initialize_for_larch(cls, obj, caseid=None, alts=None): @classmethod def construct(cls, source, caseid=None, alts=None): + """ + A generic constructor for creating Datasets from various similar objects. + + Parameters + ---------- + source : pandas.DataFrame, pyarrow.Table, xarray.Dataset, or Sequence[str] + The source from which to create a Dataset. DataFrames and Tables + are converted to Datasets that have one dimension (the rows) and + seperate variables for each of the columns. A list of strings + creates a dataset with those named empty variables. + caseid : str, optional + The name of a dimension referencing cases. + alts : Mapping or str or array-like, optional + If given as a mapping, links alternative codes to names. + A string names a dimension that defines the alternatives. + An array or list of integers gives codes for the alternatives, + which are otherwise unnamed. + + Returns + ------- + Dataset + """ if isinstance(source, pd.DataFrame): source = cls.from_dataframe(source) else: @@ -376,8 +398,44 @@ def validate_format(self): msgs.extend(warn_msgs) return msgs - def query_cases(self, query): - return self.query({self.CASEID: query}) + def query_cases(self, query, parser="pandas", engine=None): + """ + Return a new dataset with each array indexed along the CASEID dimension. + + The indexers are given as strings containing Python expressions to be + evaluated against the data variables in the dataset. + + Parameters + ---------- + query : str + Python expressions to be evaluated against the data variables + in the dataset. The expressions will be evaluated using the pandas + eval() function, and can contain any valid Python expressions but cannot + contain any Python statements. + parser : {"pandas", "python"}, default: "pandas" + The parser to use to construct the syntax tree from the expression. + The default of 'pandas' parses code slightly different than standard + Python. Alternatively, you can parse an expression using the 'python' + parser to retain strict Python semantics. + engine : {"python", "numexpr", None}, default: None + The engine used to evaluate the expression. Supported engines are: + + - None: tries to use numexpr, falls back to python + - "numexpr": evaluates expressions using numexpr + - "python": performs operations as if you had eval’d in top level python + + Returns + ------- + obj : Dataset + A new Dataset with the same contents as this dataset, except each + array is indexed by the results of the query on the CASEID dimension. + + See Also + -------- + Dataset.isel + pandas.eval + """ + return self.query({self.CASEID: query}, parser=parser, engine=engine) def dissolve_coords(self, dim, others=None): d = self.reset_index(dim) @@ -408,6 +466,22 @@ def dissolve_zero_variance(self, dim='', inplace=False): return obj def set_dtypes(self, dtypes, inplace=False, on_error='warn'): + """ + Set the dtypes for the variables in this Dataset. + Parameters + ---------- + dtypes : Mapping or DataFrame + Mapping of names to dtypes, or a DataFrame to infer such a + mapping. + inplace : bool, default False + Whether to convert dtypes inplace. + on_error : {'warn', 'raise', 'ignore'} + What to do when a type conversion triggers an error. + + Returns + ------- + Dataset + """ if isinstance(dtypes, pd.DataFrame): dtypes = dtypes.dtypes if inplace: