diff --git a/book/_toc.yml b/book/_toc.yml index 2e2eddff..d05f484d 100644 --- a/book/_toc.yml +++ b/book/_toc.yml @@ -24,10 +24,11 @@ parts: - file: api/dataset - file: api/datatree - file: api/dataframes - - file: api/linear - file: api/~models sections: - file: api/model + - file: api/linear + - file: api/nesting - caption: Examples chapters: diff --git a/book/api/nesting.rst b/book/api/nesting.rst new file mode 100644 index 00000000..c64a9964 --- /dev/null +++ b/book/api/nesting.rst @@ -0,0 +1,23 @@ +=========== +NestingTree +=========== + +.. currentmodule:: larch.model.tree + + +.. autosummary:: + :toctree: generated/ + + NestingTree + +Methods +======= + +.. autosummary:: + :toctree: generated/ + + NestingTree.new_node + NestingTree.add_node + NestingTree.remove_node + NestingTree.add_edge + NestingTree.remove_edge diff --git a/book/example/001_mnl.ipynb b/book/example/001_mnl.ipynb index 44956e9e..303072e8 100644 --- a/book/example/001_mnl.ipynb +++ b/book/example/001_mnl.ipynb @@ -46,12 +46,7 @@ { "cell_type": "markdown", "id": "f58c8dfd", - "metadata": { - "ExecuteTime": { - "end_time": "2021-11-26T20:04:55.484918Z", - "start_time": "2021-11-26T20:04:55.481729Z" - } - }, + "metadata": {}, "source": [ "This example is a mode choice model built using the MTC example dataset.\n", "First we create the Dataset and Model objects:" @@ -166,13 +161,55 @@ }, { "cell_type": "markdown", - "id": "414b6db9", + "id": "1023ba6d", + "metadata": {}, + "source": [ + "We can view a summary of the choices and alternative \n", + "availabilities to make sure the model is set up \n", + "correctly." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a1ae2a3a", + "metadata": {}, + "outputs": [], + "source": [ + "m.choice_avail_summary()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "12391385", "metadata": { - "ExecuteTime": { - "end_time": "2021-11-26T20:10:59.990767Z", - "start_time": "2021-11-26T20:10:59.987376Z" - } + "tags": [ + "remove_cell" + ] }, + "outputs": [], + "source": [ + "# TEST\n", + "s = ''' name chosen available\n", + "altid \n", + "1 DA 3637 4755\n", + "2 SR2 517 5029\n", + "3 SR3+ 161 5029\n", + "4 Transit 498 4003\n", + "5 Bike 50 1738\n", + "6 Walk 166 1479\n", + "< Total All Alternatives > 5029 \n", + "'''\n", + "import re\n", + "mash = lambda x: re.sub('\\s+', ' ', x).strip()\n", + "assert mash(s) == mash(str(m.choice_avail_summary()))" + ] + }, + { + "cell_type": "markdown", + "id": "414b6db9", + "metadata": {}, "source": [ "Having created this model, we can then estimate it:" ] @@ -374,7 +411,6 @@ } ], "metadata": { - "celltoolbar": "Tags", "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", diff --git a/book/example/201_exville_mode_choice.ipynb b/book/example/201_exville_mode_choice.ipynb index 78c4098b..ffb2c66e 100644 --- a/book/example/201_exville_mode_choice.ipynb +++ b/book/example/201_exville_mode_choice.ipynb @@ -355,15 +355,15 @@ "assert (summary.to_markdown()) == '''\n", "| | name | chosen | available | availability condition |\n", "|:---------------------------|:---------|---------:|:------------|:-------------------------|\n", - "| 1 | DA | 810 | 7564 | AGE >= 16 |\n", - "| 2 | SR | 196 | 4179 | 1 |\n", - "| 3 | Walk | 72 | 7564 | WALK_TIME < 60 |\n", - "| 4 | Bike | 434 | 4199 | BIKE_TIME < 60 |\n", - "| 5 | Transit | 6862 | 7564 | TRANSIT_FARE>0 |\n", - "| 6 | Car | 268 | 7564 | |\n", - "| 7 | NonMotor | 7296 | 7564 | |\n", - "| 8 | Motor | 7564 | 7564 | |\n", - "| < Total All Alternatives > | | 6052 | | |\n", + "| 1 | DA | 6052 | 7564 | AGE >= 16 |\n", + "| 2 | SR | 810 | 7564 | 1 |\n", + "| 3 | Walk | 196 | 4179 | WALK_TIME < 60 |\n", + "| 4 | Bike | 72 | 7564 | BIKE_TIME < 60 |\n", + "| 5 | Transit | 434 | 4199 | TRANSIT_FARE>0 |\n", + "| 6 | Car | 6862 | 7564 | |\n", + "| 7 | NonMotor | 268 | 7564 | |\n", + "| 8 | Motor | 7296 | 7564 | |\n", + "| < Total All Alternatives > | | 7564 | | |\n", "'''[1:-1]" ] }, diff --git a/book/example/202_exville_mc_logsums.ipynb b/book/example/202_exville_mc_logsums.ipynb index 4818a9be..f117b569 100644 --- a/book/example/202_exville_mc_logsums.ipynb +++ b/book/example/202_exville_mc_logsums.ipynb @@ -111,7 +111,6 @@ }, "outputs": [], "source": [ - "\n", "Mode = Dict(\n", " DA = 1,\n", " SR = 2,\n", @@ -197,15 +196,12 @@ "outputs": [], "source": [ "for i,dtaz in enumerate(logsums.TAZ_ID):\n", - " \n", " m.datatree = dt.replace_datasets(\n", " tour=dt.root_dataset.assign(\n", " DTAZ=xr.full_like(dt['DTAZ'], dtaz)\n", " ),\n", " )\n", - " logsums.loc[dict(TAZ_ID=dtaz)] = m.logsums()\n", - "\n", - " " + " logsums.loc[dict(TAZ_ID=dtaz)] = m.logsums()" ] }, { diff --git a/book/user-guide/choice-models.ipynb b/book/user-guide/choice-models.ipynb index a326f0cb..2f855b78 100644 --- a/book/user-guide/choice-models.ipynb +++ b/book/user-guide/choice-models.ipynb @@ -483,8 +483,9 @@ "\n", "[`m.datatree`](larch.numba.Model.datatree) \n", ": A `DataTree` that holds the raw data used for the model. This can\n", - "consist of just a single `Dataset`, or multiple related datasets \n", - "linked together using the `sharrow` library.\n", + "consist of just a single `Dataset`, (which is automatically converted\n", + "into a one-node tree when you assign it to this attribute) or multiple \n", + "related datasets linked together using the `sharrow` library. \n", "\n", "[`m.dataset`](larch.numba.Model.dataset) \n", ": The assembled arrays actually used in calculation, stored\n", @@ -519,6 +520,170 @@ "m.dataset" ] }, + { + "cell_type": "markdown", + "id": "8568735b", + "metadata": {}, + "source": [ + "## Nesting Structures\n", + "\n", + "By default, a model in Larch is assumed to be a simple multinomial \n", + "logit model, unless a nesting structure is defined. That structure\n", + "is defined in a model's [`graph`](larch.numba.Model.graph)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25ec7446", + "metadata": {}, + "outputs": [], + "source": [ + "m.graph" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ad6b2350", + "metadata": { + "tags": [ + "remove_cell" + ] + }, + "outputs": [], + "source": [ + "# TEST\n", + "assert sorted(m.graph.nodes) == [0, 1, 2, 3]\n", + "assert sorted(m.graph.edges) == [(0, 1), (0, 2), (0, 3)]\n", + "assert m.graph.standard_sort_names == ['Car', 'Bus', 'Walk', '_root_']\n", + "assert m.graph.standard_sort == (1, 2, 3, 0)" + ] + }, + { + "cell_type": "markdown", + "id": "0ce31f75", + "metadata": {}, + "source": [ + "Adding a nest can be accomplished the the [`new_node`](larch.model.tree.NestingTree.new_node) method,\n", + "which allows you to give a nesting node's child codes, a name, and attach a logsum parameter." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "836fc0d3", + "metadata": {}, + "outputs": [], + "source": [ + "z = m.graph.new_node(parameter='Mu_Motorized', children=[1,2], name='Motorized')\n", + "m.graph" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1bf7f1b2", + "metadata": { + "tags": [ + "remove_cell" + ] + }, + "outputs": [], + "source": [ + "# TEST\n", + "assert sorted(m.graph.nodes) == [0, 1, 2, 3, 4]\n", + "assert sorted(m.graph.edges) == [(0, 3), (0, 4), (4, 1), (4, 2)]\n", + "assert m.graph.standard_sort_names == ['Car', 'Bus', 'Walk', 'Motorized', '_root_']\n", + "assert m.graph.standard_sort == (1, 2, 3, 4, 0)" + ] + }, + { + "cell_type": "markdown", + "id": "f94b23ad", + "metadata": {}, + "source": [ + "The return value of [`new_node`](larch.model.tree.NestingTree.new_node)\n", + "is the code number of the new nest. This is assigned automatically so \n", + "as to not overlap with any other alternatives or nests. We can use this\n", + "to develop multi-level nesting structures, by putting that new code \n", + "number as the child for yet another new nest." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "637439d3", + "metadata": {}, + "outputs": [], + "source": [ + "m.graph.new_node(parameter='Mu_Omni', children=[z, 3], name='Omni')\n", + "m.graph" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "90a41d3c", + "metadata": { + "tags": [ + "remove_cell" + ] + }, + "outputs": [], + "source": [ + "# TEST\n", + "assert sorted(m.graph.nodes) == [0, 1, 2, 3, 4, 5]\n", + "assert sorted(m.graph.edges) == [(0, 5), (4, 1), (4, 2), (5, 3), (5, 4)]\n", + "assert m.graph.standard_sort_names == [\n", + " 'Car', 'Bus', 'Walk', 'Motorized', 'Omni', '_root_'\n", + "]\n", + "assert m.graph.standard_sort == (1, 2, 3, 4, 5, 0)" + ] + }, + { + "cell_type": "markdown", + "id": "9824be61", + "metadata": {}, + "source": [ + "Nothing in Larch prevents you from overloading the nesting structure with\n", + "degenerate nests, as shown above. You may have difficult with estimating\n", + "parameters if you are not careful with such complex structures. If you\n", + "need to [`remove_node`](larch.model.tree.NestingTree.remove_node), you \n", + "can do so by giving its code--but you'll likely find you'll be much better off\n", + "just fixing your code and starting over, as node removal can have some odd\n", + "side effects for complex structures." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "435b99b3", + "metadata": {}, + "outputs": [], + "source": [ + "m.graph.remove_node(5)\n", + "m.graph" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac114d76", + "metadata": { + "tags": [ + "remove_cell" + ] + }, + "outputs": [], + "source": [ + "# TEST\n", + "assert sorted(m.graph.nodes) == [0, 1, 2, 3, 4]\n", + "assert sorted(m.graph.edges) == [(0, 3), (0, 4), (4, 1), (4, 2)]\n", + "assert m.graph.standard_sort_names == ['Car', 'Bus', 'Walk', 'Motorized', '_root_']\n", + "assert m.graph.standard_sort == (1, 2, 3, 4, 0)" + ] + }, { "cell_type": "markdown", "id": "1910c935", @@ -737,7 +902,12 @@ "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, - "toc_position": {}, + "toc_position": { + "height": "calc(100% - 180px)", + "left": "10px", + "top": "150px", + "width": "299.2px" + }, "toc_section_display": true, "toc_window_display": true } diff --git a/book/user-guide/machine-learning.ipynb b/book/user-guide/machine-learning.ipynb index 6e323f5d..13cd1a32 100644 --- a/book/user-guide/machine-learning.ipynb +++ b/book/user-guide/machine-learning.ipynb @@ -4,7 +4,6 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "(machine-learning)=\n", "# Machine Learning" ] }, diff --git a/larch/dataset.py b/larch/dataset.py index 80544f45..91741341 100644 --- a/larch/dataset.py +++ b/larch/dataset.py @@ -186,6 +186,21 @@ def __repr__(self): r = r.replace("xarray.DataArray", "larch.DataArray") return r + def value_counts(self, index_name='index'): + """ + Count the number of times each unique value appears in the array. + + Parameters + ---------- + index_name : str, default 'index' + Name of index dimension in result. + + Returns + ------- + DataArray + """ + values, freqs = np.unique(self, return_counts=True) + return self.__class__(freqs, dims=index_name, coords={index_name:values}) class Dataset(_sharrow_Dataset): """ @@ -1379,10 +1394,10 @@ def choice_avail_summary(dataset, graph=None, availability_co_vars=None): """ if graph is None: if 'ch' in dataset: - ch_ = dataset['ch'].copy() + ch_ = np.asarray(dataset['ch'].copy()) else: ch_ = None - av_ = dataset.get('av') + av_ = np.asarray(dataset.get('av')) else: from .numba.cascading import array_av_cascade, array_ch_cascade @@ -1431,24 +1446,27 @@ def choice_avail_summary(dataset, graph=None, availability_co_vars=None): od = OrderedDict() if graph is not None: - od['name'] = pd.Series(graph.standard_sort_names, index=graph.standard_sort) + idx = graph.standard_sort + od['name'] = pd.Series(graph.standard_sort_names, index=idx) else: - od['name'] = dataset.get('alt_names') + idx = dataset.altids() + if dataset.get('alt_names') is not None: + od['name'] = pd.Series(dataset.get('alt_names'), index=idx) if show_wt: - od['chosen weighted'] = ch_w - od['chosen unweighted'] = ch - od['available weighted'] = av_w - od['available unweighted'] = av + od['chosen weighted'] = pd.Series(ch_w, index=idx) + od['chosen unweighted'] = pd.Series(ch, index=idx) + od['available weighted'] = pd.Series(av_w, index=idx) + od['available unweighted'] = pd.Series(av, index=idx) else: - od['chosen'] = ch - od['available'] = av + od['chosen'] = pd.Series(ch, index=idx) + od['available'] = pd.Series(av, index=idx) if ch_but_not_av is not None: if show_wt: - od['chosen but not available weighted'] = ch_but_not_av_w - od['chosen but not available unweighted'] = ch_but_not_av + od['chosen but not available weighted'] = pd.Series(ch_but_not_av_w, index=idx) + od['chosen but not available unweighted'] = pd.Series(ch_but_not_av, index=idx) else: - od['chosen but not available'] = ch_but_not_av + od['chosen but not available'] = pd.Series(ch_but_not_av, index=idx) if availability_co_vars is not None: od['availability condition'] = pd.Series( diff --git a/larch/model/model.py b/larch/model/model.py index d7569f56..eec2c40c 100644 --- a/larch/model/model.py +++ b/larch/model/model.py @@ -755,7 +755,7 @@ def copy(self): import pickle result = pickle.loads(self.dumps()) result.dataservice = self.dataservice - result.graph.set_touch_callback(result.mangle) + result._graph = result.graph return result def remove_unused_parameters(self, verbose=True): diff --git a/larch/model/tree.py b/larch/model/tree.py index 0e3fef02..97eddb48 100644 --- a/larch/model/tree.py +++ b/larch/model/tree.py @@ -5,11 +5,68 @@ from ..util.touch_notifier import TouchNotify from ..util.lazy import lazy -class NestingTree(TouchNotify, nx.DiGraph): +class NestingTree(nx.DiGraph): node_dict_factory = OrderedDict adjlist_dict_factory = OrderedDict + def __get__(self, instance, owner): + # self : SubkeyStore + # instance : instance of parent class that has `self` as a member, or None + # owner : class of `instance` + if instance is None: + pass # print("GRR: no instance") + return self + newself = getattr(instance, self.private_name, None) + if newself is None: + pass # print(f"GRR No Current: {instance=} {owner=}") + try: + instance.initialize_graph() + except ValueError: + pass + newself = getattr(instance, self.private_name, None) + if newself is not None: + newself._instance = instance + pass # print(f"GRR: get {instance=} {newself=}") + return newself + + def __set__(self, instance, value): + # self : NestingTree object + # instance : instance of parent class that has `self` as a member + # value : the new value that is trying to be assigned + assert isinstance(value, NestingTree) + t = value.copy() + t._instance = instance + setattr(instance, self.private_name, t) + try: + t._instance.mangle() + except AttributeError as err: + pass # print(f"GRR: {err}") + else: + pass # print(f"GRR Mangle: {instance}") + + def __delete__(self, instance): + setattr(instance, self.private_name, None) + try: + instance.mangle() + except AttributeError as err: + pass # print(f"GRR: {err}") + else: + pass # print(f"GRR Mangle: {instance}") + + def __set_name__(self, owner, name): + self.name = name + self.private_name = "_private_"+name + + def touch(self): + try: + self._instance.mangle() + except AttributeError: + pass # print("GRR: mangle failure") + else: + pass # print("GRR: mangle ok") + + def __init__(self, *arg, root_id=0, suggested_elemental_order=(), **kwarg): if len(arg) and isinstance(arg[0], NestingTree): super().__init__(*arg, **kwarg) @@ -39,6 +96,7 @@ def suggest_elemental_order(self, order): @property def root_id(self): + """int : The code for the root node.""" return self._root_id @root_id.setter @@ -67,6 +125,28 @@ def _clear_caches(self): self.touch() def add_edge(self, u, v, implied=False, _clear_caches=True, **kwarg): + """ + Add an edge between u and v. + + The nodes u and v will be automatically added if they are + not already in the graph. + + Edge attributes can be specified with keywords. + + Parameters + ---------- + u, v : int + Nodes should be integer codes. The upstream node `u` is + a nest or the root node. Downsteam node `v` can be + a nest or elemental alternative. + implied : bool, default False + Implied edges are for connection of otherwise unconnected + nests to the root node. + _clear_caches : bool, default True + kwarg : keyword arguments, optional + Edge data (or labels or objects) can be assigned using + keyword arguments. + """ if not implied: drops = [] for u_,v_,imp_ in self.in_edges(nbunch=[v], data='implied'): @@ -83,6 +163,19 @@ def _remove_edge_no_implied(self, u, v, *arg, **kwarg): return result def remove_edge(self, u, v, *arg, **kwarg): + """ + Remove the edge between u and v. + + Parameters + ---------- + u, v : int + Remove the edge between nodes u and v. + + Raises + ------ + NetworkXError + If there is not an edge between u and v. + """ result = super().remove_edge(u, v) if self.in_degree(v)==0 and v!=self._root_id: self.add_edge(self._root_id, v, implied=True, _clear_caches=False) @@ -195,6 +288,30 @@ def add_nodes(self, codes, *arg, parent=None, **kwarg): for code in codes: self.add_node(code, *arg, parent=parent, **kwarg) + def remove_node(self, n): + """ + Remove node n. + + Removes the node n, reconnecting all outedges to the head node + of all inedges. Attempting to remove a non-existent node will + raise an exception. + + Parameters + ---------- + n : int + A node in the graph + """ + replace_edges = { + k: self.edges[k].copy() + for k in self.edges(n) + } + replace_heads = [k for k, _ in self.in_edges(n)] + super(NestingTree, self).remove_node(n) + for k, attrs in replace_edges.items(): + for h in replace_heads: + super().add_edge(h, k[1], **attrs) + self._clear_caches() + @lazy def topological_sorted(self): return list(reverse_lexicographical_topological_sort(self)) @@ -367,7 +484,7 @@ def __getstate__(self): '_successor_slots', '_touch', 'node_dict_factory', - + '_instance', ) for k,v in self.__dict__.items(): if k not in no_pickle: @@ -895,4 +1012,4 @@ def create_tuple(node): if zero_outdegree: msg = "Graph contains a cycle or graph changed during iteration" - raise nx.NetworkXUnfeasible(msg) \ No newline at end of file + raise nx.NetworkXUnfeasible(msg) diff --git a/larch/numba/model.py b/larch/numba/model.py index bd213a7d..8a9649f9 100755 --- a/larch/numba/model.py +++ b/larch/numba/model.py @@ -889,6 +889,35 @@ def mangle(self, *args, **kwargs): self._array_av_cascade = None self._constraint_funcs = None + @property + def graph(self): + if self._graph is None: + try: + self.initialize_graph() + except ValueError: + import warnings + warnings.warn('cannot initialize graph, must define alternatives somehow') + raise RuntimeError('cannot initialize graph, must define alternatives somehow') + return self._graph + + @graph.setter + def graph(self, x): + self._graph = x + + def is_mnl(self): + """ + Check if this model is a MNL model + + Returns + ------- + bool + """ + if self._graph is None: + return True + if len(self._graph) - len(self._graph.elementals) == 1: + return True + return False + def initialize_graph(self, dataframes=None, alternative_codes=None, alternative_names=None, root_id=0): """ Write a nesting tree graph for a MNL model. @@ -1093,6 +1122,39 @@ def unmangle(self, force=False): if self._constraint_funcs is None: self._constraint_funcs = [c.as_soft_penalty() for c in self.constraints] + def _scan_logsums_ensure_names(self): + nameset = set() + try: + g = self._graph + except ValueError: + pass + else: + if g is not None: + for nodecode in g.topological_sorted_no_elementals: + if nodecode != g._root_id: + param_name = str(g.nodes[nodecode]['parameter']) + nameset.add(str(param_name)) + if self.quantity_ca is not None and len(self.quantity_ca) > 0: + if self.quantity_scale is not None: + nameset.add(str(self.quantity_scale)) + if self.logsum_parameter is not None: + nameset.add(str(self.logsum_parameter)) + self._ensure_names(nameset, nullvalue=1, initvalue=1, min=0.001, max=1) + + def _ensure_names(self, names, **kwargs): + from ..model.parameter_frame import _empty_parameter_frame + existing_names = set(self._frame.index) + nameset = set(names) + missing_names = nameset - existing_names + if missing_names: + self._frame = pd.concat( + [ + self._frame, + _empty_parameter_frame([n for n in names if (n in missing_names)], **kwargs), + ], + verify_integrity=True, + ) + def __prepare_for_compute( self, x=None, @@ -1862,6 +1924,7 @@ def __getstate__(self): constraint_intensity=self.constraint_intensity, constraint_sharpness=self.constraint_sharpness, _constraint_funcs=self._constraint_funcs, + _private__graph=self._private__graph, ) return super().__getstate__(), state @@ -1870,6 +1933,7 @@ def __setstate__(self, state): self.constraint_intensity = state[1]['constraint_intensity'] self.constraint_sharpness = state[1]['constraint_sharpness'] self._constraint_funcs = state[1]['_constraint_funcs'] + self._private__graph = state[1]["_private__graph"] super().__setstate__(state[0]) @property