diff --git a/.gitignore b/.gitignore index 56e88f5e..5827448c 100644 --- a/.gitignore +++ b/.gitignore @@ -68,3 +68,5 @@ book/_build book/api/generated book/example/_sharrow_cache_ latent-class-example-report.html +doc/build +sandbox.py diff --git a/.idea/Larch.iml b/.idea/Larch.iml index 584c1168..1016f726 100644 --- a/.idea/Larch.iml +++ b/.idea/Larch.iml @@ -6,8 +6,13 @@ + + + + + - + @@ -20,4 +25,4 @@ - \ No newline at end of file + diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 00000000..ed00ffaa --- /dev/null +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,29 @@ + + + + diff --git a/.idea/misc.xml b/.idea/misc.xml index 66d33019..042785c5 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,8 +3,8 @@ - + - \ No newline at end of file + diff --git a/.idea/runConfigurations/PyTest.xml b/.idea/runConfigurations/PyTest.xml index a0bbb34e..a8a89c36 100644 --- a/.idea/runConfigurations/PyTest.xml +++ b/.idea/runConfigurations/PyTest.xml @@ -4,17 +4,18 @@ \ No newline at end of file + diff --git a/book/user-guide/choice-models.ipynb b/book/user-guide/choice-models.ipynb index b5cd2b9a..a326f0cb 100644 --- a/book/user-guide/choice-models.ipynb +++ b/book/user-guide/choice-models.ipynb @@ -71,7 +71,7 @@ "cats = df_ca['altid'].astype(pd.CategoricalDtype(['Car', 'Bus', 'Walk'])).cat\n", "df_ca['altnum'] = cats.codes + 1\n", "df_ca = df_ca.set_index(['caseid', 'altnum'])\n", - "data = lx.Dataset.from_idca(df_ca.sort_index(), fill_unavail=0)\n", + "data = lx.Dataset.from_idca(df_ca.sort_index(), fill_missing=0)\n", "data = data.drop_vars(\"_avail_\")\n", "data['ChosenCode'] = (data['Chosen'] * data['Chosen'].altnum).sum('altnum')\n", "data.coords['alt_names'] = lx.DataArray(cats.categories, dims=('altnum'), coords={'altnum': data.altnum})\n", diff --git a/larch/dataset.py b/larch/dataset.py index 30bae5d4..80544f45 100644 --- a/larch/dataset.py +++ b/larch/dataset.py @@ -794,9 +794,11 @@ def set_altnames(self, altnames, inplace=False): return obj @classmethod - def from_idca(cls, df, crack=True, altnames=None, avail='_avail_', fill_unavail=None): + def from_idca(cls, df, crack=True, altnames=None, avail='_avail_', fill_missing=None): """ - Construct a Dataset from an idco-format DataFrame. + Construct a Dataset from an idca-format DataFrame. + + This method loads the data as dense arrays. Parameters ---------- @@ -814,7 +816,7 @@ def from_idca(cls, df, crack=True, altnames=None, avail='_avail_', fill_unavail= avail : str, default '_avail_' When the imported data is in idce format (i.e. sparse) then an availability indicator is computed and given this name. - fill_unavail : scalar or Mapping, optional + fill_missing : scalar or Mapping, optional Fill values to use for missing values when imported data is in idce format (i.e. sparse). Give a single value to use globally, or a mapping of {variable: value} or {dtype: value}. @@ -822,6 +824,11 @@ def from_idca(cls, df, crack=True, altnames=None, avail='_avail_', fill_unavail= Returns ------- Dataset + + See Also + -------- + Dataset.from_idce : Construct a Dataset from a sparse idca-format DataFrame. + """ if df.index.nlevels != 2: raise ValueError("source idca dataframe must have a two " @@ -846,20 +853,20 @@ def from_idca(cls, df, crack=True, altnames=None, avail='_avail_', fill_unavail= if avail not in ds and len(df) < ds.n_cases * ds.n_alts: av = DataArray.from_series(pd.Series(1, index=df.index)).fillna(0).astype(np.int8) ds[avail] = av - if fill_unavail is not None: - if isinstance(fill_unavail, Mapping): + if fill_missing is not None: + if isinstance(fill_missing, Mapping): for k, i in ds.items(): if ds.ALTID not in i.dims: continue - if k not in fill_unavail and i.dtype not in fill_unavail: + if k not in fill_missing and i.dtype not in fill_missing: continue - filler = fill_unavail.get(k, fill_unavail[i.dtype]) + filler = fill_missing.get(k, fill_missing[i.dtype]) ds[k] = i.where(ds['_avail_']!=0, filler) else: for k, i in ds.items(): if ds.ALTID not in i.dims: continue - ds[k] = i.where(ds['_avail_']!=0, fill_unavail) + ds[k] = i.where(ds['_avail_']!=0, fill_missing) return ds @classmethod @@ -897,6 +904,10 @@ def from_idce(cls, df, crack=True, altnames=None, dim_name=None, alt_index='alt_ Returns ------- Dataset + + See Also + -------- + Dataset.from_idca : Construct a dense Dataset from a idca-format DataFrame. """ if df.index.nlevels != 2: raise ValueError("source idce dataframe must have a two "