From 9e0e9f1cbd13f3951a3d1a6bd7d76461b5a56872 Mon Sep 17 00:00:00 2001 From: Philipp A Date: Thu, 5 Oct 2023 21:31:27 +0200 Subject: [PATCH 1/5] Add-concat-on-disk-examples (#1161) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Fix bug of in_files being dict of len 1 * add simple example * add test dep * Create parent dir * simplify * don’t write to dataset dir * test value counts * more comprehensive * no more overwrite * Standardize behaviour: error if directory doesn't exist, error if no objects passed --------- Co-authored-by: Isaac Virshup --- anndata/_core/anndata.py | 2 +- anndata/experimental/merge.py | 114 ++++++++++++++++--------- anndata/tests/test_concatenate_disk.py | 15 ++++ pyproject.toml | 1 + 4 files changed, 93 insertions(+), 39 deletions(-) diff --git a/anndata/_core/anndata.py b/anndata/_core/anndata.py index b04e3b161..944fc66a4 100644 --- a/anndata/_core/anndata.py +++ b/anndata/_core/anndata.py @@ -1681,7 +1681,7 @@ def concatenate( ... dict(var_names=['d', 'c', 'b'], annoA=[0, 1, 2]), ... ) >>> adata3 = AnnData( - ... np.array([[1, 2, 3], [4, 5, 6]]), + ... np.array([[1, 2, 3], [4, 5, 6]]), ... dict(obs_names=['s1', 's2'], anno2=['d3', 'd4']), ... dict(var_names=['d', 'c', 'b'], annoA=[0, 2, 3], annoB=[0, 1, 2]), ... ) diff --git a/anndata/experimental/merge.py b/anndata/experimental/merge.py index 711bcd52d..2413b3348 100644 --- a/anndata/experimental/merge.py +++ b/anndata/experimental/merge.py @@ -2,7 +2,7 @@ import os import shutil -from collections.abc import Collection, Iterable, Mapping, MutableMapping, Sequence +from collections.abc import Collection, Iterable, Mapping, Sequence from functools import singledispatch from pathlib import Path from typing import ( @@ -105,7 +105,9 @@ def as_group(store, *args, **kwargs) -> ZarrGroup | H5Group: @as_group.register(os.PathLike) -def _(store: os.PathLike, *args, **kwargs) -> ZarrGroup | H5Group: +@as_group.register(str) +def _(store: os.PathLike | str, *args, **kwargs) -> ZarrGroup | H5Group: + store = Path(store) if store.suffix == ".h5ad": import h5py @@ -115,11 +117,6 @@ def _(store: os.PathLike, *args, **kwargs) -> ZarrGroup | H5Group: return zarr.open_group(store, *args, **kwargs) -@as_group.register(str) -def _(store: str, *args, **kwargs) -> ZarrGroup | H5Group: - return as_group(Path(store), *args, **kwargs) - - @as_group.register(ZarrGroup) @as_group.register(H5Group) def _(store, *args, **kwargs): @@ -395,33 +392,33 @@ def _write_dim_annot(groups, output_group, dim, concat_indices, label, label_col def concat_on_disk( - in_files: Collection[str | os.PathLike] | MutableMapping[str, str | os.PathLike], + in_files: Collection[str | os.PathLike] | Mapping[str, str | os.PathLike], out_file: str | os.PathLike, *, - overwrite: bool = False, max_loaded_elems: int = 100_000_000, axis: Literal[0, 1] = 0, join: Literal["inner", "outer"] = "inner", merge: StrategiesLiteral | Callable[[Collection[Mapping]], Mapping] | None = None, - uns_merge: StrategiesLiteral - | Callable[[Collection[Mapping]], Mapping] - | None = None, + uns_merge: ( + StrategiesLiteral | Callable[[Collection[Mapping]], Mapping] | None + ) = None, label: str | None = None, keys: Collection[str] | None = None, index_unique: str | None = None, fill_value: Any | None = None, pairwise: bool = False, ) -> None: - """Concatenates multiple AnnData objects along a specified axis using their + """\ + Concatenates multiple AnnData objects along a specified axis using their corresponding stores or paths, and writes the resulting AnnData object to a target location on disk. - Unlike the `concat` function, this method does not require + Unlike :func:`anndata.concat`, this method does not require loading the input AnnData objects into memory, making it a memory-efficient alternative for large datasets. The resulting object written to disk should be equivalent to the concatenation of the loaded AnnData objects using - the `concat` function. + :func:`anndata.concat`. To adjust the maximum amount of data loaded in memory; for sparse arrays use the max_loaded_elems argument; for dense arrays @@ -436,19 +433,16 @@ def concat_on_disk( argument and values are concatenated. out_file The target path or store to write the result in. - overwrite - If `False` while a file already exists it will raise an error, - otherwise it will overwrite. max_loaded_elems The maximum number of elements to load in memory when concatenating sparse arrays. Note that this number also includes the empty entries. Set to 100m by default meaning roughly 400mb will be loaded - to memory at simultaneously. + to memory simultaneously. axis Which axis to concatenate along. join - How to align values when concatenating. If "outer", the union of the other axis - is taken. If "inner", the intersection. See :doc:`concatenation <../concatenation>` + How to align values when concatenating. If `"outer"`, the union of the other axis + is taken. If `"inner"`, the intersection. See :doc:`concatenation <../concatenation>` for more. merge How elements not aligned to the axis being concatenated along are selected. @@ -471,7 +465,7 @@ def concat_on_disk( incrementing integer labels. index_unique Whether to make the index unique by using the keys. If provided, this - is the delimiter between "{orig_idx}{index_unique}{key}". When `None`, + is the delimiter between `"{orig_idx}{index_unique}{key}"`. When `None`, the original indices are kept. fill_value When `join="outer"`, this is the value that will be used to fill the introduced @@ -483,13 +477,58 @@ def concat_on_disk( Notes ----- - .. warning:: - - If you use `join='outer'` this fills 0s for sparse data when - variables are absent in a batch. Use this with care. Dense data is - filled with `NaN`. + If you use `join='outer'` this fills 0s for sparse data when + variables are absent in a batch. Use this with care. Dense data is + filled with `NaN`. + + Examples + -------- + + See :func:`anndata.concat` for the semantics. + The following examples highlight the differences this function has. + + First, let’s get some “big” datasets with a compatible ``var`` axis: + + >>> import httpx + >>> import scanpy as sc + >>> api_url = "https://api.cellxgene.cziscience.com/curation/v1" + >>> def get_cellxgene_data(id_: str): + ... out_path = sc.settings.datasetdir / f'{id_}.h5ad' + ... if out_path.exists(): + ... return out_path + ... ds_versions = httpx.get(f'{api_url}/datasets/{id_}/versions').raise_for_status().json() + ... ds = ds_versions[0] # newest + ... file_url = next(a['url'] for a in ds['assets'] if a['filetype'] == 'H5AD') + ... sc.settings.datasetdir.mkdir(parents=True, exist_ok=True) + ... with httpx.stream('GET', file_url) as r, out_path.open('wb') as f: + ... r.raise_for_status() + ... for data in r.iter_bytes(): + ... f.write(data) + ... return out_path + >>> path_b_cells = get_cellxgene_data('0895c838-e550-48a3-a777-dbcd35d30272') + >>> path_fetal = get_cellxgene_data('08e94873-c2a6-4f7d-ab72-aeaff3e3f929') + + Now we can concatenate them on-disk: + + >>> import anndata as ad + >>> ad.experimental.concat_on_disk( + ... dict(b_cells=path_b_cells, fetal=path_fetal), + ... 'merged.h5ad', + ... label='dataset', + ... ) + >>> adata = ad.read_h5ad('merged.h5ad', backed=True) + >>> adata.X + CSRDataset: backend hdf5, shape (490, 15585), data_dtype float32 + >>> adata.obs['dataset'].value_counts() + dataset + fetal 344 + b_cells 146 + Name: count, dtype: int64 """ + if len(in_files) == 0: + raise ValueError("No objects to concatenate.") + # Argument normalization if pairwise: raise NotImplementedError("pairwise concatenation not yet implemented") @@ -498,14 +537,11 @@ def concat_on_disk( merge = resolve_merge_strategy(merge) uns_merge = resolve_merge_strategy(uns_merge) - if len(in_files) <= 1: - if len(in_files) == 1: - if not overwrite and Path(out_file).is_file(): - raise FileExistsError( - f"File “{out_file}” already exists and `overwrite` is set to False" - ) - shutil.copy2(in_files[0], out_file) - return + + out_file = Path(out_file) + if not out_file.parent.exists(): + raise FileNotFoundError(f"Parent directory of {out_file} does not exist.") + if isinstance(in_files, Mapping): if keys is not None: raise TypeError( @@ -516,15 +552,17 @@ def concat_on_disk( else: in_files = list(in_files) + if len(in_files) == 1: + shutil.copy2(in_files[0], out_file) + return + if keys is None: keys = np.arange(len(in_files)).astype(str) _, dim = _resolve_dim(axis=axis) _, alt_dim = _resolve_dim(axis=1 - axis) - mode = "w" if overwrite else "w-" - - output_group = as_group(out_file, mode=mode) + output_group = as_group(out_file, mode="w") groups = [as_group(f) for f in in_files] use_reindexing = False diff --git a/anndata/tests/test_concatenate_disk.py b/anndata/tests/test_concatenate_disk.py index 0192df452..f9eab9540 100644 --- a/anndata/tests/test_concatenate_disk.py +++ b/anndata/tests/test_concatenate_disk.py @@ -250,3 +250,18 @@ def gen_index(n): def test_concatenate_obsm_inner(obsm_adatas, tmp_path, file_format): assert_eq_concat_on_disk(obsm_adatas, tmp_path, file_format, join="inner") + + +def test_output_dir_exists(tmp_path): + in_pth = tmp_path / "in.h5ad" + out_pth = tmp_path / "does_not_exist" / "out.h5ad" + + AnnData(X=np.ones((5, 1))).write_h5ad(in_pth) + + with pytest.raises(FileNotFoundError, match=f"{out_pth}"): + concat_on_disk([in_pth], out_pth) + + +def test_failure_w_no_args(tmp_path): + with pytest.raises(ValueError, match="No objects to concatenate"): + concat_on_disk([], tmp_path / "out.h5ad") diff --git a/pyproject.toml b/pyproject.toml index e375d4700..9a2f514ea 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -87,6 +87,7 @@ test = [ "joblib", "boltons", "scanpy", + "httpx", # For data downloading "dask[array,distributed]", "awkward>=2.3", "pytest_memray", From 5e8102dad4c16282b2c02ef23c0ab9c4db1383b0 Mon Sep 17 00:00:00 2001 From: Isaac Virshup Date: Fri, 6 Oct 2023 13:25:34 +0200 Subject: [PATCH 2/5] Add zarr examples to fileformat docs (#1162) * Add zarr examples to fileformat docs * Release note + minor fixes * Apply changes from review * Add missing line of output --- docs/conf.py | 1 + docs/fileformat-prose.md | 458 ++++++++++++++++++++++++++++------- docs/release-notes/0.10.0.md | 2 + pyproject.toml | 1 + 4 files changed, 370 insertions(+), 92 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index a25b0f6cf..d5c872c60 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -51,6 +51,7 @@ "sphinx.ext.autosummary", "sphinx_autodoc_typehints", # needs to be after napoleon "sphinx_issues", + "sphinx_design", "sphinxext.opengraph", "scanpydoc", # needs to be before linkcode "sphinx.ext.linkcode", diff --git a/docs/fileformat-prose.md b/docs/fileformat-prose.md index 340be1fdc..9843e9c81 100644 --- a/docs/fileformat-prose.md +++ b/docs/fileformat-prose.md @@ -1,7 +1,7 @@ # On-disk format ```{note} -These docs are written for anndata 0.8. +These docs are written for anndata 0.8+. Files written before this version may differ in some conventions, but will still be read by newer versions of the library. ``` @@ -10,21 +10,42 @@ AnnData objects are saved on disk to hierarchical array stores like [HDF5] (via {doc}`H5py `) and {doc}`zarr:index`. This allows us to have very similar structures in disk and on memory. -As an example we’ll look into a typical `.h5ad` object that’s been through an analysis. -This structure should be largely equivalent to Zarr structure, though there are a few minor differences. +As an example we’ll look into a typical `.h5ad`/ `.zarr` object that’s been through an analysis. +The structures are largely equivalent, though there are a few minor differences when it comes to type encoding. ## Elements - + +`````{tab-set} + +````{tab-item} HDF5 +:sync: hdf5 + ```python >>> import h5py ->>> f = h5py.File("02_processed.h5ad", "r") ->>> list(f.keys()) -['X', 'layers', 'obs', 'obsm', 'uns', 'var', 'varm'] +>>> store = h5py.File("for-ondisk-docs/cart-164k-processed.h5ad", mode="r") +>>> list(store.keys()) +['X', 'layers', 'obs', 'obsm', 'obsp', 'uns', 'var', 'varm', 'varp'] ``` +```` + +````{tab-item} Zarr +:sync: zarr + +```python +>>> import zarr +>>> store = zarr.open("for-ondisk-docs/cart-164k-processed.zarr", mode="r") +>>> list(store.keys()) +['X', 'layers', 'obs', 'obsm', 'obsp', 'uns', 'var', 'varm', 'varp'] +``` + +```` + +````` + +```` + +````{tab-item} Zarr +:sync: zarr + +```python +>>> store["X"].visititems(print) +data +indices +indptr +``` + +```` + +````` ### Sparse array specification (v0.1.0) @@ -148,14 +203,17 @@ DataFrames are saved as a columnar format in a group, so each column of a DataFr We save a little more information in the attributes here. ```python ->>> dict(f["obs"].attrs) -{'_index': 'Cell', - 'column-order': array(['sample', 'cell_type', 'n_genes_by_counts', - 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', - 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', - 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', - 'total_counts_mito', 'log1p_total_counts_mito', 'pct_counts_mito', - 'label_by_score'], dtype=object), +>>> dict(store["var"].attrs) +{'_index': 'ensembl_id', + 'column-order': ['highly_variable', + 'means', + 'variances', + 'variances_norm', + 'feature_is_filtered', + 'feature_name', + 'feature_reference', + 'feature_biotype', + 'mito'], 'encoding-type': 'dataframe', 'encoding-version': '0.2.0'} ``` @@ -163,19 +221,53 @@ We save a little more information in the attributes here. These attributes identify the index of the dataframe, as well as the original order of the columns. Each column in this dataframe is encoded as its own array. +`````{tab-set} + +````{tab-item} HDF5 +:sync: hdf5 + ```python ->>> dict(f["obs"]["total_counts"].attrs) -{'encoding-type': 'array', 'encoding-version': '0.2.0'} +>>> store["var"].visititems(print) +ensembl_id +feature_biotype +feature_biotype/categories +feature_biotype/codes +feature_is_filtered +... +``` + +```` + +````{tab-item} Zarr +:sync: zarr + +```python +>>> store["var"].visititems(print) +ensembl_id +feature_biotype +feature_biotype/categories +feature_biotype/codes +feature_is_filtered +... +``` + +```` ->>> dict(f["obs"]["cell_type"].attrs) +````` + +```python +>>> dict(store["var"]["feature_name"].attrs) {'encoding-type': 'categorical', 'encoding-version': '0.2.0', 'ordered': False} + +>>> dict(store["var"]["feature_is_filtered"].attrs) +{'encoding-type': 'array', 'encoding-version': '0.2.0'} ``` ### Dataframe Specification (v0.2.0) * A dataframe MUST be stored as a group * The group's metadata: - * MUST contain the field `"_index"`, whose value is the key of the array to be used as an index + * MUST contain the field `"_index"`, whose value is the key of the array to be used as an index/ row labels * MUST contain encoding metadata `"encoding-type": "dataframe"`, `"encoding-version": "0.2.0"` * MUST contain `"column-order"` an array of strings denoting the order of column entries * The group MUST contain an array for the index @@ -190,15 +282,40 @@ A `Group` is created for any `Mapping` in the AnnData object, including the standard `obsm`, `varm`, `layers`, and `uns`. Notably, this definition is used recursively within `uns`: +`````{tab-set} + +````{tab-item} HDF5 +:sync: hdf5 + ```python ->>> f["uns"].visititems(print) +>>> store["uns"].visititems(print) [...] -pca -pca/variance -pca/variance_ratio +pca +pca/variance +pca/variance_ratio [...] ``` +```` + +````{tab-item} Zarr +:sync: zarr + +```python +>>> store["uns"].visititems(print) +[...] +pca +pca/variance +pca/variance_ratio +[...] +``` + +```` + +````` + + + ### Mapping specifications (v0.1.0) * Each mapping MUST be its own group @@ -209,14 +326,40 @@ pca/variance_ratio Zero dimensional arrays are used for scalar values (i.e. single values like strings, numbers or booleans). These should only occur inside of `uns`, and are commonly saved parameters: +`````{tab-set} + +````{tab-item} HDF5 +:sync: hdf5 + ```python ->>> f["uns/neighbors/params"].visititems(print) +>>> store["uns/neighbors/params"].visititems(print) method metric n_neighbors ->>> f["uns/neighbors/params/metric"][()] +random_state +``` + +```` + +````{tab-item} Zarr +:sync: zarr + +```python +>>> store["uns/neighbors/params"].visititems(print) +method +metric +n_neighbors +random_state +``` + +```` + +````` + +```python +>>> store["uns/neighbors/params/metric"][()] 'euclidean' ->>> dict(f["uns/neighbors/params/metric"].attrs) +>>> dict(store["uns/neighbors/params/metric"].attrs) {'encoding-type': 'string', 'encoding-version': '0.2.0'} ``` @@ -234,7 +377,7 @@ n_neighbors ## Categorical arrays ```python ->>> categorical = f["obs"]["cell_type"] +>>> categorical = store["obs"]["development_stage"] >>> dict(categorical.attrs) {'encoding-type': 'categorical', 'encoding-version': '0.2.0', 'ordered': False} ``` @@ -245,12 +388,32 @@ Each entry in the `codes` array is the zero-based index of the encoded value in To represent a missing value, a code of `-1` is used. We store these two arrays separately. +`````{tab-set} + +````{tab-item} HDF5 +:sync: hdf5 + ```python >>> categorical.visititems(print) -categories -codes +categories +codes ``` +```` + +````{tab-item} Zarr +:sync: zarr + +```python +>>> categorical.visititems(print) +categories +codes +``` + +```` + +````` + ### Categorical array specification (v0.2.0) * Categorical arrays MUST be stored as a group @@ -265,6 +428,30 @@ codes Arrays of strings are handled differently than numeric arrays since numpy doesn't really have a good way of representing arrays of unicode strings. `anndata` assumes strings are text-like data, so uses a variable length encoding. +`````{tab-set} + +````{tab-item} HDF5 +:sync: hdf5 + +```python +>>> store["var"][store["var"].attrs["_index"]] + +``` + +```` + +````{tab-item} Zarr +:sync: zarr + +```python +>>> store["var"][store["var"].attrs["_index"]] + +``` + +```` + +````` + ```python >>> dict(categorical["categories"].attrs) {'encoding-type': 'string-array', 'encoding-version': '0.2.0'} @@ -283,20 +470,56 @@ We support IO with Pandas nullable integer and boolean arrays. We represent these on disk similar to `numpy` masked arrays, `julia` nullable arrays, or `arrow` validity bitmaps (see {issue}`504` for more discussion). That is, we store an indicator array (or mask) of null values alongside the array of all values. +`````{tab-set} + +````{tab-item} HDF5 +:sync: hdf5 + +```python +>>> from anndata.experimental import write_elem +>>> null_store = h5py.File("tmp.h5", mode="w") +>>> int_array = pd.array([1, None, 3, 4]) +>>> int_array + +[1, , 3, 4] +Length: 4, dtype: Int64 + +>>> write_elem(null_store, "nullable_integer", int_array) + +>>> null_store.visititems(print) +nullable_integer +nullable_integer/mask +nullable_integer/values +``` + +```` + +````{tab-item} Zarr +:sync: zarr + ```python ->>> h5_file = h5py.File("anndata_format.h5", "a") +>>> from anndata.experimental import write_elem +>>> null_store = zarr.open() >>> int_array = pd.array([1, None, 3, 4]) >>> int_array [1, , 3, 4] Length: 4, dtype: Int64 ->>> write_elem(h5_file, "nullable_integer", int_array) ->>> h5_file["nullable_integer"].visititems(print) -mask -values +>>> write_elem(null_store, "nullable_integer", int_array) ->>> dict(h5_file["nullable_integer"].attrs) +>>> null_store.visititems(print) +nullable_integer +nullable_integer/mask +nullable_integer/values +``` + +```` + +````` + +```python +>>> dict(null_store["nullable_integer"].attrs) {'encoding-type': 'nullable-integer', 'encoding-version': '0.1.0'} ``` @@ -330,54 +553,80 @@ break down the awkward array into it’s constituent arrays using [`ak.to_buffers`](https://awkward-array.readthedocs.io/en/latest/_auto/ak.to_buffers.html) then writing these arrays using `anndata`’s methods. -The container of arrays is stored in a group called `"container"` +`````{tab-set} +````{tab-item} HDF5 +:sync: hdf5 ```python ->>> import zarr ->>> z = zarr.open("airr.zarr", "r") ->>> awkward_group = z["obsm/airr"] ->>> awkward_group.tree() +>>> store["varm/transcript"].visititems(print) +node1-mask +node10-data +node11-mask +node12-offsets +node13-mask +node14-data +node16-offsets +node17-data +node2-offsets +node3-data +node4-mask +node5-offsets +node6-data +node7-mask +node8-offsets +node9-mask ``` -``` -airr - └── container - ├── node0-offsets (17,) int64 - ├── node2-offsets (40,) int64 - ├── node3-data (117,) uint8 - ├── node4-offsets (40,) int64 - └── node5-data (117,) uint8 -``` +```` -The length of the array is saved to it’s own `"length"` attribute, -while metadata for the array structure is serialized and saved to the -`“form”` attribute. +````{tab-item} Zarr +:sync: zarr ```python ->>> dict(awkward_group.attrs) +>>> store["varm/transcript"].visititems(print) +node1-mask +node10-data +node11-mask +node12-offsets +node13-mask +node14-data +node16-offsets +node17-data +node2-offsets +node3-data +node4-mask +node5-offsets +node6-data +node7-mask +node8-offsets +node9-mask ``` +```` + +````` + + + +The length of the array is saved to it’s own `"length"` attribute, +while metadata for the array structure is serialized and saved to the +`“form”` attribute. ```python -{ - 'encoding-type': 'awkward-array', - 'encoding-version': '0.1.0', - 'form': '{"class": "ListOffsetArray", "offsets": "i64", "content": {"class": ' - '"RecordArray", "contents": {"locus": {"class": "ListOffsetArray", ' - '"offsets": "i64", "content": {"class": "NumpyArray", "primitive": ' - '"uint8", "inner_shape": [], "has_identifier": false, "parameters": ' - '{"__array__": "char"}, "form_key": "node3"}, "has_identifier": ' - 'false, "parameters": {"__array__": "string"}, "form_key": "node2"}, ' - '"junction_aa": {"class": "ListOffsetArray", "offsets": "i64", ' - '"content": {"class": "NumpyArray", "primitive": "uint8", ' - '"inner_shape": [], "has_identifier": false, "parameters": ' - '{"__array__": "char"}, "form_key": "node5"}, "has_identifier": ' - 'false, "parameters": {"__array__": "string"}, "form_key": "node4"}}, ' - '"has_identifier": false, "parameters": {}, "form_key": "node1"}, ' - '"has_identifier": false, "parameters": {}, "form_key": "node0"}' - 'length': 16 -} +>>> dict(store["varm/transcript"].attrs) +{'encoding-type': 'awkward-array', + 'encoding-version': '0.1.0', + 'form': '{"class": "RecordArray", "fields": ["tx_id", "seq_name", ' + '"exon_seq_start", "exon_seq_end", "ensembl_id"], "contents": ' + '[{"class": "BitMaskedArray", "mask": "u8", "valid_when": true, ' + '"lsb_order": true, "content": {"class": "ListOffsetArray", ' + '"offsets": "i64", "content": {"class": "NumpyArray", "primitive": ' + '"uint8", "inner_shape": [], "parameters": {"__array__": "char"}, ' + '"form_key": "node3"}, "parameters": {"__array__": "string"}, ' + '"form_key": "node2"}, "parameters": {}, "form_key": "node1"}, ' + ... + 'length': 40145} ``` These can be read back as awkward arrays using the @@ -387,15 +636,40 @@ function: ```python >>> import awkward as ak >>> from anndata.experimental import read_elem +>>> awkward_group = store["varm/transcript"] >>> ak.from_buffers( ... awkward_group.attrs["form"], ... awkward_group.attrs["length"], ... {k: read_elem(v) for k, v in awkward_group.items()} ... ) -``` - -``` - +>>> transcript_models[:5] +[{tx_id: 'ENST00000450305', seq_name: '1', exon_seq_start: [...], ...}, + {tx_id: 'ENST00000488147', seq_name: '1', exon_seq_start: [...], ...}, + {tx_id: 'ENST00000473358', seq_name: '1', exon_seq_start: [...], ...}, + {tx_id: 'ENST00000477740', seq_name: '1', exon_seq_start: [...], ...}, + {tx_id: 'ENST00000495576', seq_name: '1', exon_seq_start: [...], ...}] +----------------------------------------------------------------------- +type: 5 * { + tx_id: ?string, + seq_name: ?string, + exon_seq_start: option[var * ?int64], + exon_seq_end: option[var * ?int64], + ensembl_id: ?string +} +>>> transcript_models[0] +{tx_id: 'ENST00000450305', + seq_name: '1', + exon_seq_start: [12010, 12179, 12613, 12975, 13221, 13453], + exon_seq_end: [12057, 12227, 12697, 13052, 13374, 13670], + ensembl_id: 'ENSG00000223972'} +------------------------------------------------------------ +type: { + tx_id: ?string, + seq_name: ?string, + exon_seq_start: option[var * ?int64], + exon_seq_end: option[var * ?int64], + ensembl_id: ?string +} ``` diff --git a/docs/release-notes/0.10.0.md b/docs/release-notes/0.10.0.md index d0809f85f..d419eca57 100644 --- a/docs/release-notes/0.10.0.md +++ b/docs/release-notes/0.10.0.md @@ -36,6 +36,8 @@ We expect to make a full release by October. ```{rubric} Documentation ``` +* Added zarr examples to {doc}`file format docs` {pr}`1162` {user}`ivirshup` + ```{rubric} Breaking changes ``` diff --git a/pyproject.toml b/pyproject.toml index 9a2f514ea..7bfbe496a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -75,6 +75,7 @@ doc = [ "awkward>=2.0.7", "IPython", # For syntax highlighting in notebooks "myst_parser", + "sphinx_design>=0.5.0", ] test = [ "loompy>=3.0.5", From a4f34eb6716cfd77409c1541172352f52487698b Mon Sep 17 00:00:00 2001 From: Isaac Virshup Date: Fri, 6 Oct 2023 13:40:39 +0200 Subject: [PATCH 3/5] Update release notes (#1165) --- docs/release-notes/0.10.0.md | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/docs/release-notes/0.10.0.md b/docs/release-notes/0.10.0.md index d419eca57..01520e33c 100644 --- a/docs/release-notes/0.10.0.md +++ b/docs/release-notes/0.10.0.md @@ -1,14 +1,4 @@ -### 0.10.0rc1 {small}`2023-09-09` - -````{note} -anndata 0.10.0 is currently available as a release candidate for testing. You can install this version of anndata with: - -``` -pip install -U --pre anndata -``` - -We expect to make a full release by October. -```` +### 0.10.0 {small}`2023-10-06` ```{rubric} Features ``` From c6dcffd380cbeefbc8f6f1cefdb449c6f3965a01 Mon Sep 17 00:00:00 2001 From: Isaac Virshup Date: Fri, 6 Oct 2023 14:12:37 +0200 Subject: [PATCH 4/5] Start 0.10.1 (#1166) --- docs/release-notes/0.10.1.md | 10 ++++++++++ docs/release-notes/release-latest.md | 3 +++ 2 files changed, 13 insertions(+) create mode 100644 docs/release-notes/0.10.1.md diff --git a/docs/release-notes/0.10.1.md b/docs/release-notes/0.10.1.md new file mode 100644 index 000000000..1b83f8906 --- /dev/null +++ b/docs/release-notes/0.10.1.md @@ -0,0 +1,10 @@ +### 0.10.1 {small}`the future` + +```{rubric} Bugfix +``` + +```{rubric} Documentation +``` + +```{rubric} Performance +``` diff --git a/docs/release-notes/release-latest.md b/docs/release-notes/release-latest.md index 0a89b1582..3203b03e7 100644 --- a/docs/release-notes/release-latest.md +++ b/docs/release-notes/release-latest.md @@ -1,4 +1,7 @@ ## Version 0.10 +```{include} /release-notes/0.10.1.md +``` + ```{include} /release-notes/0.10.0.md ``` From 6a969eb4696029716bc49dbe8686a9fb823f6e4f Mon Sep 17 00:00:00 2001 From: Isaac Virshup Date: Fri, 6 Oct 2023 14:32:19 +0200 Subject: [PATCH 5/5] start 0.11 (#1168) --- docs/release-notes/0.11.0.md | 13 +++++++++++++ docs/release-notes/release-latest.md | 5 +++++ 2 files changed, 18 insertions(+) create mode 100644 docs/release-notes/0.11.0.md diff --git a/docs/release-notes/0.11.0.md b/docs/release-notes/0.11.0.md new file mode 100644 index 000000000..32aabe87a --- /dev/null +++ b/docs/release-notes/0.11.0.md @@ -0,0 +1,13 @@ +### 0.11.0 {small}`the future` + +```{rubric} Features +``` + +```{rubric} Bugfix +``` + +```{rubric} Documentation +``` + +```{rubric} Performance +``` diff --git a/docs/release-notes/release-latest.md b/docs/release-notes/release-latest.md index 3203b03e7..5337aa78f 100644 --- a/docs/release-notes/release-latest.md +++ b/docs/release-notes/release-latest.md @@ -1,3 +1,8 @@ +## Version 0.11 + +```{include} /release-notes/0.11.0.md +``` + ## Version 0.10 ```{include} /release-notes/0.10.1.md