Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix garbage collection #1119

Merged
merged 40 commits into from
Jul 22, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
a9a10af
Make creation of aligned mapping lazy
ivirshup Sep 4, 2023
ca0759a
Use weakref for filemanager
ivirshup Sep 4, 2023
92615d8
Merge branch 'main' into remove-backrefs
ilan-gold Jun 3, 2024
cb53b77
(chore): add benchmark
ilan-gold Jun 3, 2024
2df03b5
(fix): use right `gen_adata` function
ilan-gold Jun 3, 2024
38d218f
(fix): change name
ilan-gold Jun 3, 2024
a9eba7a
(chore): fewer runs
ilan-gold Jun 3, 2024
be8ae55
(fix): benchmark test name
ilan-gold Jun 4, 2024
82fc74c
Merge branch 'main' into remove-backrefs
ilan-gold Jun 4, 2024
e0bff0e
(fix): return `cls` for `None` obj
ilan-gold Jun 4, 2024
ccbdaf3
Merge branch 'remove-backrefs' of https://github.com/ivirshup/anndata…
ilan-gold Jun 4, 2024
6ed9963
(fix): try `track_peakmem`
ilan-gold Jun 4, 2024
ce4e2d8
(fix): remove `track_` name
ilan-gold Jun 5, 2024
0c9e563
(fix): docs
ilan-gold Jun 5, 2024
c971dfb
(fix): do custom peakmem track
ilan-gold Jun 5, 2024
9e08151
(fix): `n` -> `runs`
ilan-gold Jun 5, 2024
c771079
(fix): comment
ilan-gold Jun 5, 2024
9d3caad
(fix): `vals` typing for aligned mappings
ilan-gold Jun 5, 2024
eeb8865
(fix): don't use mutable argument
ilan-gold Jun 5, 2024
5267c2d
(fix): change profiler name to `track_peakmem_garbage_collection`
ilan-gold Jun 5, 2024
13d134a
(chore): delete rogue comment
ilan-gold Jun 5, 2024
4dec616
clarify reference use
flying-sheep Jun 7, 2024
5361188
consistency
flying-sheep Jun 7, 2024
b61549d
remove boilerplate
flying-sheep Jun 7, 2024
81d5933
Merge branch 'main' into remove-backrefs
ilan-gold Jun 7, 2024
1a85497
add types, undo moves
flying-sheep Jun 7, 2024
f750954
Add type hints for properties
flying-sheep Jun 7, 2024
ab4cd79
Merge branch 'main' into remove-backrefs
ilan-gold Jun 18, 2024
cf4377c
Merge branch 'main' into pr/ivirshup/1119
flying-sheep Jun 21, 2024
48ae69d
fmt
flying-sheep Jul 2, 2024
480b0a4
cleanup
flying-sheep Jul 2, 2024
24b89a0
more fmt
flying-sheep Jul 4, 2024
05ebee9
dedupe and test
flying-sheep Jul 4, 2024
866abbd
Simplify copy
flying-sheep Jul 4, 2024
3595197
Merge branch 'main' into pr/ivirshup/1119
flying-sheep Jul 8, 2024
015d6ad
docs and typing
flying-sheep Jul 22, 2024
3a1a007
fix parent_mapping type
flying-sheep Jul 22, 2024
73fc94c
Fix I
flying-sheep Jul 22, 2024
ba61e2b
fix docs
flying-sheep Jul 22, 2024
e138fc9
fix 3.9
flying-sheep Jul 22, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions benchmarks/benchmarks/anndata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from __future__ import annotations

from .utils import gen_adata


class GarbargeCollectionSuite:
def track_peakmem_garbage_collection(self, *_):
for _ in range(10):
adata = gen_adata(10000, 10000, "X-csc") # noqa: F841
67 changes: 53 additions & 14 deletions src/anndata/_core/aligned_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,13 @@
from anndata._warnings import ExperimentalFeatureWarning, ImplicitModificationWarning
from anndata.compat import AwkArray

from ..utils import axis_len, deprecated, ensure_df_homogeneous, warn_once
from ..utils import (
axis_len,
convert_to_dict,
deprecated,
ensure_df_homogeneous,
warn_once,
)
from .access import ElementRef
from .index import _subset
from .views import as_view, view_update
Expand Down Expand Up @@ -115,7 +121,7 @@ def parent(self) -> AnnData | Raw:
return self._parent

def copy(self):
d = self._actual_class(self.parent, self._axis)
d = self._actual_class(self.parent, self._axis, {})
for k, v in self.items():
if isinstance(v, AwkArray):
# Shallow copy since awkward array buffers are immutable
Expand Down Expand Up @@ -272,6 +278,7 @@ def dim_names(self) -> pd.Index:
return (self.parent.obs_names, self.parent.var_names)[self._axis]


# TODO: vals can't be None
flying-sheep marked this conversation as resolved.
Show resolved Hide resolved
class AxisArrays(AlignedActualMixin, AxisArraysBase):
def __init__(
self,
Expand All @@ -283,9 +290,9 @@ def __init__(
if axis not in (0, 1):
raise ValueError()
self._axis = axis
self._data = dict()
if vals is not None:
self.update(vals)
for k, v in vals.items():
vals[k] = self._validate_value(v, k)
self._data = vals


class AxisArraysView(AlignedViewMixin, AxisArraysBase):
Expand Down Expand Up @@ -317,18 +324,21 @@ class LayersBase(AlignedMapping):

# TODO: I thought I had a more elegant solution to overriding this...
def copy(self) -> Layers:
d = self._actual_class(self.parent)
d = self._actual_class(self.parent, vals={})
for k, v in self.items():
d[k] = v.copy()
return d


class Layers(AlignedActualMixin, LayersBase):
def __init__(self, parent: AnnData, vals: Mapping | None = None):
def __init__(
self, parent: AnnData, axis: tuple[int] = (0, 1), vals: Mapping | None = None
):
assert axis == (0, 1), axis
self._parent = parent
self._data = dict()
if vals is not None:
self.update(vals)
for k, v in vals.items():
vals[k] = self._validate_value(v, k)
self._data = vals


class LayersView(AlignedViewMixin, LayersBase):
Expand Down Expand Up @@ -382,9 +392,9 @@ def __init__(
if axis not in (0, 1):
raise ValueError()
self._axis = axis
self._data = dict()
if vals is not None:
self.update(vals)
for k, v in vals.items():
vals[k] = self._validate_value(v, k)
self._data = vals


class PairwiseArraysView(AlignedViewMixin, PairwiseArraysBase):
Expand All @@ -396,9 +406,38 @@ def __init__(
):
self.parent_mapping = parent_mapping
self._parent = parent_view
self.subset_idx = (subset_idx, subset_idx)
self.subset_idx = subset_idx
ilan-gold marked this conversation as resolved.
Show resolved Hide resolved
self._axis = parent_mapping._axis


PairwiseArraysBase._view_class = PairwiseArraysView
PairwiseArraysBase._actual_class = PairwiseArrays


class AlignedMappingProperty:
flying-sheep marked this conversation as resolved.
Show resolved Hide resolved
def __init__(self, name, cls, axis):
self.name = name
self.axis = axis
self.cls = cls

def __get__(self, obj, objtype=None):
if obj.is_view:
parent_anndata = obj._adata_ref
idxs = (obj._oidx, obj._vidx)
parent_aligned_mapping = getattr(parent_anndata, self.name)
return parent_aligned_mapping._view(
obj, tuple(idxs[ax] for ax in parent_aligned_mapping.axes)
)
# return self.cls._view_class()
else:
return self.cls(obj, self.axis, getattr(obj, "_" + self.name))

def __set__(self, obj, value):
value = convert_to_dict(value)
_ = self.cls(obj, self.axis, value) # Validate
if obj.is_view:
obj._init_as_actual(obj.copy())
setattr(obj, "_" + self.name, value)

def __delete__(self, obj):
setattr(obj, self.name, dict())
160 changes: 13 additions & 147 deletions src/anndata/_core/anndata.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,16 +40,14 @@
_move_adj_mtx,
)
from ..logging import anndata_logger as logger
from ..utils import axis_len, convert_to_dict, deprecated, ensure_df_homogeneous
from ..utils import axis_len, deprecated, ensure_df_homogeneous
from .access import ElementRef
from .aligned_df import _gen_dataframe
from .aligned_mapping import (
AlignedMappingProperty,
AxisArrays,
AxisArraysView,
Layers,
LayersView,
PairwiseArrays,
PairwiseArraysView,
)
from .file_backing import AnnDataFileManager, to_memory
from .index import Index, Index1D, _normalize_indices, _subset, get_vector
Expand Down Expand Up @@ -318,11 +316,6 @@ def _init_as_view(self, adata_ref: AnnData, oidx: Index, vidx: Index):
# views on attributes of adata_ref
obs_sub = adata_ref.obs.iloc[oidx]
var_sub = adata_ref.var.iloc[vidx]
self._obsm = adata_ref.obsm._view(self, (oidx,))
self._varm = adata_ref.varm._view(self, (vidx,))
self._layers = adata_ref.layers._view(self, (oidx, vidx))
self._obsp = adata_ref.obsp._view(self, oidx)
self._varp = adata_ref.varp._view(self, vidx)
# fix categories
uns = copy(adata_ref._uns)
if settings.should_remove_unused_categories:
Expand Down Expand Up @@ -472,12 +465,11 @@ def _init_as_actual(
# unstructured annotations
self.uns = uns or OrderedDict()

# TODO: Think about consequences of making obsm a group in hdf
self._obsm = AxisArrays(self, 0, vals=convert_to_dict(obsm))
self._varm = AxisArrays(self, 1, vals=convert_to_dict(varm))
self.obsm = obsm
self.varm = varm

self._obsp = PairwiseArrays(self, 0, vals=convert_to_dict(obsp))
self._varp = PairwiseArrays(self, 1, vals=convert_to_dict(varp))
self.obsp = obsp
self.varp = varp

# Backwards compat for connectivities matrices in uns["neighbors"]
_move_adj_mtx({"uns": self._uns, "obsp": self._obsp})
Expand All @@ -502,7 +494,7 @@ def _init_as_actual(
self._clean_up_old_format(uns)

# layers
self._layers = Layers(self, layers)
self.layers = layers

def __sizeof__(self, show_stratified=None, with_disk: bool = False) -> int:
def get_size(X) -> int:
Expand Down Expand Up @@ -675,45 +667,11 @@ def X(self, value: np.ndarray | sparse.spmatrix | SpArray | None):
def X(self):
self.X = None

@property
def layers(self) -> Layers | LayersView:
"""\
Dictionary-like object with values of the same dimensions as :attr:`X`.

Layers in AnnData are inspired by loompy’s :ref:`loomlayers`.

Return the layer named `"unspliced"`::

adata.layers["unspliced"]

Create or replace the `"spliced"` layer::

adata.layers["spliced"] = ...

Assign the 10th column of layer `"spliced"` to the variable a::

a = adata.layers["spliced"][:, 10]

Delete the `"spliced"` layer::

del adata.layers["spliced"]

Return layers’ names::

adata.layers.keys()
"""
return self._layers

@layers.setter
def layers(self, value):
layers = Layers(self, vals=convert_to_dict(value))
if self.is_view:
self._init_as_actual(self.copy())
self._layers = layers

@layers.deleter
def layers(self):
self.layers = dict()
obsm = AlignedMappingProperty("obsm", AxisArrays, 0)
varm = AlignedMappingProperty("varm", AxisArrays, 1)
layers = AlignedMappingProperty("layers", Layers, (0, 1))
obsp = AlignedMappingProperty("obsp", PairwiseArrays, 0)
varp = AlignedMappingProperty("varp", PairwiseArrays, 1)
flying-sheep marked this conversation as resolved.
Show resolved Hide resolved

@property
def raw(self) -> Raw:
Expand Down Expand Up @@ -822,7 +780,7 @@ def _set_dim_index(self, value: pd.Index, attr: str):
if self.is_view:
self._init_as_actual(self.copy())
getattr(self, attr).index = value
for v in getattr(self, f"{attr}m").values():
for v in getattr(self, f"_{attr}m").values():
if isinstance(v, pd.DataFrame):
v.index = value

Expand Down Expand Up @@ -896,98 +854,6 @@ def uns(self, value: MutableMapping):
def uns(self):
self.uns = OrderedDict()

@property
def obsm(self) -> AxisArrays | AxisArraysView:
"""\
Multi-dimensional annotation of observations
(mutable structured :class:`~numpy.ndarray`).

Stores for each key a two or higher-dimensional :class:`~numpy.ndarray`
of length `n_obs`.
Is sliced with `data` and `obs` but behaves otherwise like a :term:`mapping`.
"""
return self._obsm

@obsm.setter
def obsm(self, value):
obsm = AxisArrays(self, 0, vals=convert_to_dict(value))
if self.is_view:
self._init_as_actual(self.copy())
self._obsm = obsm

@obsm.deleter
def obsm(self):
self.obsm = dict()

@property
def varm(self) -> AxisArrays | AxisArraysView:
"""\
Multi-dimensional annotation of variables/features
(mutable structured :class:`~numpy.ndarray`).

Stores for each key a two or higher-dimensional :class:`~numpy.ndarray`
of length `n_vars`.
Is sliced with `data` and `var` but behaves otherwise like a :term:`mapping`.
"""
return self._varm

@varm.setter
def varm(self, value):
varm = AxisArrays(self, 1, vals=convert_to_dict(value))
if self.is_view:
self._init_as_actual(self.copy())
self._varm = varm

@varm.deleter
def varm(self):
self.varm = dict()

@property
def obsp(self) -> PairwiseArrays | PairwiseArraysView:
"""\
Pairwise annotation of observations,
a mutable mapping with array-like values.

Stores for each key a two or higher-dimensional :class:`~numpy.ndarray`
whose first two dimensions are of length `n_obs`.
Is sliced with `data` and `obs` but behaves otherwise like a :term:`mapping`.
"""
return self._obsp

@obsp.setter
def obsp(self, value):
obsp = PairwiseArrays(self, 0, vals=convert_to_dict(value))
if self.is_view:
self._init_as_actual(self.copy())
self._obsp = obsp

@obsp.deleter
def obsp(self):
self.obsp = dict()

@property
def varp(self) -> PairwiseArrays | PairwiseArraysView:
"""\
Pairwise annotation of variables/features,
a mutable mapping with array-like values.

Stores for each key a two or higher-dimensional :class:`~numpy.ndarray`
whose first two dimensions are of length `n_var`.
Is sliced with `data` and `var` but behaves otherwise like a :term:`mapping`.
"""
return self._varp

@varp.setter
def varp(self, value):
varp = PairwiseArrays(self, 1, vals=convert_to_dict(value))
if self.is_view:
self._init_as_actual(self.copy())
self._varp = varp

@varp.deleter
def varp(self):
self.varp = dict()

def obs_keys(self) -> list[str]:
"""List keys of observation annotation :attr:`obs`."""
return self._obs.keys().tolist()
Expand Down
16 changes: 15 additions & 1 deletion src/anndata/_core/file_backing.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import weakref
from collections.abc import Iterator, Mapping
from functools import singledispatch
from pathlib import Path
Expand All @@ -25,13 +26,26 @@ def __init__(
filename: PathLike | None = None,
filemode: Literal["r", "r+"] | None = None,
):
self._adata = adata
self._adata_ref = weakref.ref(adata)
self.filename = filename
self._filemode = filemode
self._file = None
if filename:
self.open()

def __getstate__(self):
state = self.__dict__.copy()
state["_adata_ref"] = state["_adata_ref"]()
return state

def __setstate__(self, state):
self.__dict__ = state.copy()
self.__dict__["_adata_ref"] = weakref.ref(state["_adata_ref"])

@property
def _adata(self):
return self._adata_ref()

def __repr__(self) -> str:
if self.filename is None:
return "Backing file manager: no file is set."
Expand Down
Loading
Loading