Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix garbage collection #1119

Merged
merged 40 commits into from
Jul 22, 2024
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
a9a10af
Make creation of aligned mapping lazy
ivirshup Sep 4, 2023
ca0759a
Use weakref for filemanager
ivirshup Sep 4, 2023
92615d8
Merge branch 'main' into remove-backrefs
ilan-gold Jun 3, 2024
cb53b77
(chore): add benchmark
ilan-gold Jun 3, 2024
2df03b5
(fix): use right `gen_adata` function
ilan-gold Jun 3, 2024
38d218f
(fix): change name
ilan-gold Jun 3, 2024
a9eba7a
(chore): fewer runs
ilan-gold Jun 3, 2024
be8ae55
(fix): benchmark test name
ilan-gold Jun 4, 2024
82fc74c
Merge branch 'main' into remove-backrefs
ilan-gold Jun 4, 2024
e0bff0e
(fix): return `cls` for `None` obj
ilan-gold Jun 4, 2024
ccbdaf3
Merge branch 'remove-backrefs' of https://github.com/ivirshup/anndata…
ilan-gold Jun 4, 2024
6ed9963
(fix): try `track_peakmem`
ilan-gold Jun 4, 2024
ce4e2d8
(fix): remove `track_` name
ilan-gold Jun 5, 2024
0c9e563
(fix): docs
ilan-gold Jun 5, 2024
c971dfb
(fix): do custom peakmem track
ilan-gold Jun 5, 2024
9e08151
(fix): `n` -> `runs`
ilan-gold Jun 5, 2024
c771079
(fix): comment
ilan-gold Jun 5, 2024
9d3caad
(fix): `vals` typing for aligned mappings
ilan-gold Jun 5, 2024
eeb8865
(fix): don't use mutable argument
ilan-gold Jun 5, 2024
5267c2d
(fix): change profiler name to `track_peakmem_garbage_collection`
ilan-gold Jun 5, 2024
13d134a
(chore): delete rogue comment
ilan-gold Jun 5, 2024
4dec616
clarify reference use
flying-sheep Jun 7, 2024
5361188
consistency
flying-sheep Jun 7, 2024
b61549d
remove boilerplate
flying-sheep Jun 7, 2024
81d5933
Merge branch 'main' into remove-backrefs
ilan-gold Jun 7, 2024
1a85497
add types, undo moves
flying-sheep Jun 7, 2024
f750954
Add type hints for properties
flying-sheep Jun 7, 2024
ab4cd79
Merge branch 'main' into remove-backrefs
ilan-gold Jun 18, 2024
cf4377c
Merge branch 'main' into pr/ivirshup/1119
flying-sheep Jun 21, 2024
48ae69d
fmt
flying-sheep Jul 2, 2024
480b0a4
cleanup
flying-sheep Jul 2, 2024
24b89a0
more fmt
flying-sheep Jul 4, 2024
05ebee9
dedupe and test
flying-sheep Jul 4, 2024
866abbd
Simplify copy
flying-sheep Jul 4, 2024
3595197
Merge branch 'main' into pr/ivirshup/1119
flying-sheep Jul 8, 2024
015d6ad
docs and typing
flying-sheep Jul 22, 2024
3a1a007
fix parent_mapping type
flying-sheep Jul 22, 2024
73fc94c
Fix I
flying-sheep Jul 22, 2024
ba61e2b
fix docs
flying-sheep Jul 22, 2024
e138fc9
fix 3.9
flying-sheep Jul 22, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions benchmarks/benchmarks/anndata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from __future__ import annotations

import tracemalloc

import numpy as np

from .utils import gen_adata


class GarbargeCollectionSuite:
runs = 10

# https://github.com/pythonprofilers/memory_profiler/issues/402 and other backend does not pick this up
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok @ivirshup @flying-sheep independent of this issue, were you aware that https://pypi.org/project/memory-profiler/ is a line-by-line profiler? I think this is probably not very good for us, no? When I read get_peakmem (i.e., the function in utils we wrote based on memory_profiler), I would think that it would really track the literal peak memory, and not the peak over individual line operations.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

def track_peakmem_write_compressed(self, *_):
def display_top(snapshot, key_type="lineno"):
snapshot = snapshot.filter_traces(
(
tracemalloc.Filter(False, "<frozen importlib._bootstrap>"),
tracemalloc.Filter(False, "<unknown>"),
)
)
top_stats = snapshot.statistics(key_type)
total = sum(stat.size for stat in top_stats)
return total

total = np.zeros(self.runs)
tracemalloc.start()
for i in range(self.runs):
data = gen_adata(10000, 10000, "X-csc") # noqa: F841
snapshot = tracemalloc.take_snapshot()
total[i] = display_top(snapshot)
tracemalloc.stop()
return max(total)
68 changes: 54 additions & 14 deletions src/anndata/_core/aligned_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,13 @@
from anndata._warnings import ExperimentalFeatureWarning, ImplicitModificationWarning
from anndata.compat import AwkArray

from ..utils import axis_len, deprecated, ensure_df_homogeneous, warn_once
from ..utils import (
axis_len,
convert_to_dict,
deprecated,
ensure_df_homogeneous,
warn_once,
)
from .access import ElementRef
from .index import _subset
from .views import as_view, view_update
Expand Down Expand Up @@ -115,7 +121,7 @@
return self._parent

def copy(self):
d = self._actual_class(self.parent, self._axis)
d = self._actual_class(self.parent, self._axis, {})
for k, v in self.items():
if isinstance(v, AwkArray):
# Shallow copy since awkward array buffers are immutable
Expand Down Expand Up @@ -272,6 +278,7 @@
return (self.parent.obs_names, self.parent.var_names)[self._axis]


# TODO: vals can't be None
flying-sheep marked this conversation as resolved.
Show resolved Hide resolved
class AxisArrays(AlignedActualMixin, AxisArraysBase):
def __init__(
self,
Expand All @@ -283,9 +290,9 @@
if axis not in (0, 1):
raise ValueError()
self._axis = axis
self._data = dict()
if vals is not None:
self.update(vals)
for k, v in vals.items():
vals[k] = self._validate_value(v, k)
self._data = vals


class AxisArraysView(AlignedViewMixin, AxisArraysBase):
Expand Down Expand Up @@ -317,18 +324,21 @@

# TODO: I thought I had a more elegant solution to overriding this...
def copy(self) -> Layers:
d = self._actual_class(self.parent)
d = self._actual_class(self.parent, vals={})
for k, v in self.items():
d[k] = v.copy()
return d


class Layers(AlignedActualMixin, LayersBase):
def __init__(self, parent: AnnData, vals: Mapping | None = None):
def __init__(
self, parent: AnnData, axis: tuple[int] = (0, 1), vals: Mapping | None = None
):
assert axis == (0, 1), axis
self._parent = parent
self._data = dict()
if vals is not None:
self.update(vals)
for k, v in vals.items():
vals[k] = self._validate_value(v, k)
self._data = vals


class LayersView(AlignedViewMixin, LayersBase):
Expand Down Expand Up @@ -382,9 +392,9 @@
if axis not in (0, 1):
raise ValueError()
self._axis = axis
self._data = dict()
if vals is not None:
self.update(vals)
for k, v in vals.items():
vals[k] = self._validate_value(v, k)
self._data = vals


class PairwiseArraysView(AlignedViewMixin, PairwiseArraysBase):
Expand All @@ -396,9 +406,39 @@
):
self.parent_mapping = parent_mapping
self._parent = parent_view
self.subset_idx = (subset_idx, subset_idx)
self.subset_idx = subset_idx
ilan-gold marked this conversation as resolved.
Show resolved Hide resolved
self._axis = parent_mapping._axis


PairwiseArraysBase._view_class = PairwiseArraysView
PairwiseArraysBase._actual_class = PairwiseArrays


class AlignedMappingProperty:
flying-sheep marked this conversation as resolved.
Show resolved Hide resolved
def __init__(self, name, cls, axis):
self.name = name
self.axis = axis
self.cls = cls

def __get__(self, obj, objtype=None):
if obj is None: # None check needed for AnnData.layers accessors
return self.cls

Check warning on line 425 in src/anndata/_core/aligned_mapping.py

View check run for this annotation

Codecov / codecov/patch

src/anndata/_core/aligned_mapping.py#L425

Added line #L425 was not covered by tests
if obj.is_view:
parent_anndata = obj._adata_ref
idxs = (obj._oidx, obj._vidx)
parent_aligned_mapping = getattr(parent_anndata, self.name)
return parent_aligned_mapping._view(
obj, tuple(idxs[ax] for ax in parent_aligned_mapping.axes)
)
else:
return self.cls(obj, self.axis, getattr(obj, "_" + self.name))

def __set__(self, obj, value):
value = convert_to_dict(value)
_ = self.cls(obj, self.axis, value) # Validate
if obj.is_view:
obj._init_as_actual(obj.copy())
setattr(obj, "_" + self.name, value)

def __delete__(self, obj):
setattr(obj, self.name, dict())
Loading
Loading