Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PERF: Use grid tree for counting grid cell inclusion #4529

Open
wants to merge 41 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
8b3a320
Enable using the grid tree for counting
matthewturk Jun 21, 2023
8b2c75a
Avoid reusing bitarrays for now
matthewturk Jun 21, 2023
196a2a2
Don't count selection with fast_index for grids
matthewturk Jun 21, 2023
a93664a
Avoid data collection
matthewturk Jun 21, 2023
45deabe
Make refine_by and ref_factor work in grid trees
matthewturk Jun 21, 2023
001c96e
Special case the slice counting.
matthewturk Jun 21, 2023
827bb9e
Split pytest tests out
matthewturk Jun 22, 2023
61235df
Replicate the mask filling for rays in visit_grid_cells
matthewturk Jun 22, 2023
8b49103
Avoid using 'fast_index' for ray objects
matthewturk Jun 22, 2023
831d4a7
Don't ignore access for pytest, but ignore checksums
matthewturk Jun 22, 2023
075db4b
Disable grid tree for AMRVAC, as it only loads leafs
matthewturk Jun 23, 2023
bd55726
Allow multiple parents for grid tree
matthewturk Jun 23, 2023
222764c
Don't consider AMRGridPatch objects iterable
matthewturk Jun 23, 2023
57ce059
Update other invocations of GridTree
matthewturk Jun 23, 2023
041c9f5
Handle non spatial hierarchy
matthewturk Jun 24, 2023
218467f
Set min_level for yt data hierarchy
matthewturk Jun 24, 2023
150c84b
Set default min level to 0.
matthewturk Jun 26, 2023
f89e48e
Merge branch 'main' of github.com:yt-project/yt into grid_tree_count
matthewturk Aug 22, 2023
b3098bb
This should improve speed
matthewturk Aug 22, 2023
d4d7124
Add more noexcept nogil
matthewturk Aug 23, 2023
f961194
Disable grid_tree for athena_pp
matthewturk Aug 23, 2023
188390a
enzo_e has negative level grids
matthewturk Aug 23, 2023
8ce607f
Grid Tree should only be used in 3D
matthewturk Aug 24, 2023
dd033e8
Some more missed noexcepts
matthewturk Aug 24, 2023
e28e4b1
Some cases of arrays of refine_by are not 3D
matthewturk Aug 24, 2023
9bd7462
Simplify checks
matthewturk Aug 24, 2023
301d272
Clip child bounds to parent for recursive descent.
matthewturk Aug 24, 2023
f65df99
Clip internal boundaries to parent.
matthewturk Aug 25, 2023
f390a2b
Avoid parentage checks and use a bitmask to only visit once.
matthewturk Aug 25, 2023
e061fa6
Scalar-ize GDF attributes
matthewturk Aug 25, 2023
e987baf
Merge branch 'grid_tree_count' of github.com:matthewturk/yt into grid…
matthewturk Aug 25, 2023
fdca0de
Merge branch 'main' into grid_tree_count
matthewturk Sep 20, 2023
1260e4a
Merge branch 'grid_tree_count' of github.com:matthewturk/yt into grid…
matthewturk Oct 5, 2023
189c39d
This type of annotation does work!
matthewturk Jan 23, 2024
cb576e5
Merge branch 'main' of github.com:yt-project/yt into grid_tree_count
matthewturk Mar 22, 2024
69a5f80
Merge branch 'main' of github.com:yt-project/yt into grid_tree_count
matthewturk Apr 26, 2024
51cdcc9
Merge branch 'main' of github.com:yt-project/yt into grid_tree_count
matthewturk Apr 26, 2024
86e32db
Apply suggestions from code review
matthewturk Apr 26, 2024
7544934
move comment
matthewturk Apr 26, 2024
2a7cf8e
Ignore the test file
matthewturk Apr 26, 2024
7dc292b
Merge branch 'grid_tree_count' of github.com:matthewturk/yt into grid…
matthewturk Apr 26, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions nose_ignores.txt
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,4 @@
--ignore-file=test_alt_ray_tracers\.py
--ignore-file=test_minimal_representation\.py
--ignore-file=test_set_log_level\.py
--ignore-file=test_dataset_access\.py
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,7 @@ addopts = '''
--ignore='yt/data_objects/level_sets/tests/test_clump_finding.py'
--ignore='yt/data_objects/tests/test_connected_sets.py'
--ignore='yt/data_objects/tests/test_data_containers.py'
--ignore='yt/data_objects/tests/test_dataset_access.py'
--ignore='yt/data_objects/tests/test_dataset_checksums.py'
--ignore='yt/data_objects/tests/test_disks.py'
--ignore='yt/data_objects/tests/test_particle_filter.py'
--ignore='yt/data_objects/tests/test_particle_trajectories.py'
Expand Down
1 change: 1 addition & 0 deletions tests/tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,7 @@ other_tests:
- "--ignore-file=test_gadget_pytest\\.py"
- "--ignore-file=test_vr_orientation\\.py"
- "--ignore-file=test_particle_trajectories_pytest\\.py"
- "--ignore-file=test_dataset_access\\.py"
- "--exclude-test=yt.frontends.gdf.tests.test_outputs.TestGDF"
- "--exclude-test=yt.frontends.adaptahop.tests.test_outputs"
- "--exclude-test=yt.frontends.stream.tests.test_stream_particles.test_stream_non_cartesian_particles"
Expand Down
10 changes: 6 additions & 4 deletions yt/data_objects/index_subobjects/grid_patch.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import weakref

import numpy as np
from more_itertools import always_iterable

import yt.geometry.particle_deposit as particle_deposit
from yt._typing import FieldKey
Expand Down Expand Up @@ -126,10 +127,11 @@ def _setup_dx(self):
id = self.id - self._id_offset
ds = self.ds
index = self.index
if self.Parent is not None:
if not hasattr(self.Parent, "dds"):
self.Parent._setup_dx()
self.dds = self.Parent.dds.d / self.ds.refine_by
parents = list(always_iterable(self.Parent, base_type=(AMRGridPatch,)))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is the base_type argument necessary here ? or in other words, is AMRGridPatch iterable ?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is indeed necessary, because AMRGridPatch exposes an iteration-like interface. This is a long-time issue, unfortunately.

if len(parents) > 0:
if not hasattr(parents[0], "dds"):
parents[0]._setup_dx()
self.dds = parents[0].dds.d / self.ds.refine_by
else:
LE, RE = (index.grid_left_edge[id, :].d, index.grid_right_edge[id, :].d)
self.dds = (RE - LE) / self.ActiveDimensions
Expand Down
21 changes: 4 additions & 17 deletions yt/data_objects/tests/test_dataset_access.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,12 @@
import numpy as np
from nose.tools import assert_raises
import pytest
chrishavlin marked this conversation as resolved.
Show resolved Hide resolved
from numpy.testing import assert_almost_equal, assert_equal

from yt.testing import (
fake_amr_ds,
fake_particle_ds,
fake_random_ds,
requires_file,
requires_module,
)
from yt.utilities.answer_testing.framework import data_dir_load
from yt.utilities.exceptions import YTDimensionalityError
from yt.visualization.line_plot import LineBuffer

Expand Down Expand Up @@ -66,7 +63,7 @@ def test_region_from_d():
assert_equal(reg1["gas", "density"], reg2["gas", "density"])

# Test with bad boundary initialization
with assert_raises(RuntimeError):
with pytest.raises(RuntimeError):
ds.r[0.3:0.1, 0.4:0.6, :]

# Test region by creating an arbitrary grid
Expand Down Expand Up @@ -120,9 +117,9 @@ def test_point_from_r():
assert_equal(pt1["gas", "density"], pt2["gas", "density"])

# Test YTDimensionalityError
with assert_raises(YTDimensionalityError) as ex:
with pytest.raises(YTDimensionalityError) as ex:
ds.r[0.5, 0.1]
assert_equal(str(ex.exception), "Dimensionality specified was 2 but we need 3")
assert_equal(str(ex.exception), "Dimensionality specified was 2 but we need 3")


def test_ray_from_r():
Expand Down Expand Up @@ -177,13 +174,3 @@ def test_particle_counts():

pds = fake_particle_ds(npart=128)
assert pds.particle_type_counts == {"io": 128}


g30 = "IsolatedGalaxy/galaxy0030/galaxy0030"


@requires_module("h5py")
@requires_file(g30)
def test_checksum():
assert fake_random_ds(16).checksum == "notafile"
assert data_dir_load(g30).checksum == "6169536e4b9f737ce3d3ad440df44c58"
15 changes: 15 additions & 0 deletions yt/data_objects/tests/test_dataset_checksums.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from yt.testing import (
fake_random_ds,
requires_file,
requires_module,
)
from yt.utilities.answer_testing.framework import data_dir_load

g30 = "IsolatedGalaxy/galaxy0030/galaxy0030"


@requires_module("h5py")
@requires_file(g30)
def test_checksum():
assert fake_random_ds(16).checksum == "notafile"
assert data_dir_load(g30).checksum == "6169536e4b9f737ce3d3ad440df44c58"
4 changes: 4 additions & 0 deletions yt/frontends/amrvac/data_structures.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,10 @@ def _populate_grid_objects(self):
g._prepare_grid()
g._setup_dx()

@property
def grid_tree(self):
return None


class AMRVACDataset(Dataset):
_index_class = AMRVACHierarchy
Expand Down
4 changes: 4 additions & 0 deletions yt/frontends/athena_pp/data_structures.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,10 @@ def _populate_grid_objects(self):
g._setup_dx()
self.max_level = self._handle.attrs["MaxLevel"]

@property
def grid_tree(self):
return None


class AthenaPPDataset(Dataset):
_field_info_class = AthenaPPFieldInfo
Expand Down
6 changes: 5 additions & 1 deletion yt/frontends/gdf/data_structures.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,11 @@ def _parse_parameter_file(self):
]
else:
self.data_software = "unknown"
sp = self._handle["/simulation_parameters"].attrs
# We have to turn everything that *is* a scalar into a scalar.
sp = dict(self._handle["/simulation_parameters"].attrs)
for k, v in sp.items():
if getattr(v, "size", None) == 1:
sp[k] = v.item(0)
Comment on lines +248 to +250
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see a couple problems here

  • modifying a dict while iterating over it is unsafe and may raise warnings/errors (use a copy)
  • using the existence of a size attribute to detect array-like objects seems a bit fragile. Could this be done with isinstance instead ? If not, I would suggest using a EAFP pattern instead of LBYL (basically, switch to try/except)
  • the comment also doesn't explain why this conversion is needed. It'd be more helpful if it did.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So the reason I'm using size rather than a base class is to avoid h5py as an instance; we're actually checking here if it's an HDF5 attribute array, rather than a numpy array. I'm not sure that I agree with the suggested change -- particularly because we really do want this specific check, less so than whether or not it can be accessed like a dict.

if self.geometry is None:
geometry = just_one(sp.get("geometry", 0))
try:
Expand Down
7 changes: 4 additions & 3 deletions yt/frontends/stream/definitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,20 +47,21 @@ def assign_particle_data(ds, pdata, bbox):
if len(ds.stream_handler.fields) > 1:
pdata.pop("number_of_particles", None)
num_grids = len(ds.stream_handler.fields)
parent_ids = ds.stream_handler.parent_ids
num_children = np.zeros(num_grids, dtype="int64")
# We're going to do this the slow way
mask = np.empty(num_grids, dtype="bool")
for i in range(num_grids):
np.equal(parent_ids, i, mask)
np.equal(ds.stream_handler.parent_ids, i, mask)
num_children[i] = mask.sum()
levels = ds.stream_handler.levels.astype("int64").ravel()
# We have to convert these to a list of lists
parent_ids = [[_] for _ in ds.stream_handler.parent_ids]
grid_tree = GridTree(
num_grids,
ds.stream_handler.left_edges,
ds.stream_handler.right_edges,
ds.stream_handler.dimensions,
ds.stream_handler.parent_ids,
parent_ids,
levels,
num_children,
)
Expand Down
3 changes: 3 additions & 0 deletions yt/frontends/ytdata/data_structures.py
Original file line number Diff line number Diff line change
Expand Up @@ -440,6 +440,8 @@ def Children(self):


class YTDataHierarchy(GridIndex):
min_level = 0

def __init__(self, ds, dataset_type=None):
self.dataset_type = dataset_type
self.float_type = "float64"
Expand Down Expand Up @@ -709,6 +711,7 @@ def Children(self):

class YTNonspatialHierarchy(YTDataHierarchy):
grid = YTNonspatialGrid
min_level = 0

def _populate_grid_objects(self):
for g in self.grids:
Expand Down
60 changes: 58 additions & 2 deletions yt/geometry/_selection_routines/ray_selector.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,9 @@ cdef void dt_sampler(
void *data) noexcept nogil:
cdef IntegrationAccumulator *am = <IntegrationAccumulator *> data
cdef int di = (index[0]*vc.dims[1]+index[1])*vc.dims[2]+index[2]
if am.child_mask[di] == 0 or enter_t == exit_t:
return
if am.child_mask != NULL:
if am.child_mask[di] == 0 or enter_t == exit_t:
return
am.hits += 1
am.t[di] = enter_t
am.dt[di] = (exit_t - enter_t)
Expand Down Expand Up @@ -74,6 +75,61 @@ cdef class RaySelector(SelectorObject):
if total == 0: return None, 0
return mask.astype("bool"), total

@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
cdef void visit_grid_cells(self, GridVisitorData *data,
grid_visitor_function *func,
np.uint8_t *cached_mask = NULL) noexcept nogil:
# AT PRESENT THIS DOES NOT WORK.
matthewturk marked this conversation as resolved.
Show resolved Hide resolved
# I am not entirely sure why, which is why I have disallowed
# using fast index for the ray object for grids.
# It seems that there is undefined behavior, as I'm getting the results
# of empty arrays passed in, and it shows up in test_dataset_access.py.
# I am not keen on giving up, but I think it's a suitably short enough
# use case that it will suffice to use the old-style selection and
# indexing for it.
cdef int i
cdef int total = 0
cdef int this_level = 0
cdef int level = data.grid.level
if level < self.min_level or level > self.max_level:
return
if level == self.max_level:
this_level = 1
cdef np.uint64_t size = data.grid.dims[0] * data.grid.dims[1] * data.grid.dims[2]
cdef IntegrationAccumulator *ia
ia = <IntegrationAccumulator *> malloc(sizeof(IntegrationAccumulator))
cdef VolumeContainer vc
ia.t = <np.float64_t *> malloc(sizeof(np.float64_t) * size)
ia.dt = <np.float64_t *> malloc(sizeof(np.float64_t) * size)
for i in range(size):
ia.dt[i] = -1
ia.child_mask = NULL
ia.hits = 0
for i in range(3):
vc.left_edge[i] = data.grid.left_edge[i]
vc.right_edge[i] = data.grid.right_edge[i]
vc.dds[i] = (vc.right_edge[i] - vc.left_edge[i])/data.grid.dims[i]
vc.idds[i] = 1.0/vc.dds[i]
vc.dims[i] = data.grid.dims[i]
walk_volume(&vc, self.p1, self.vec, dt_sampler, <void*> ia)
# This is now a flat loop, since we're not using numpy arrays
for i in range(size):
if this_level == 1:
child_masked = 0
else:
child_masked = check_child_masked(data)
if child_masked == 0 and ia.dt[i] >= 0:
selected = 1
else:
selected = 0
func(data, selected)
data.global_index += 1
free(ia.t)
free(ia.dt)
free(ia)

@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
Expand Down
25 changes: 13 additions & 12 deletions yt/geometry/_selection_routines/selector_object.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -507,7 +507,7 @@ cdef class SelectorObject:
@cython.cdivision(True)
cdef void visit_grid_cells(self, GridVisitorData *data,
grid_visitor_function *func,
np.uint8_t *cached_mask = NULL):
np.uint8_t *cached_mask = NULL) noexcept nogil:
# This function accepts a grid visitor function, the data that
# corresponds to the current grid being examined (the most important
# aspect of which is the .grid attribute, along with index values and
Expand All @@ -516,8 +516,8 @@ cdef class SelectorObject:
cdef np.float64_t left_edge[3]
cdef np.float64_t right_edge[3]
cdef np.float64_t dds[3]
cdef int dim[3]
cdef int this_level = 0, level, i
cdef np.int64_t si[3], ei[3]
cdef np.float64_t pos[3]
level = data.grid.level
if level < self.min_level or level > self.max_level:
Expand All @@ -529,17 +529,18 @@ cdef class SelectorObject:
left_edge[i] = data.grid.left_edge[i]
right_edge[i] = data.grid.right_edge[i]
dds[i] = (right_edge[i] - left_edge[i])/data.grid.dims[i]
dim[i] = data.grid.dims[i]
si[i] = 0
ei[i] = data.grid.dims[i]
with nogil:
pos[0] = left_edge[0] + dds[0] * 0.5
data.pos[0] = 0
for i in range(dim[0]):
pos[1] = left_edge[1] + dds[1] * 0.5
data.pos[1] = 0
for j in range(dim[1]):
pos[2] = left_edge[2] + dds[2] * 0.5
data.pos[2] = 0
for k in range(dim[2]):
pos[0] = left_edge[0] + dds[0] * (si[0] + 0.5)
data.pos[0] = si[0]
for i in range(si[0], ei[0]):
pos[1] = left_edge[1] + dds[1] * (si[1] + 0.5)
data.pos[1] = si[1]
for j in range(si[1], ei[1]):
pos[2] = left_edge[2] + dds[2] * (si[2] + 0.5)
data.pos[2] = si[2]
for k in range(si[2], ei[2]):
# We short-circuit if we have a cache; if we don't, we
# only set selected to true if it's *not* masked by a
# child and it *is* selected.
Expand Down
76 changes: 76 additions & 0 deletions yt/geometry/_selection_routines/slice_selector.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,82 @@ cdef class SliceSelector(SelectorObject):
if total == 0: return None, 0
return mask.astype("bool"), total

@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
cdef void visit_grid_cells(self, GridVisitorData *data,
grid_visitor_function *func,
np.uint8_t *cached_mask = NULL) noexcept nogil:
# This function accepts a grid visitor function, the data that
# corresponds to the current grid being examined (the most important
# aspect of which is the .grid attribute, along with index values and
# void* pointers to arrays) and a possibly-pre-generated cached mask.
# Each cell is visited with the grid visitor function.
cdef np.float64_t left_edge[3]
cdef np.float64_t right_edge[3]
cdef np.float64_t dds[3]
cdef int dim[3]
cdef int this_level = 0, level, i
cdef np.float64_t pos[3]
level = data.grid.level
if level < self.min_level or level > self.max_level:
return
if level == self.max_level:
this_level = 1
cdef np.uint8_t child_masked, selected
for i in range(3):
left_edge[i] = data.grid.left_edge[i]
right_edge[i] = data.grid.right_edge[i]
dds[i] = (right_edge[i] - left_edge[i])/data.grid.dims[i]
dim[i] = data.grid.dims[i]
#
# This is the special casing for slices
#
icoord = <np.uint64_t>(
(self.coord - left_edge[self.axis])/dds[self.axis])
# clip coordinate to avoid seg fault below if we're
# exactly at a grid boundary
ind = iclip(icoord, 0, dim[self.axis]-1)
with nogil:
pos[0] = left_edge[0] + dds[0] * 0.5
data.pos[0] = 0
for i in range(dim[0]):
pos[1] = left_edge[1] + dds[1] * 0.5
data.pos[1] = 0
for j in range(dim[1]):
pos[2] = left_edge[2] + dds[2] * 0.5
data.pos[2] = 0
for k in range(dim[2]):
# We short-circuit if we have a cache; if we don't, we
# only set selected to true if it's *not* masked by a
# child and it *is* selected.
if cached_mask != NULL:
selected = ba_get_value(cached_mask,
data.global_index)
else:
if this_level == 1:
child_masked = 0
else:
child_masked = check_child_masked(data)
if child_masked == 0:
if ((self.axis == 0 and i == ind) or
(self.axis == 1 and j == ind) or
(self.axis == 2 and k == ind)):
selected = 1
else:
selected = 0
#selected = self.select_cell(pos, dds)
else:
selected = 0
func(data, selected)
data.global_index += 1
pos[2] += dds[2]
data.pos[2] += 1
pos[1] += dds[1]
data.pos[1] += 1
pos[0] += dds[0]
data.pos[0] += 1

@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
Expand Down
Loading