Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

write race checking: tighten condition based on address space #601

Merged
merged 3 commits into from
Jan 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion doc/tutorial.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1059,7 +1059,6 @@ earlier:
acc_k = 0.0f;
if (-1 + -16 * gid(0) + -1 * lid(0) + n >= 0)
a_fetch[lid(0)] = a[16 * gid(0) + lid(0)];
barrier(CLK_LOCAL_MEM_FENCE) /* for a_fetch (insn_k_update depends on a_fetch_rule) */;
if (-1 + -16 * gid(0) + -1 * lid(0) + n >= 0)
{
for (int k = 0; k <= 15; ++k)
Expand Down
47 changes: 36 additions & 11 deletions loopy/schedule/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,11 +335,15 @@ def get_return_from_kernel_mapping(kernel):

# {{{ check for write races in accesses

def _check_for_access_races(map_a, insn_a, map_b, insn_b, knl, callables_table):
def _check_for_access_races(map_a, insn_a, map_b, insn_b, knl, callables_table,
address_space):
"""
Returns *True* if the execution instances of *insn_a* and *insn_b*, accessing
the same variable via access maps *map_a* and *map_b*, result in an access race.

:arg address_space: An instance of :class:`loopy.kernel.data.AddressSpace`
of the variable whose accesses are being checked for a race.

.. note::

The accesses ``map_a``, ``map_b`` lead to write races iff there exists 2
Expand All @@ -348,9 +352,12 @@ def _check_for_access_races(map_a, insn_a, map_b, insn_b, knl, callables_table):
import pymbolic.primitives as p
from loopy.symbolic import isl_set_from_expr, aff_from_expr, aff_to_expr
from loopy.kernel.data import (filter_iname_tags_by_type,
HardwareConcurrentTag)
HardwareConcurrentTag,
AddressSpace)
from loopy.kernel.tools import get_hw_axis_base_for_codegen

assert address_space in [AddressSpace.LOCAL, AddressSpace.GLOBAL]

gsize, lsize = knl.get_grid_size_upper_bounds(callables_table,
return_dict=True)

Expand Down Expand Up @@ -470,25 +477,40 @@ def _check_for_access_races(map_a, insn_a, map_b, insn_b, knl, callables_table):
# {{{ Step 5: create the set any(l.i.A != l.i.B) OR any(g.i.A != g.i.B)

space = set_a.space
unequal_global_id_set = isl.Set.empty(set_a.get_space())
unequal_local_id_set = isl.Set.empty(set_a.get_space())
unequal_group_id_set = isl.Set.empty(set_a.get_space())
equal_group_id_set = isl.BasicSet.universe(set_a.get_space())

for i_l in lsize:
lid_a = p.Variable(f"l.{i_l}.A")
lid_b = p.Variable(f"l.{i_l}.B")
unequal_global_id_set |= (isl_set_from_expr(space,
p.Comparison(lid_a, "!=", lid_b))
)
unequal_local_id_set |= (isl_set_from_expr(space,
p.Comparison(lid_a, "!=", lid_b))
)

for i_g in gsize:
gid_a = p.Variable(f"g.{i_g}.A")
gid_b = p.Variable(f"g.{i_g}.B")
unequal_global_id_set |= (isl_set_from_expr(space,
p.Comparison(gid_a, "!=", gid_b))
)
unequal_group_id_set |= (isl_set_from_expr(space,
p.Comparison(gid_a, "!=", gid_b))
)
equal_group_id_set &= (isl_set_from_expr(space,
p.Comparison(gid_a, "==", gid_b))
)

# }}}

return not (set_a & set_b & unequal_global_id_set).is_empty()
if address_space == AddressSpace.GLOBAL:
return not (set_a
& set_b
& (unequal_local_id_set
| unequal_group_id_set)
).is_empty()
else:
return not (set_a
& set_b
& unequal_local_id_set
& equal_group_id_set).is_empty()


class AccessMapDescriptor(enum.Enum):
Expand Down Expand Up @@ -582,7 +604,10 @@ def do_accesses_result_in_races(self, insn1, insn1_dir, insn2, insn2_dir,

return _check_for_access_races(insn1_amap, self.kernel.id_to_insn[insn1],
insn2_amap, self.kernel.id_to_insn[insn2],
self.kernel, self.callables_table)
self.kernel, self.callables_table,
(self.kernel
.get_var_descriptor(var_name)
.address_space))

# }}}

Expand Down
24 changes: 24 additions & 0 deletions test/test_loopy.py
Original file line number Diff line number Diff line change
Expand Up @@ -3644,6 +3644,30 @@ def test_barrier_non_zero_hw_lbound():
assert barrier_between(knl, "w_a", "w_b")


def test_no_unnecessary_lbarrier(ctx_factory):
# This regression would fail on loopy.git <= 268a7f4
# (Issue reported by @thilinarmtb)

t_unit = lp.make_kernel(
"{[i_outer, i_inner]: 0 <= i_outer < n and 0 <= i_inner < 16}",
"""
<> s_a[i_inner] = ai[i_outer * 16 + i_inner] {id=write_s_a}
ao[i_outer * 16 + i_inner] = 2.0 * s_a[i_inner] {id=write_ao, dep=write_s_a}
""",
assumptions="n>=0")

t_unit = lp.add_dtypes(t_unit, dict(ai=np.float32))
t_unit = lp.tag_inames(t_unit, dict(i_inner="l.0", i_outer="g.0"))
t_unit = lp.set_temporary_address_space(t_unit, "s_a", "local")
t_unit = lp.prioritize_loops(t_unit, "i_outer,i_inner")

t_unit = lp.preprocess_kernel(t_unit)
knl = lp.get_one_linearized_kernel(t_unit.default_entrypoint,
t_unit.callables_table)

assert not barrier_between(knl, "write_s_a", "write_ao")


if __name__ == "__main__":
if len(sys.argv) > 1:
exec(sys.argv[1])
Expand Down
Loading