Skip to content

Commit

Permalink
Merge pull request #389 from ecmwf-ifs/naan-skip-annotate
Browse files Browse the repository at this point in the history
Skip privatization of arrays with existing data declarations
  • Loading branch information
reuterbal authored Oct 15, 2024
2 parents b6eb817 + 66215b6 commit 2bbe704
Show file tree
Hide file tree
Showing 5 changed files with 90 additions and 31 deletions.
64 changes: 56 additions & 8 deletions loki/transformations/single_column/annotate.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,13 @@
# granted to it by virtue of its status as an intergovernmental organisation
# nor does it submit to any jurisdiction.

from collections import defaultdict
from loki.batch import Transformation
from loki.expression import symbols as sym, is_dimension_constant
from loki.ir import (
nodes as ir, FindNodes, FindVariables, Transformer,
pragmas_attached, is_loki_pragma, get_pragma_parameters
pragmas_attached, is_loki_pragma, get_pragma_parameters,
pragma_regions_attached
)
from loki.logging import info
from loki.tools import as_tuple, flatten
Expand Down Expand Up @@ -192,11 +194,55 @@ def transform_subroutine(self, routine, **kwargs):
# Mark all non-parallel loops as `!$acc loop seq`
self.annotate_sequential_loops(routine)

with pragmas_attached(routine, ir.Loop, attach_pragma_post=True):
driver_loops = find_driver_loops(routine=routine, targets=targets)
for loop in driver_loops:
self.annotate_driver_loop(loop)
with pragma_regions_attached(routine):
with pragmas_attached(routine, ir.Loop, attach_pragma_post=True):
# Find variables with existing OpenACC data declarations
acc_vars = self.find_acc_vars(routine, targets)

driver_loops = find_driver_loops(section=routine.body, targets=targets)
for loop in driver_loops:
self.annotate_driver_loop(loop, acc_vars.get(loop, []))

def find_acc_vars(self, routine, targets):
"""
Find variables already specified in acc data clauses.
Parameters
----------
routine : :any:`Subroutine`
Subroutine to apply this transformation to.
targets : list or string
List of subroutines that are to be considered as part of
the transformation call tree.
"""

acc_vars = defaultdict(list)

for region in FindNodes(ir.PragmaRegion).visit(routine.body):
if region.pragma.keyword.lower() == 'acc':
if (parameters := get_pragma_parameters(region.pragma, starts_with='data', only_loki_pragmas=False)):

driver_loops = find_driver_loops(section=region.body, targets=targets)
if not driver_loops:
continue

if (default := parameters.get('default', None)):
if not 'none' in [p.strip().lower() for p in default.split(',')]:
for loop in driver_loops:

_vars = [var.name.lower() for var in FindVariables(unique=True).visit(loop)]
acc_vars[loop] += _vars
else:
_vars = [
p.strip().lower()
for category in ('present', 'copy', 'copyin', 'copyout', 'deviceptr')
for p in parameters.get(category, '').split(',')
]

for loop in driver_loops:
acc_vars[loop] += _vars

return acc_vars

@classmethod
def device_alloc_column_locals(cls, routine, column_locals):
Expand All @@ -219,14 +265,16 @@ def device_alloc_column_locals(cls, routine, column_locals):
routine.body.prepend((ir.Comment(''), pragma, ir.Comment('')))
routine.body.append((ir.Comment(''), pragma_post, ir.Comment('')))

def annotate_driver_loop(self, loop):
def annotate_driver_loop(self, loop, acc_vars):
"""
Annotate driver block loop with ``'openacc'`` pragmas.
Parameters
----------
loop : :any:`Loop`
Driver :any:`Loop` to wrap in ``'opencc'`` pragmas.
Driver :any:`Loop` to wrap in ``'openacc'`` pragmas.
acc_vars : list
Variables already declared in ``'openacc'`` data directives.
"""

# Mark driver loop as "gang parallel".
Expand All @@ -239,7 +287,7 @@ def annotate_driver_loop(self, loop):
# Filter out arrays that are explicitly allocated with block dimension
sizes = self.block_dim.size_expressions
arrays = [v for v in arrays if not any(d in sizes for d in as_tuple(v.shape))]
private_arrays = ', '.join(set(v.name for v in arrays))
private_arrays = ', '.join(set(v.name for v in arrays if not v.name_parts[0].lower() in acc_vars))
private_clause = '' if not private_arrays else f' private({private_arrays})'

for pragma in as_tuple(loop.pragma):
Expand Down
29 changes: 20 additions & 9 deletions loki/transformations/single_column/tests/test_scc.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from loki import Subroutine, Sourcefile, Dimension, fgen
from loki.batch import ProcedureItem
from loki.expression import Scalar, Array, IntLiteral, RangeIndex
from loki.expression import Scalar, Array, IntLiteral
from loki.frontend import available_frontends, OMNI, OFP
from loki.ir import (
FindNodes, Assignment, CallStatement, Conditional, Loop,
Expand Down Expand Up @@ -236,32 +236,40 @@ def test_scc_demote_transformation(frontend, horizontal):


@pytest.mark.parametrize('frontend', available_frontends())
def test_scc_annotate_openacc(frontend, horizontal, blocking):
@pytest.mark.parametrize('acc_data', ['default', 'copyin', None])
def test_scc_annotate_openacc(frontend, horizontal, blocking, acc_data):
"""
Test the correct addition of OpenACC pragmas to SCC format code (no hoisting).
"""

fcode_driver = """
fcode_driver = f"""
SUBROUTINE column_driver(nlon, nproma, nlev, nz, q, nb)
INTEGER, INTENT(IN) :: nlon, nz, nb ! Size of the horizontal and vertical
INTEGER, INTENT(IN) :: nproma, nlev ! Aliases of horizontal and vertical sizes
REAL, INTENT(INOUT) :: q(nlon,nz,nb)
REAL :: other_var(nlon)
INTEGER :: b, start, end
start = 1
end = nlon
{'!$acc data default(present)' if acc_data == 'default' else ''}
{'!$acc data copyin(other_var)' if acc_data == 'copyin' else ''}
!
do b=1, nb
call compute_column(start, end, nlon, nproma, nz, q(:,:,b))
call compute_column(start, end, nlon, nproma, nz, q(:,:,b), other_var)
end do
!
{'!$acc end data' if acc_data else ''}
END SUBROUTINE column_driver
"""

fcode_kernel = """
SUBROUTINE compute_column(start, end, nlon, nproma, nlev, nz, q)
SUBROUTINE compute_column(start, end, nlon, nproma, nlev, nz, q, other_var)
INTEGER, INTENT(IN) :: start, end ! Iteration indices
INTEGER, INTENT(IN) :: nlon, nz ! Size of the horizontal and vertical
INTEGER, INTENT(IN) :: nproma, nlev ! Aliases of horizontal and vertical sizes
REAL, INTENT(INOUT) :: q(nlon,nz)
REAL, INTENT(IN) :: other_var
REAL :: t(nlon,nz)
REAL :: a(nlon)
REAL :: d(nproma)
Expand Down Expand Up @@ -326,8 +334,11 @@ def test_scc_annotate_openacc(frontend, horizontal, blocking):
with pragmas_attached(driver, Loop):
driver_loops = FindNodes(Loop).visit(driver.body)
assert len(driver_loops) == 1
assert driver_loops[0].pragma[0].keyword == 'acc'
assert driver_loops[0].pragma[0].content == 'parallel loop gang vector_length(nlon)'
assert driver_loops[0].pragma[0].keyword.lower() == 'acc'
if acc_data:
assert driver_loops[0].pragma[0].content == 'parallel loop gang vector_length(nlon)'
else:
assert driver_loops[0].pragma[0].content == 'parallel loop gang private(other_var) vector_length(nlon)'


@pytest.mark.parametrize('frontend', available_frontends())
Expand Down Expand Up @@ -750,7 +761,7 @@ def test_scc_multiple_acc_pragmas(frontend, horizontal, blocking):


@pytest.mark.parametrize('frontend', available_frontends())
def test_scc_annotate_routine_seq_pragma(frontend, horizontal, blocking):
def test_scc_annotate_routine_seq_pragma(frontend, blocking):
"""
Test that `!$loki routine seq` pragmas are replaced correctly by
`!$acc routine seq` pragmas.
Expand Down Expand Up @@ -790,7 +801,7 @@ def test_scc_annotate_routine_seq_pragma(frontend, horizontal, blocking):


@pytest.mark.parametrize('frontend', available_frontends())
def test_scc_annotate_empty_data_clause(frontend, horizontal, blocking):
def test_scc_annotate_empty_data_clause(frontend, blocking):
"""
Test that we do not generate empty `!$acc data` clauses.
"""
Expand Down
4 changes: 2 additions & 2 deletions loki/transformations/single_column/vector.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ def process_driver(self, routine, targets=()):
"""

with pragmas_attached(routine, ir.Loop, attach_pragma_post=True):
driver_loops = find_driver_loops(routine=routine, targets=targets)
driver_loops = find_driver_loops(section=routine.body, targets=targets)

# remove vector loops
driver_loop_map = {}
Expand Down Expand Up @@ -435,7 +435,7 @@ def transform_subroutine(self, routine, **kwargs):

if role == 'driver':
with pragmas_attached(routine, ir.Loop):
driver_loops = find_driver_loops(routine=routine, targets=targets)
driver_loops = find_driver_loops(section=routine.body, targets=targets)

for loop in driver_loops:
# Revector all marked sections within the driver loop body
Expand Down
16 changes: 8 additions & 8 deletions loki/transformations/tests/test_loop_blocking.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def test_1d_splitting(tmp_path, frontend, block_size, n):
num_loops = len(loops)
num_vars = len(routine.variable_map)
with pragmas_attached(routine, Loop):
loops = find_driver_loops(routine,
loops = find_driver_loops(routine.body,
targets=None)
splitting_vars, inner_loop, outer_loop = split_loop(routine, loops[0], block_size)
loops = FindNodes(ir.Loop).visit(routine.ir)
Expand Down Expand Up @@ -94,7 +94,7 @@ def test_1d_splitting_multi_var(tmp_path, frontend, block_size, n):
num_loops = len(loops)
num_vars = len(routine.variable_map)
with pragmas_attached(routine, Loop):
loops = find_driver_loops(routine,
loops = find_driver_loops(routine.body,
targets=None)
splitting_vars, inner_loop, outer_loop = split_loop(routine, loops[0], block_size)
loops = FindNodes(ir.Loop).visit(routine.ir)
Expand Down Expand Up @@ -142,7 +142,7 @@ def test_2d_splitting(tmp_path, frontend, block_size, n):
num_loops = len(loops)
num_vars = len(routine.variable_map)
with pragmas_attached(routine, Loop):
loops = find_driver_loops(routine,
loops = find_driver_loops(routine.body,
targets=None)
splitting_vars, inner_loop, outer_loop = split_loop(routine, loops[0], block_size)
loops = FindNodes(ir.Loop).visit(routine.ir)
Expand Down Expand Up @@ -192,7 +192,7 @@ def test_3d_splitting(tmp_path, frontend, block_size, n):
num_loops = len(loops)
num_vars = len(routine.variable_map)
with pragmas_attached(routine, Loop):
loops = find_driver_loops(routine,
loops = find_driver_loops(routine.body,
targets=None)
splitting_vars, inner_loop, outer_loop = split_loop(routine, loops[0], block_size)
loops = FindNodes(ir.Loop).visit(routine.ir)
Expand Down Expand Up @@ -250,7 +250,7 @@ def test_1d_blocking(tmp_path, frontend, block_size, n):
routine = Subroutine.from_source(fcode, frontend=frontend)
loops = FindNodes(ir.Loop).visit(routine.ir)
with pragmas_attached(routine, Loop):
loops = find_driver_loops(routine,
loops = find_driver_loops(routine.body,
targets=None)

num_loops = len(loops)
Expand Down Expand Up @@ -309,7 +309,7 @@ def test_1d_blocking_multi_intent(tmp_path, frontend, block_size, n):
routine = Subroutine.from_source(fcode, frontend=frontend)
loops = FindNodes(ir.Loop).visit(routine.ir)
with pragmas_attached(routine, Loop):
loops = find_driver_loops(routine,
loops = find_driver_loops(routine.body,
targets=None)

num_loops = len(loops)
Expand Down Expand Up @@ -372,7 +372,7 @@ def test_2d_blocking(tmp_path, frontend, block_size, n):
num_loops = len(loops)
num_vars = len(routine.variable_map)
with pragmas_attached(routine, Loop):
loops = find_driver_loops(routine,
loops = find_driver_loops(routine.body,
targets=None)
splitting_vars, inner_loop, outer_loop = split_loop(routine, loops[0], block_size)
loops = FindNodes(ir.Loop).visit(routine.ir)
Expand Down Expand Up @@ -432,7 +432,7 @@ def test_3d_blocking(tmp_path, frontend, block_size, n):
num_loops = len(loops)
num_vars = len(routine.variable_map)
with pragmas_attached(routine, Loop):
loops = find_driver_loops(routine,
loops = find_driver_loops(routine.body,
targets=None)
splitting_vars, inner_loop, outer_loop = split_loop(routine, loops[0], block_size)
loops = FindNodes(ir.Loop).visit(routine.ir)
Expand Down
8 changes: 4 additions & 4 deletions loki/transformations/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -595,16 +595,16 @@ def is_driver_loop(loop, targets):
return False


def find_driver_loops(routine, targets):
def find_driver_loops(section, targets):
"""
Find and return all driver loops of a given `routine`.
Find and return all driver loops in a given `section`.
A *driver loop* is specified either by a call to a routine within
`targets` or by the pragma `!$loki driver-loop`.
Parameters
----------
routine : :any:`Subroutine`
section : :any:`Section` or tuple
The subroutine in which to find the driver loops.
targets : list or string
List of subroutines that are to be considered as part of
Expand All @@ -613,7 +613,7 @@ def find_driver_loops(routine, targets):

driver_loops = []
nested_driver_loops = []
for loop in FindNodes(ir.Loop).visit(routine.body):
for loop in FindNodes(ir.Loop).visit(section):
if loop in nested_driver_loops:
continue

Expand Down

0 comments on commit 2bbe704

Please sign in to comment.