Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SCC re-vector ad driver level and OpenMP offload #480

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion loki/transformations/build_system/dependency.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
from loki.backend import fgen
from loki.batch import Transformation
from loki.ir import (
CallStatement, Import, Interface, FindNodes, FindInlineCalls, Transformer
CallStatement, Import, Interface, FindNodes, FindInlineCalls, Transformer,
Pragma, get_pragma_parameters
)
from loki.logging import warning
from loki.module import Module
Expand Down Expand Up @@ -131,6 +132,8 @@ def transform_module(self, module, **kwargs):
),
)

self.rename_omp_target_declare_pragmas(module)

targets = tuple(str(t).lower() for t in as_tuple(kwargs.get('targets')))
if self.replace_ignore_items and (item := kwargs.get('item')):
targets += tuple(str(i).lower() for i in item.ignore)
Expand Down Expand Up @@ -188,6 +191,17 @@ def transform_subroutine(self, routine, **kwargs):
# Re-generate C-style interface header
self.generate_interfaces(routine)


def rename_omp_target_declare_pragmas(self, module):
"""
Update :any:`Pragma` `!$omp declare target data <routine name>` accordingly.
"""
for pragma in FindNodes(Pragma).visit(module.spec):
pragma_parameters = get_pragma_parameters(pragma, starts_with='declare', only_loki_pragmas=False)
if 'target' in pragma_parameters:
pragma._update(content=f"declare target({pragma_parameters['target']}{self.suffix.lower()})")


def remove_inactive_ir_nodes(self, body, transformed_scope_name, **kwargs):
"""
Utility to filter :any:`Scope` nodes in :data:`body` to include only
Expand Down
59 changes: 38 additions & 21 deletions loki/transformations/data_offload/offload.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ class DataOffloadTransformation(Transformation):

Parameters
----------
directive : str, optional
Pragma/Directive language to be used (OpenACC via 'openacc'
or OpenMP offload via 'omp-gpu')
remove_openmp : bool
Remove any existing OpenMP pragmas inside the marked region.
present_on_device : bool
Expand All @@ -37,9 +40,11 @@ class DataOffloadTransformation(Transformation):
is being managed outside of structured OpenACC data regions.
"""

def __init__(self, **kwargs):
def __init__(self, directive='openacc', **kwargs):
# We need to record if we actually added any, so
# that down-stream processing can use that info
self.directive = directive
assert self.directive in ['openacc', 'omp-gpu']
self.has_data_regions = False
self.remove_openmp = kwargs.get('remove_openmp', False)
self.assume_deviceptr = kwargs.get('assume_deviceptr', False)
Expand Down Expand Up @@ -148,28 +153,40 @@ def insert_data_offload_pragmas(self, routine, targets):
outargs = tuple(dict.fromkeys(outargs))
inoutargs = tuple(dict.fromkeys(inoutargs))

# Now generate the pre- and post pragmas (OpenACC)
if self.present_on_device:
if self.assume_deviceptr:
offload_args = inargs + outargs + inoutargs
if offload_args:
deviceptr = f' deviceptr({", ".join(offload_args)})'
# Now generate the pre- and post pragmas (OpenACC or OpenMP)
pragma = None
pragma_post = None
if self.directive == 'openacc':
if self.present_on_device:
if self.assume_deviceptr:
offload_args = inargs + outargs + inoutargs
if offload_args:
deviceptr = f' deviceptr({", ".join(offload_args)})'
else:
deviceptr = ''
pragma = Pragma(keyword='acc', content=f'data{deviceptr}')
else:
deviceptr = ''
pragma = Pragma(keyword='acc', content=f'data{deviceptr}')
offload_args = inargs + outargs + inoutargs
if offload_args:
present = f' present({", ".join(offload_args)})'
else:
present = ''
pragma = Pragma(keyword='acc', content=f'data{present}')
else:
offload_args = inargs + outargs + inoutargs
if offload_args:
present = f' present({", ".join(offload_args)})'
else:
present = ''
pragma = Pragma(keyword='acc', content=f'data{present}')
else:
copyin = f'copyin({", ".join(inargs)})' if inargs else ''
copy = f'copy({", ".join(inoutargs)})' if inoutargs else ''
copyout = f'copyout({", ".join(outargs)})' if outargs else ''
pragma = Pragma(keyword='acc', content=f'data {copyin} {copy} {copyout}')
pragma_post = Pragma(keyword='acc', content='end data')
copyin = f'copyin({", ".join(inargs)})' if inargs else ''
copy = f'copy({", ".join(inoutargs)})' if inoutargs else ''
copyout = f'copyout({", ".join(outargs)})' if outargs else ''
pragma = Pragma(keyword='acc', content=f'data {copyin} {copy} {copyout}')
pragma_post = Pragma(keyword='acc', content='end data')
elif self.directive == 'omp-gpu':
if self.present_on_device:
... # TODO: OpenMP offload if self.present_on_device
else:
copyin = f'map(to: {", ".join(inargs)})' if inargs else ''
copy = f'map(tofrom:{", ".join(inoutargs)})' if inoutargs else ''
copyout = f'map(from: {", ".join(outargs)})' if outargs else ''
pragma = Pragma(keyword='omp', content=f'target data {copyin} {copy} {copyout}')
pragma_post = Pragma(keyword='omp', content='end target data')
Comment on lines +156 to +189
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think something that would be great here would be an abstraction of the encoded concept: We inject statements to perform data allocation and movement, by

  1. specifying a list of variables that are in/out/inout/create.
  2. specifying the programming model.
  3. specifying the IR (PragmaRegion) to apply this to

I would create an abstract interface that takes this information and applies it to a region, with implementations for OpenACC, OpenMP, FIELD_API etc. That makes this easier to test standalone and then to re-use everywhere (offload trafo, pool allocator, global var offload, ...)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I think we have a rudimentary version of this for FIELD-API only already, although it would probably need extending/generalising:
https://github.com/ecmwf-ifs/loki/blob/main/loki/transformations/field_api.py#L33

I fully agree though, that having a common abstraction for this would be great.

pragma_map[region.pragma] = pragma
pragma_map[region.pragma_post] = pragma_post

Expand Down
8 changes: 8 additions & 0 deletions loki/transformations/pool_allocator.py
Original file line number Diff line number Diff line change
Expand Up @@ -451,6 +451,14 @@ def _get_stack_storage_and_size_var(self, routine, stack_size):
body_prepend += [pragma_data_start]
pragma_data_end = Pragma(keyword='acc', content='end data')
body_append += [pragma_data_end]
elif self.directive == 'omp-gpu':
pragma_data_start = Pragma(
keyword='omp',
content=f'target enter data map(alloc: {stack_storage.name})' # pylint: disable=no-member
)
body_prepend += [pragma_data_start]
pragma_data_end = Pragma(keyword='omp', content=f'target exit data map(delete: {stack_storage.name})') # pylint: disable=no-member
body_append += [pragma_data_end]
body_append += [stack_dealloc]

# Inject new variables and body nodes
Expand Down
Loading
Loading