Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add DEVICEPTR annotations to data region in driver loop #145

Merged
merged 3 commits into from
Sep 14, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion cmake/loki_transform.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ endmacro()

function( loki_transform_convert )

set( options CPP DATA_OFFLOAD REMOVE_OPENMP GLOBAL_VAR_OFFLOAD TRIM_VECTOR_SECTIONS REMOVE_DERIVED_ARGS )
set( options CPP DATA_OFFLOAD REMOVE_OPENMP ASSUME_DEVICEPTR GLOBAL_VAR_OFFLOAD TRIM_VECTOR_SECTIONS REMOVE_DERIVED_ARGS )
set( oneValueArgs MODE DIRECTIVE FRONTEND CONFIG PATH OUTPATH )
set( multiValueArgs OUTPUT DEPENDS INCLUDES INCLUDE HEADERS HEADER DEFINITIONS DEFINE OMNI_INCLUDE XMOD )

Expand Down Expand Up @@ -236,6 +236,10 @@ function( loki_transform_convert )
list( APPEND _ARGS --remove-openmp )
endif()

if( ${_PAR_ASSUME_DEVICEPTR} )
list( APPEND _ARGS --assume-deviceptr )
endif()

if( ${_PAR_GLOBAL_VAR_OFFLOAD} )
list( APPEND _ARGS --global-var-offload )
endif()
Expand Down
6 changes: 4 additions & 2 deletions scripts/loki_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,8 @@ def cli(debug):
help='Run transformation to insert custom data offload regions.')
@click.option('--remove-openmp', is_flag=True, default=False,
help='Removes existing OpenMP pragmas in "!$loki data" regions.')
@click.option('--assume-deviceptr', is_flag=True, default=False,
help='Mark the relevant arguments as true device-pointers in "!$loki data" regions.')
@click.option('--frontend', default='fp', type=click.Choice(['fp', 'ofp', 'omni']),
help='Frontend parser to use (default FP)')
@click.option('--trim-vector-sections', is_flag=True, default=False,
Expand All @@ -102,7 +104,7 @@ def cli(debug):
help="Remove derived-type arguments and replace with canonical arguments")
def convert(
mode, config, build, source, header, cpp, directive, include, define, omni_include, xmod,
data_offload, remove_openmp, frontend, trim_vector_sections,
data_offload, remove_openmp, assume_deviceptr, frontend, trim_vector_sections,
global_var_offload, remove_derived_args
):
"""
Expand Down Expand Up @@ -165,7 +167,7 @@ def convert(
# Insert data offload regions for GPUs and remove OpenMP threading directives
use_claw_offload = True
if data_offload:
offload_transform = DataOffloadTransformation(remove_openmp=remove_openmp)
offload_transform = DataOffloadTransformation(remove_openmp=remove_openmp, assume_deviceptr=assume_deviceptr)
scheduler.process(transformation=offload_transform)
use_claw_offload = not offload_transform.has_data_regions

Expand Down
27 changes: 17 additions & 10 deletions transformations/tests/test_data_offload.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,15 @@

from loki import (
Sourcefile, FindNodes, Pragma, PragmaRegion, Loop,
CallStatement, pragma_regions_attached
CallStatement, pragma_regions_attached, get_pragma_parameters
)
from conftest import available_frontends
from transformations import DataOffloadTransformation


@pytest.mark.parametrize('frontend', available_frontends())
def test_data_offload_region_openacc(frontend):
@pytest.mark.parametrize('assume_deviceptr', [True, False])
def test_data_offload_region_openacc(frontend, assume_deviceptr):
"""
Test the creation of a simple device data offload region
(`!$acc update`) from a `!$loki data` region with a single
Expand Down Expand Up @@ -56,14 +57,20 @@ def test_data_offload_region_openacc(frontend):
kernel = Sourcefile.from_source(fcode_kernel, frontend=frontend)['kernel_routine']
driver.enrich_calls(kernel)

driver.apply(DataOffloadTransformation(), role='driver', targets=['kernel_routine'])

assert len(FindNodes(Pragma).visit(driver.body)) == 2
assert all(p.keyword == 'acc' for p in FindNodes(Pragma).visit(driver.body))
transformed = driver.to_fortran()
assert 'copyin( a )' in transformed
assert 'copy( b )' in transformed
assert 'copyout( c )' in transformed
driver.apply(DataOffloadTransformation(assume_deviceptr=assume_deviceptr), role='driver', targets=['kernel_routine'])

pragmas = FindNodes(Pragma).visit(driver.body)
assert len(pragmas) == 2
assert all(p.keyword == 'acc' for p in pragmas)
if assume_deviceptr:
assert 'deviceptr' in pragmas[0].content
params = get_pragma_parameters(pragmas[0], only_loki_pragmas=False)
assert all(var in params['deviceptr'] for var in ('a', 'b', 'c'))
else:
transformed = driver.to_fortran()
assert 'copyin( a )' in transformed
assert 'copy( b )' in transformed
assert 'copyout( c )' in transformed


@pytest.mark.parametrize('frontend', available_frontends())
Expand Down
20 changes: 16 additions & 4 deletions transformations/transformations/data_offload.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,17 @@ class DataOffloadTransformation(Transformation):
----------
remove_openmp : bool
Remove any existing OpenMP pragmas inside the marked region.
assume_deviceptr : bool
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Docstring and actual argument are out of sync here now. Can we call the argument assume_deviceptr?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Another silly oversight. Thanks for spotting this!

Mark all offloaded arrays as true device-pointers if data offload
is being managed outside of structured OpenACC data regions.
"""

def __init__(self, **kwargs):
# We need to record if we actually added any, so
# that down-stream processing can use that info
self.has_data_regions = False
self.remove_openmp = kwargs.get('remove_openmp', False)
self.assume_deviceptr = kwargs.get('assume_deviceptr', False)

def transform_subroutine(self, routine, **kwargs):
"""
Expand Down Expand Up @@ -134,10 +138,18 @@ def insert_data_offload_pragmas(self, routine, targets):
inoutargs = tuple(dict.fromkeys(inoutargs))

# Now geenerate the pre- and post pragmas (OpenACC)
copyin = f'copyin({", ".join(inargs)})' if inargs else ''
copy = f'copy({", ".join(inoutargs)})' if inoutargs else ''
copyout = f'copyout({", ".join(outargs)})' if outargs else ''
pragma = Pragma(keyword='acc', content=f'data {copyin} {copy} {copyout}')
if self.assume_deviceptr:
offload_args = inargs + outargs + inoutargs
if offload_args:
deviceptr = f' deviceptr({", ".join(offload_args)})'
else:
deviceptr = ''
pragma = Pragma(keyword='acc', content=f'data{deviceptr}')
else:
copyin = f'copyin({", ".join(inargs)})' if inargs else ''
copy = f'copy({", ".join(inoutargs)})' if inoutargs else ''
copyout = f'copyout({", ".join(outargs)})' if outargs else ''
pragma = Pragma(keyword='acc', content=f'data {copyin} {copy} {copyout}')
pragma_post = Pragma(keyword='acc', content='end data')
pragma_map[region.pragma] = pragma
pragma_map[region.pragma_post] = pragma_post
Expand Down
Loading