From 10d3945440cf66fff5dec6134d36e152dc234f60 Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Thu, 7 Sep 2023 12:07:18 +0200 Subject: [PATCH] DataOffloadTrafo: added option to declare kernel args as true device pointers --- cmake/loki_transform.cmake | 6 ++++- scripts/loki_transform.py | 6 +++-- transformations/tests/test_data_offload.py | 27 ++++++++++++------- .../transformations/data_offload.py | 18 ++++++++++--- 4 files changed, 40 insertions(+), 17 deletions(-) diff --git a/cmake/loki_transform.cmake b/cmake/loki_transform.cmake index e812d4093..acabaf4e0 100644 --- a/cmake/loki_transform.cmake +++ b/cmake/loki_transform.cmake @@ -199,7 +199,7 @@ endmacro() function( loki_transform_convert ) - set( options CPP DATA_OFFLOAD REMOVE_OPENMP GLOBAL_VAR_OFFLOAD TRIM_VECTOR_SECTIONS REMOVE_DERIVED_ARGS ) + set( options CPP DATA_OFFLOAD REMOVE_OPENMP DEVICEPTR GLOBAL_VAR_OFFLOAD TRIM_VECTOR_SECTIONS REMOVE_DERIVED_ARGS ) set( oneValueArgs MODE DIRECTIVE FRONTEND CONFIG PATH OUTPATH ) set( multiValueArgs OUTPUT DEPENDS INCLUDES INCLUDE HEADERS HEADER DEFINITIONS DEFINE OMNI_INCLUDE XMOD ) @@ -236,6 +236,10 @@ function( loki_transform_convert ) list( APPEND _ARGS --remove-openmp ) endif() + if( ${_PAR_DEVICEPTR} ) + list( APPEND _ARGS --deviceptr ) + endif() + if( ${_PAR_GLOBAL_VAR_OFFLOAD} ) list( APPEND _ARGS --global-var-offload ) endif() diff --git a/scripts/loki_transform.py b/scripts/loki_transform.py index 32be60cb5..bb886cf3d 100644 --- a/scripts/loki_transform.py +++ b/scripts/loki_transform.py @@ -96,6 +96,8 @@ def cli(debug): help='Run transformation to insert custom data offload regions.') @click.option('--remove-openmp', is_flag=True, default=False, help='Removes existing OpenMP pragmas in "!$loki data" regions.') +@click.option('--deviceptr', is_flag=True, default=False, + help='Mark the relevant arguments as true device-pointers in "!$loki data" regions.') @click.option('--frontend', default='fp', type=click.Choice(['fp', 'ofp', 'omni']), help='Frontend parser to use (default FP)') @click.option('--trim-vector-sections', is_flag=True, default=False, @@ -106,7 +108,7 @@ def cli(debug): help="Remove derived-type arguments and replace with canonical arguments") def convert( mode, config, build, source, header, cpp, directive, include, define, omni_include, xmod, - data_offload, remove_openmp, frontend, trim_vector_sections, + data_offload, remove_openmp, deviceptr, frontend, trim_vector_sections, global_var_offload, remove_derived_args ): """ @@ -169,7 +171,7 @@ def convert( # Insert data offload regions for GPUs and remove OpenMP threading directives use_claw_offload = True if data_offload: - offload_transform = DataOffloadTransformation(remove_openmp=remove_openmp) + offload_transform = DataOffloadTransformation(remove_openmp=remove_openmp, deviceptr=deviceptr) scheduler.process(transformation=offload_transform) use_claw_offload = not offload_transform.has_data_regions diff --git a/transformations/tests/test_data_offload.py b/transformations/tests/test_data_offload.py index d2d8577d0..f6562ae12 100644 --- a/transformations/tests/test_data_offload.py +++ b/transformations/tests/test_data_offload.py @@ -9,14 +9,15 @@ from loki import ( Sourcefile, FindNodes, Pragma, PragmaRegion, Loop, - CallStatement, pragma_regions_attached + CallStatement, pragma_regions_attached, get_pragma_parameters ) from conftest import available_frontends from transformations import DataOffloadTransformation @pytest.mark.parametrize('frontend', available_frontends()) -def test_data_offload_region_openacc(frontend): +@pytest.mark.parametrize('deviceptr', [True, False]) +def test_data_offload_region_openacc(frontend, deviceptr): """ Test the creation of a simple device data offload region (`!$acc update`) from a `!$loki data` region with a single @@ -56,14 +57,20 @@ def test_data_offload_region_openacc(frontend): kernel = Sourcefile.from_source(fcode_kernel, frontend=frontend)['kernel_routine'] driver.enrich_calls(kernel) - driver.apply(DataOffloadTransformation(), role='driver', targets=['kernel_routine']) - - assert len(FindNodes(Pragma).visit(driver.body)) == 2 - assert all(p.keyword == 'acc' for p in FindNodes(Pragma).visit(driver.body)) - transformed = driver.to_fortran() - assert 'copyin( a )' in transformed - assert 'copy( b )' in transformed - assert 'copyout( c )' in transformed + driver.apply(DataOffloadTransformation(deviceptr=deviceptr), role='driver', targets=['kernel_routine']) + + pragmas = FindNodes(Pragma).visit(driver.body) + assert len(pragmas) == 2 + assert all(p.keyword == 'acc' for p in pragmas) + if deviceptr: + assert 'deviceptr' in pragmas[0].content + params = get_pragma_parameters(pragmas[0], only_loki_pragmas=False) + assert all(var in params['deviceptr'] for var in ('a', 'b', 'c')) + else: + transformed = driver.to_fortran() + assert 'copyin( a )' in transformed + assert 'copy( b )' in transformed + assert 'copyout( c )' in transformed @pytest.mark.parametrize('frontend', available_frontends()) diff --git a/transformations/transformations/data_offload.py b/transformations/transformations/data_offload.py index 62c431d19..643e71ede 100644 --- a/transformations/transformations/data_offload.py +++ b/transformations/transformations/data_offload.py @@ -27,6 +27,9 @@ class DataOffloadTransformation(Transformation): ---------- remove_openmp : bool Remove any existing OpenMP pragmas inside the marked region. + deviceptr : bool + Mark all offloaded arrays as true device-pointers if data offload + is being managed outside of structured Openacc data regions. """ def __init__(self, **kwargs): @@ -34,6 +37,7 @@ def __init__(self, **kwargs): # that down-stream processing can use that info self.has_data_regions = False self.remove_openmp = kwargs.get('remove_openmp', False) + self.deviceptr = kwargs.get('deviceptr', False) def transform_subroutine(self, routine, **kwargs): """ @@ -134,10 +138,16 @@ def insert_data_offload_pragmas(self, routine, targets): inoutargs = tuple(dict.fromkeys(inoutargs)) # Now geenerate the pre- and post pragmas (OpenACC) - copyin = f'copyin({", ".join(inargs)})' if inargs else '' - copy = f'copy({", ".join(inoutargs)})' if inoutargs else '' - copyout = f'copyout({", ".join(outargs)})' if outargs else '' - pragma = Pragma(keyword='acc', content=f'data {copyin} {copy} {copyout}') + if self.deviceptr: + deviceptr = '' + if inargs+outargs+inoutargs: + deviceptr = f'deviceptr({", ".join(inargs+outargs+inoutargs)})' + pragma = Pragma(keyword='acc', content=f'data {deviceptr}') + else: + copyin = f'copyin({", ".join(inargs)})' if inargs else '' + copy = f'copy({", ".join(inoutargs)})' if inoutargs else '' + copyout = f'copyout({", ".join(outargs)})' if outargs else '' + pragma = Pragma(keyword='acc', content=f'data {copyin} {copy} {copyout}') pragma_post = Pragma(keyword='acc', content='end data') pragma_map[region.pragma] = pragma pragma_map[region.pragma_post] = pragma_post