From 7be19350d39b7369fb3f5b9c4cbd9336f3915f78 Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Thu, 7 Sep 2023 12:07:18 +0200 Subject: [PATCH 1/3] DataOffloadTrafo: added option to declare kernel args as true device pointers --- cmake/loki_transform.cmake | 6 ++++- scripts/loki_transform.py | 6 +++-- transformations/tests/test_data_offload.py | 27 ++++++++++++------- .../transformations/data_offload.py | 18 ++++++++++--- 4 files changed, 40 insertions(+), 17 deletions(-) diff --git a/cmake/loki_transform.cmake b/cmake/loki_transform.cmake index e812d4093..acabaf4e0 100644 --- a/cmake/loki_transform.cmake +++ b/cmake/loki_transform.cmake @@ -199,7 +199,7 @@ endmacro() function( loki_transform_convert ) - set( options CPP DATA_OFFLOAD REMOVE_OPENMP GLOBAL_VAR_OFFLOAD TRIM_VECTOR_SECTIONS REMOVE_DERIVED_ARGS ) + set( options CPP DATA_OFFLOAD REMOVE_OPENMP DEVICEPTR GLOBAL_VAR_OFFLOAD TRIM_VECTOR_SECTIONS REMOVE_DERIVED_ARGS ) set( oneValueArgs MODE DIRECTIVE FRONTEND CONFIG PATH OUTPATH ) set( multiValueArgs OUTPUT DEPENDS INCLUDES INCLUDE HEADERS HEADER DEFINITIONS DEFINE OMNI_INCLUDE XMOD ) @@ -236,6 +236,10 @@ function( loki_transform_convert ) list( APPEND _ARGS --remove-openmp ) endif() + if( ${_PAR_DEVICEPTR} ) + list( APPEND _ARGS --deviceptr ) + endif() + if( ${_PAR_GLOBAL_VAR_OFFLOAD} ) list( APPEND _ARGS --global-var-offload ) endif() diff --git a/scripts/loki_transform.py b/scripts/loki_transform.py index 3ea735565..053c3bb8c 100644 --- a/scripts/loki_transform.py +++ b/scripts/loki_transform.py @@ -92,6 +92,8 @@ def cli(debug): help='Run transformation to insert custom data offload regions.') @click.option('--remove-openmp', is_flag=True, default=False, help='Removes existing OpenMP pragmas in "!$loki data" regions.') +@click.option('--deviceptr', is_flag=True, default=False, + help='Mark the relevant arguments as true device-pointers in "!$loki data" regions.') @click.option('--frontend', default='fp', type=click.Choice(['fp', 'ofp', 'omni']), help='Frontend parser to use (default FP)') @click.option('--trim-vector-sections', is_flag=True, default=False, @@ -102,7 +104,7 @@ def cli(debug): help="Remove derived-type arguments and replace with canonical arguments") def convert( mode, config, build, source, header, cpp, directive, include, define, omni_include, xmod, - data_offload, remove_openmp, frontend, trim_vector_sections, + data_offload, remove_openmp, deviceptr, frontend, trim_vector_sections, global_var_offload, remove_derived_args ): """ @@ -165,7 +167,7 @@ def convert( # Insert data offload regions for GPUs and remove OpenMP threading directives use_claw_offload = True if data_offload: - offload_transform = DataOffloadTransformation(remove_openmp=remove_openmp) + offload_transform = DataOffloadTransformation(remove_openmp=remove_openmp, deviceptr=deviceptr) scheduler.process(transformation=offload_transform) use_claw_offload = not offload_transform.has_data_regions diff --git a/transformations/tests/test_data_offload.py b/transformations/tests/test_data_offload.py index d2d8577d0..f6562ae12 100644 --- a/transformations/tests/test_data_offload.py +++ b/transformations/tests/test_data_offload.py @@ -9,14 +9,15 @@ from loki import ( Sourcefile, FindNodes, Pragma, PragmaRegion, Loop, - CallStatement, pragma_regions_attached + CallStatement, pragma_regions_attached, get_pragma_parameters ) from conftest import available_frontends from transformations import DataOffloadTransformation @pytest.mark.parametrize('frontend', available_frontends()) -def test_data_offload_region_openacc(frontend): +@pytest.mark.parametrize('deviceptr', [True, False]) +def test_data_offload_region_openacc(frontend, deviceptr): """ Test the creation of a simple device data offload region (`!$acc update`) from a `!$loki data` region with a single @@ -56,14 +57,20 @@ def test_data_offload_region_openacc(frontend): kernel = Sourcefile.from_source(fcode_kernel, frontend=frontend)['kernel_routine'] driver.enrich_calls(kernel) - driver.apply(DataOffloadTransformation(), role='driver', targets=['kernel_routine']) - - assert len(FindNodes(Pragma).visit(driver.body)) == 2 - assert all(p.keyword == 'acc' for p in FindNodes(Pragma).visit(driver.body)) - transformed = driver.to_fortran() - assert 'copyin( a )' in transformed - assert 'copy( b )' in transformed - assert 'copyout( c )' in transformed + driver.apply(DataOffloadTransformation(deviceptr=deviceptr), role='driver', targets=['kernel_routine']) + + pragmas = FindNodes(Pragma).visit(driver.body) + assert len(pragmas) == 2 + assert all(p.keyword == 'acc' for p in pragmas) + if deviceptr: + assert 'deviceptr' in pragmas[0].content + params = get_pragma_parameters(pragmas[0], only_loki_pragmas=False) + assert all(var in params['deviceptr'] for var in ('a', 'b', 'c')) + else: + transformed = driver.to_fortran() + assert 'copyin( a )' in transformed + assert 'copy( b )' in transformed + assert 'copyout( c )' in transformed @pytest.mark.parametrize('frontend', available_frontends()) diff --git a/transformations/transformations/data_offload.py b/transformations/transformations/data_offload.py index 62c431d19..643e71ede 100644 --- a/transformations/transformations/data_offload.py +++ b/transformations/transformations/data_offload.py @@ -27,6 +27,9 @@ class DataOffloadTransformation(Transformation): ---------- remove_openmp : bool Remove any existing OpenMP pragmas inside the marked region. + deviceptr : bool + Mark all offloaded arrays as true device-pointers if data offload + is being managed outside of structured Openacc data regions. """ def __init__(self, **kwargs): @@ -34,6 +37,7 @@ def __init__(self, **kwargs): # that down-stream processing can use that info self.has_data_regions = False self.remove_openmp = kwargs.get('remove_openmp', False) + self.deviceptr = kwargs.get('deviceptr', False) def transform_subroutine(self, routine, **kwargs): """ @@ -134,10 +138,16 @@ def insert_data_offload_pragmas(self, routine, targets): inoutargs = tuple(dict.fromkeys(inoutargs)) # Now geenerate the pre- and post pragmas (OpenACC) - copyin = f'copyin({", ".join(inargs)})' if inargs else '' - copy = f'copy({", ".join(inoutargs)})' if inoutargs else '' - copyout = f'copyout({", ".join(outargs)})' if outargs else '' - pragma = Pragma(keyword='acc', content=f'data {copyin} {copy} {copyout}') + if self.deviceptr: + deviceptr = '' + if inargs+outargs+inoutargs: + deviceptr = f'deviceptr({", ".join(inargs+outargs+inoutargs)})' + pragma = Pragma(keyword='acc', content=f'data {deviceptr}') + else: + copyin = f'copyin({", ".join(inargs)})' if inargs else '' + copy = f'copy({", ".join(inoutargs)})' if inoutargs else '' + copyout = f'copyout({", ".join(outargs)})' if outargs else '' + pragma = Pragma(keyword='acc', content=f'data {copyin} {copy} {copyout}') pragma_post = Pragma(keyword='acc', content='end data') pragma_map[region.pragma] = pragma pragma_map[region.pragma_post] = pragma_post From 9bc5142cb01b2a65f3f1df0630daabdc30e020e3 Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Wed, 13 Sep 2023 13:34:24 +0200 Subject: [PATCH 2/3] Changed option name to assume_deviceptr --- cmake/loki_transform.cmake | 6 +++--- scripts/loki_transform.py | 6 +++--- transformations/tests/test_data_offload.py | 8 ++++---- .../transformations/data_offload.py | 18 ++++++++++-------- 4 files changed, 20 insertions(+), 18 deletions(-) diff --git a/cmake/loki_transform.cmake b/cmake/loki_transform.cmake index acabaf4e0..b50e243e4 100644 --- a/cmake/loki_transform.cmake +++ b/cmake/loki_transform.cmake @@ -199,7 +199,7 @@ endmacro() function( loki_transform_convert ) - set( options CPP DATA_OFFLOAD REMOVE_OPENMP DEVICEPTR GLOBAL_VAR_OFFLOAD TRIM_VECTOR_SECTIONS REMOVE_DERIVED_ARGS ) + set( options CPP DATA_OFFLOAD REMOVE_OPENMP ASSUME_DEVICEPTR GLOBAL_VAR_OFFLOAD TRIM_VECTOR_SECTIONS REMOVE_DERIVED_ARGS ) set( oneValueArgs MODE DIRECTIVE FRONTEND CONFIG PATH OUTPATH ) set( multiValueArgs OUTPUT DEPENDS INCLUDES INCLUDE HEADERS HEADER DEFINITIONS DEFINE OMNI_INCLUDE XMOD ) @@ -236,8 +236,8 @@ function( loki_transform_convert ) list( APPEND _ARGS --remove-openmp ) endif() - if( ${_PAR_DEVICEPTR} ) - list( APPEND _ARGS --deviceptr ) + if( ${_PAR_ASSUME_DEVICEPTR} ) + list( APPEND _ARGS --assume-deviceptr ) endif() if( ${_PAR_GLOBAL_VAR_OFFLOAD} ) diff --git a/scripts/loki_transform.py b/scripts/loki_transform.py index 053c3bb8c..1286edd6a 100644 --- a/scripts/loki_transform.py +++ b/scripts/loki_transform.py @@ -92,7 +92,7 @@ def cli(debug): help='Run transformation to insert custom data offload regions.') @click.option('--remove-openmp', is_flag=True, default=False, help='Removes existing OpenMP pragmas in "!$loki data" regions.') -@click.option('--deviceptr', is_flag=True, default=False, +@click.option('--assume-deviceptr', is_flag=True, default=False, help='Mark the relevant arguments as true device-pointers in "!$loki data" regions.') @click.option('--frontend', default='fp', type=click.Choice(['fp', 'ofp', 'omni']), help='Frontend parser to use (default FP)') @@ -104,7 +104,7 @@ def cli(debug): help="Remove derived-type arguments and replace with canonical arguments") def convert( mode, config, build, source, header, cpp, directive, include, define, omni_include, xmod, - data_offload, remove_openmp, deviceptr, frontend, trim_vector_sections, + data_offload, remove_openmp, assume_deviceptr, frontend, trim_vector_sections, global_var_offload, remove_derived_args ): """ @@ -167,7 +167,7 @@ def convert( # Insert data offload regions for GPUs and remove OpenMP threading directives use_claw_offload = True if data_offload: - offload_transform = DataOffloadTransformation(remove_openmp=remove_openmp, deviceptr=deviceptr) + offload_transform = DataOffloadTransformation(remove_openmp=remove_openmp, assume_deviceptr=assume_deviceptr) scheduler.process(transformation=offload_transform) use_claw_offload = not offload_transform.has_data_regions diff --git a/transformations/tests/test_data_offload.py b/transformations/tests/test_data_offload.py index f6562ae12..1d7255a66 100644 --- a/transformations/tests/test_data_offload.py +++ b/transformations/tests/test_data_offload.py @@ -16,8 +16,8 @@ @pytest.mark.parametrize('frontend', available_frontends()) -@pytest.mark.parametrize('deviceptr', [True, False]) -def test_data_offload_region_openacc(frontend, deviceptr): +@pytest.mark.parametrize('assume_deviceptr', [True, False]) +def test_data_offload_region_openacc(frontend, assume_deviceptr): """ Test the creation of a simple device data offload region (`!$acc update`) from a `!$loki data` region with a single @@ -57,12 +57,12 @@ def test_data_offload_region_openacc(frontend, deviceptr): kernel = Sourcefile.from_source(fcode_kernel, frontend=frontend)['kernel_routine'] driver.enrich_calls(kernel) - driver.apply(DataOffloadTransformation(deviceptr=deviceptr), role='driver', targets=['kernel_routine']) + driver.apply(DataOffloadTransformation(assume_deviceptr=assume_deviceptr), role='driver', targets=['kernel_routine']) pragmas = FindNodes(Pragma).visit(driver.body) assert len(pragmas) == 2 assert all(p.keyword == 'acc' for p in pragmas) - if deviceptr: + if assume_deviceptr: assert 'deviceptr' in pragmas[0].content params = get_pragma_parameters(pragmas[0], only_loki_pragmas=False) assert all(var in params['deviceptr'] for var in ('a', 'b', 'c')) diff --git a/transformations/transformations/data_offload.py b/transformations/transformations/data_offload.py index 643e71ede..120f4df17 100644 --- a/transformations/transformations/data_offload.py +++ b/transformations/transformations/data_offload.py @@ -27,9 +27,9 @@ class DataOffloadTransformation(Transformation): ---------- remove_openmp : bool Remove any existing OpenMP pragmas inside the marked region. - deviceptr : bool + assume_deviceptr : bool Mark all offloaded arrays as true device-pointers if data offload - is being managed outside of structured Openacc data regions. + is being managed outside of structured OpenACC data regions. """ def __init__(self, **kwargs): @@ -37,7 +37,7 @@ def __init__(self, **kwargs): # that down-stream processing can use that info self.has_data_regions = False self.remove_openmp = kwargs.get('remove_openmp', False) - self.deviceptr = kwargs.get('deviceptr', False) + self.assume_deviceptr = kwargs.get('assume_deviceptr', False) def transform_subroutine(self, routine, **kwargs): """ @@ -138,11 +138,13 @@ def insert_data_offload_pragmas(self, routine, targets): inoutargs = tuple(dict.fromkeys(inoutargs)) # Now geenerate the pre- and post pragmas (OpenACC) - if self.deviceptr: - deviceptr = '' - if inargs+outargs+inoutargs: - deviceptr = f'deviceptr({", ".join(inargs+outargs+inoutargs)})' - pragma = Pragma(keyword='acc', content=f'data {deviceptr}') + if self.assume_deviceptr: + offload_args = inargs + outargs + inoutargs + if offload_args: + deviceptr = f' deviceptr({", ".join(offload_args)})' + else: + deviceptr = '' + pragma = Pragma(keyword='acc', content=f'data{deviceptr}') else: copyin = f'copyin({", ".join(inargs)})' if inargs else '' copy = f'copy({", ".join(inoutargs)})' if inoutargs else '' From 3ac1f46dfa0c653580a975b3dab1e50291550134 Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Wed, 13 Sep 2023 17:51:29 +0200 Subject: [PATCH 3/3] Appease linter --- transformations/tests/test_data_offload.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/transformations/tests/test_data_offload.py b/transformations/tests/test_data_offload.py index 1d7255a66..2afa4abfc 100644 --- a/transformations/tests/test_data_offload.py +++ b/transformations/tests/test_data_offload.py @@ -57,7 +57,8 @@ def test_data_offload_region_openacc(frontend, assume_deviceptr): kernel = Sourcefile.from_source(fcode_kernel, frontend=frontend)['kernel_routine'] driver.enrich_calls(kernel) - driver.apply(DataOffloadTransformation(assume_deviceptr=assume_deviceptr), role='driver', targets=['kernel_routine']) + driver.apply(DataOffloadTransformation(assume_deviceptr=assume_deviceptr), role='driver', + targets=['kernel_routine']) pragmas = FindNodes(Pragma).visit(driver.body) assert len(pragmas) == 2