diff --git a/ci/base.yml b/ci/base.yml index 2b1732888c..24d9a01314 100644 --- a/ci/base.yml +++ b/ci/base.yml @@ -43,3 +43,4 @@ variables: VIRTUALENV_SYSTEM_SITE_PACKAGES: 1 CSCS_NEEDED_DATA: icon4py TEST_DATA_PATH: "/project/d121/icon4py/ci/testdata" + ICON_GRID_LOC: "/project/d121/icon4py/ci/testdata/grids/mch_ch_r04b09_dsl" diff --git a/model/atmosphere/diffusion/src/icon4py/model/atmosphere/diffusion/cached.py b/model/atmosphere/diffusion/src/icon4py/model/atmosphere/diffusion/cached.py new file mode 100644 index 0000000000..30834c291d --- /dev/null +++ b/model/atmosphere/diffusion/src/icon4py/model/atmosphere/diffusion/cached.py @@ -0,0 +1,79 @@ +# ICON4Py - ICON inspired code in Python and GT4Py +# +# Copyright (c) 2022, ETH Zurich and MeteoSwiss +# All rights reserved. +# +# This file is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or any later +# version. See the LICENSE.txt file at the top-level directory of this +# distribution for a copy of the license or check . +# +# SPDX-License-Identifier: GPL-3.0-or-later + +from icon4py.model.atmosphere.diffusion.diffusion_utils import ( + copy_field as copy_field_orig, + init_diffusion_local_fields_for_regular_timestep as init_diffusion_local_fields_for_regular_timestep_orig, + scale_k as scale_k_orig, + setup_fields_for_initial_step as setup_fields_for_initial_step_orig, +) +from icon4py.model.atmosphere.diffusion.stencils.apply_diffusion_to_vn import ( + apply_diffusion_to_vn as apply_diffusion_to_vn_orig, +) +from icon4py.model.atmosphere.diffusion.stencils.apply_diffusion_to_w_and_compute_horizontal_gradients_for_turbulence import ( + apply_diffusion_to_w_and_compute_horizontal_gradients_for_turbulence as apply_diffusion_to_w_and_compute_horizontal_gradients_for_turbulence_orig, +) +from icon4py.model.atmosphere.diffusion.stencils.calculate_diagnostic_quantities_for_turbulence import ( + calculate_diagnostic_quantities_for_turbulence as calculate_diagnostic_quantities_for_turbulence_orig, +) +from icon4py.model.atmosphere.diffusion.stencils.calculate_enhanced_diffusion_coefficients_for_grid_point_cold_pools import ( + calculate_enhanced_diffusion_coefficients_for_grid_point_cold_pools as calculate_enhanced_diffusion_coefficients_for_grid_point_cold_pools_orig, +) +from icon4py.model.atmosphere.diffusion.stencils.calculate_nabla2_and_smag_coefficients_for_vn import ( + calculate_nabla2_and_smag_coefficients_for_vn as calculate_nabla2_and_smag_coefficients_for_vn_orig, +) +from icon4py.model.atmosphere.diffusion.stencils.calculate_nabla2_for_theta import ( + calculate_nabla2_for_theta as calculate_nabla2_for_theta_orig, +) +from icon4py.model.atmosphere.diffusion.stencils.truly_horizontal_diffusion_nabla_of_theta_over_steep_points import ( + truly_horizontal_diffusion_nabla_of_theta_over_steep_points as truly_horizontal_diffusion_nabla_of_theta_over_steep_points_orig, +) +from icon4py.model.atmosphere.diffusion.stencils.update_theta_and_exner import ( + update_theta_and_exner as update_theta_and_exner_orig, +) +from icon4py.model.common.caching import CachedProgram +from icon4py.model.common.interpolation.stencils.mo_intp_rbf_rbf_vec_interpol_vertex import ( + mo_intp_rbf_rbf_vec_interpol_vertex as mo_intp_rbf_rbf_vec_interpol_vertex_orig, +) + + +# diffusion run stencils +apply_diffusion_to_vn = CachedProgram(apply_diffusion_to_vn_orig) +apply_diffusion_to_w_and_compute_horizontal_gradients_for_turbulence = CachedProgram( + apply_diffusion_to_w_and_compute_horizontal_gradients_for_turbulence_orig +) +calculate_diagnostic_quantities_for_turbulence = CachedProgram( + calculate_diagnostic_quantities_for_turbulence_orig +) +calculate_enhanced_diffusion_coefficients_for_grid_point_cold_pools = CachedProgram( + calculate_enhanced_diffusion_coefficients_for_grid_point_cold_pools_orig +) +calculate_nabla2_and_smag_coefficients_for_vn = CachedProgram( + calculate_nabla2_and_smag_coefficients_for_vn_orig +) +calculate_nabla2_for_theta = CachedProgram(calculate_nabla2_for_theta_orig) +truly_horizontal_diffusion_nabla_of_theta_over_steep_points = CachedProgram( + truly_horizontal_diffusion_nabla_of_theta_over_steep_points_orig +) +update_theta_and_exner = CachedProgram(update_theta_and_exner_orig) + +mo_intp_rbf_rbf_vec_interpol_vertex = CachedProgram(mo_intp_rbf_rbf_vec_interpol_vertex_orig) + + +# model init stencils +setup_fields_for_initial_step = CachedProgram(setup_fields_for_initial_step_orig, with_domain=False) +copy_field = CachedProgram(copy_field_orig, with_domain=False) +init_diffusion_local_fields_for_regular_timestep = CachedProgram( + init_diffusion_local_fields_for_regular_timestep_orig, with_domain=False +) +scale_k = CachedProgram(scale_k_orig, with_domain=False) diff --git a/model/atmosphere/diffusion/src/icon4py/model/atmosphere/diffusion/diffusion.py b/model/atmosphere/diffusion/src/icon4py/model/atmosphere/diffusion/diffusion.py index ac9a332aaa..e9c5f962d0 100644 --- a/model/atmosphere/diffusion/src/icon4py/model/atmosphere/diffusion/diffusion.py +++ b/model/atmosphere/diffusion/src/icon4py/model/atmosphere/diffusion/diffusion.py @@ -28,35 +28,27 @@ DiffusionMetricState, ) from icon4py.model.atmosphere.diffusion.diffusion_utils import ( - copy_field, - init_diffusion_local_fields_for_regular_timestep, init_nabla2_factor_in_upper_damping_zone, - scale_k, - setup_fields_for_initial_step, zero_field, ) -from icon4py.model.atmosphere.diffusion.stencils.apply_diffusion_to_vn import apply_diffusion_to_vn -from icon4py.model.atmosphere.diffusion.stencils.apply_diffusion_to_w_and_compute_horizontal_gradients_for_turbulence import ( - apply_diffusion_to_w_and_compute_horizontal_gradients_for_turbulence, -) -from icon4py.model.atmosphere.diffusion.stencils.calculate_diagnostic_quantities_for_turbulence import ( - calculate_diagnostic_quantities_for_turbulence, -) -from icon4py.model.atmosphere.diffusion.stencils.calculate_enhanced_diffusion_coefficients_for_grid_point_cold_pools import ( - calculate_enhanced_diffusion_coefficients_for_grid_point_cold_pools, -) -from icon4py.model.atmosphere.diffusion.stencils.calculate_nabla2_and_smag_coefficients_for_vn import ( + +# cached program import +from icon4py.model.atmosphere.diffusion.cached import ( + init_diffusion_local_fields_for_regular_timestep, + setup_fields_for_initial_step, + scale_k, calculate_nabla2_and_smag_coefficients_for_vn, -) -from icon4py.model.atmosphere.diffusion.stencils.calculate_nabla2_for_theta import ( calculate_nabla2_for_theta, -) -from icon4py.model.atmosphere.diffusion.stencils.truly_horizontal_diffusion_nabla_of_theta_over_steep_points import ( truly_horizontal_diffusion_nabla_of_theta_over_steep_points, -) -from icon4py.model.atmosphere.diffusion.stencils.update_theta_and_exner import ( + apply_diffusion_to_w_and_compute_horizontal_gradients_for_turbulence, + apply_diffusion_to_vn, + calculate_diagnostic_quantities_for_turbulence, + calculate_enhanced_diffusion_coefficients_for_grid_point_cold_pools, update_theta_and_exner, + copy_field, + mo_intp_rbf_rbf_vec_interpol_vertex, ) + from icon4py.model.common.constants import ( CPD, DEFAULT_PHYSICS_DYNAMICS_TIMESTEP_RATIO, @@ -68,9 +60,6 @@ from icon4py.model.common.grid.horizontal import CellParams, EdgeParams, HorizontalMarkerIndex from icon4py.model.common.grid.icon import IconGrid from icon4py.model.common.grid.vertical import VerticalModelParams -from icon4py.model.common.interpolation.stencils.mo_intp_rbf_rbf_vec_interpol_vertex import ( - mo_intp_rbf_rbf_vec_interpol_vertex, -) from icon4py.model.common.states.prognostic_state import PrognosticState from icon4py.model.common.settings import xp @@ -752,6 +741,7 @@ def _do_diffusion_step( ) # TODO (magdalena) get rid of this copying. So far passing an empty buffer instead did not verify? copy_field(prognostic_state.w, self.w_tmp, offset_provider={}) + apply_diffusion_to_w_and_compute_horizontal_gradients_for_turbulence( area=self.cell_params.area, geofac_n2s=self.interpolation_state.geofac_n2s, @@ -784,6 +774,7 @@ def _do_diffusion_step( log.debug( "running fused stencils 11 12 (calculate_enhanced_diffusion_coefficients_for_grid_point_cold_pools): start" ) + calculate_enhanced_diffusion_coefficients_for_grid_point_cold_pools( theta_v=prognostic_state.theta_v, theta_ref_mc=self.metric_state.theta_ref_mc, @@ -799,6 +790,7 @@ def _do_diffusion_step( log.debug( "running stencils 11 12 (calculate_enhanced_diffusion_coefficients_for_grid_point_cold_pools): end" ) + log.debug("running stencils 13 14 (calculate_nabla2_for_theta): start") calculate_nabla2_for_theta( kh_smag_e=self.kh_smag_e, diff --git a/model/atmosphere/diffusion/tests/diffusion_stencil_tests/test_temporary_fields_for_turbulence_diagnostics.py b/model/atmosphere/diffusion/tests/diffusion_stencil_tests/test_temporary_fields_for_turbulence_diagnostics.py index e45018c211..bcfd669274 100644 --- a/model/atmosphere/diffusion/tests/diffusion_stencil_tests/test_temporary_fields_for_turbulence_diagnostics.py +++ b/model/atmosphere/diffusion/tests/diffusion_stencil_tests/test_temporary_fields_for_turbulence_diagnostics.py @@ -43,12 +43,15 @@ def reference( **kwargs, ) -> dict: c2e = grid.connectivities[C2EDim] + c2ce = grid.get_offset_provider("C2CE").table + geofac_div = np.expand_dims(geofac_div, axis=-1) - vn_geofac = vn[c2e] * geofac_div[grid.get_offset_provider("C2CE").table] - div = np.sum(vn_geofac, axis=1) e_bln_c_s = np.expand_dims(e_bln_c_s, axis=-1) diff_multfac_smag = np.expand_dims(diff_multfac_smag, axis=0) - mul = kh_smag_ec[c2e] * e_bln_c_s[grid.get_offset_provider("C2CE").table] + + vn_geofac = vn[c2e] * geofac_div[c2ce] + div = np.sum(vn_geofac, axis=1) + mul = kh_smag_ec[c2e] * e_bln_c_s[c2ce] summed = np.sum(mul, axis=1) kh_c = summed / diff_multfac_smag diff --git a/model/atmosphere/dycore/tests/dycore_stencil_tests/test_add_interpolated_horizontal_advection_of_w.py b/model/atmosphere/dycore/tests/dycore_stencil_tests/test_add_interpolated_horizontal_advection_of_w.py index ce8cf650ba..e53ca35c58 100644 --- a/model/atmosphere/dycore/tests/dycore_stencil_tests/test_add_interpolated_horizontal_advection_of_w.py +++ b/model/atmosphere/dycore/tests/dycore_stencil_tests/test_add_interpolated_horizontal_advection_of_w.py @@ -27,8 +27,10 @@ def add_interpolated_horizontal_advection_of_w_numpy( grid, e_bln_c_s: np.array, z_v_grad_w: np.array, ddt_w_adv: np.array, **kwargs ) -> np.array: e_bln_c_s = np.expand_dims(e_bln_c_s, axis=-1) + c2ce = grid.get_offset_provider("C2CE").table + ddt_w_adv = ddt_w_adv + np.sum( - z_v_grad_w[grid.connectivities[C2EDim]] * e_bln_c_s[grid.get_offset_provider("C2CE").table], + z_v_grad_w[grid.connectivities[C2EDim]] * e_bln_c_s[c2ce], axis=1, ) return ddt_w_adv diff --git a/model/atmosphere/dycore/tests/dycore_stencil_tests/test_interpolate_to_cell_center.py b/model/atmosphere/dycore/tests/dycore_stencil_tests/test_interpolate_to_cell_center.py index 19ce7bbac2..1e24efcf7e 100644 --- a/model/atmosphere/dycore/tests/dycore_stencil_tests/test_interpolate_to_cell_center.py +++ b/model/atmosphere/dycore/tests/dycore_stencil_tests/test_interpolate_to_cell_center.py @@ -30,9 +30,10 @@ def interpolate_to_cell_center_numpy( grid, interpolant: np.array, e_bln_c_s: np.array, **kwargs ) -> np.array: e_bln_c_s = np.expand_dims(e_bln_c_s, axis=-1) + c2ce = grid.get_offset_provider("C2CE").table + interpolation = np.sum( - interpolant[grid.connectivities[C2EDim]] - * e_bln_c_s[grid.get_offset_provider("C2CE").table], + interpolant[grid.connectivities[C2EDim]] * e_bln_c_s[c2ce], axis=1, ) return interpolation diff --git a/model/atmosphere/dycore/tests/dycore_stencil_tests/test_mcompute_divergence_of_fluxes_of_rho_and_theta.py b/model/atmosphere/dycore/tests/dycore_stencil_tests/test_mcompute_divergence_of_fluxes_of_rho_and_theta.py index f1fff490a3..7e6f2f8cd4 100644 --- a/model/atmosphere/dycore/tests/dycore_stencil_tests/test_mcompute_divergence_of_fluxes_of_rho_and_theta.py +++ b/model/atmosphere/dycore/tests/dycore_stencil_tests/test_mcompute_divergence_of_fluxes_of_rho_and_theta.py @@ -28,7 +28,7 @@ from icon4py.model.common.type_alias import vpfloat, wpfloat -class TestComputeDivergenceOfFluxesOfRhoAndTheta(StencilTest): +class TestComputeDivergenconnectivityceOfFluxesOfRhoAndTheta(StencilTest): PROGRAM = compute_divergence_of_fluxes_of_rho_and_theta OUTPUTS = ("z_flxdiv_mass", "z_flxdiv_theta") @@ -42,12 +42,14 @@ def reference( ) -> tuple[np.array]: c2e = grid.connectivities[C2EDim] geofac_div = np.expand_dims(geofac_div, axis=-1) + c2ce = grid.get_offset_provider("C2CE").table + z_flxdiv_mass = np.sum( - geofac_div[grid.get_offset_provider("C2CE").table] * mass_fl_e[c2e], + geofac_div[c2ce] * mass_fl_e[c2e], axis=1, ) z_flxdiv_theta = np.sum( - geofac_div[grid.get_offset_provider("C2CE").table] * z_theta_v_fl_e[c2e], + geofac_div[c2ce] * z_theta_v_fl_e[c2e], axis=1, ) return dict(z_flxdiv_mass=z_flxdiv_mass, z_flxdiv_theta=z_flxdiv_theta) diff --git a/model/common/src/icon4py/model/common/caching.py b/model/common/src/icon4py/model/common/caching.py new file mode 100644 index 0000000000..a10e36f420 --- /dev/null +++ b/model/common/src/icon4py/model/common/caching.py @@ -0,0 +1,135 @@ +# ICON4Py - ICON inspired code in Python and GT4Py +# +# Copyright (c) 2022, ETH Zurich and MeteoSwiss +# All rights reserved. +# +# This file is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or any later +# version. See the LICENSE.txt file at the top-level directory of this +# distribution for a copy of the license or check . +# +# SPDX-License-Identifier: GPL-3.0-or-later + +import dataclasses +from typing import Any, Callable, Optional + +import numpy as np +from gt4py import next as gtx +from gt4py.next.otf import workflow +from gt4py.next.program_processors.runners.gtfn import extract_connectivity_args + +from icon4py.model.common.settings import device + + +try: + import cupy as cp + from gt4py.next.embedded.nd_array_field import CuPyArrayField +except ImportError: + cp: Optional = None # type:ignore[no-redef] + +from gt4py.next.embedded.nd_array_field import NumPyArrayField + + +def handle_numpy_integer(value): + return int(value) + + +def handle_common_field(value, sizes): + sizes.extend(value.shape) + return value # Return the value unmodified, but side-effect on sizes + + +def handle_default(value): + return value # Return the value unchanged + + +if cp: + type_handlers = { + np.integer: handle_numpy_integer, + NumPyArrayField: handle_common_field, + CuPyArrayField: handle_common_field, + } +else: + type_handlers = { + np.integer: handle_numpy_integer, + NumPyArrayField: handle_common_field, + } + + +def process_arg(value, sizes): + handler = type_handlers.get(type(value), handle_default) + return handler(value, sizes) if handler == handle_common_field else handler(value) + + +@dataclasses.dataclass +class CachedProgram: + """Class to handle caching and compilation of GT4Py programs. + + This class is responsible for caching and compiling GT4Py programs + with optional domain information. The compiled program and its + connectivity arguments are stored for efficient execution. + + Attributes: + program (gtx.ffront.decorator.Program): The GT4Py program to be cached and compiled. + with_domain (bool): Flag to indicate if the program should be compiled with domain information. Defaults to True. + _compiled_program (Optional[Callable]): The compiled GT4Py program. + _conn_args (Any): Connectivity arguments extracted from the offset provider. + _compiled_args (tuple): Arguments used during the compilation of the program. + + Properties: + compiled_program (Callable): Returns the compiled GT4Py program. + conn_args (Any): Returns the connectivity arguments. + + Note: + This functionality will be provided by GT4Py in the future. + """ + + program: gtx.ffront.decorator.Program + with_domain: bool = True + _compiled_program: Optional[Callable] = None + _conn_args: Any = None + _compiled_args: tuple = dataclasses.field(default_factory=tuple) + + @property + def compiled_program(self) -> Callable: + return self._compiled_program + + @property + def conn_args(self) -> Callable: + return self._conn_args + + def compile_the_program( + self, *args, offset_provider: dict[str, gtx.Dimension], **kwargs: Any + ) -> Callable: + backend = self.program.backend + program_call = backend.transforms_prog( + workflow.InputWithArgs( + data=self.program.definition_stage, + args=args, + kwargs=kwargs | {"offset_provider": offset_provider}, + ) + ) + self._compiled_args = program_call.args + return backend.executor.otf_workflow(program_call) + + def __call__(self, *args, offset_provider: dict[str, gtx.Dimension], **kwargs: Any) -> None: + if not self.compiled_program: + self._compiled_program = self.compile_the_program( + *args, offset_provider=offset_provider, **kwargs + ) + self._conn_args = extract_connectivity_args(offset_provider, device) + + kwargs_as_tuples = tuple(kwargs.values()) + program_args = list(args) + list(kwargs_as_tuples) + sizes = [] + + # Convert numpy integers in args to int and handle gtx.common.Field + for i in range(len(program_args)): + program_args[i] = process_arg(program_args[i], sizes) + + if not self.with_domain: + program_args.extend(sizes) + + # todo(samkellerhals): if we merge gt4py PR we can also pass connectivity args here conn_args=self.conn_args + return self.compiled_program(*program_args, offset_provider=offset_provider) diff --git a/model/common/src/icon4py/model/common/config.py b/model/common/src/icon4py/model/common/config.py index f48034bf07..f7f4389dcc 100644 --- a/model/common/src/icon4py/model/common/config.py +++ b/model/common/src/icon4py/model/common/config.py @@ -69,3 +69,7 @@ def device(self): } device = device_map[self.icon4py_backend] return device + + @cached_property + def limited_area(self): + return os.environ.get("ICON4PY_LAM", False) diff --git a/model/common/src/icon4py/model/common/grid/base.py b/model/common/src/icon4py/model/common/grid/base.py index ede8df26d3..fc65c00742 100644 --- a/model/common/src/icon4py/model/common/grid/base.py +++ b/model/common/src/icon4py/model/common/grid/base.py @@ -24,6 +24,7 @@ from icon4py.model.common.dimension import CellDim, EdgeDim, KDim, VertexDim from icon4py.model.common.grid.utils import neighbortable_offset_provider_for_1d_sparse_fields from icon4py.model.common.grid.vertical import VerticalGridSize +from icon4py.model.common.settings import xp from icon4py.model.common.utils import builder @@ -128,11 +129,6 @@ def _get_offset_provider(self, dim, from_dim, to_dim): if dim not in self.connectivities: raise MissingConnectivity() - if self.config.on_gpu: - import cupy as xp - else: - xp = np - return NeighborTableOffsetProvider( xp.asarray(self.connectivities[dim]), from_dim, diff --git a/model/common/src/icon4py/model/common/grid/horizontal.py b/model/common/src/icon4py/model/common/grid/horizontal.py index 484974f2c5..7ac2050f3b 100644 --- a/model/common/src/icon4py/model/common/grid/horizontal.py +++ b/model/common/src/icon4py/model/common/grid/horizontal.py @@ -348,13 +348,13 @@ def __init__( @dataclass(frozen=True) class CellParams: #: Latitude at the cell center. The cell center is defined to be the circumcenter of a triangle. - cell_center_lat: Field[[CellDim], float] + cell_center_lat: Field[[CellDim], float] = None #: Longitude at the cell center. The cell center is defined to be the circumcenter of a triangle. - cell_center_lon: Field[[CellDim], float] + cell_center_lon: Field[[CellDim], float] = None #: Area of a cell, defined in ICON in mo_model_domain.f90:t_grid_cells%area - area: Field[[CellDim], float] + area: Field[[CellDim], float] = None #: Mean area of a cell [m^2] = total surface area / numer of cells defined in ICON in in mo_model_domimp_patches.f90 - mean_cell_area: float + mean_cell_area: float = None length_rescale_factor: float = 1.0 @classmethod diff --git a/model/common/src/icon4py/model/common/grid/utils.py b/model/common/src/icon4py/model/common/grid/utils.py index 840d60f32e..e8a02984e8 100644 --- a/model/common/src/icon4py/model/common/grid/utils.py +++ b/model/common/src/icon4py/model/common/grid/utils.py @@ -14,6 +14,8 @@ import numpy as np from gt4py.next import Dimension, NeighborTableOffsetProvider +from icon4py.model.common.settings import xp + def neighbortable_offset_provider_for_1d_sparse_fields( old_shape: tuple[int, int], @@ -21,7 +23,7 @@ def neighbortable_offset_provider_for_1d_sparse_fields( neighbor_axis: Dimension, has_skip_values: bool, ): - table = np.arange(old_shape[0] * old_shape[1]).reshape(old_shape) + table = xp.asarray(np.arange(old_shape[0] * old_shape[1]).reshape(old_shape)) return NeighborTableOffsetProvider( table, origin_axis, diff --git a/model/common/src/icon4py/model/common/settings.py b/model/common/src/icon4py/model/common/settings.py index 0f5d1a82c0..b6d482eee5 100644 --- a/model/common/src/icon4py/model/common/settings.py +++ b/model/common/src/icon4py/model/common/settings.py @@ -17,3 +17,4 @@ backend = config.gt4py_runner xp = config.array_ns device = config.device +limited_area = config.limited_area diff --git a/model/common/src/icon4py/model/common/test_utils/grid_utils.py b/model/common/src/icon4py/model/common/test_utils/grid_utils.py index b3ff01df84..57e3bdd439 100644 --- a/model/common/src/icon4py/model/common/test_utils/grid_utils.py +++ b/model/common/src/icon4py/model/common/test_utils/grid_utils.py @@ -17,6 +17,7 @@ from icon4py.model.common.grid.grid_manager import GridManager, ToGt4PyTransformation from icon4py.model.common.grid.icon import IconGrid from icon4py.model.common.grid.vertical import VerticalGridSize +from icon4py.model.common.test_utils.data_handling import download_and_extract from icon4py.model.common.test_utils.datatest_utils import ( GLOBAL_EXPERIMENT, GRID_URIS, @@ -34,7 +35,7 @@ @functools.cache def get_icon_grid_from_gridfile(experiment: str, on_gpu: bool = False) -> IconGrid: if experiment == GLOBAL_EXPERIMENT: - return _load_from_gridfile( + return _download_and_load_from_gridfile( R02B04_GLOBAL, "icon_grid_0013_R02B04_R.nc", num_levels=GLOBAL_NUM_LEVELS, @@ -42,7 +43,7 @@ def get_icon_grid_from_gridfile(experiment: str, on_gpu: bool = False) -> IconGr limited_area=False, ) elif experiment == REGIONAL_EXPERIMENT: - return _load_from_gridfile( + return _download_and_load_from_gridfile( REGIONAL_EXPERIMENT, "grid.nc", num_levels=MCH_CH_R04B09_LEVELS, @@ -53,18 +54,20 @@ def get_icon_grid_from_gridfile(experiment: str, on_gpu: bool = False) -> IconGr raise ValueError(f"Unknown experiment: {experiment}") -def _load_from_gridfile( - file_path: str, filename: str, num_levels: int, on_gpu: bool, limited_area: bool -) -> IconGrid: +def download_grid_file(file_path: str, filename: str): grid_file = GRIDS_PATH.joinpath(file_path, filename) if not grid_file.exists(): - from icon4py.model.common.test_utils.data_handling import download_and_extract - download_and_extract( GRID_URIS[file_path], grid_file.parent, grid_file.parent, ) + return grid_file + + +def load_grid_from_file( + grid_file: str, num_levels: int, on_gpu: bool, limited_area: bool +) -> IconGrid: gm = GridManager( ToGt4PyTransformation(), str(grid_file), @@ -74,6 +77,13 @@ def _load_from_gridfile( return gm.get_grid() +def _download_and_load_from_gridfile( + file_path: str, filename: str, num_levels: int, on_gpu: bool, limited_area: bool +) -> IconGrid: + grid_file = download_grid_file(file_path, filename) + return load_grid_from_file(grid_file, num_levels, on_gpu, limited_area) + + @pytest.fixture def grid(request): return request.param diff --git a/model/requirements.txt b/model/requirements.txt index 3330309e2e..8da0b03ad2 100644 --- a/model/requirements.txt +++ b/model/requirements.txt @@ -2,5 +2,5 @@ ./atmosphere/dycore ./atmosphere/diffusion ./atmosphere/advection -./common +./common[netcdf] ./driver diff --git a/tools/README.md b/tools/README.md index 72132757df..f4d8b7b398 100644 --- a/tools/README.md +++ b/tools/README.md @@ -344,7 +344,7 @@ Options: ## Important Environment Variables When embedding granules it may be necessary to use an ICON grid file, as is the case in the diffusion granule wrapper. -The granule expects an `ICON_GRID_LOC` environment variable with the path to the folder holding the grid netcdf file. +The granule expects an `ICON_GRID_LOC` environment variable with the path to the folder containing the different grids. ### Example diff --git a/tools/requirements-dev.txt b/tools/requirements-dev.txt index ab8d302ab9..8aee68ae06 100644 --- a/tools/requirements-dev.txt +++ b/tools/requirements-dev.txt @@ -2,5 +2,5 @@ -e ../model/atmosphere/dycore -e ../model/atmosphere/diffusion -e ../model/atmosphere/advection --e ../model/common +-e ../model/common[netcdf] -e .[all] diff --git a/tools/requirements.txt b/tools/requirements.txt index 3fe8f73e35..f5357c839c 100644 --- a/tools/requirements.txt +++ b/tools/requirements.txt @@ -2,5 +2,5 @@ ../model/atmosphere/dycore ../model/atmosphere/advection ../model/atmosphere/diffusion -../model/common +../model/common[netcdf] . diff --git a/tools/src/icon4pytools/icon4pygen/bindings/codegen/type_conversion.py b/tools/src/icon4pytools/icon4pygen/bindings/codegen/type_conversion.py index 752763439b..cb86c05a65 100644 --- a/tools/src/icon4pytools/icon4pygen/bindings/codegen/type_conversion.py +++ b/tools/src/icon4pytools/icon4pygen/bindings/codegen/type_conversion.py @@ -28,3 +28,10 @@ ts.ScalarKind.INT32: "int", ts.ScalarKind.INT64: "long", } +BUILTIN_TO_NUMPY_TYPE: dict[ts.ScalarKind, str] = { + ts.ScalarKind.FLOAT64: "xp.float64", + ts.ScalarKind.FLOAT32: "xp.float32", + ts.ScalarKind.BOOL: "xp.int32", + ts.ScalarKind.INT32: "xp.int32", + ts.ScalarKind.INT64: "xp.int64", +} diff --git a/tools/src/icon4pytools/py2fgen/cli.py b/tools/src/icon4pytools/py2fgen/cli.py index 99db23a5c8..0a80ae9333 100644 --- a/tools/src/icon4pytools/py2fgen/cli.py +++ b/tools/src/icon4pytools/py2fgen/cli.py @@ -57,6 +57,7 @@ def parse_comma_separated_list(ctx, param, value) -> list[str]: is_flag=True, help="Enable debug mode to log additional Python runtime information.", ) +@click.option("--limited-area", is_flag=True, help="Enable limited area mode.") def main( module_import_path: str, functions: list[str], @@ -64,6 +65,7 @@ def main( output_path: pathlib.Path, debug_mode: bool, backend: str, + limited_area: str, ) -> None: """Generate C and F90 wrappers and C library for embedding a Python module in C and Fortran.""" output_path.mkdir(exist_ok=True, parents=True) @@ -71,8 +73,8 @@ def main( plugin = parse(module_import_path, functions, plugin_name) c_header = generate_c_header(plugin) - python_wrapper = generate_python_wrapper(plugin, backend, debug_mode) - f90_interface = generate_f90_interface(plugin) + python_wrapper = generate_python_wrapper(plugin, backend, debug_mode, limited_area) + f90_interface = generate_f90_interface(plugin, limited_area) generate_and_compile_cffi_plugin(plugin.plugin_name, c_header, python_wrapper, output_path) write_string(f90_interface, output_path, f"{plugin.plugin_name}.f90") diff --git a/tools/src/icon4pytools/py2fgen/generate.py b/tools/src/icon4pytools/py2fgen/generate.py index 9812d8a3f8..14a92890a1 100644 --- a/tools/src/icon4pytools/py2fgen/generate.py +++ b/tools/src/icon4pytools/py2fgen/generate.py @@ -45,7 +45,9 @@ def generate_c_header(plugin: CffiPlugin) -> str: return codegen.format_source("cpp", generated_code, style="LLVM") -def generate_python_wrapper(plugin: CffiPlugin, backend: Optional[str], debug_mode: bool) -> str: +def generate_python_wrapper( + plugin: CffiPlugin, backend: Optional[str], debug_mode: bool, limited_area: str +) -> str: """ Generate Python wrapper code. @@ -65,13 +67,14 @@ def generate_python_wrapper(plugin: CffiPlugin, backend: Optional[str], debug_mo imports=plugin.imports, backend=backend, debug_mode=debug_mode, + limited_area=limited_area, ) generated_code = PythonWrapperGenerator.apply(python_wrapper) return codegen.format_source("python", generated_code) -def generate_f90_interface(plugin: CffiPlugin) -> str: +def generate_f90_interface(plugin: CffiPlugin, limited_area: str) -> str: """ Generate Fortran 90 interface code. @@ -79,5 +82,7 @@ def generate_f90_interface(plugin: CffiPlugin) -> str: plugin: The CffiPlugin instance containing information for code generation. """ logger.info("Generating Fortran interface...") - generated_code = F90InterfaceGenerator.apply(F90Interface(cffi_plugin=plugin)) + generated_code = F90InterfaceGenerator.apply( + F90Interface(cffi_plugin=plugin, limited_area=limited_area) + ) return format_fortran_code(generated_code) diff --git a/tools/src/icon4pytools/py2fgen/plugin.py b/tools/src/icon4pytools/py2fgen/plugin.py index 8a85c90594..fb49f08b62 100644 --- a/tools/src/icon4pytools/py2fgen/plugin.py +++ b/tools/src/icon4pytools/py2fgen/plugin.py @@ -11,6 +11,7 @@ # # SPDX-License-Identifier: GPL-3.0-or-later import logging +import math import typing from pathlib import Path @@ -48,7 +49,7 @@ def unpack(ptr, *sizes: int) -> NDArray: This array shares the underlying data with the original Fortran code, allowing modifications made through the array to affect the original data. """ - length = np.prod(sizes) + length = math.prod(sizes) c_type = ffi.getctype(ffi.typeof(ptr).item) # Map C data types to NumPy dtypes @@ -88,7 +89,7 @@ def unpack_gpu(ptr, *sizes: int): if not sizes: raise ValueError("Sizes must be provided to determine the array shape.") - length = np.prod(sizes) + length = math.prod(sizes) c_type = ffi.getctype(ffi.typeof(ptr).item) dtype_map = { @@ -108,7 +109,6 @@ def unpack_gpu(ptr, *sizes: int): mem = cp.cuda.UnownedMemory(ptr_val, total_size, owner=ptr, device_id=current_device.id) memptr = cp.cuda.MemoryPointer(mem, 0) arr = cp.ndarray(shape=sizes, dtype=dtype, memptr=memptr, order="F") - return arr diff --git a/tools/src/icon4pytools/py2fgen/template.py b/tools/src/icon4pytools/py2fgen/template.py index 406c3d03d3..f258ac5a55 100644 --- a/tools/src/icon4pytools/py2fgen/template.py +++ b/tools/src/icon4pytools/py2fgen/template.py @@ -22,9 +22,11 @@ from icon4pytools.icon4pygen.bindings.codegen.type_conversion import ( BUILTIN_TO_CPP_TYPE, BUILTIN_TO_ISO_C_TYPE, + BUILTIN_TO_NUMPY_TYPE, ) from icon4pytools.py2fgen.plugin import int_array_to_bool_array, unpack, unpack_gpu from icon4pytools.py2fgen.utils import flatten_and_get_unique_elts +from icon4pytools.py2fgen.wrappers.experiments import UNINITIALISED_ARRAYS CFFI_DECORATOR = "@ffi.def_extern()" @@ -44,16 +46,20 @@ class FuncParameter(Node): size_args: list[str] = datamodels.field(init=False) is_array: bool = datamodels.field(init=False) gtdims: list[str] = datamodels.field(init=False) + size_args_len: int = datamodels.field(init=False) + np_type: str = datamodels.field(init=False) def __post_init__(self): self.size_args = dims_to_size_strings(self.dimensions) + self.size_args_len = len(self.size_args) self.is_array = True if len(self.dimensions) >= 1 else False # We need some fields to have nlevp1 levels on the fortran wrapper side, which we make # happen by using KHalfDim as a type hint. However, this is not yet supported on the icon4py - # side. So before generating the python wrapper code, we replace occurences of KHalfDim with KDim + # side. So before generating the python wrapper code, we replace occurrences of KHalfDim with KDim self.gtdims = [ dimension.value.replace("KHalf", "K") + "Dim" for dimension in self.dimensions ] + self.np_type = to_np_type(self.d_type) class Func(Node): @@ -78,6 +84,7 @@ class CffiPlugin(Node): class PythonWrapper(CffiPlugin): backend: str debug_mode: bool + limited_area: bool cffi_decorator: str = CFFI_DECORATOR cffi_unpack: str = inspect.getsource(unpack) cffi_unpack_gpu: str = inspect.getsource(unpack_gpu) @@ -88,6 +95,11 @@ class PythonWrapper(CffiPlugin): def __post_init__(self, *args: Any, **kwargs: Any) -> None: self.gt4py_backend = GT4PyBackend[self.backend].value self.is_gt4py_program_present = any(func.is_gt4py_program for func in self.functions) + self.uninitialised_arrays = get_uninitialised_arrays(self.limited_area) + + +def get_uninitialised_arrays(limited_area: bool): + return UNINITIALISED_ARRAYS if not limited_area else [] def build_array_size_args() -> dict[str, str]: @@ -109,6 +121,11 @@ def to_c_type(scalar_type: ScalarKind) -> str: return BUILTIN_TO_CPP_TYPE[scalar_type] +def to_np_type(scalar_type: ScalarKind) -> str: + """Convert a scalar type to its corresponding numpy type.""" + return BUILTIN_TO_NUMPY_TYPE[scalar_type] + + def to_iso_c_type(scalar_type: ScalarKind) -> str: """Convert a scalar type to its corresponding ISO C type.""" return BUILTIN_TO_ISO_C_TYPE[scalar_type] @@ -196,11 +213,13 @@ class PythonWrapperGenerator(TemplatedGenerator): """\ # imports for generated wrapper code import logging +import math from {{ plugin_name }} import ffi import numpy as np {% if _this_node.backend == 'GPU' %}import cupy as cp {% endif %} from numpy.typing import NDArray from gt4py.next.iterator.embedded import np_as_located_field +from icon4py.model.common.settings import xp {% if _this_node.is_gt4py_program_present %} # necessary imports when embedding a gt4py program directly @@ -260,7 +279,12 @@ def {{ func.name }}_wrapper( msg = '{{ arg.name }} before unpacking: %s' % str({{ arg.name}}) logging.debug(msg) {% endif %} + + {%- if arg.name in _this_node.uninitialised_arrays -%} + {{ arg.name }} = xp.ones((1,) * {{ arg.size_args_len }}, dtype={{arg.np_type}}, order="F") + {%- else -%} {{ arg.name }} = unpack{%- if _this_node.backend == 'GPU' -%}_gpu{%- endif -%}({{ arg.name }}, {{ ", ".join(arg.size_args) }}) + {%- endif -%} {%- if arg.d_type.name == "BOOL" %} {{ arg.name }} = int_array_to_bool_array({{ arg.name }}) @@ -354,23 +378,33 @@ class DimensionPosition(Node): class F90FunctionDefinition(Func): - dimension_size_declarations: Sequence[DimensionPosition] = datamodels.field(init=False) + limited_area: bool + dimension_positions: Sequence[DimensionPosition] = datamodels.field(init=False) def __post_init__(self, *args: Any, **kwargs: Any) -> None: super().__post_init__() # call Func __post_init__ + self.dimension_positions = self.extract_dimension_positions() + self.uninitialised_arrays = get_uninitialised_arrays(self.limited_area) - dim_positions = [] + def extract_dimension_positions(self) -> Sequence[DimensionPosition]: + """Extract a unique set of dimension positions which are used to infer dimension sizes at runtime.""" + dim_positions: list[DimensionPosition] = [] + unique_size_args: set[str] = set() for arg in self.args: for index, size_arg in enumerate(arg.size_args): - dim_positions.append( - DimensionPosition(variable=str(arg.name), size_arg=size_arg, index=index + 1) - ) # Use Fortran indexing - - self.dimension_size_declarations = dim_positions + if size_arg not in unique_size_args: + dim_positions.append( + DimensionPosition( + variable=str(arg.name), size_arg=size_arg, index=index + 1 + ) + ) # Use Fortran indexing + unique_size_args.add(size_arg) + return dim_positions class F90Interface(Node): cffi_plugin: CffiPlugin + limited_area: bool function_declaration: list[F90FunctionDeclaration] = datamodels.field(init=False) function_definition: list[F90FunctionDefinition] = datamodels.field(init=False) @@ -381,7 +415,12 @@ def __post_init__(self, *args: Any, **kwargs: Any) -> None: for f in functions ] self.function_definition = [ - F90FunctionDefinition(name=f.name, args=f.args, is_gt4py_program=f.is_gt4py_program) + F90FunctionDefinition( + name=f.name, + args=f.args, + is_gt4py_program=f.is_gt4py_program, + limited_area=self.limited_area, + ) for f in functions ] @@ -435,18 +474,22 @@ def visit_F90FunctionDefinition(self, func: F90FunctionDefinition, **kwargs): arg_names = ", &\n ".join(map(lambda x: x.name, func.args)) param_names_with_size_args = arg_names + ",&\n" + ", &\n".join(func.global_size_args) + return_code_param = ",&\nrc" if len(func.args) >= 1 else "rc" + return self.generic_visit( func, assumed_size_array=False, param_names=arg_names, param_names_with_size_args=param_names_with_size_args, - arrays=[arg for arg in func.args if arg.is_array], + arrays=set([arg.name for arg in func.args if arg.is_array]).difference( + set(func.uninitialised_arrays) + ), + return_code_param=return_code_param, ) - # todo(samkellerhals): Consider using unique SIZE args F90FunctionDefinition = as_jinja( """ -subroutine {{name}}({{param_names}}, &\nrc) +subroutine {{name}}({{param_names}} {{ return_code_param }}) use, intrinsic :: iso_c_binding {% for size_arg in global_size_args %} integer(c_int) :: {{ size_arg }} @@ -456,19 +499,23 @@ def visit_F90FunctionDefinition(self, func: F90FunctionDefinition, **kwargs): {% endfor %} integer(c_int) :: rc ! Stores the return code + {% if arrays | length >= 1 %} !$ACC host_data use_device( & {%- for arr in arrays %} - !$ACC {{ arr.name }}{% if not loop.last %}, &{% else %} &{% endif %} + !$ACC {{ arr }}{% if not loop.last %}, &{% else %} &{% endif %} {%- endfor %} !$ACC ) + {% endif %} - {% for d in _this_node.dimension_size_declarations %} + {% for d in _this_node.dimension_positions %} {{ d.size_arg }} = SIZE({{ d.variable }}, {{ d.index }}) {% endfor %} rc = {{ name }}_wrapper({{ param_names_with_size_args }}) + {% if arrays | length >= 1 %} !$acc end host_data + {% endif %} end subroutine {{name}} """ ) diff --git a/tools/src/icon4pytools/py2fgen/utils.py b/tools/src/icon4pytools/py2fgen/utils.py index 0313c04aa6..3fad1c34b8 100644 --- a/tools/src/icon4pytools/py2fgen/utils.py +++ b/tools/src/icon4pytools/py2fgen/utils.py @@ -31,14 +31,14 @@ def flatten_and_get_unique_elts(list_of_lists: list[list[str]]) -> list[str]: return sorted(set(item for sublist in list_of_lists for item in sublist)) -def get_local_test_grid(): +def get_local_test_grid(grid_folder: str): test_folder = "testdata" module_spec = importlib.util.find_spec("icon4pytools") if module_spec and module_spec.origin: # following namespace package conventions the root is three levels down repo_root = Path(module_spec.origin).parents[3] - return os.path.join(repo_root, test_folder) + return os.path.join(repo_root, test_folder, "grids", grid_folder) else: raise FileNotFoundError( "The `icon4pytools` package could not be found. Ensure the package is installed " @@ -52,7 +52,9 @@ def get_icon_grid_loc(): if env_path is not None: return env_path else: - return get_local_test_grid() + raise ValueError( + "Need to define ICON_GRID_LOC environment variable specifying absolute path to folder containing grid." + ) def get_grid_filename(): diff --git a/tools/src/icon4pytools/py2fgen/wrappers/diffusion.py b/tools/src/icon4pytools/py2fgen/wrappers/diffusion.py index b4c7fd7391..d264b17b31 100644 --- a/tools/src/icon4pytools/py2fgen/wrappers/diffusion.py +++ b/tools/src/icon4pytools/py2fgen/wrappers/diffusion.py @@ -20,6 +20,9 @@ - all arguments needed from external sources are passed. - passing of scalar types or fields of simple types """ +import cProfile +import os +import pstats from gt4py.next.common import Field from gt4py.next.ffront.fbuiltins import float64, int32 @@ -52,9 +55,9 @@ ) from icon4py.model.common.grid.horizontal import CellParams, EdgeParams from icon4py.model.common.grid.vertical import VerticalModelParams -from icon4py.model.common.settings import device +from icon4py.model.common.settings import device, limited_area from icon4py.model.common.states.prognostic_state import PrognosticState -from icon4py.model.common.test_utils.grid_utils import _load_from_gridfile +from icon4py.model.common.test_utils.grid_utils import load_grid_from_file from icon4py.model.common.test_utils.helpers import as_1D_sparse_field, flatten_first_two_dims from icon4pytools.common.logger import setup_logger @@ -66,6 +69,19 @@ # global diffusion object diffusion_granule: Diffusion = Diffusion() +# global profiler object +profiler = cProfile.Profile() + + +def profile_enable(): + profiler.enable() + + +def profile_disable(): + profiler.disable() + stats = pstats.Stats(profiler) + stats.dump_stats(f"{__name__}.profile") + def diffusion_init( vct_a: Field[[KHalfDim], float64], @@ -98,6 +114,11 @@ def diffusion_init( hdiff_efdt_ratio: float64, smagorinski_scaling_factor: float64, hdiff_temp: bool, + thslp_zdiffu: float, + thhgtd_zdiffu: float, + denom_diffu_v: float, + nudge_max_coeff: float, + itype_sher: int32, tangent_orientation: Field[[EdgeDim], float64], inverse_primal_edge_lengths: Field[[EdgeDim], float64], inv_dual_edge_length: Field[[EdgeDim], float64], @@ -122,12 +143,13 @@ def diffusion_init( else: on_gpu = False - icon_grid = _load_from_gridfile( - file_path=get_icon_grid_loc(), - filename=get_grid_filename(), + grid_file_path = os.path.join(get_icon_grid_loc(), get_grid_filename()) + + icon_grid = load_grid_from_file( + grid_file=grid_file_path, num_levels=num_levels, on_gpu=on_gpu, - limited_area=True, + limited_area=True if limited_area else False, ) # Edge geometry @@ -163,11 +185,11 @@ def diffusion_init( smagorinski_scaling_factor=smagorinski_scaling_factor, hdiff_temp=hdiff_temp, n_substeps=ndyn_substeps, - thslp_zdiffu=0.02, - thhgtd_zdiffu=125.0, - velocity_boundary_diffusion_denom=150.0, - max_nudging_coeff=0.075, - shear_type=TurbulenceShearForcingType.VERTICAL_HORIZONTAL_OF_HORIZONTAL_VERTICAL_WIND, + thslp_zdiffu=thslp_zdiffu, + thhgtd_zdiffu=thhgtd_zdiffu, + velocity_boundary_diffusion_denom=denom_diffu_v, + max_nudging_coeff=nudge_max_coeff, + shear_type=TurbulenceShearForcingType(itype_sher), ) diffusion_params = DiffusionParams(config) diff --git a/tools/src/icon4pytools/py2fgen/wrappers/experiments.py b/tools/src/icon4pytools/py2fgen/wrappers/experiments.py new file mode 100644 index 0000000000..ab78122b62 --- /dev/null +++ b/tools/src/icon4pytools/py2fgen/wrappers/experiments.py @@ -0,0 +1,25 @@ +# ICON4Py - ICON inspired code in Python and GT4Py +# +# Copyright (c) 2022, ETH Zurich and MeteoSwiss +# All rights reserved. +# +# This file is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or any later +# version. See the LICENSE.txt file at the top-level directory of this +# distribution for a copy of the license or check . +# +# SPDX-License-Identifier: GPL-3.0-or-later + +# these arrays are not initialised in global experiments (e.g. ape_r02b04) and are not used +# therefore unpacking needs to be skipped as otherwise it will trigger an error. +UNINITIALISED_ARRAYS = [ + "mask_hdiff", + "zd_diffcoef", + "zd_vertoffset", + "zd_intcoef", + "hdef_ic", + "div_ic", + "dwdx", + "dwdy", +] diff --git a/tools/src/icon4pytools/py2fgen/wrappers/simple.py b/tools/src/icon4pytools/py2fgen/wrappers/simple.py index f7e072ff02..afe53e5fd3 100644 --- a/tools/src/icon4pytools/py2fgen/wrappers/simple.py +++ b/tools/src/icon4pytools/py2fgen/wrappers/simple.py @@ -11,13 +11,35 @@ # # SPDX-License-Identifier: GPL-3.0-or-later # mypy: ignore-errors +import cProfile +import pstats + from gt4py.next.common import GridType from gt4py.next.ffront.decorator import field_operator, program -from gt4py.next.ffront.fbuiltins import Field, float64, int32 -from icon4py.model.common.dimension import CellDim, EdgeDim, KDim +from gt4py.next.ffront.fbuiltins import Field, float64, int32, neighbor_sum +from icon4py.model.common.caching import CachedProgram +from icon4py.model.common.dimension import C2CE, C2E, C2EDim, CEDim, CellDim, EdgeDim, KDim +from icon4py.model.common.grid.simple import SimpleGrid +from icon4py.model.common.settings import backend from icon4py.model.common.type_alias import wpfloat +# global profiler object +profiler = cProfile.Profile() + +grid = SimpleGrid() + + +def profile_enable(): + profiler.enable() + + +def profile_disable(): + profiler.disable() + stats = pstats.Stats(profiler) + stats.dump_stats(f"{__name__}.profile") + + @field_operator def _square( inp: Field[[CellDim, KDim], float64], @@ -25,7 +47,7 @@ def _square( return inp**2 -@program(grid_type=GridType.UNSTRUCTURED) +@program(grid_type=GridType.UNSTRUCTURED, backend=backend) def square( inp: Field[[CellDim, KDim], float64], result: Field[[CellDim, KDim], float64], @@ -46,20 +68,25 @@ def _multi_return( mass_fl_e: Field[[EdgeDim, KDim], wpfloat], vn_traj: Field[[EdgeDim, KDim], wpfloat], mass_flx_me: Field[[EdgeDim, KDim], wpfloat], + geofac_div: Field[[CEDim], wpfloat], + z_nabla2_e: Field[[EdgeDim, KDim], wpfloat], r_nsubsteps: wpfloat, ) -> tuple[Field[[EdgeDim, KDim], wpfloat], Field[[EdgeDim, KDim], wpfloat]]: """accumulate_prep_adv_fields stencil formerly known as _mo_solve_nonhydro_stencil_34.""" vn_traj_wp = vn_traj + r_nsubsteps * z_vn_avg mass_flx_me_wp = mass_flx_me + r_nsubsteps * mass_fl_e + z_temp_wp = neighbor_sum(z_nabla2_e(C2E) * geofac_div(C2CE), axis=C2EDim) # noqa: F841 return vn_traj_wp, mass_flx_me_wp -@program(grid_type=GridType.UNSTRUCTURED) +@program(grid_type=GridType.UNSTRUCTURED, backend=backend) def multi_return( z_vn_avg: Field[[EdgeDim, KDim], wpfloat], mass_fl_e: Field[[EdgeDim, KDim], wpfloat], vn_traj: Field[[EdgeDim, KDim], wpfloat], mass_flx_me: Field[[EdgeDim, KDim], wpfloat], + geofac_div: Field[[CEDim], wpfloat], + z_nabla2_e: Field[[EdgeDim, KDim], wpfloat], r_nsubsteps: wpfloat, horizontal_start: int32, horizontal_end: int32, @@ -71,6 +98,8 @@ def multi_return( mass_fl_e, vn_traj, mass_flx_me, + geofac_div, + z_nabla2_e, r_nsubsteps, out=(vn_traj, mass_flx_me), domain={ @@ -85,3 +114,35 @@ def square_error( result: Field[[CellDim, KDim], float64], ): raise Exception("Exception foo occurred") + + +multi_return_cached = CachedProgram(multi_return) + + +def multi_return_from_function( + z_vn_avg: Field[[EdgeDim, KDim], wpfloat], + mass_fl_e: Field[[EdgeDim, KDim], wpfloat], + vn_traj: Field[[EdgeDim, KDim], wpfloat], + mass_flx_me: Field[[EdgeDim, KDim], wpfloat], + geofac_div: Field[[CEDim], wpfloat], + z_nabla2_e: Field[[EdgeDim, KDim], wpfloat], + r_nsubsteps: wpfloat, + horizontal_start: int32, + horizontal_end: int32, + vertical_start: int32, + vertical_end: int32, +): + multi_return_cached( + z_vn_avg, + mass_fl_e, + vn_traj, + mass_flx_me, + geofac_div, + z_nabla2_e, + r_nsubsteps, + horizontal_start, + horizontal_end, + vertical_start, + vertical_end, + offset_provider=grid.offset_providers, + ) diff --git a/tools/tests/py2fgen/fortran_samples/test_diffusion.f90 b/tools/tests/py2fgen/fortran_samples/test_diffusion.f90 index 460de43512..fbb3685375 100644 --- a/tools/tests/py2fgen/fortran_samples/test_diffusion.f90 +++ b/tools/tests/py2fgen/fortran_samples/test_diffusion.f90 @@ -98,6 +98,7 @@ program diffusion_simulation implicit none integer(c_int) :: rc + integer(c_int) :: n ! Constants and types integer(c_int), parameter :: num_cells = 20480 @@ -125,7 +126,7 @@ program diffusion_simulation real(c_double), parameter :: hdiff_efdt_ratio = 24.0 real(c_double), parameter :: smagorinski_scaling_factor = 0.025 logical(c_int), parameter :: hdiff_temp = .true. - logical(c_int), parameter :: linit = .true. + logical(c_int), parameter :: linit = .false. ! Declaring arrays for diffusion_init and diffusion_run real(c_double), dimension(:), allocatable :: vct_a @@ -298,8 +299,12 @@ program diffusion_simulation call exit(1) end if + do n = 1, 60 ! Call diffusion_run + call profile_enable(rc) call diffusion_run(w, vn, exner, theta_v, rho, hdef_ic, div_ic, dwdx, dwdy, dtime, linit, rc) + call profile_disable(rc) + end do print *, "Python exit code = ", rc if (rc /= 0) then diff --git a/tools/tests/py2fgen/fortran_samples/test_multi_return.f90 b/tools/tests/py2fgen/fortran_samples/test_multi_return.f90 index 86c13b7495..f0ae430bac 100644 --- a/tools/tests/py2fgen/fortran_samples/test_multi_return.f90 +++ b/tools/tests/py2fgen/fortran_samples/test_multi_return.f90 @@ -1,27 +1,33 @@ program call_multi_return_cffi_plugin use, intrinsic :: iso_c_binding - use multi_return_plugin + use multi_return_from_function_plugin implicit none - integer(c_int) :: edim, kdim, i, j, horizontal_start, horizontal_end, vertical_start, vertical_end, rc + integer(c_int) :: cdim, edim, kdim, cedim, i, j, horizontal_start, horizontal_end, vertical_start, vertical_end, rc, n logical :: computation_correct character(len=100) :: str_buffer real(c_double) :: r_nsubsteps - real(c_double), dimension(:, :), allocatable :: z_vn_avg, mass_fl_e, vn_traj, mass_flx_me + real(c_double), dimension(:, :), allocatable :: z_vn_avg, mass_fl_e, vn_traj, mass_flx_me, z_nabla2_e + real(c_double), dimension(:), allocatable :: geofac_div ! array dimensions + cdim = 18 edim = 27 kdim = 10 + cedim = cdim * edim !$ACC enter data create(z_vn_avg, mass_fl_e, vn_traj, mass_flx_me) ! allocate arrays (allocate in column-major order) + allocate(geofac_div(cedim)) + allocate(z_nabla2_e(edim, kdim)) allocate (z_vn_avg(edim, kdim)) allocate (mass_fl_e(edim, kdim)) allocate (vn_traj(edim, kdim)) allocate (mass_flx_me(edim, kdim)) ! initialize arrays and variables + geofac_div = 3.5d0 z_vn_avg = 1.0d0 mass_fl_e = 2.0d0 vn_traj = 3.0d0 @@ -46,9 +52,32 @@ program call_multi_return_cffi_plugin print *, trim(str_buffer) print * + ! call once just so that we compile the code once (profiling becomes easier later) + call multi_return_from_function(z_vn_avg, mass_fl_e, vn_traj, mass_flx_me, geofac_div, z_nabla2_e, r_nsubsteps, & + horizontal_start, horizontal_end, vertical_start, vertical_end, rc) + ! call the cffi plugin - call multi_return(z_vn_avg, mass_fl_e, vn_traj, mass_flx_me, r_nsubsteps, & + call profile_enable(rc) + do n = 1, 1000 + call multi_return_from_function(z_vn_avg, mass_fl_e, vn_traj, mass_flx_me, geofac_div, z_nabla2_e, r_nsubsteps, & horizontal_start, horizontal_end, vertical_start, vertical_end, rc) + + ! print array shapes and values + print *, "Arrays after computation:" + print *, "First value of vn_traj:", vn_traj(1, 1) + print *, "First value of mass_flx_me:", mass_flx_me(1, 1) + write (str_buffer, '("Shape of z_vn_avg = ", I2, ",", I2)') size(z_vn_avg, 1), size(z_vn_avg, 2) + print *, trim(str_buffer) + write (str_buffer, '("Shape of mass_fl_e = ", I2, ",", I2)') size(mass_fl_e, 1), size(mass_fl_e, 2) + print *, trim(str_buffer) + write (str_buffer, '("Shape of vn_traj = ", I2, ",", I2)') size(vn_traj, 1), size(vn_traj, 2) + print *, trim(str_buffer) + write (str_buffer, '("Shape of mass_flx_me = ", I2, ",", I2)') size(mass_flx_me, 1), size(mass_flx_me, 2) + print *, trim(str_buffer) + print *, "passed" + + end do + call profile_disable(rc) print *, "Python exit code = ", rc if (rc /= 0) then call exit(1) @@ -56,29 +85,6 @@ program call_multi_return_cffi_plugin !$ACC update host(z_vn_avg, mass_fl_e, vn_traj, mass_flx_me) - ! print array shapes and values before computation - print *, "Arrays after computation:" - write (str_buffer, '("Shape of z_vn_avg = ", I2, ",", I2)') size(z_vn_avg, 1), size(z_vn_avg, 2) - print *, trim(str_buffer) - write (str_buffer, '("Shape of mass_fl_e = ", I2, ",", I2)') size(mass_fl_e, 1), size(mass_fl_e, 2) - print *, trim(str_buffer) - write (str_buffer, '("Shape of vn_traj = ", I2, ",", I2)') size(vn_traj, 1), size(vn_traj, 2) - print *, trim(str_buffer) - write (str_buffer, '("Shape of mass_flx_me = ", I2, ",", I2)') size(mass_flx_me, 1), size(mass_flx_me, 2) - print *, trim(str_buffer) - print * - - ! Assert vn_traj == 12 and mass_flx_me == 22 - computation_correct = .true. - do i = 1, edim - do j = 1, kdim - if (vn_traj(i, j) /= 12.0d0 .or. mass_flx_me(i, j) /= 22.0d0) then - computation_correct = .false. - exit - end if - end do - if (.not. computation_correct) exit - end do !$ACC end data !$ACC exit data delete(z_vn_avg, mass_fl_e, vn_traj, mass_flx_me) @@ -86,11 +92,4 @@ program call_multi_return_cffi_plugin ! deallocate arrays deallocate (z_vn_avg, mass_fl_e, vn_traj, mass_flx_me) - ! Check and print the result of the assertion - if (computation_correct) then - print *, "passed: vn_traj and mass_flx_me have expected values." - else - print *, "failed: vn_traj or mass_flx_me does not have the expected values." - stop 1 - end if end program call_multi_return_cffi_plugin diff --git a/tools/tests/py2fgen/fortran_samples/test_square.f90 b/tools/tests/py2fgen/fortran_samples/test_square.f90 index 24bebd0ea0..29098bcdf4 100644 --- a/tools/tests/py2fgen/fortran_samples/test_square.f90 +++ b/tools/tests/py2fgen/fortran_samples/test_square.f90 @@ -39,6 +39,10 @@ program call_square_wrapper_cffi_plugin call square_from_function(input, result, rc) #elif USE_SQUARE_ERROR call square_error(input, result, rc) +#elif PROFILE_SQUARE_FROM_FUNCTION + call profile_enable(rc) + call square_from_function(input, result, rc) + call profile_disable(rc) #else call square(input, result, rc) #endif diff --git a/tools/tests/py2fgen/test_cli.py b/tools/tests/py2fgen/test_cli.py index ce51e8eb30..2111fd083e 100644 --- a/tools/tests/py2fgen/test_cli.py +++ b/tools/tests/py2fgen/test_cli.py @@ -39,7 +39,7 @@ def run_test_case( module: str, function: str, plugin_name: str, - backend: str, + py2fbackend: str, samples_path: Path, fortran_driver: str, compiler: str = "gfortran", @@ -47,7 +47,7 @@ def run_test_case( expected_error_code: int = 0, ): with cli.isolated_filesystem(): - result = cli.invoke(main, [module, function, plugin_name, "-b", backend, "-d"]) + result = cli.invoke(main, [module, function, plugin_name, "-b", py2fbackend]) assert result.exit_code == 0, "CLI execution failed" try: @@ -102,7 +102,7 @@ def run_fortran_executable(plugin_name: str): @pytest.mark.parametrize( - "backend, extra_flags", + "py2fbackend, extra_flags", [ ("CPU", ("-DUSE_SQUARE_FROM_FUNCTION",)), ("ROUNDTRIP", ""), @@ -111,7 +111,7 @@ def run_fortran_executable(plugin_name: str): ], ) def test_py2fgen_compilation_and_execution_square_cpu( - cli_runner, backend, samples_path, wrapper_module, extra_flags + cli_runner, py2fbackend, samples_path, wrapper_module, extra_flags ): """Tests embedding Python functions, and GT4Py program directly. Also tests embedding multiple functions in one shared library. @@ -121,7 +121,7 @@ def test_py2fgen_compilation_and_execution_square_cpu( wrapper_module, "square,square_from_function", "square_plugin", - backend, + py2fbackend, samples_path, "test_square", extra_compiler_flags=extra_flags, @@ -143,17 +143,17 @@ def test_py2fgen_python_error_propagation_to_fortran(cli_runner, samples_path, w ) -@pytest.mark.parametrize("backend", ("CPU", "ROUNDTRIP")) -def test_py2fgen_compilation_and_execution_multi_return( - cli_runner, backend, samples_path, wrapper_module +@pytest.mark.parametrize("py2fbackend", ("CPU",)) +def test_py2fgen_compilation_and_execution_multi_return_profile( + cli_runner, py2fbackend, samples_path, wrapper_module ): """Tests embedding multi return gt4py program.""" run_test_case( cli_runner, wrapper_module, - "multi_return", - "multi_return_plugin", - backend, + "multi_return_from_function,profile_enable,profile_disable", + "multi_return_from_function_plugin", + py2fbackend, samples_path, "test_multi_return", ) @@ -165,7 +165,7 @@ def test_py2fgen_compilation_and_execution_diffusion(cli_runner, samples_path): run_test_case( cli_runner, "icon4pytools.py2fgen.wrappers.diffusion", - "diffusion_init,diffusion_run", + "diffusion_init,diffusion_run,profile_enable,profile_disable", "diffusion_plugin", "CPU", samples_path, @@ -176,7 +176,7 @@ def test_py2fgen_compilation_and_execution_diffusion(cli_runner, samples_path): # todo: enable on CI @pytest.mark.skip("Requires setting various environment variables.") @pytest.mark.parametrize( - "function_name, plugin_name, test_name, backend, extra_flags", + "function_name, plugin_name, test_name, py2fbackend, extra_flags", [ ("square", "square_plugin", "test_square", "GPU", ("-acc", "-Minfo=acc")), ("multi_return", "multi_return_plugin", "test_multi_return", "GPU", ("-acc", "-Minfo=acc")), @@ -187,7 +187,7 @@ def test_py2fgen_compilation_and_execution_gpu( function_name, plugin_name, test_name, - backend, + py2fbackend, samples_path, wrapper_module, extra_flags, @@ -197,7 +197,7 @@ def test_py2fgen_compilation_and_execution_gpu( wrapper_module, function_name, plugin_name, - backend, + py2fbackend, samples_path, test_name, "/opt/nvidia/hpc_sdk/Linux_x86_64/2024/compilers/bin/nvfortran", # Ensure NVFORTRAN_COMPILER is set in your environment variables @@ -209,11 +209,11 @@ def test_py2fgen_compilation_and_execution_gpu( # Need to compile using nvfortran, and set CUDACXX path to nvcc cuda compiler. Also need to set ICON_GRID_LOC for path to gridfile, and ICON4PY_BACKEND to determine device at runtime. @pytest.mark.skip("Requires setting various environment variables.") @pytest.mark.parametrize( - "backend, extra_flags", + "py2fbackend, extra_flags", [("GPU", ("-acc", "-Minfo=acc"))], ) def test_py2fgen_compilation_and_execution_diffusion_gpu( - cli_runner, samples_path, backend, extra_flags + cli_runner, samples_path, py2fbackend, extra_flags ): # todo: requires setting ICON_GRID_LOC run_test_case( @@ -221,9 +221,31 @@ def test_py2fgen_compilation_and_execution_diffusion_gpu( "icon4pytools.py2fgen.wrappers.diffusion", "diffusion_init,diffusion_run", "diffusion_plugin", - backend, + py2fbackend, samples_path, "test_diffusion", "/opt/nvidia/hpc_sdk/Linux_x86_64/2024/compilers/bin/nvfortran", # todo: set nvfortran location in base.yml file. extra_flags, ) + + +@pytest.mark.parametrize( + "py2fbackend, extra_flags", + [ + ("CPU", ("-DPROFILE_SQUARE_FROM_FUNCTION",)), + ], +) +def test_py2fgen_compilation_and_profiling( + cli_runner, py2fbackend, samples_path, wrapper_module, extra_flags +): + """Test profiling using cProfile of the generated wrapper.""" + run_test_case( + cli_runner, + wrapper_module, + "square_from_function,profile_enable,profile_disable", + "square_plugin", + py2fbackend, + samples_path, + "test_square", + extra_compiler_flags=extra_flags, + ) diff --git a/tools/tests/py2fgen/test_codegen.py b/tools/tests/py2fgen/test_codegen.py index 1ff57f9410..319048a62a 100644 --- a/tools/tests/py2fgen/test_codegen.py +++ b/tools/tests/py2fgen/test_codegen.py @@ -118,7 +118,7 @@ def dummy_plugin(): def test_fortran_interface(dummy_plugin): - interface = generate_f90_interface(dummy_plugin) + interface = generate_f90_interface(dummy_plugin, limited_area=True) expected = """ module libtest_plugin use, intrinsic :: iso_c_binding @@ -238,15 +238,17 @@ def test_fortran_interface(dummy_plugin): def test_python_wrapper(dummy_plugin): - interface = generate_python_wrapper(dummy_plugin, "GPU", False) + interface = generate_python_wrapper(dummy_plugin, "GPU", False, limited_area=True) expected = ''' # imports for generated wrapper code import logging +import math from libtest_plugin import ffi import numpy as np import cupy as cp from numpy.typing import NDArray from gt4py.next.iterator.embedded import np_as_located_field +from icon4py.model.common.settings import xp # logger setup log_format = '%(asctime)s.%(msecs)03d - %(levelname)s - %(message)s' @@ -282,11 +284,10 @@ def unpack_gpu(ptr, *sizes: int): This array shares the underlying data with the original Fortran code, allowing modifications made through the array to affect the original data. """ - if not sizes: raise ValueError("Sizes must be provided to determine the array shape.") - length = np.prod(sizes) + length = math.prod(sizes) c_type = ffi.getctype(ffi.typeof(ptr).item) dtype_map = { @@ -306,7 +307,6 @@ def unpack_gpu(ptr, *sizes: int): mem = cp.cuda.UnownedMemory(ptr_val, total_size, owner=ptr, device_id=current_device.id) memptr = cp.cuda.MemoryPointer(mem, 0) arr = cp.ndarray(shape=sizes, dtype=dtype, memptr=memptr, order="F") - return arr def int_array_to_bool_array(int_array: NDArray) -> NDArray: diff --git a/tools/tests/py2fgen/test_diffusion_wrapper.py b/tools/tests/py2fgen/test_diffusion_wrapper.py index cb49e594c1..36b1705385 100644 --- a/tools/tests/py2fgen/test_diffusion_wrapper.py +++ b/tools/tests/py2fgen/test_diffusion_wrapper.py @@ -11,7 +11,6 @@ # # SPDX-License-Identifier: GPL-3.0-or-later # type: ignore -import pytest from gt4py.next import as_field from icon4py.model.atmosphere.diffusion.diffusion import DiffusionType from icon4py.model.common.dimension import ( @@ -27,30 +26,29 @@ VertexDim, ) from icon4py.model.common.settings import xp +from icon4py.model.common.test_utils.grid_utils import MCH_CH_R04B09_LEVELS from icon4pytools.py2fgen.wrappers.diffusion import diffusion_init, diffusion_run -# todo(samkellerhals): turn on and off using a marker/option (required ICON_GRID_LOC) -@pytest.mark.skip -def test_diffusion_wrapper_py(): +def test_diffusion_wrapper_interface(): # grid parameters - num_cells = 20480 - num_edges = 30720 - num_vertices = 10242 - num_levels = 60 + num_cells = 20896 + num_edges = 31558 + num_vertices = 10663 + num_levels = MCH_CH_R04B09_LEVELS num_c2ec2o = 4 num_v2e = 6 - num_c2e = 2 + num_c2e = 3 num_e2c2v = 4 num_c2e2c = 3 - num_e2c = 3 + num_e2c = 2 mean_cell_area = 24907282236.708576 # other configuration parameters ndyn_substeps = 2 - dtime = 2.0 - rayleigh_damping_height = 50000 + dtime = 10.0 + rayleigh_damping_height = 12500.0 nflatlev = 30 nflat_gradp = 59 @@ -64,6 +62,11 @@ def test_diffusion_wrapper_py(): hdiff_efdt_ratio = 24.0 smagorinski_scaling_factor = 0.025 hdiff_temp = True + thslp_zdiffu = 0.02 + thhgtd_zdiffu = 125.0 + denom_diffu_v = 150.0 + nudge_max_coeff = 0.075 + itype_sher = 2 # TurbulenceShearForcingType.VERTICAL_HORIZONTAL_OF_HORIZONTAL_VERTICAL_WIND # input data - numpy rng = xp.random.default_rng() @@ -183,6 +186,11 @@ def test_diffusion_wrapper_py(): hdiff_efdt_ratio=hdiff_efdt_ratio, smagorinski_scaling_factor=smagorinski_scaling_factor, hdiff_temp=hdiff_temp, + thslp_zdiffu=thslp_zdiffu, + thhgtd_zdiffu=thhgtd_zdiffu, + denom_diffu_v=denom_diffu_v, + nudge_max_coeff=nudge_max_coeff, + itype_sher=itype_sher, tangent_orientation=tangent_orientation, inverse_primal_edge_lengths=inverse_primal_edge_lengths, inv_dual_edge_length=inv_dual_edge_length, diff --git a/tools/tox.ini b/tools/tox.ini index 0c8b7eb934..ad79aebbda 100644 --- a/tools/tox.ini +++ b/tools/tox.ini @@ -11,6 +11,7 @@ skipsdist = true passenv = PIP_USER PYTHONUSERBASE + ICON_GRID_LOC deps = -r {toxinidir}/requirements-dev.txt commands = @@ -25,7 +26,6 @@ allowlist_externals = rm [testenv:dev] -basepython = python3.10 setenv = PIP_SRC = _external_src skip_install = true