Skip to content

Commit

Permalink
[Py2F]: add profiling support & optimisations (#449)
Browse files Browse the repository at this point in the history
Adds CachedProgram to diffusion stencils, as well as other optimisations for py2f and changes so that APE experiments can be run with py2f.
  • Loading branch information
samkellerhals authored May 29, 2024
1 parent c918d52 commit 1752f87
Show file tree
Hide file tree
Showing 34 changed files with 602 additions and 165 deletions.
1 change: 1 addition & 0 deletions ci/base.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,4 @@ variables:
VIRTUALENV_SYSTEM_SITE_PACKAGES: 1
CSCS_NEEDED_DATA: icon4py
TEST_DATA_PATH: "/project/d121/icon4py/ci/testdata"
ICON_GRID_LOC: "/project/d121/icon4py/ci/testdata/grids/mch_ch_r04b09_dsl"
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
# ICON4Py - ICON inspired code in Python and GT4Py
#
# Copyright (c) 2022, ETH Zurich and MeteoSwiss
# All rights reserved.
#
# This file is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the
# Free Software Foundation, either version 3 of the License, or any later
# version. See the LICENSE.txt file at the top-level directory of this
# distribution for a copy of the license or check <https://www.gnu.org/licenses/>.
#
# SPDX-License-Identifier: GPL-3.0-or-later

from icon4py.model.atmosphere.diffusion.diffusion_utils import (
copy_field as copy_field_orig,
init_diffusion_local_fields_for_regular_timestep as init_diffusion_local_fields_for_regular_timestep_orig,
scale_k as scale_k_orig,
setup_fields_for_initial_step as setup_fields_for_initial_step_orig,
)
from icon4py.model.atmosphere.diffusion.stencils.apply_diffusion_to_vn import (
apply_diffusion_to_vn as apply_diffusion_to_vn_orig,
)
from icon4py.model.atmosphere.diffusion.stencils.apply_diffusion_to_w_and_compute_horizontal_gradients_for_turbulence import (
apply_diffusion_to_w_and_compute_horizontal_gradients_for_turbulence as apply_diffusion_to_w_and_compute_horizontal_gradients_for_turbulence_orig,
)
from icon4py.model.atmosphere.diffusion.stencils.calculate_diagnostic_quantities_for_turbulence import (
calculate_diagnostic_quantities_for_turbulence as calculate_diagnostic_quantities_for_turbulence_orig,
)
from icon4py.model.atmosphere.diffusion.stencils.calculate_enhanced_diffusion_coefficients_for_grid_point_cold_pools import (
calculate_enhanced_diffusion_coefficients_for_grid_point_cold_pools as calculate_enhanced_diffusion_coefficients_for_grid_point_cold_pools_orig,
)
from icon4py.model.atmosphere.diffusion.stencils.calculate_nabla2_and_smag_coefficients_for_vn import (
calculate_nabla2_and_smag_coefficients_for_vn as calculate_nabla2_and_smag_coefficients_for_vn_orig,
)
from icon4py.model.atmosphere.diffusion.stencils.calculate_nabla2_for_theta import (
calculate_nabla2_for_theta as calculate_nabla2_for_theta_orig,
)
from icon4py.model.atmosphere.diffusion.stencils.truly_horizontal_diffusion_nabla_of_theta_over_steep_points import (
truly_horizontal_diffusion_nabla_of_theta_over_steep_points as truly_horizontal_diffusion_nabla_of_theta_over_steep_points_orig,
)
from icon4py.model.atmosphere.diffusion.stencils.update_theta_and_exner import (
update_theta_and_exner as update_theta_and_exner_orig,
)
from icon4py.model.common.caching import CachedProgram
from icon4py.model.common.interpolation.stencils.mo_intp_rbf_rbf_vec_interpol_vertex import (
mo_intp_rbf_rbf_vec_interpol_vertex as mo_intp_rbf_rbf_vec_interpol_vertex_orig,
)


# diffusion run stencils
apply_diffusion_to_vn = CachedProgram(apply_diffusion_to_vn_orig)
apply_diffusion_to_w_and_compute_horizontal_gradients_for_turbulence = CachedProgram(
apply_diffusion_to_w_and_compute_horizontal_gradients_for_turbulence_orig
)
calculate_diagnostic_quantities_for_turbulence = CachedProgram(
calculate_diagnostic_quantities_for_turbulence_orig
)
calculate_enhanced_diffusion_coefficients_for_grid_point_cold_pools = CachedProgram(
calculate_enhanced_diffusion_coefficients_for_grid_point_cold_pools_orig
)
calculate_nabla2_and_smag_coefficients_for_vn = CachedProgram(
calculate_nabla2_and_smag_coefficients_for_vn_orig
)
calculate_nabla2_for_theta = CachedProgram(calculate_nabla2_for_theta_orig)
truly_horizontal_diffusion_nabla_of_theta_over_steep_points = CachedProgram(
truly_horizontal_diffusion_nabla_of_theta_over_steep_points_orig
)
update_theta_and_exner = CachedProgram(update_theta_and_exner_orig)

mo_intp_rbf_rbf_vec_interpol_vertex = CachedProgram(mo_intp_rbf_rbf_vec_interpol_vertex_orig)


# model init stencils
setup_fields_for_initial_step = CachedProgram(setup_fields_for_initial_step_orig, with_domain=False)
copy_field = CachedProgram(copy_field_orig, with_domain=False)
init_diffusion_local_fields_for_regular_timestep = CachedProgram(
init_diffusion_local_fields_for_regular_timestep_orig, with_domain=False
)
scale_k = CachedProgram(scale_k_orig, with_domain=False)
Original file line number Diff line number Diff line change
Expand Up @@ -28,35 +28,27 @@
DiffusionMetricState,
)
from icon4py.model.atmosphere.diffusion.diffusion_utils import (
copy_field,
init_diffusion_local_fields_for_regular_timestep,
init_nabla2_factor_in_upper_damping_zone,
scale_k,
setup_fields_for_initial_step,
zero_field,
)
from icon4py.model.atmosphere.diffusion.stencils.apply_diffusion_to_vn import apply_diffusion_to_vn
from icon4py.model.atmosphere.diffusion.stencils.apply_diffusion_to_w_and_compute_horizontal_gradients_for_turbulence import (
apply_diffusion_to_w_and_compute_horizontal_gradients_for_turbulence,
)
from icon4py.model.atmosphere.diffusion.stencils.calculate_diagnostic_quantities_for_turbulence import (
calculate_diagnostic_quantities_for_turbulence,
)
from icon4py.model.atmosphere.diffusion.stencils.calculate_enhanced_diffusion_coefficients_for_grid_point_cold_pools import (
calculate_enhanced_diffusion_coefficients_for_grid_point_cold_pools,
)
from icon4py.model.atmosphere.diffusion.stencils.calculate_nabla2_and_smag_coefficients_for_vn import (

# cached program import
from icon4py.model.atmosphere.diffusion.cached import (
init_diffusion_local_fields_for_regular_timestep,
setup_fields_for_initial_step,
scale_k,
calculate_nabla2_and_smag_coefficients_for_vn,
)
from icon4py.model.atmosphere.diffusion.stencils.calculate_nabla2_for_theta import (
calculate_nabla2_for_theta,
)
from icon4py.model.atmosphere.diffusion.stencils.truly_horizontal_diffusion_nabla_of_theta_over_steep_points import (
truly_horizontal_diffusion_nabla_of_theta_over_steep_points,
)
from icon4py.model.atmosphere.diffusion.stencils.update_theta_and_exner import (
apply_diffusion_to_w_and_compute_horizontal_gradients_for_turbulence,
apply_diffusion_to_vn,
calculate_diagnostic_quantities_for_turbulence,
calculate_enhanced_diffusion_coefficients_for_grid_point_cold_pools,
update_theta_and_exner,
copy_field,
mo_intp_rbf_rbf_vec_interpol_vertex,
)

from icon4py.model.common.constants import (
CPD,
DEFAULT_PHYSICS_DYNAMICS_TIMESTEP_RATIO,
Expand All @@ -68,9 +60,6 @@
from icon4py.model.common.grid.horizontal import CellParams, EdgeParams, HorizontalMarkerIndex
from icon4py.model.common.grid.icon import IconGrid
from icon4py.model.common.grid.vertical import VerticalModelParams
from icon4py.model.common.interpolation.stencils.mo_intp_rbf_rbf_vec_interpol_vertex import (
mo_intp_rbf_rbf_vec_interpol_vertex,
)
from icon4py.model.common.states.prognostic_state import PrognosticState
from icon4py.model.common.settings import xp

Expand Down Expand Up @@ -752,6 +741,7 @@ def _do_diffusion_step(
)
# TODO (magdalena) get rid of this copying. So far passing an empty buffer instead did not verify?
copy_field(prognostic_state.w, self.w_tmp, offset_provider={})

apply_diffusion_to_w_and_compute_horizontal_gradients_for_turbulence(
area=self.cell_params.area,
geofac_n2s=self.interpolation_state.geofac_n2s,
Expand Down Expand Up @@ -784,6 +774,7 @@ def _do_diffusion_step(
log.debug(
"running fused stencils 11 12 (calculate_enhanced_diffusion_coefficients_for_grid_point_cold_pools): start"
)

calculate_enhanced_diffusion_coefficients_for_grid_point_cold_pools(
theta_v=prognostic_state.theta_v,
theta_ref_mc=self.metric_state.theta_ref_mc,
Expand All @@ -799,6 +790,7 @@ def _do_diffusion_step(
log.debug(
"running stencils 11 12 (calculate_enhanced_diffusion_coefficients_for_grid_point_cold_pools): end"
)

log.debug("running stencils 13 14 (calculate_nabla2_for_theta): start")
calculate_nabla2_for_theta(
kh_smag_e=self.kh_smag_e,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,15 @@ def reference(
**kwargs,
) -> dict:
c2e = grid.connectivities[C2EDim]
c2ce = grid.get_offset_provider("C2CE").table

geofac_div = np.expand_dims(geofac_div, axis=-1)
vn_geofac = vn[c2e] * geofac_div[grid.get_offset_provider("C2CE").table]
div = np.sum(vn_geofac, axis=1)
e_bln_c_s = np.expand_dims(e_bln_c_s, axis=-1)
diff_multfac_smag = np.expand_dims(diff_multfac_smag, axis=0)
mul = kh_smag_ec[c2e] * e_bln_c_s[grid.get_offset_provider("C2CE").table]

vn_geofac = vn[c2e] * geofac_div[c2ce]
div = np.sum(vn_geofac, axis=1)
mul = kh_smag_ec[c2e] * e_bln_c_s[c2ce]
summed = np.sum(mul, axis=1)
kh_c = summed / diff_multfac_smag

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,10 @@ def add_interpolated_horizontal_advection_of_w_numpy(
grid, e_bln_c_s: np.array, z_v_grad_w: np.array, ddt_w_adv: np.array, **kwargs
) -> np.array:
e_bln_c_s = np.expand_dims(e_bln_c_s, axis=-1)
c2ce = grid.get_offset_provider("C2CE").table

ddt_w_adv = ddt_w_adv + np.sum(
z_v_grad_w[grid.connectivities[C2EDim]] * e_bln_c_s[grid.get_offset_provider("C2CE").table],
z_v_grad_w[grid.connectivities[C2EDim]] * e_bln_c_s[c2ce],
axis=1,
)
return ddt_w_adv
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,10 @@ def interpolate_to_cell_center_numpy(
grid, interpolant: np.array, e_bln_c_s: np.array, **kwargs
) -> np.array:
e_bln_c_s = np.expand_dims(e_bln_c_s, axis=-1)
c2ce = grid.get_offset_provider("C2CE").table

interpolation = np.sum(
interpolant[grid.connectivities[C2EDim]]
* e_bln_c_s[grid.get_offset_provider("C2CE").table],
interpolant[grid.connectivities[C2EDim]] * e_bln_c_s[c2ce],
axis=1,
)
return interpolation
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from icon4py.model.common.type_alias import vpfloat, wpfloat


class TestComputeDivergenceOfFluxesOfRhoAndTheta(StencilTest):
class TestComputeDivergenconnectivityceOfFluxesOfRhoAndTheta(StencilTest):
PROGRAM = compute_divergence_of_fluxes_of_rho_and_theta
OUTPUTS = ("z_flxdiv_mass", "z_flxdiv_theta")

Expand All @@ -42,12 +42,14 @@ def reference(
) -> tuple[np.array]:
c2e = grid.connectivities[C2EDim]
geofac_div = np.expand_dims(geofac_div, axis=-1)
c2ce = grid.get_offset_provider("C2CE").table

z_flxdiv_mass = np.sum(
geofac_div[grid.get_offset_provider("C2CE").table] * mass_fl_e[c2e],
geofac_div[c2ce] * mass_fl_e[c2e],
axis=1,
)
z_flxdiv_theta = np.sum(
geofac_div[grid.get_offset_provider("C2CE").table] * z_theta_v_fl_e[c2e],
geofac_div[c2ce] * z_theta_v_fl_e[c2e],
axis=1,
)
return dict(z_flxdiv_mass=z_flxdiv_mass, z_flxdiv_theta=z_flxdiv_theta)
Expand Down
135 changes: 135 additions & 0 deletions model/common/src/icon4py/model/common/caching.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
# ICON4Py - ICON inspired code in Python and GT4Py
#
# Copyright (c) 2022, ETH Zurich and MeteoSwiss
# All rights reserved.
#
# This file is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the
# Free Software Foundation, either version 3 of the License, or any later
# version. See the LICENSE.txt file at the top-level directory of this
# distribution for a copy of the license or check <https://www.gnu.org/licenses/>.
#
# SPDX-License-Identifier: GPL-3.0-or-later

import dataclasses
from typing import Any, Callable, Optional

import numpy as np
from gt4py import next as gtx
from gt4py.next.otf import workflow
from gt4py.next.program_processors.runners.gtfn import extract_connectivity_args

from icon4py.model.common.settings import device


try:
import cupy as cp
from gt4py.next.embedded.nd_array_field import CuPyArrayField
except ImportError:
cp: Optional = None # type:ignore[no-redef]

from gt4py.next.embedded.nd_array_field import NumPyArrayField


def handle_numpy_integer(value):
return int(value)


def handle_common_field(value, sizes):
sizes.extend(value.shape)
return value # Return the value unmodified, but side-effect on sizes


def handle_default(value):
return value # Return the value unchanged


if cp:
type_handlers = {
np.integer: handle_numpy_integer,
NumPyArrayField: handle_common_field,
CuPyArrayField: handle_common_field,
}
else:
type_handlers = {
np.integer: handle_numpy_integer,
NumPyArrayField: handle_common_field,
}


def process_arg(value, sizes):
handler = type_handlers.get(type(value), handle_default)
return handler(value, sizes) if handler == handle_common_field else handler(value)


@dataclasses.dataclass
class CachedProgram:
"""Class to handle caching and compilation of GT4Py programs.
This class is responsible for caching and compiling GT4Py programs
with optional domain information. The compiled program and its
connectivity arguments are stored for efficient execution.
Attributes:
program (gtx.ffront.decorator.Program): The GT4Py program to be cached and compiled.
with_domain (bool): Flag to indicate if the program should be compiled with domain information. Defaults to True.
_compiled_program (Optional[Callable]): The compiled GT4Py program.
_conn_args (Any): Connectivity arguments extracted from the offset provider.
_compiled_args (tuple): Arguments used during the compilation of the program.
Properties:
compiled_program (Callable): Returns the compiled GT4Py program.
conn_args (Any): Returns the connectivity arguments.
Note:
This functionality will be provided by GT4Py in the future.
"""

program: gtx.ffront.decorator.Program
with_domain: bool = True
_compiled_program: Optional[Callable] = None
_conn_args: Any = None
_compiled_args: tuple = dataclasses.field(default_factory=tuple)

@property
def compiled_program(self) -> Callable:
return self._compiled_program

@property
def conn_args(self) -> Callable:
return self._conn_args

def compile_the_program(
self, *args, offset_provider: dict[str, gtx.Dimension], **kwargs: Any
) -> Callable:
backend = self.program.backend
program_call = backend.transforms_prog(
workflow.InputWithArgs(
data=self.program.definition_stage,
args=args,
kwargs=kwargs | {"offset_provider": offset_provider},
)
)
self._compiled_args = program_call.args
return backend.executor.otf_workflow(program_call)

def __call__(self, *args, offset_provider: dict[str, gtx.Dimension], **kwargs: Any) -> None:
if not self.compiled_program:
self._compiled_program = self.compile_the_program(
*args, offset_provider=offset_provider, **kwargs
)
self._conn_args = extract_connectivity_args(offset_provider, device)

kwargs_as_tuples = tuple(kwargs.values())
program_args = list(args) + list(kwargs_as_tuples)
sizes = []

# Convert numpy integers in args to int and handle gtx.common.Field
for i in range(len(program_args)):
program_args[i] = process_arg(program_args[i], sizes)

if not self.with_domain:
program_args.extend(sizes)

# todo(samkellerhals): if we merge gt4py PR we can also pass connectivity args here conn_args=self.conn_args
return self.compiled_program(*program_args, offset_provider=offset_provider)
4 changes: 4 additions & 0 deletions model/common/src/icon4py/model/common/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,3 +69,7 @@ def device(self):
}
device = device_map[self.icon4py_backend]
return device

@cached_property
def limited_area(self):
return os.environ.get("ICON4PY_LAM", False)
Loading

0 comments on commit 1752f87

Please sign in to comment.