diff --git a/compiler/mx.compiler/mx_graal_benchmark.py b/compiler/mx.compiler/mx_graal_benchmark.py
index 38245a0c0b9e..37c8d3b094d5 100644
--- a/compiler/mx.compiler/mx_graal_benchmark.py
+++ b/compiler/mx.compiler/mx_graal_benchmark.py
@@ -26,12 +26,15 @@
 import re
 import os
 from tempfile import mkstemp
+from typing import List, Optional
 
 import mx
 import mx_benchmark
 import mx_sdk_benchmark
 import mx_compiler
 from mx_java_benchmarks import DaCapoBenchmarkSuite, ScalaDaCapoBenchmarkSuite
+from mx_benchmark import DataPoints
+from mx_sdk_benchmark import SUCCESSFUL_STAGE_PATTERNS
 
 _suite = mx.suite('compiler')
 
@@ -406,7 +409,45 @@ def benchSuiteName(self, bmSuiteArgs=None):
 mx_benchmark.add_bm_suite(ScalaDaCapoTimingBenchmarkSuite())
 
 
-class JMHNativeImageBenchmarkMixin(mx_sdk_benchmark.NativeImageBenchmarkMixin):
+class JMHNativeImageBenchmarkMixin(mx_benchmark.JMHBenchmarkSuiteBase, mx_sdk_benchmark.NativeImageBenchmarkMixin):
+
+    def get_jmh_result_file(self, bm_suite_args: List[str]) -> Optional[str]:
+        """
+        Only generate a JMH result file in the run stage. Otherwise the file-based rule (see
+        :class:`mx_benchmark.JMHJsonRule`) will produce datapoints at every stage, based on results from a previous
+        stage.
+        """
+        if self.is_native_mode(bm_suite_args) and not self.stages_info.fallback_mode:
+            # At this point, the StagesInfo class may not have all the information yet, in that case we rely on the
+            # requested stage. But if this function is called later again when it is fully set up, we have to use the
+            # effective stage instead.
+            # This is important so that the JMH parsing rule is only enabled when the stage actually ran (if it is
+            # skipped, it would otherwise pick up a previous result file)
+            if self.stages_info.is_set_up:
+                current_stage = self.stages_info.effective_stage
+            else:
+                current_stage = self.stages_info.requested_stage
+
+            if current_stage not in ["agent", "instrument-run", "run"]:
+                return None
+
+        return super().get_jmh_result_file(bm_suite_args)
+
+    def fallback_mode_reason(self, bm_suite_args: List[str]) -> Optional[str]:
+        """
+        JMH benchmarks need to use the fallback mode if --jmh-run-individually is used.
+        The flag causes one native image to be built per JMH benchmark. This is fundamentally incompatible with the
+        default benchmarking mode of running each stage on its own because a benchmark will overwrite the intermediate
+        files of the previous benchmark if not all stages are run at once.
+
+        In the fallback mode, collection of performance data is limited. Only performance data of the ``run`` stage can
+        reliably be collected. Other metrics, such as image build statistics or profiling performance cannot reliably be
+        collected because they cannot be attributed so a specific individual JMH benchmark.
+        """
+        if self.jmhArgs(bm_suite_args).jmh_run_individually:
+            return "--jmh-run-individually is not compatible with selecting individual stages"
+        else:
+            return None
 
     def extra_image_build_argument(self, benchmark, args):
         # JMH does HotSpot-specific field offset checks in class initializers
@@ -462,6 +503,9 @@ def group(self):
     def subgroup(self):
         return "graal-compiler"
 
+    def run(self, benchmarks, bmSuiteArgs) -> DataPoints:
+        return self.intercept_run(super(), benchmarks, bmSuiteArgs)
+
 
 mx_benchmark.add_bm_suite(JMHRunnerGraalCoreBenchmarkSuite())
 
@@ -477,6 +521,9 @@ def group(self):
     def subgroup(self):
         return "graal-compiler"
 
+    def run(self, benchmarks, bmSuiteArgs) -> DataPoints:
+        return self.intercept_run(super(), benchmarks, bmSuiteArgs)
+
 
 mx_benchmark.add_bm_suite(JMHJarGraalCoreBenchmarkSuite())
 
@@ -492,10 +539,16 @@ def group(self):
     def subgroup(self):
         return "graal-compiler"
 
+    def run(self, benchmarks, bmSuiteArgs) -> DataPoints:
+        return self.intercept_run(super(), benchmarks, bmSuiteArgs)
+
     def filter_distribution(self, dist):
         return super(JMHDistGraalCoreBenchmarkSuite, self).filter_distribution(dist) and \
                not JMHDistWhiteboxBenchmarkSuite.is_whitebox_dependency(dist)
 
+    def successPatterns(self):
+        return super().successPatterns() + SUCCESSFUL_STAGE_PATTERNS
+
 
 mx_benchmark.add_bm_suite(JMHDistGraalCoreBenchmarkSuite())
 
@@ -511,6 +564,9 @@ def group(self):
     def subgroup(self):
         return "graal-compiler"
 
+    def run(self, benchmarks, bmSuiteArgs) -> DataPoints:
+        return self.intercept_run(super(), benchmarks, bmSuiteArgs)
+
     @staticmethod
     def is_whitebox_dependency(dist):
         return hasattr(dist, 'graalWhiteboxDistribution') and dist.graalWhiteboxDistribution
@@ -542,5 +598,8 @@ def getJMHEntry(self, bmSuiteArgs):
         assert self.dist
         return [mx.distribution(self.dist).mainClass]
 
+    def successPatterns(self):
+        return super().successPatterns() + SUCCESSFUL_STAGE_PATTERNS
+
 
 mx_benchmark.add_bm_suite(JMHDistWhiteboxBenchmarkSuite())
diff --git a/java-benchmarks/mx.java-benchmarks/mx_java_benchmarks.py b/java-benchmarks/mx.java-benchmarks/mx_java_benchmarks.py
index 0b0b036ae30d..2d2bf32dafac 100644
--- a/java-benchmarks/mx.java-benchmarks/mx_java_benchmarks.py
+++ b/java-benchmarks/mx.java-benchmarks/mx_java_benchmarks.py
@@ -35,7 +35,7 @@
 
 import mx
 import mx_benchmark
-from mx_benchmark import ParserEntry
+from mx_benchmark import ParserEntry, DataPoints
 import mx_sdk_benchmark
 from mx_sdk_benchmark import NativeImageBundleBasedBenchmarkMixin
 import mx_sdk_vm_impl
@@ -161,12 +161,8 @@ def skip_agent_assertions(self, benchmark, args):
         else:
             return []
 
-    def stages(self, args):
-        # This method overrides NativeImageMixin.stages
-        parsed_arg = mx_sdk_benchmark.parse_prefixed_arg('-Dnative-image.benchmark.stages=', args, 'Native Image benchmark stages should only be specified once.')
-        return parsed_arg.split(',') if parsed_arg else self.default_stages()
-
     def default_stages(self):
+        # This method is used by NativeImageMixin.stages
         raise NotImplementedError()
 
 
@@ -186,7 +182,6 @@ def get_application_startup_units(self):
         return 's'
 
     def default_stages(self):
-        # This method overrides NativeImageMixin.stages
         return ['instrument-image', 'instrument-run', 'image', 'run']
 
     def uses_bundles(self):
@@ -420,6 +415,9 @@ def benchmarkList(self, bmSuiteArgs):
     def default_stages(self):
         return ['image']
 
+    def run(self, benchmarks, bmSuiteArgs):
+        self.intercept_run(super(), benchmarks, bmSuiteArgs)
+
     def createCommandLineArgs(self, benchmarks, bmSuiteArgs):
         if benchmarks is None:
             mx.abort("Suite can only run a single benchmark per VM instance.")
@@ -1952,7 +1950,7 @@ def rules(self, out, benchmarks, bmSuiteArgs):
             )
         ]
 
-    def run(self, benchmarks, bmSuiteArgs):
+    def run(self, benchmarks, bmSuiteArgs) -> DataPoints:
         results = super(RenaissanceBenchmarkSuite, self).run(benchmarks, bmSuiteArgs)
         self.addAverageAcrossLatestResults(results)
         return results
@@ -2029,7 +2027,7 @@ def getExtraIterationCount(self, iterations):
         # We average over the last 2 out of 3 total iterations done by this suite.
         return 2
 
-    def run(self, benchmarks, bmSuiteArgs):
+    def run(self, benchmarks, bmSuiteArgs) -> DataPoints:
         runretval = self.runAndReturnStdOut(benchmarks, bmSuiteArgs)
         retcode, out, dims = runretval
         self.validateStdoutWithDimensions(
@@ -2164,7 +2162,7 @@ def rules(self, out, benchmarks, bmSuiteArgs):
             )
         ]
 
-    def run(self, benchmarks, bmSuiteArgs):
+    def run(self, benchmarks, bmSuiteArgs) -> DataPoints:
         results = super(AWFYBenchmarkSuite, self).run(benchmarks, bmSuiteArgs)
         self.addAverageAcrossLatestResults(results)
         return results
diff --git a/sdk/mx.sdk/mx_sdk_benchmark.py b/sdk/mx.sdk/mx_sdk_benchmark.py
index 4029d71014b0..7ee326c858b2 100644
--- a/sdk/mx.sdk/mx_sdk_benchmark.py
+++ b/sdk/mx.sdk/mx_sdk_benchmark.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2019, 2023, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2019, 2024, Oracle and/or its affiliates. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 # The Universal Permissive License (UPL), Version 1.0
@@ -47,6 +47,8 @@
 import threading
 import json
 import argparse
+from typing import List, Optional, Set
+
 import mx
 import mx_benchmark
 import datetime
@@ -55,6 +57,27 @@
 import urllib.request
 
 import mx_sdk_vm_impl
+from mx_benchmark import DataPoints, DataPoint, BenchmarkSuite
+
+STAGE_LAST_SUCCESSFUL_PREFIX: str = "Successfully finished the last specified stage:"
+STAGE_SUCCESSFUL_PREFIX: str = "Successfully finished stage:"
+STAGE_SKIPPED_PREFIX: str = "Skipping stage:"
+
+SUCCESSFUL_STAGE_PATTERNS = [re.compile(p, re.MULTILINE) for p in [
+    # Produced when the last stage as requested by the user (see: NativeImageBenchmarkMixin.stages) finished
+    rf"{STAGE_LAST_SUCCESSFUL_PREFIX}.*$",
+    # Produced when any other stage finishes
+    rf"{STAGE_SUCCESSFUL_PREFIX}.*$",
+    # Produced when a stage is skipped for some reason (e.g. the specific configuration does not require it)
+    rf"{STAGE_SKIPPED_PREFIX}.*$",
+]]
+"""
+List of regex patterns to use in successPatterns() to match the successful completion of a Native Image benchmark stage.
+Native Image benchmarks run in stages and not all stages have the expected success pattern for the benchmark suite,
+which generally only matches the benchmark run output and nothing in the build output.
+Instead, each benchmark stage produces one of the following messages to signal that the stage completed and bypass the
+original success pattern check.
+"""
 
 
 def parse_prefixed_args(prefix, args):
@@ -162,16 +185,287 @@ def _strip_arg_with_number_gen(_strip_arg, _args):
     return list(result)
 
 
+class StagesInfo:
+    """
+    Holds information about benchmark stages that should be persisted across multiple stages in the same
+    ``mx benchmark`` command.
+
+    Is used to pass data between the benchmark suite and the underlying :class:`mx_benchmark.Vm`.
+
+    The information about the stages comes from two layers:
+
+    * The stages requested by the user and passed into the object during creation
+    * And the effectively executed stages, which are determined in the `NativeImageBenchmarkConfig` class and passed to
+      this object the first time we call into the ``NativeImageVM``.
+      The effective stages are determined by removing stages that don't need to run in the current benchmark
+      configuration from the requested changes.
+      This information is only available if :attr:`is_set_up` returns ``True``.
+    """
+
+    def __init__(self, requested_stages: List[str], fallback_mode: bool = False):
+        """
+        :param requested_stages: List of stages requested by the user. See also :meth:`NativeImageBenchmarkMixin.stages`
+                                 and :attr:`StagesInfo.effective_stages`
+        """
+        self._is_set_up: bool = False
+        self._requested_stages = requested_stages
+        self._removed_stages: Set[str] = set()
+        self._effective_stages: Optional[List[str]] = None
+        self._stages_till_now: List[str] = []
+        self._requested_stage: Optional[str] = None
+        # Computed lazily
+        self._skip_current_stage: Optional[bool] = None
+        self._failed: bool = False
+        self._fallback_mode = fallback_mode
+
+    @property
+    def fallback_mode(self) -> bool:
+        return self._fallback_mode
+
+    @property
+    def is_set_up(self) -> bool:
+        return self._is_set_up
+
+    def setup(self, removed_stages: Set[str]) -> None:
+        """
+        Fully configures the object with information about removed stages.
+
+        From that, the effective list of stages can be computed.
+        Only after this method is called for the first time can the effective stages be accessed
+
+        :param removed_stages: Set of stages that should not be executed under this benchmark configuration
+        """
+        if self.is_set_up:
+            # This object is used again for an additional stage.
+            # Sanity check to make sure the removed stages are the same as in the first call
+            assert self._removed_stages == removed_stages, f"Removed stages differ between executed stages: {self._removed_stages} != {removed_stages}"
+        else:
+            self._removed_stages = removed_stages
+            # requested_stages - removed_stages while preserving order of requested_stages
+            self._effective_stages = [s for s in self._requested_stages if s not in removed_stages]
+            self._is_set_up = True
+
+    @property
+    def effective_stages(self) -> List[str]:
+        """
+        List of stages that are actually executed for this benchmark (is equal to requested_stages - removed_stages)
+        """
+        assert self.is_set_up
+        return self._effective_stages
+
+    @property
+    def skip_current_stage(self) -> bool:
+        if self._skip_current_stage is None:
+            self._skip_current_stage = self._requested_stage not in self.effective_stages
+        return self._skip_current_stage
+
+    @property
+    def requested_stage(self) -> str:
+        """
+        The stage that was last requested to be executed.
+        It is not guaranteed that this stage will be executed, it could be a skipped stage (see
+        :attr:`StagesInfo.skip_current_stage`)
+
+        Use this for informational output, prefer :attr:`StagesInfo.effective_stage` to compare against the effectively
+        executed stage.
+        """
+        assert self._requested_stage, "No current stage set"
+        return self._requested_stage
+
+    @property
+    def effective_stage(self) -> Optional[str]:
+        """
+        Same as :meth:`StagesInfo.requested_stage`, but returns None if the stage is skipped.
+        """
+        return None if self.skip_current_stage else self.requested_stage
+
+    @property
+    def last_stage(self) -> str:
+        return self.effective_stages[-1]
+
+    @property
+    def failed(self) -> bool:
+        return self._failed
+
+    @property
+    def stages_till_now(self) -> List[str]:
+        """
+        List of stages executed so far, all of which have been successful.
+
+        Does not include the current stage.
+        """
+        return self._stages_till_now
+
+    def change_stage(self, stage_name: str) -> None:
+        self._requested_stage = stage_name
+        # Force recomputation
+        self._skip_current_stage = None
+
+    def success(self) -> None:
+        assert self.is_set_up
+        """Called when the current stage has finished successfully"""
+        self._stages_till_now.append(self.requested_stage)
+
+    def fail(self) -> None:
+        assert self.is_set_up
+        """Called when the current stage finished with an error"""
+        self._failed = True
+
+
 class NativeImageBenchmarkMixin(object):
+    """
+    Mixin extended by :class:`BenchmarkSuite` classes to enable a JVM bench suite to run as a Native Image benchmark.
+
+    IMPORTANT: All Native Image benchmarks (including JVM benchmarks that are also used in Native Image benchmarks) must
+    explicitly call :meth:`NativeImageBenchmarkMixin.intercept_run` in order for benchmarking to work.
+    See description of that method for more information.
+
+    Native Image benchmarks are run in stages: agent, instrument-image, instrument-run, image, run
+    Each stage produces intermediate files required by subsequent phases until the final ``run`` stage runs the final
+    Native Image executable to produce performance results.
+    However, it is worth collecting certain performance metrics from any of the other stages as well (e.g. compiler
+    performance).
+
+    The mixin's ``intercept_run`` method calls into the ``mx_vm_benchmark.NativeImageVM`` once per stage to run that
+    stage and produce datapoints for that stage only.
+    This is a bit of a hack since each stage can be seen as its own full benchmark execution (does some operations and
+    produces datapoints), but it works well in most cases.
+
+    Limitations
+    -----------
+
+    This mode of benchmarking cannot fully support arbitrary benchmarking suites without modification.
+
+    Because of each stage effectively being its own benchmark execution, rules that unconditionally produce datapoints
+    will misbehave as they will produce datapoints in each stage (even though, it is likely only meant to produce
+    benchmark performance datapoints).
+    For example, if a benchmark suite has rules that read the performance data from a file (e.g. JMH), those rules will
+    happily read that file and produce performance data in every stage (even the ``image`` stages).
+    Such rules need to be modified to only trigger in the desired stages. Either by parsing the file location out of the
+    benchmark output or by writing some Native Image specific logic (with :meth:`is_native_mode`)
+    An example for such a workaround are :class:`mx_benchmark.JMHBenchmarkSuiteBase` and its subclasses (see
+    ``get_jmh_result_file``, its usages and its Native Image specific implementation in
+    :class:`mx_java_benchmark.JMHNativeImageBenchmarkMixin`)
+
+    If the benchmark suite itself dispatches into the VM multiple times (in addition to the mixin doing it once per
+    stage), care must be taken in which order this happens.
+    If these multiple dispatches happen in a (transitive) callee of ``intercept_run``, each dispatch will first happen
+    for the first stage and only after the next stage will be run. In that order, a subsequent dispatch may overwrite
+    intermediate files of the previous dispatch of the same stage (e.g. executables).
+    For this to work as expected, ``intercept_run`` needs to be a callee of these multiple dispatches, i.e. these
+    multiple dispatches also need to happen in the ``run`` method and (indirectly) call ``intercept_run``.
+
+    If these limitations cannot be worked around, using the fallback mode may be required, with the caveat that it
+    provides limited functionality.
+
+    Fallback Mode
+    -------------
+
+    Fallback mode is for benchmarks that are fundamentally incompatible with how this mixin dispatches into the
+    ``NativeImageVM`` once per stage (e.g. JMH with the ``--jmh-run-individually`` flag).
+    The conditional logic to enable fallback mode can be implemented by overriding :meth:`fallback_mode_reason`.
+
+    In fallback mode, we only call into the VM once and it runs all stages in sequence. This limits what kind of
+    performance data we can accurately collect (e.g. it is impossible to distinguish benchmark output from the
+    ``instrument-run`` and ``run`` phases).
+    Because of that, only the output of the ``image`` and ``run`` stages is returned from the VM (the remainder is still
+    printed, but not used for regex matching when creating datapoints).
+
+    Additionally, the user cannot select only a subset of stages to run (using ``-Dnative-image.benchmark.stages``).
+    All stages required for that benchmark are always run together.
+    """
 
     def __init__(self):
         self.benchmark_name = None
+        self.stages_info: Optional[StagesInfo] = None
 
     def benchmarkName(self):
         if not self.benchmark_name:
             raise NotImplementedError()
         return self.benchmark_name
 
+    def fallback_mode_reason(self, bm_suite_args: List[str]) -> Optional[str]:
+        """
+        Reason why this Native Image benchmark should run in fallback mode.
+
+        :return: None if no fallback is required. Otherwise, a non-empty string describing why fallback mode is necessary
+        """
+        return None
+
+    def intercept_run(self, super_delegate: BenchmarkSuite, benchmarks, bm_suite_args: List[str]) -> DataPoints:
+        """
+        Intercepts the main benchmark execution (:meth:`BenchmarkSuite.run`) and runs a series of benchmark stages
+        required for Native Image benchmarks in series.
+        For non-native-image benchmarks, this simply delegates to the caller's ``super().run`` method.
+
+        The stages are requested by the user (see :meth:`NativeImageBenchmarkMixin.stages`).
+
+        There are no good ways to just intercept ``run`` in arbitrary ``BenchmarkSuite``s, so each
+        :class:`BenchmarkSuite` subclass that is intended for Native Image benchmarking needs to make sure that the
+        :meth:`BenchmarkSuite.run` calls into this method like this::
+
+            def run(self, benchmarks, bm_suite_args: List[str]) -> DataPoints:
+                return self.intercept_run(super(), benchmarks, bm_suite_args)
+
+        It is fine if this implemented in a common (Native Image-specific) superclass of multiple benchmark suites, as
+        long as the method is not overriden in a subclass in an incompatible way.
+
+        :param super_delegate: A reference to the caller class' superclass in method-resolution order (MRO).
+        :param benchmarks: Passed to :meth:`BenchmarkSuite.run`
+        :param bm_suite_args: Passed to :meth:`BenchmarkSuite.run`
+        :return: Datapoints accumulated from all stages
+        """
+        if not self.is_native_mode(bm_suite_args):
+            # This is not a Native Image benchmark, just run the benchmark as regular
+            return super_delegate.run(benchmarks, bm_suite_args)
+
+        datapoints: List[DataPoint] = []
+
+        requested_stages = self.stages(bm_suite_args)
+
+        fallback_reason = self.fallback_mode_reason(bm_suite_args)
+        if fallback_reason:
+            # In fallback mode, all stages are run at once. There is matching code in `NativeImageVM.run_java` for this.
+            mx.log(f"Running benchmark in fallback mode (reason: {fallback_reason})")
+            self.stages_info = StagesInfo(requested_stages, True)
+            datapoints += super_delegate.run(benchmarks, bm_suite_args)
+        else:
+            self.stages_info = StagesInfo(requested_stages)
+
+            for stage in requested_stages:
+                self.stages_info.change_stage(stage)
+                # Start the actual benchmark execution. The stages_info attribute will be used by the NativeImageVM to
+                # determine which stage to run this time.
+                stage_dps = super_delegate.run(benchmarks, bm_suite_args)
+                NativeImageBenchmarkMixin._inject_stage_keys(stage_dps, stage)
+                datapoints += stage_dps
+
+        self.stages_info = None
+        return datapoints
+
+    @staticmethod
+    def _inject_stage_keys(dps: DataPoints, stage: str) -> None:
+        """
+        Modifies the ``host-vm-config`` key based on the current stage.
+        For the agent and instrument stages ``-agent`` and ``-instrument`` are appended to distinguish the datapoints
+        from the main ``image`` and ``run`` phases.
+
+        :param dps: List of datapoints, modified in-place
+        :param stage: The stage the datapoints were generated in
+        """
+
+        if stage == "agent":
+            host_vm_suffix = "-agent"
+        elif stage in ["instrument-image", "instrument-run"]:
+            host_vm_suffix = "-instrument"
+        elif stage in ["image", "run"]:
+            host_vm_suffix = ""
+        else:
+            raise ValueError(f"Unknown stage {stage}")
+
+        for dp in dps:
+            dp["host-vm-config"] += host_vm_suffix
+
     def run_stage(self, vm, stage, command, out, err, cwd, nonZeroIsFatal):
         final_command = command
         if stage == 'run':
@@ -179,6 +473,10 @@ def run_stage(self, vm, stage, command, out, err, cwd, nonZeroIsFatal):
 
         return mx.run(final_command, out=out, err=err, cwd=cwd, nonZeroIsFatal=nonZeroIsFatal)
 
+    def is_native_mode(self, bm_suite_args: List[str]):
+        """Checks whether the given arguments request a Native Image benchmark"""
+        return "native-image" in self.jvm(bm_suite_args)
+
     def apply_command_mapper_hooks(self, cmd, vm):
         return mx.apply_command_mapper_hooks(cmd, vm.command_mapper_hooks)
 
@@ -253,9 +551,28 @@ def benchmark_output_dir(self, _, args):
         else:
             return None
 
-    def stages(self, args):
+    def stages(self, bm_suite_args: List[str]) -> List[str]:
+        """
+        Benchmark stages requested by the user with ``-Dnative-image.benchmark.stages=``.
+
+        Falls back to :meth:`NativeImageBenchmarkMixin.default_stages` if not specified.
+        """
+        args = self.vmArgs(bm_suite_args)
         parsed_arg = parse_prefixed_arg('-Dnative-image.benchmark.stages=', args, 'Native Image benchmark stages should only be specified once.')
-        return parsed_arg.split(',') if parsed_arg else ['agent', 'instrument-image', 'instrument-run', 'image', 'run']
+
+        fallback_reason = self.fallback_mode_reason(bm_suite_args)
+        if parsed_arg and fallback_reason:
+            mx.abort(
+                "This benchmarking configuration is running in fallback mode and does not support selection of benchmark stages using -Dnative-image.benchmark.stages"
+                f"Reason: {fallback_reason}\n"
+                f"Arguments: {bm_suite_args}"
+            )
+
+        return parsed_arg.split(',') if parsed_arg else self.default_stages()
+
+    def default_stages(self) -> List[str]:
+        """Default list of stages to run if none have been specified."""
+        return ["agent", "instrument-image", "instrument-run", "image", "run"]
 
     def skip_agent_assertions(self, _, args):
         parsed_args = parse_prefixed_args('-Dnative-image.benchmark.skip-agent-assertions=', args)
@@ -422,9 +739,6 @@ def requestHeaders(self):
         """
         return {}
 
-    def inNativeMode(self):
-        return "native-image" in self.jvm(self.bmSuiteArgs)
-
     def createCommandLineArgs(self, benchmarks, bmSuiteArgs):
         return self.vmArgs(bmSuiteArgs) + ["-jar", self.applicationPath()]
 
@@ -451,12 +765,12 @@ def runAndReturnStdOut(self, benchmarks, bmSuiteArgs):
         ret_code, applicationOutput, dims = super(BaseMicroserviceBenchmarkSuite, self).runAndReturnStdOut(benchmarks, bmSuiteArgs)
         result = ret_code, "\n".join(self.timeToFirstResponseOutputs) + '\n' + self.startupOutput + '\n' + self.peakOutput + '\n' + self.latencyOutput + '\n' + applicationOutput, dims
 
-        # For HotSpot, the rules are executed after every execution. So, it is necessary to reset the data to avoid duplication of datapoints.
-        if not self.inNativeMode():
-            self.timeToFirstResponseOutputs = []
-            self.startupOutput = ''
-            self.peakOutput = ''
-            self.latencyOutput = ''
+        # For HotSpot, the rules are executed after every execution and for Native Image the rules are applied after each stage.
+        # So, it is necessary to reset the data to avoid duplication of datapoints.
+        self.timeToFirstResponseOutputs = []
+        self.startupOutput = ''
+        self.peakOutput = ''
+        self.latencyOutput = ''
 
         return result
 
@@ -642,7 +956,7 @@ def computePeakThroughputRSS(self, datapoints):
         else:
             return None
 
-    def validateStdoutWithDimensions(self, out, benchmarks, bmSuiteArgs, retcode=None, dims=None, extraRules=None):
+    def validateStdoutWithDimensions(self, out, benchmarks, bmSuiteArgs, retcode=None, dims=None, extraRules=None) -> DataPoints:
         datapoints = super(BaseMicroserviceBenchmarkSuite, self).validateStdoutWithDimensions(
             out=out, benchmarks=benchmarks, bmSuiteArgs=bmSuiteArgs, retcode=retcode, dims=dims, extraRules=extraRules)
 
@@ -652,7 +966,7 @@ def validateStdoutWithDimensions(self, out, benchmarks, bmSuiteArgs, retcode=Non
 
         return datapoints
 
-    def run(self, benchmarks, bmSuiteArgs):
+    def run(self, benchmarks, bmSuiteArgs) -> DataPoints:
         if len(benchmarks) > 1:
             mx.abort("A single benchmark should be specified for {0}.".format(BaseMicroserviceBenchmarkSuite.__name__))
         self.bmSuiteArgs = bmSuiteArgs
@@ -664,7 +978,7 @@ def run(self, benchmarks, bmSuiteArgs):
         self.measureStartup = not args.skip_startup_measurements
         self.measurePeak = not args.skip_peak_measurements
 
-        if not self.inNativeMode():
+        if not self.is_native_mode(self.bmSuiteArgs):
             datapoints = []
             if self.measureFirstResponse:
                 # Measure time-to-first-response (without any command mapper hooks as those affect the measurement significantly)
@@ -847,8 +1161,8 @@ def testLatency(self):
     def tailDatapointsToSkip(self, results):
         return int(len(results) * .10)
 
-    def run(self, benchmarks, bmSuiteArgs):
-        results = super(BaseJMeterBenchmarkSuite, self).run(benchmarks, bmSuiteArgs)
+    def run(self, benchmarks, bmSuiteArgs) -> DataPoints:
+        results = self.intercept_run(super(), benchmarks, bmSuiteArgs)
         results = results[:len(results) - self.tailDatapointsToSkip(results)]
         self.addAverageAcrossLatestResults(results, "throughput")
         return results
@@ -1189,6 +1503,9 @@ def getOS(self):
         else:
             mx.abort("{0} not supported in {1}.".format(BaseWrkBenchmarkSuite.__name__, mx.get_os()))
 
+    def run(self, benchmarks, bmSuiteArgs):
+        return self.intercept_run(super(), benchmarks, bmSuiteArgs)
+
     def rules(self, out, benchmarks, bmSuiteArgs):
         # Example of wrk output:
         # "Requests/sec:   5453.61"
diff --git a/substratevm/mx.substratevm/mx_substratevm_benchmark.py b/substratevm/mx.substratevm/mx_substratevm_benchmark.py
index e3be4f49c8c9..34041bc8bf6e 100644
--- a/substratevm/mx.substratevm/mx_substratevm_benchmark.py
+++ b/substratevm/mx.substratevm/mx_substratevm_benchmark.py
@@ -26,7 +26,6 @@
 from __future__ import print_function
 
 import os
-import re
 import tempfile
 import zipfile
 from glob import glob
@@ -35,9 +34,9 @@
 import mx_benchmark
 import mx_java_benchmarks
 import mx_sdk_benchmark
+from mx_sdk_benchmark import SUCCESSFUL_STAGE_PATTERNS
 
 _suite = mx.suite("substratevm")
-_successful_stage_pattern = re.compile(r'Successfully finished the last specified stage:.*$', re.MULTILINE)
 
 
 def extract_archive(path, extracted_name):
@@ -191,6 +190,9 @@ def standalone_jar_path(self, benchmark_name):
         standalone_jars_directory = "single"
         return os.path.join(self.renaissance_unpacked(), standalone_jars_directory, "{}.jar".format(benchmark_name))
 
+    def run(self, benchmarks, bmSuiteArgs) -> mx_benchmark.DataPoints:
+        return self.intercept_run(super(), benchmarks, bmSuiteArgs)
+
     def extra_run_arg(self, benchmark, args, image_run_args):
         run_args = super(RenaissanceNativeImageBenchmarkSuite, self).extra_run_arg(benchmark, args, image_run_args)
         if benchmark == "dotty" and self.version() not in ["0.9.0", "0.10.0", "0.11.0", "0.12.0", "0.13.0"]:
@@ -262,9 +264,7 @@ def createCommandLineArgs(self, benchmarks, bmSuiteArgs):
         return vm_args + ["-jar", self.standalone_jar_path(self.benchmarkName())] + run_args + [self.benchmarkName()]
 
     def successPatterns(self):
-        return super(RenaissanceNativeImageBenchmarkSuite, self).successPatterns() + [
-            _successful_stage_pattern
-        ]
+        return super().successPatterns() + SUCCESSFUL_STAGE_PATTERNS
 
 mx_benchmark.add_bm_suite(RenaissanceNativeImageBenchmarkSuite())
 
@@ -439,6 +439,9 @@ def daCapoIterations(self):
     def benchmark_resources(self, benchmark):
         return _dacapo_resources[benchmark]
 
+    def run(self, benchmarks, bmSuiteArgs) -> mx_benchmark.DataPoints:
+        return self.intercept_run(super(), benchmarks, bmSuiteArgs)
+
     def extra_agent_run_arg(self, benchmark, args, image_run_args):
         user_args = super(DaCapoNativeImageBenchmarkSuite, self).extra_agent_run_arg(benchmark, args, image_run_args)
         # remove -n X argument from image run args
@@ -483,9 +486,7 @@ def create_classpath(self, benchmark):
         return cp
 
     def successPatterns(self):
-        return super(DaCapoNativeImageBenchmarkSuite, self).successPatterns() + [
-            _successful_stage_pattern
-        ]
+        return super().successPatterns() + SUCCESSFUL_STAGE_PATTERNS
 
 
 mx_benchmark.add_bm_suite(DaCapoNativeImageBenchmarkSuite())
@@ -560,6 +561,9 @@ def daCapoIterations(self):
     def benchmark_resources(self, benchmark):
         return _scala_dacapo_resources[benchmark]
 
+    def run(self, benchmarks, bmSuiteArgs) -> mx_benchmark.DataPoints:
+        return self.intercept_run(super(), benchmarks, bmSuiteArgs)
+
     def extra_agent_run_arg(self, benchmark, args, image_run_args):
         user_args = super(ScalaDaCapoNativeImageBenchmarkSuite, self).extra_agent_run_arg(benchmark, args, image_run_args)
         # remove -n X argument from image run args
@@ -606,9 +610,7 @@ def create_classpath(self, benchmark):
         return cp
 
     def successPatterns(self):
-        return super(ScalaDaCapoNativeImageBenchmarkSuite, self).successPatterns() + [
-            _successful_stage_pattern
-        ]
+        return super().successPatterns() + SUCCESSFUL_STAGE_PATTERNS
 
     @staticmethod
     def substitution_path():
@@ -632,6 +634,9 @@ def name(self):
     def benchSuiteName(self, bmSuiteArgs=None):
         return 'console'
 
+    def run(self, benchmarks, bmSuiteArgs) -> mx_benchmark.DataPoints:
+        return self.intercept_run(super(), benchmarks, bmSuiteArgs)
+
     def createCommandLineArgs(self, benchmarks, bmSuiteArgs):
         args = super(ConsoleNativeImageBenchmarkSuite, self).createCommandLineArgs(benchmarks, bmSuiteArgs)
         self.benchmark_name = benchmarks[0]
@@ -659,6 +664,9 @@ def name(self):
     def benchSuiteName(self, bmSuiteArgs=None):
         return 'specjvm2008'
 
+    def run(self, benchmarks, bmSuiteArgs) -> mx_benchmark.DataPoints:
+        return self.intercept_run(super(), benchmarks, bmSuiteArgs)
+
     def createCommandLineArgs(self, benchmarks, bmSuiteArgs):
         args = super().createCommandLineArgs(benchmarks, bmSuiteArgs)
 
@@ -685,6 +693,6 @@ def extra_run_arg(self, benchmark, args, image_run_args):
         return super().extra_run_arg(benchmark, args, image_run_args) + SpecJVM2008NativeImageBenchmarkSuite.long_run_args
 
     def successPatterns(self):
-        return super().successPatterns() + [_successful_stage_pattern]
+        return super().successPatterns() + SUCCESSFUL_STAGE_PATTERNS
 
 mx_benchmark.add_bm_suite(SpecJVM2008NativeImageBenchmarkSuite())
diff --git a/vm/mx.vm/mx_vm_benchmark.py b/vm/mx.vm/mx_vm_benchmark.py
index 83e40148631c..86c80c52663e 100644
--- a/vm/mx.vm/mx_vm_benchmark.py
+++ b/vm/mx.vm/mx_vm_benchmark.py
@@ -22,6 +22,7 @@
 # or visit www.oracle.com if you need additional information or have any
 # questions.
 #
+from __future__ import annotations
 
 import datetime
 import os
@@ -30,17 +31,21 @@
 import tempfile
 import json
 from genericpath import exists
-from os.path import basename, dirname, getsize, join
+from os.path import basename, dirname, getsize
 from traceback import print_tb
-import inspect
 import subprocess
 import zipfile
+from typing import Iterable, Optional
 
 import mx
 import mx_benchmark
+import mx_sdk_benchmark
+import mx_util
 import mx_sdk_vm
 import mx_sdk_vm_impl
 from mx_sdk_vm_impl import svm_experimental_options
+from mx_benchmark import DataPoint, DataPoints, BenchmarkSuite
+from mx_sdk_benchmark import StagesInfo, NativeImageBenchmarkMixin
 
 _suite = mx.suite('vm')
 _polybench_vm_registry = mx_benchmark.VmRegistry('PolyBench', 'polybench-vm')
@@ -120,149 +125,293 @@ def run_launcher(self, cmd, args, cwd):
         return code, out, dims
 
 
+class NativeImageBenchmarkConfig:
+    def __init__(self, vm: "NativeImageVM", bm_suite: BenchmarkSuite | NativeImageBenchmarkMixin, args):
+        self.bm_suite = bm_suite
+        self.benchmark_suite_name = bm_suite.benchSuiteName(args)
+        self.benchmark_name = bm_suite.benchmarkName()
+        self.executable, self.classpath_arguments, self.modulepath_arguments, self.system_properties, self.image_vm_args, image_run_args, self.split_run = NativeImageVM.extract_benchmark_arguments(args)
+        self.extra_image_build_arguments = bm_suite.extra_image_build_argument(self.benchmark_name, args)
+        # use list() to create fresh copies to safeguard against accidental modification
+        self.image_run_args = bm_suite.extra_run_arg(self.benchmark_name, args, list(image_run_args))
+        self.extra_jvm_args = bm_suite.extra_jvm_arg(self.benchmark_name, args)
+        self.extra_agent_run_args = bm_suite.extra_agent_run_arg(self.benchmark_name, args, list(image_run_args))
+        self.extra_agentlib_options = bm_suite.extra_agentlib_options(self.benchmark_name, args, list(image_run_args))
+        for option in self.extra_agentlib_options:
+            if option.startswith('config-output-dir'):
+                mx.abort("config-output-dir must not be set in the extra_agentlib_options.")
+        # Do not strip the run arguments if safepoint-sampler configuration is active.
+        self.extra_profile_run_args = bm_suite.extra_profile_run_arg(self.benchmark_name, args, list(image_run_args), not vm.safepoint_sampler)
+        self.extra_agent_profile_run_args = bm_suite.extra_agent_profile_run_arg(self.benchmark_name, args, list(image_run_args))
+        self.benchmark_output_dir = bm_suite.benchmark_output_dir(self.benchmark_name, args)
+        self.params = ['extra-image-build-argument', 'extra-jvm-arg', 'extra-run-arg', 'extra-agent-run-arg', 'extra-profile-run-arg',
+                       'extra-agent-profile-run-arg', 'benchmark-output-dir', 'stages', 'skip-agent-assertions']
+
+        # These stages are not executed, even if explicitly requested.
+        # Some configurations don't need to/can't run certain stages
+        removed_stages = set()
+
+        if vm.jdk_profiles_collect:
+            # forbid image build/run in the profile collection execution mode
+            removed_stages.update(["image", "run"])
+        if vm.profile_inference_feature_extraction:
+            # do not run the image in the profile inference feature extraction mode
+            removed_stages.add("run")
+        self.skip_agent_assertions = bm_suite.skip_agent_assertions(self.benchmark_name, args)
+        self.root_dir = self.benchmark_output_dir if self.benchmark_output_dir else mx.suite('vm').get_output_root(platformDependent=False, jdkDependent=False)
+        unique_suite_name = f"{self.bm_suite.benchSuiteName()}-{self.bm_suite.version().replace('.', '-')}" if self.bm_suite.version() != 'unknown' else self.bm_suite.benchSuiteName()
+        self.executable_name = (unique_suite_name + '-' + self.benchmark_name).lower() if self.benchmark_name else unique_suite_name.lower()
+        self.instrumentation_executable_name = self.executable_name + "-instrument"
+        self.final_image_name = self.executable_name + '-' + vm.config_name()
+        self.output_dir = os.path.join(os.path.abspath(self.root_dir), 'native-image-benchmarks', self.executable_name + '-' + vm.config_name())
+        self.profile_path = os.path.join(self.output_dir, self.executable_name) + ".iprof"
+        self.config_dir = os.path.join(self.output_dir, 'config')
+        self.log_dir = self.output_dir
+        base_image_build_args = ['--no-fallback', '-g']
+        base_image_build_args += ['-H:+VerifyGraalGraphs', '-H:+VerifyPhases', '--diagnostics-mode'] if vm.is_gate else []
+        base_image_build_args += ['-H:+ReportExceptionStackTraces']
+        base_image_build_args += bm_suite.build_assertions(self.benchmark_name, vm.is_gate)
+
+        base_image_build_args += self.system_properties
+        self.bundle_path = self.get_bundle_path_if_present()
+        self.bundle_create_path = self.get_bundle_create_path_if_present()
+        if not self.bundle_path:
+            base_image_build_args += self.classpath_arguments
+            base_image_build_args += self.modulepath_arguments
+            base_image_build_args += self.executable
+            base_image_build_args += ['-H:Path=' + self.output_dir]
+        base_image_build_args += [
+            '-H:ConfigurationFileDirectories=' + self.config_dir,
+            '-H:+PrintAnalysisStatistics',
+            '-H:+PrintCallEdges',
+            '-H:+CollectImageBuildStatistics',
+        ]
+        self.image_build_reports_directory = os.path.join(self.output_dir, 'reports')
+        if self.bundle_create_path is not None:
+            self.image_build_reports_directory = os.path.join(self.output_dir, self.bundle_create_path)
+        self.image_build_stats_file = os.path.join(self.image_build_reports_directory, 'image_build_statistics.json')
+
+        if vm.is_quickbuild:
+            base_image_build_args += ['-Ob']
+        if vm.use_string_inlining:
+            base_image_build_args += ['-H:+UseStringInlining']
+        if vm.is_llvm:
+            base_image_build_args += ['--features=org.graalvm.home.HomeFinderFeature'] + ['-H:CompilerBackend=llvm', '-H:DeadlockWatchdogInterval=0']
+        if vm.gc:
+            base_image_build_args += ['--gc=' + vm.gc] + ['-H:+SpawnIsolates']
+        if vm.native_architecture:
+            base_image_build_args += ['-march=native']
+        if vm.analysis_context_sensitivity:
+            base_image_build_args += ['-H:AnalysisContextSensitivity=' + vm.analysis_context_sensitivity, '-H:-RemoveSaturatedTypeFlows', '-H:+AliasArrayTypeFlows']
+        if vm.no_inlining_before_analysis:
+            base_image_build_args += ['-H:-InlineBeforeAnalysis']
+        if vm.optimization_level:
+            base_image_build_args += ['-' + vm.optimization_level]
+        if vm.async_sampler:
+            base_image_build_args += ['-R:+FlightRecorder',
+                                      '-R:StartFlightRecording=filename=default.jfr',
+                                      '--enable-monitoring=jfr',
+                                      '-R:+JfrBasedExecutionSamplerStatistics'
+                                      ]
+            removed_stages.update(["instrument-image", "instrument-run"])
+        if not vm.pgo_instrumentation:
+            removed_stages.update(["instrument-image", "instrument-run"])
+        if self.image_vm_args is not None:
+            base_image_build_args += self.image_vm_args
+        self.is_runnable = self.check_runnable()
+        base_image_build_args += self.extra_image_build_arguments
+
+        # Inform the StagesInfo object about removed stages
+        bm_suite.stages_info.setup(removed_stages)
+
+        # benchmarks are allowed to use experimental options
+        self.base_image_build_args = [os.path.join(vm.home(), 'bin', 'native-image')] + svm_experimental_options(base_image_build_args)
+
+    def check_runnable(self):
+        # TODO remove once there is load available for the specified benchmarks
+        if self.benchmark_suite_name in ["mushop", "quarkus"]:
+            return False
+        return True
+
+    def get_bundle_path_if_present(self):
+        bundle_apply_arg = "--bundle-apply="
+        for i in range(len(self.extra_image_build_arguments)):
+            if self.extra_image_build_arguments[i].startswith(bundle_apply_arg):
+                # The bundle output is produced next to the bundle file, which in the case of
+                # benchmarks is in the mx cache, so we make a local copy.
+                cached_bundle_path = self.extra_image_build_arguments[i][len(bundle_apply_arg):]
+                bundle_copy_path = os.path.join(self.output_dir, basename(cached_bundle_path))
+                mx_util.ensure_dirname_exists(bundle_copy_path)
+                mx.copyfile(cached_bundle_path, bundle_copy_path)
+                self.extra_image_build_arguments[i] = bundle_apply_arg + bundle_copy_path
+                return bundle_copy_path
+
+        return None
+
+    def get_bundle_create_path_if_present(self):
+        bundle_create_arg = "--bundle-create"
+        bundle_arg_idx = [idx for idx, arg in enumerate(self.extra_image_build_arguments) if arg.startswith(bundle_create_arg)]
+        if len(bundle_arg_idx) == 1:
+            bp = os.path.join(self.extra_image_build_arguments[bundle_arg_idx[0] + 1] + ".output", "default", "reports")
+            return bp
+
+        return None
+
+
+class NativeImageStages:
+    def __init__(self, stages_info: StagesInfo, config, bench_out, bench_err, is_gate, non_zero_is_fatal, cwd):
+        self.stages_info = stages_info
+        self.config: NativeImageBenchmarkConfig = config
+        self.bench_out = bench_out
+        self.bench_err = bench_err
+        self.final_image_name = config.final_image_name
+        self.is_gate = is_gate
+        self.non_zero_is_fatal = non_zero_is_fatal
+        self.cwd = cwd
+
+        self.exit_code = None
+        self.command = None
+        self.stderr_path = None
+        self.stdout_path = None
+
+    def reset_stage(self):
+        self.exit_code = None
+        self.command = None
+        self.stderr_path = None
+        self.stdout_path = None
+
+    def __enter__(self):
+        self.stdout_path = os.path.abspath(os.path.join(self.config.log_dir, self.final_image_name + '-' + self.stages_info.requested_stage + '-stdout.log'))
+        self.stderr_path = os.path.abspath(os.path.join(self.config.log_dir, self.final_image_name + '-' + self.stages_info.requested_stage + '-stderr.log'))
+        self.stdout_file = open(self.stdout_path, 'w')
+        self.stderr_file = open(self.stderr_path, 'w')
+
+        self.separator_line()
+        mx.log(self.get_timestamp() + 'Entering stage: ' + self.stages_info.requested_stage + ' for ' + self.final_image_name)
+        self.separator_line()
+
+        mx.log('Running: ')
+        mx.log(' '.join(self.command))
+
+        if self.stdout_path:
+            mx.log('The standard output is saved to ' + str(self.stdout_path))
+        if self.stderr_path:
+            mx.log('The standard error is saved to ' + str(self.stderr_path))
+
+        return self
+
+    def __exit__(self, tp, value, tb):
+        self.stdout_file.flush()
+        self.stderr_file.flush()
+
+        if self.exit_code == 0 and (tb is None):
+            self.stages_info.success()
+            if self.config.split_run:
+                with open(self.config.split_run, 'a') as stdout:
+                    stdout.write(self.get_timestamp() + self.config.bm_suite.name() + ':' + self.config.benchmark_name + ' ' + self.stages_info.requested_stage + ': PASS\n')
+            if self.stages_info.requested_stage == self.stages_info.last_stage:
+                self.bench_out(f"{self.get_timestamp()}{mx_sdk_benchmark.STAGE_LAST_SUCCESSFUL_PREFIX} {self.stages_info.requested_stage} for {self.final_image_name}")
+            else:
+                self.bench_out(f"{self.get_timestamp()}{mx_sdk_benchmark.STAGE_SUCCESSFUL_PREFIX} {self.stages_info.requested_stage}")
+
+            self.separator_line()
+        else:
+            self.stages_info.fail()
+            if self.config.split_run:
+                with open(self.config.split_run, 'a') as stdout:
+                    stdout.write(self.get_timestamp() + self.config.bm_suite.name() + ':' + self.config.benchmark_name + ' ' + self.stages_info.requested_stage + ': FAILURE\n')
+            if self.exit_code is not None and self.exit_code != 0:
+                mx.log(mx.colorize(self.get_timestamp() + 'Failed in stage ' + self.stages_info.requested_stage + ' for ' + self.final_image_name + ' with exit code ' + str(self.exit_code), 'red'))
+
+            if self.stdout_path:
+                mx.log(mx.colorize('--------- Standard output:', 'blue'))
+                with open(self.stdout_path, 'r') as stdout:
+                    mx.log(stdout.read())
+
+            if self.stderr_path:
+                mx.log(mx.colorize('--------- Standard error:', 'red'))
+                with open(self.stderr_path, 'r') as stderr:
+                    mx.log(stderr.read())
+
+            if tb:
+                mx.log(mx.colorize(self.get_timestamp() + 'Failed in stage ' + self.stages_info.requested_stage + ' with ', 'red'))
+                print_tb(tb)
+
+            self.separator_line()
+
+            mx.log(mx.colorize('--------- To run the failed benchmark execute the following: ', 'green'))
+            mx.log(mx.current_mx_command())
+
+            if self.stages_info.stages_till_now:
+                mx.log(mx.colorize('--------- To only prepare the benchmark add the following to the end of the previous command: ', 'green'))
+                mx.log('-Dnative-image.benchmark.stages=' + ','.join(self.stages_info.stages_till_now))
+
+            mx.log(mx.colorize('--------- To only run the failed stage add the following to the end of the previous command: ', 'green'))
+            mx.log('-Dnative-image.benchmark.stages=' + self.stages_info.requested_stage)
+
+            mx.log(mx.colorize('--------- Additional arguments that can be used for debugging the benchmark go after the final --: ', 'green'))
+            for param in self.config.params:
+                mx.log('-Dnative-image.benchmark.' + param + '=')
+
+            self.separator_line()
+            if self.non_zero_is_fatal:
+                mx.abort(self.get_timestamp() + 'Exiting the benchmark due to the failure.')
+
+        self.stdout_file.close()
+        self.stderr_file.close()
+        self.reset_stage()
+
+    def stdout(self, include_bench_out=False):
+        def writeFun(s):
+            v = self.stdout_file.write(s)
+            if include_bench_out:
+                self.bench_out(s)
+            else:
+                mx.log(s, end='')
+            return v
+        return writeFun
+
+    def stderr(self, include_bench_err=False):
+        def writeFun(s):
+            v = self.stdout_file.write(s)
+            if include_bench_err:
+                self.bench_err(s)
+            else:
+                mx.log(s, end='')
+            return v
+        return writeFun
+
+    @staticmethod
+    def separator_line():
+        mx.log(mx.colorize('-' * 120, 'green'))
+
+    @staticmethod
+    def get_timestamp():
+        return '[' + datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + '] '
+
+    def set_command(self, command):
+        self.command = command
+        return self
+
+    def execute_command(self, vm=None):
+        write_output = self.stages_info.effective_stage in ["run", "image"] or self.is_gate
+
+        cmd = self.command
+        self.exit_code = self.config.bm_suite.run_stage(vm, self.stages_info.effective_stage, cmd, self.stdout(write_output), self.stderr(write_output), self.cwd, False)
+        if "image" not in self.stages_info.effective_stage and self.config.bm_suite.validateReturnCode(self.exit_code):
+            self.exit_code = 0
+
+
 class NativeImageVM(GraalVm):
     """
-    This is a VM that should be used for running all Native Image benchmarks. This VM should support all the benchmarks
-    that a regular Java VM supports as it:
-       1) Runs a benchmark with the Native Image Agent.
-       2) Builds an image based on the configuration collected by the agent.
-       3) Runs the image of the benchmark with supported VM arguments and with run-time arguments.
-    """
+    A VM implementation to build and run Native Image benchmarks.
 
-    class BenchmarkConfig:
-        def __init__(self, vm, bm_suite, args):
-            self.bmSuite = bm_suite
-            self.benchmark_suite_name = bm_suite.benchSuiteName(args) if len(inspect.getfullargspec(bm_suite.benchSuiteName).args) > 1 else bm_suite.benchSuiteName()
-            self.benchmark_name = bm_suite.benchmarkName()
-            self.executable, self.classpath_arguments, self.modulepath_arguments, self.system_properties, self.image_vm_args, image_run_args, self.split_run = NativeImageVM.extract_benchmark_arguments(args)
-            self.extra_image_build_arguments = bm_suite.extra_image_build_argument(self.benchmark_name, args)
-            # use list() to create fresh copies to safeguard against accidental modification
-            self.image_run_args = bm_suite.extra_run_arg(self.benchmark_name, args, list(image_run_args))
-            self.extra_jvm_args = bm_suite.extra_jvm_arg(self.benchmark_name, args)
-            self.extra_agent_run_args = bm_suite.extra_agent_run_arg(self.benchmark_name, args, list(image_run_args))
-            self.extra_agentlib_options = bm_suite.extra_agentlib_options(self.benchmark_name, args, list(image_run_args))
-            for option in self.extra_agentlib_options:
-                if option.startswith('config-output-dir'):
-                    mx.abort("config-output-dir must not be set in the extra_agentlib_options.")
-            # Do not strip the run arguments if safepoint-sampler configuration is active.
-            self.extra_profile_run_args = bm_suite.extra_profile_run_arg(self.benchmark_name, args, list(image_run_args), not vm.safepoint_sampler)
-            self.extra_agent_profile_run_args = bm_suite.extra_agent_profile_run_arg(self.benchmark_name, args, list(image_run_args))
-            self.benchmark_output_dir = bm_suite.benchmark_output_dir(self.benchmark_name, args)
-            self.params = ['extra-image-build-argument', 'extra-jvm-arg', 'extra-run-arg', 'extra-agent-run-arg', 'extra-profile-run-arg',
-                           'extra-agent-profile-run-arg', 'benchmark-output-dir', 'stages', 'skip-agent-assertions']
-
-            self.profile_file_extension = '.iprof'
-            self.stages = bm_suite.stages(args)
-            if vm.jdk_profiles_collect:  # forbid image build/run in the profile collection execution mode
-                for stage in ('image', 'run'):
-                    if stage in self.stages:
-                        self.stages.remove(stage)
-            if vm.profile_inference_feature_extraction:  # do not run the image in the profile inference feature extraction mode
-                if 'run' in self.stages:
-                    self.stages.remove('run')
-            self.last_stage = self.stages[-1]
-            self.skip_agent_assertions = bm_suite.skip_agent_assertions(self.benchmark_name, args)
-            self.root_dir = self.benchmark_output_dir if self.benchmark_output_dir else mx.suite('vm').get_output_root(platformDependent=False, jdkDependent=False)
-            unique_suite_name = f"{self.bmSuite.benchSuiteName()}-{self.bmSuite.version().replace('.', '-')}" if self.bmSuite.version() != 'unknown' else self.bmSuite.benchSuiteName()
-            self.executable_name = (unique_suite_name + '-' + self.benchmark_name).lower() if self.benchmark_name else unique_suite_name.lower()
-            self.final_image_name = self.executable_name + '-' + vm.config_name()
-            self.output_dir = mx.join(os.path.abspath(self.root_dir), 'native-image-benchmarks', self.executable_name + '-' + vm.config_name())
-            self.profile_path_no_extension = os.path.join(self.output_dir, self.executable_name)
-            self.latest_profile_path = self.profile_path_no_extension + '-latest' + self.profile_file_extension
-            self.config_dir = os.path.join(self.output_dir, 'config')
-            self.log_dir = self.output_dir
-            base_image_build_args = ['--no-fallback', '-g']
-            base_image_build_args += ['-H:+VerifyGraalGraphs', '-H:+VerifyPhases', '--diagnostics-mode'] if vm.is_gate else []
-            base_image_build_args += ['-H:+ReportExceptionStackTraces']
-            base_image_build_args += bm_suite.build_assertions(self.benchmark_name, vm.is_gate)
-
-            base_image_build_args += self.system_properties
-            self.bundle_path = self.get_bundle_path_if_present()
-            self.bundle_create_path = self.get_bundle_create_path_if_present()
-            if not self.bundle_path:
-                base_image_build_args += self.classpath_arguments
-                base_image_build_args += self.modulepath_arguments
-                base_image_build_args += self.executable
-                base_image_build_args += ['-H:Path=' + self.output_dir]
-            base_image_build_args += [
-                '-H:ConfigurationFileDirectories=' + self.config_dir,
-                '-H:+PrintAnalysisStatistics',
-                '-H:+PrintCallEdges',
-                '-H:+CollectImageBuildStatistics',
-            ]
-            self.image_build_reports_directory = os.path.join(self.output_dir, 'reports')
-            if self.bundle_create_path is not None:
-                self.image_build_reports_directory = os.path.join(self.output_dir, self.bundle_create_path)
-            self.image_build_stats_file = os.path.join(self.image_build_reports_directory, 'image_build_statistics.json')
-
-            if vm.is_quickbuild:
-                base_image_build_args += ['-Ob']
-            if vm.use_string_inlining:
-                base_image_build_args += ['-H:+UseStringInlining']
-            if vm.is_llvm:
-                base_image_build_args += ['--features=org.graalvm.home.HomeFinderFeature'] + ['-H:CompilerBackend=llvm', '-H:DeadlockWatchdogInterval=0']
-            if vm.gc:
-                base_image_build_args += ['--gc=' + vm.gc] + ['-H:+SpawnIsolates']
-            if vm.native_architecture:
-                base_image_build_args += ['-march=native']
-            if vm.analysis_context_sensitivity:
-                base_image_build_args += ['-H:AnalysisContextSensitivity=' + vm.analysis_context_sensitivity, '-H:-RemoveSaturatedTypeFlows', '-H:+AliasArrayTypeFlows']
-            if vm.no_inlining_before_analysis:
-                base_image_build_args += ['-H:-InlineBeforeAnalysis']
-            if vm.optimization_level:
-                base_image_build_args += ['-' + vm.optimization_level]
-            if vm.async_sampler:
-                base_image_build_args += ['-R:+FlightRecorder',
-                                          '-R:StartFlightRecording=filename=default.jfr',
-                                          '--enable-monitoring=jfr',
-                                          '-R:+JfrBasedExecutionSamplerStatistics'
-                                          ]
-                for stage in ('instrument-image', 'instrument-run'):
-                    if stage in self.stages:
-                        self.stages.remove(stage)
-            if self.image_vm_args is not None:
-                base_image_build_args += self.image_vm_args
-            self.is_runnable = self.check_runnable()
-            base_image_build_args += self.extra_image_build_arguments
-            # benchmarks are allowed to use experimental options
-            self.base_image_build_args = [os.path.join(vm.home(), 'bin', 'native-image')] + svm_experimental_options(base_image_build_args)
-
-        def check_runnable(self):
-            # TODO remove once there is load available for the specified benchmarks
-            if self.benchmark_suite_name in ["mushop", "quarkus"]:
-                return False
-            return True
-
-        def get_bundle_path_if_present(self):
-            bundle_apply_arg = "--bundle-apply="
-            for i in range(len(self.extra_image_build_arguments)):
-                if self.extra_image_build_arguments[i].startswith(bundle_apply_arg):
-                    # The bundle output is produced next to the bundle file, which in the case of
-                    # benchmarks is in the mx cache, so we make a local copy.
-                    cached_bundle_path = self.extra_image_build_arguments[i][len(bundle_apply_arg):]
-                    bundle_copy_path = join(self.output_dir, basename(cached_bundle_path))
-                    mx.ensure_dirname_exists(bundle_copy_path)
-                    mx.copyfile(cached_bundle_path, bundle_copy_path)
-                    self.extra_image_build_arguments[i] = bundle_apply_arg + bundle_copy_path
-                    return bundle_copy_path
-
-            return None
-
-        def get_bundle_create_path_if_present(self):
-            bundle_create_arg = "--bundle-create"
-            bundle_arg_idx = [idx for idx, arg in enumerate(self.extra_image_build_arguments) if arg.startswith(bundle_create_arg)]
-            if len(bundle_arg_idx) == 1:
-                bp = join(self.extra_image_build_arguments[bundle_arg_idx[0] + 1] + ".output", "default", "reports")
-                return bp
-
-            return None
-
-    def __init__(self, name, config_name, extra_java_args=None, extra_launcher_args=None, **kwargs):
-        super(NativeImageVM, self).__init__(name, config_name, extra_java_args, extra_launcher_args)
-        if len(kwargs) > 0:
-            mx.log_deprecation("Ignoring NativeImageVM custom configuration! Use named configuration instead.")
-            mx.warn(f"Ignoring: {kwargs}")
+    Runs individual stages of the benchmarking process (or all stages in sequence in the fallback mode).
+    See also :class:`NativeImageBenchmarkMixin` for more information on the Native Image benchmarking process.
+    """
 
+    def __init__(self, name, config_name, extra_java_args=None, extra_launcher_args=None):
+        super().__init__(name, config_name, extra_java_args, extra_launcher_args)
         self.vm_args = None
         self.pgo_instrumentation = False
         self.pgo_context_sensitive = True
@@ -274,8 +423,9 @@ def __init__(self, name, config_name, extra_java_args=None, extra_launcher_args=
         self.native_architecture = False
         self.use_upx = False
         self.graalvm_edition = None
-        self.config = None
-        self.stages = None
+        self.config: Optional[NativeImageBenchmarkConfig] = None
+        self.stages_info: Optional[StagesInfo] = None
+        self.stages: Optional[NativeImageStages] = None
         self.jdk_profiles_collect = False
         self.adopted_jdk_pgo = False
         self.async_sampler = False
@@ -364,7 +514,7 @@ def _configure_from_name(self, config_name):
                 def generate_profiling_package_prefixes():
                     # run the native-image-configure tool to gather the jdk package prefixes
                     graalvm_home_bin = os.path.join(mx_sdk_vm.graalvm_home(), 'bin')
-                    native_image_configure_command = mx.cmd_suffix(join(graalvm_home_bin, 'native-image-configure'))
+                    native_image_configure_command = mx.cmd_suffix(os.path.join(graalvm_home_bin, 'native-image-configure'))
                     if not exists(native_image_configure_command):
                         mx.abort('Failed to find the native-image-configure command at {}. \nContent {}: \n\t{}'.format(native_image_configure_command, graalvm_home_bin, '\n\t'.join(os.listdir(graalvm_home_bin))))
                     tmp = tempfile.NamedTemporaryFile()
@@ -518,163 +668,6 @@ def extract_benchmark_arguments(args):
 
         return executable, classpath_arguments, modulepath_arguments, system_properties, image_vm_args, image_run_args, split_run
 
-    class Stages:
-        def __init__(self, config, bench_out, bench_err, is_gate, non_zero_is_fatal, cwd):
-            self.stages_till_now = []
-            self.successfully_finished_stages = []
-            self.config = config
-            self.bench_out = bench_out
-            self.bench_err = bench_err
-            self.final_image_name = config.final_image_name
-            self.is_gate = is_gate
-            self.non_zero_is_fatal = non_zero_is_fatal
-            self.cwd = cwd
-            self.failed = False
-
-            self.current_stage = ''
-            self.exit_code = None
-            self.command = None
-            self.stderr_path = None
-            self.stdout_path = None
-
-        def reset_stage(self):
-            self.current_stage = ''
-            self.exit_code = None
-            self.command = None
-            self.stderr_path = None
-            self.stdout_path = None
-
-        def __enter__(self):
-            self.stdout_path = os.path.abspath(os.path.join(self.config.log_dir, self.final_image_name + '-' + self.current_stage + '-stdout.log'))
-            self.stderr_path = os.path.abspath(os.path.join(self.config.log_dir, self.final_image_name + '-' + self.current_stage + '-stderr.log'))
-            self.stdout_file = open(self.stdout_path, 'w')
-            self.stderr_file = open(self.stderr_path, 'w')
-
-            self.separator_line()
-            mx.log(self.get_timestamp() + 'Entering stage: ' + self.current_stage + ' for ' + self.final_image_name)
-            self.separator_line()
-
-            mx.log('Running: ')
-            mx.log(' '.join(self.command))
-
-            if self.stdout_path:
-                mx.log('The standard output is saved to ' + str(self.stdout_path))
-            if self.stderr_path:
-                mx.log('The standard error is saved to ' + str(self.stderr_path))
-
-            return self
-
-        def __exit__(self, tp, value, tb):
-            self.stdout_file.flush()
-            self.stderr_file.flush()
-
-            if self.exit_code == 0 and (tb is None):
-                if self.config.split_run:
-                    with open(self.config.split_run, 'a') as stdout:
-                        stdout.write(self.get_timestamp() + self.config.bmSuite.name() + ':' + self.config.benchmark_name + ' ' + self.current_stage + ': PASS\n')
-                self.successfully_finished_stages.append(self.current_stage)
-                if self.current_stage.startswith(self.config.last_stage):
-                    self.bench_out(self.get_timestamp() + 'Successfully finished the last specified stage:' + ' ' + self.current_stage + ' for ' + self.final_image_name)
-                else:
-                    mx.log(self.get_timestamp() + 'Successfully finished stage:' + ' ' + self.current_stage)
-
-                self.separator_line()
-            else:
-                if self.config.split_run:
-                    with open(self.config.split_run, 'a') as stdout:
-                        stdout.write(self.get_timestamp() + self.config.bmSuite.name() + ':' + self.config.benchmark_name + ' ' + self.current_stage + ': FAILURE\n')
-                self.failed = True
-                if self.exit_code is not None and self.exit_code != 0:
-                    mx.log(mx.colorize(self.get_timestamp() + 'Failed in stage ' + self.current_stage + ' for ' + self.final_image_name + ' with exit code ' + str(self.exit_code), 'red'))
-
-                if self.stdout_path:
-                    mx.log(mx.colorize('--------- Standard output:', 'blue'))
-                    with open(self.stdout_path, 'r') as stdout:
-                        mx.log(stdout.read())
-
-                if self.stderr_path:
-                    mx.log(mx.colorize('--------- Standard error:', 'red'))
-                    with open(self.stderr_path, 'r') as stderr:
-                        mx.log(stderr.read())
-
-                if tb:
-                    mx.log(mx.colorize(self.get_timestamp() + 'Failed in stage ' + self.current_stage + ' with ', 'red'))
-                    print_tb(tb)
-
-                self.separator_line()
-
-                if len(self.stages_till_now) > 0:
-                    mx.log(mx.colorize('--------- To run the failed benchmark execute the following: ', 'green'))
-                    mx.log(mx.current_mx_command())
-
-                    if len(self.stages_till_now[:-1]) > 0:
-                        mx.log(mx.colorize('--------- To only prepare the benchmark add the following to the end of the previous command: ', 'green'))
-                        mx.log('-Dnative-image.benchmark.stages=' + ','.join(self.stages_till_now[:-1]))
-
-                    mx.log(mx.colorize('--------- To only run the failed stage add the following to the end of the previous command: ', 'green'))
-                    mx.log('-Dnative-image.benchmark.stages=' + self.current_stage)
-
-                    mx.log(mx.colorize('--------- Additional arguments that can be used for debugging the benchmark go after the final --: ', 'green'))
-                    for param in self.config.params:
-                        mx.log('-Dnative-image.benchmark.' + param + '=')
-
-                self.separator_line()
-                if self.non_zero_is_fatal:
-                    mx.abort(self.get_timestamp() + 'Exiting the benchmark due to the failure.')
-
-            self.stdout_file.close()
-            self.stderr_file.close()
-            self.reset_stage()
-
-        def stdout(self, include_bench_out=False):
-            def writeFun(s):
-                v = self.stdout_file.write(s)
-                if include_bench_out:
-                    self.bench_out(s)
-                else:
-                    mx.log(s, end='')
-                return v
-            return writeFun
-
-        def stderr(self, include_bench_err=False):
-            def writeFun(s):
-                v = self.stdout_file.write(s)
-                if include_bench_err:
-                    self.bench_err(s)
-                else:
-                    mx.log(s, end='')
-                return v
-            return writeFun
-
-        def change_stage(self, *argv):
-            if self.failed:
-                return False
-
-            stage_name = '-'.join(argv)
-            self.stages_till_now.append(stage_name)
-            self.current_stage = stage_name
-            stage_applies = argv[0] in self.config.stages or stage_name in self.config.stages
-            return stage_applies
-
-        @staticmethod
-        def separator_line():
-            mx.log(mx.colorize('-' * 120, 'green'))
-
-        @staticmethod
-        def get_timestamp():
-            return '[' + datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + '] '
-
-        def set_command(self, command):
-            self.command = command
-            return self
-
-        def execute_command(self, vm=None):
-            write_output = self.current_stage == 'run' or self.current_stage == 'image' or self.is_gate
-            cmd = self.command
-            self.exit_code = self.config.bmSuite.run_stage(vm, self.current_stage, cmd, self.stdout(write_output), self.stderr(write_output), self.cwd, False)
-            if "image" not in self.current_stage and self.config.bmSuite.validateReturnCode(self.exit_code):
-                self.exit_code = 0
-
     def image_build_rules(self, output, benchmarks, bmSuiteArgs):
         return self.image_build_general_rules(output, benchmarks, bmSuiteArgs) + self.image_build_analysis_rules(output, benchmarks, bmSuiteArgs) \
                + self.image_build_statistics_rules(output, benchmarks, bmSuiteArgs) + self.image_build_timers_rules(output, benchmarks, bmSuiteArgs)
@@ -872,10 +865,10 @@ def __call__(self, *args, **kwargs):
         return rules
 
     def rules(self, output, benchmarks, bmSuiteArgs):
-        rules = super(NativeImageVM, self).rules(output, benchmarks, bmSuiteArgs)
+        rules = super().rules(output, benchmarks, bmSuiteArgs)
 
-        image_build_finished = 'image' in self.stages.successfully_finished_stages or 'instrument-image' in self.stages.successfully_finished_stages
-        if image_build_finished:
+        if self.stages_info.fallback_mode or self.stages_info.effective_stage == "image":
+            # Only apply image build rules for the image build stages
             rules += self.image_build_rules(output, benchmarks, bmSuiteArgs)
 
         return rules
@@ -888,14 +881,14 @@ def copy_bundle_output(config):
         """
         bundle_dir = os.path.dirname(config.bundle_path)
         bundle_name = os.path.basename(config.bundle_path)
-        bundle_output = join(bundle_dir, bundle_name[:-len(".nib")] + ".output", "default")
+        bundle_output = os.path.join(bundle_dir, bundle_name[:-len(".nib")] + ".output", "default")
         shutil.copytree(bundle_output, config.output_dir, dirs_exist_ok=True)
         mx.rmtree(bundle_output)
 
-    def run_stage_agent(self, config, stages):
-        hotspot_vm_args = ['-ea', '-esa'] if self.is_gate and not config.skip_agent_assertions else []
-        hotspot_vm_args += config.extra_jvm_args
-        agentlib_options = ['native-image-agent=config-output-dir=' + str(config.config_dir)] + config.extra_agentlib_options
+    def run_stage_agent(self):
+        hotspot_vm_args = ['-ea', '-esa'] if self.is_gate and not self.config.skip_agent_assertions else []
+        hotspot_vm_args += self.config.extra_jvm_args
+        agentlib_options = ['native-image-agent=config-output-dir=' + str(self.config.config_dir)] + self.config.extra_agentlib_options
         hotspot_vm_args += ['-agentlib:' + ','.join(agentlib_options)]
 
         # Native Image has the following option enabled by default. In order to create lambda classes in the same way
@@ -909,32 +902,32 @@ def run_stage_agent(self, config, stages):
         if mx.cpu_count() > 8:
             hotspot_vm_args += ['-XX:ActiveProcessorCount=8']
 
-        if config.image_vm_args is not None:
-            hotspot_vm_args += config.image_vm_args
+        if self.config.image_vm_args is not None:
+            hotspot_vm_args += self.config.image_vm_args
 
-        hotspot_args = hotspot_vm_args + config.classpath_arguments + config.modulepath_arguments + config.system_properties + config.executable + config.extra_agent_run_args
-        with stages.set_command(self.generate_java_command(hotspot_args)) as s:
+        hotspot_args = hotspot_vm_args + self.config.classpath_arguments + self.config.modulepath_arguments + self.config.system_properties + self.config.executable + self.config.extra_agent_run_args
+        with self.stages.set_command(self.generate_java_command(hotspot_args)) as s:
             s.execute_command()
 
-        path = os.path.join(config.config_dir, "config.zip")
+        path = os.path.join(self.config.config_dir, "config.zip")
         with zipfile.ZipFile(path, 'w', zipfile.ZIP_DEFLATED) as zipf:
-            for root, _, files in os.walk(config.config_dir):
+            for root, _, files in os.walk(self.config.config_dir):
                 for file in files:
                     if file.endswith(".json"):
                         zipf.write(os.path.join(root, file), os.path.relpath(os.path.join(root, file), os.path.join(path, '..')))
 
-    def run_stage_instrument_image(self, config, stages, out, instrumentation_image_name, image_path, profile_path):
-        executable_name_args = ['-o', instrumentation_image_name]
-        instrument_args = ['--pgo-instrument', '-R:ProfilesDumpFile=' + profile_path]
+    def run_stage_instrument_image(self, out):
+        executable_name_args = ['-o', self.config.instrumentation_executable_name]
+        instrument_args = ['--pgo-instrument', '-R:ProfilesDumpFile=' + self.config.profile_path]
         if self.jdk_profiles_collect:
             instrument_args += svm_experimental_options(['-H:+AOTPriorityInline', '-H:-SamplingCollect', f'-H:ProfilingPackagePrefixes={self.generate_profiling_package_prefixes()}'])
 
-        with stages.set_command(config.base_image_build_args + executable_name_args + instrument_args) as s:
+        with self.stages.set_command(self.config.base_image_build_args + executable_name_args + instrument_args) as s:
             s.execute_command()
-            if config.bundle_path is not None:
-                NativeImageVM.copy_bundle_output(config)
+            if self.config.bundle_path is not None:
+                NativeImageVM.copy_bundle_output(self.config)
             if s.exit_code == 0:
-                image_size = os.stat(image_path).st_size
+                image_size = os.stat(os.path.join(self.config.output_dir, self.config.instrumentation_executable_name)).st_size
                 out('Instrumented image size: ' + str(image_size) + ' B')
 
     def _ensureSamplesAreInProfile(self, profile_path):
@@ -953,30 +946,29 @@ def _ensureSamplesAreInProfile(self, profile_path):
                     assert len(sample["records"]) == 1, "Sampling profiles seem to be missing records in file " + profile_path
                     assert sample["records"][0] > 0, "Sampling profiles seem to have a 0 in records in file " + profile_path
 
-    def run_stage_instrument_run(self, config, stages, image_path, profile_path):
-        image_run_cmd = [image_path]
-        image_run_cmd += config.extra_jvm_args
-        image_run_cmd += config.extra_profile_run_args
-        with stages.set_command(image_run_cmd) as s:
+    def run_stage_instrument_run(self):
+        image_run_cmd = [os.path.join(self.config.output_dir, self.config.instrumentation_executable_name)]
+        image_run_cmd += self.config.extra_jvm_args
+        image_run_cmd += self.config.extra_profile_run_args
+        with self.stages.set_command(image_run_cmd) as s:
             s.execute_command()
             if s.exit_code == 0:
-                mx.copyfile(profile_path, config.latest_profile_path)
-                print(f"Profile file {config.latest_profile_path} sha1 is {mx.sha1OfFile(config.latest_profile_path)}")
-                self._ensureSamplesAreInProfile(config.latest_profile_path)
+                print(f"Profile file {self.config.profile_path} sha1 is {mx.sha1OfFile(self.config.profile_path)}")
+                self._ensureSamplesAreInProfile(self.config.profile_path)
             else:
-                print(f"Profile file {config.latest_profile_path} not dumped. Instrument run failed with exit code {s.exit_code}")
+                print(f"Profile file {self.config.profile_path} not dumped. Instrument run failed with exit code {s.exit_code}")
 
-    def _print_binary_size(self, config, out):
+    def _print_binary_size(self, out):
         # The image size for benchmarks is tracked by printing on stdout and matching the rule.
-        image_path = os.path.join(config.output_dir, config.final_image_name)
-        if config.bundle_create_path is not None:
-            image_path = os.path.join(config.output_dir, config.bundle_create_path[:-len("reports")], config.bundle_create_path.split(".")[0])
+        image_path = os.path.join(self.config.output_dir, self.config.final_image_name)
+        if self.config.bundle_create_path is not None:
+            image_path = os.path.join(self.config.output_dir, self.config.bundle_create_path[:-len("reports")], self.config.bundle_create_path.split(".")[0])
         image_size = os.stat(image_path).st_size
-        out(f'The executed image size for benchmark {config.benchmark_suite_name}:{config.benchmark_name} is {image_size} B')
+        out(f'The executed image size for benchmark {self.config.benchmark_suite_name}:{self.config.benchmark_name} is {image_size} B')
 
-    def run_stage_image(self, config, stages, out):
-        executable_name_args = ['-o', config.final_image_name]
-        pgo_args = ['--pgo=' + config.latest_profile_path]
+    def run_stage_image(self, out):
+        executable_name_args = ['-o', self.config.final_image_name]
+        pgo_args = ['--pgo=' + self.config.profile_path]
         pgo_args += svm_experimental_options(['-H:' + ('+' if self.pgo_context_sensitive else '-') + 'PGOContextSensitivityEnabled'])
         if self.adopted_jdk_pgo:
             # choose appropriate profiles
@@ -988,102 +980,106 @@ def run_stage_image(self, config, stages, out):
                 adopted_profile = os.path.join(adopted_profiles_dir, 'jdk_profile.iprof')
             else:
                 mx.warn(f'SubstrateVM Enterprise with JDK{jdk_version} does not contain JDK profiles.')
-                adopted_profile = join(mx.suite('substratevm-enterprise').mxDir, 'empty.iprof')
+                adopted_profile = os.path.join(mx.suite('substratevm-enterprise').mxDir, 'empty.iprof')
             jdk_profiles_args = svm_experimental_options([f'-H:AdoptedPGOEnabled={adopted_profile}'])
         else:
             jdk_profiles_args = []
         if self.profile_inference_feature_extraction:
             ml_args = svm_experimental_options(['-H:+MLGraphFeaturesExtraction', '-H:+ProfileInferenceDumpFeatures'])
             dump_file_flag = 'ProfileInferenceDumpFile'
-            if dump_file_flag not in ''.join(config.base_image_build_args):
+            if dump_file_flag not in ''.join(self.config.base_image_build_args):
                 mx.warn("To dump the profile inference features to a specific location, please set the '{}' flag.".format(dump_file_flag))
         else:
             ml_args = []
-        final_image_command = config.base_image_build_args + executable_name_args + (pgo_args if self.pgo_instrumentation else []) + jdk_profiles_args + ml_args
-        with stages.set_command(final_image_command) as s:
+        final_image_command = self.config.base_image_build_args + executable_name_args + (pgo_args if self.pgo_instrumentation else []) + jdk_profiles_args + ml_args
+        with self.stages.set_command(final_image_command) as s:
             s.execute_command()
-            if config.bundle_path is not None:
-                NativeImageVM.copy_bundle_output(config)
-            self._print_binary_size(config, out)
-            if self.use_upx:
-                image_path = os.path.join(config.output_dir, config.final_image_name)
-                upx_directory = mx.library("UPX", True).get_path(True)
-                upx_path = os.path.join(upx_directory, mx.exe_suffix("upx"))
-                upx_cmd = [upx_path, image_path]
-                mx.log(f"Compressing image: {' '.join(upx_cmd)}")
-                mx.run(upx_cmd, s.stdout(True), s.stderr(True))
-
-    def run_stage_run(self, config, stages, out):
-        if not config.is_runnable:
-            mx.abort(f"Benchmark {config.benchmark_suite_name}:{config.benchmark_name} is not runnable.")
-        image_path = os.path.join(config.output_dir, config.final_image_name)
-        with stages.set_command([image_path] + config.extra_jvm_args + config.image_run_args) as s:
-            s.execute_command(vm=self)
+            if self.config.bundle_path is not None:
+                NativeImageVM.copy_bundle_output(self.config)
+
             if s.exit_code == 0:
-                self._print_binary_size(config, out)
+                image_path = os.path.join(self.config.output_dir, self.config.final_image_name)
+                if self.use_upx:
+                    upx_directory = mx.library("UPX", True).get_path(True)
+                    upx_path = os.path.join(upx_directory, mx.exe_suffix("upx"))
+                    upx_cmd = [upx_path, image_path]
+                    mx.log(f"Compressing image: {' '.join(upx_cmd)}")
+                    mx.run(upx_cmd, s.stdout(True), s.stderr(True))
+
+                self._print_binary_size(out)
                 image_sections_command = "objdump -h " + image_path
                 out(subprocess.check_output(image_sections_command, shell=True, universal_newlines=True))
                 for config_type in ['jni', 'proxy', 'predefined-classes', 'reflect', 'resource', 'serialization']:
-                    config_path = os.path.join(config.config_dir, config_type + '-config.json')
+                    config_path = os.path.join(self.config.config_dir, config_type + '-config.json')
                     if os.path.exists(config_path):
                         config_size = os.stat(config_path).st_size
-                        out('The ' + config_type + ' configuration size for benchmark ' + config.benchmark_suite_name + ':' + config.benchmark_name + ' is ' + str(config_size) + ' B')
+                        out('The ' + config_type + ' configuration size for benchmark ' + self.config.benchmark_suite_name + ':' + self.config.benchmark_name + ' is ' + str(config_size) + ' B')
 
-    def run_java(self, args, out=None, err=None, cwd=None, nonZeroIsFatal=False):
+    def run_stage_run(self, out):
+        if not self.config.is_runnable:
+            mx.abort(f"Benchmark {self.config.benchmark_suite_name}:{self.config.benchmark_name} is not runnable.")
+        image_path = os.path.join(self.config.output_dir, self.config.final_image_name)
+        with self.stages.set_command([image_path] + self.config.extra_jvm_args + self.config.image_run_args) as s:
+            s.execute_command(vm=self)
 
+    def run_java(self, args, out=None, err=None, cwd=None, nonZeroIsFatal=False):
+        # This is also called with -version to gather information about the Java VM. Since this is not technically a
+        # Java VM, we delegate to the superclass
         if '-version' in args:
             return super(NativeImageVM, self).run_java(args, out=out, err=err, cwd=cwd, nonZeroIsFatal=nonZeroIsFatal)
 
-        if self.bmSuite is None:
-            mx.abort("Benchmark suite was not registered.")
-
-        if not callable(getattr(self.bmSuite, "run_stage", None)):
-            mx.abort("Benchmark suite is not a NativeImageMixin.")
-
-        # never fatal, we handle it ourselves
-        config = NativeImageVM.BenchmarkConfig(self, self.bmSuite, args)
-        self.config = config
-        stages = NativeImageVM.Stages(config, out, err, self.is_gate, True if self.is_gate else nonZeroIsFatal, os.path.abspath(cwd if cwd else os.getcwd()))
-        self.stages = stages
-
-        if not os.path.exists(config.output_dir):
-            os.makedirs(config.output_dir)
+        assert self.bmSuite, "Benchmark suite was not registered."
+        assert callable(getattr(self.bmSuite, "run_stage", None)), "Benchmark suite is not a NativeImageMixin."
 
-        if not os.path.exists(config.config_dir):
-            os.makedirs(config.config_dir)
+        if not self.bmSuite.stages_info:
+            def fullname(cls):
+                return cls.__module__ + '.' + cls.__qualname__
 
-        if stages.change_stage('agent'):
-            self.run_stage_agent(config, stages)
+            mx.abort(
+                f"Invalid Native Image benchmark setup for {fullname(self.bmSuite.__class__)}.\n"
+                f"Please see {fullname(NativeImageBenchmarkMixin)} for more information.",
+            )
 
-        # Native Image profile collection
-        profile_path = config.profile_path_no_extension + config.profile_file_extension
-        instrumentation_image_name = config.executable_name + '-instrument'
+        self.stages_info: StagesInfo = self.bmSuite.stages_info
+        assert not self.stages_info.failed, "In case of a failed benchmark, no further calls into the VM should be made"
 
-        if self.pgo_instrumentation:
-            image_path = os.path.join(config.output_dir, instrumentation_image_name)
-            if stages.change_stage('instrument-image'):
-                self.run_stage_instrument_image(config, stages, out, instrumentation_image_name, image_path, profile_path)
-
-            if stages.change_stage('instrument-run'):
-                self.run_stage_instrument_run(config, stages, image_path, profile_path)
-
-        # Build the final image
-        if stages.change_stage('image'):
-            self.run_stage_image(config, stages, out)
-
-        # Execute the benchmark
-        if stages.change_stage('run'):
-            self.run_stage_run(config, stages, out)
+        # never fatal, we handle it ourselves
+        self.config = NativeImageBenchmarkConfig(self, self.bmSuite, args)
+        self.stages = NativeImageStages(self.stages_info, self.config, out, err, self.is_gate, True if self.is_gate else nonZeroIsFatal, os.path.abspath(cwd if cwd else os.getcwd()))
+
+        os.makedirs(self.config.output_dir, exist_ok=True)
+        os.makedirs(self.config.config_dir, exist_ok=True)
+
+        if self.stages_info.fallback_mode:
+            # In fallback mode, we have to run all requested stages in the same `run_java` invocation.
+            # We simply emulate the dispatching of the individual stages as in `NativeImageBenchmarkMixin.intercept_run`
+            for stage in self.stages_info.effective_stages:
+                self.stages_info.change_stage(stage)
+                self.run_single_stage(out)
+        else:
+            self.run_single_stage(out)
 
-        if stages.failed:
+        if self.stages_info.failed:
             mx.abort('Exiting the benchmark due to the failure.')
 
-    def create_log_files(self, config, executable_name, stage):
-        stdout_path = os.path.abspath(
-            os.path.join(config.log_dir, executable_name + '-' + stage.current_stage + '-stdout.log'))
-        stderr_path = os.path.abspath(
-            os.path.join(config.log_dir, executable_name + '-' + stage.current_stage + '-stderr.log'))
-        return stderr_path, stdout_path
+    def run_single_stage(self, out):
+        if self.stages_info.skip_current_stage:
+            self.stages.bench_out(f"{mx_sdk_benchmark.STAGE_SKIPPED_PREFIX} {self.stages_info.requested_stage}")
+            return
+
+        stage_to_run = self.stages_info.effective_stage
+        if stage_to_run == "agent":
+            self.run_stage_agent()
+        elif stage_to_run == "instrument-image":
+            self.run_stage_instrument_image(out)
+        elif stage_to_run == "instrument-run":
+            self.run_stage_instrument_run()
+        elif stage_to_run == "image":
+            self.run_stage_image(out)
+        elif stage_to_run == "run":
+            self.run_stage_run(out)
+        else:
+            raise ValueError(f"Unknown stage {stage_to_run}")
 
 
 class AnalysisReportJsonFileRule(mx_benchmark.JsonBaseRule):
@@ -1103,8 +1099,8 @@ def getJsonFiles(self, text):
             json_file_name = os.path.basename(json_file_path)
             base_search_dir = self.report_directory
             if self.is_diagnostics_mode:
-                base_search_dir = join(base_search_dir, os.path.basename(os.path.dirname(json_file_path)))
-            expected_json_file_path = join(base_search_dir, json_file_name)
+                base_search_dir = os.path.join(base_search_dir, os.path.basename(os.path.dirname(json_file_path)))
+            expected_json_file_path = os.path.join(base_search_dir, json_file_name)
             if exists(expected_json_file_path):
                 found_json_files.append(expected_json_file_path)
             else:
@@ -1172,7 +1168,7 @@ def createCommandLineArgs(self, benchmarks, bmSuiteArgs):
         return self.vmArgs(bmSuiteArgs) + super(AgentScriptJsBenchmarkSuite, self).createCommandLineArgs(benchmarks, bmSuiteArgs)
 
     def workingDirectory(self, benchmarks, bmSuiteArgs):
-        return join(_suite.dir, 'benchmarks', 'agentscript')
+        return os.path.join(_suite.dir, 'benchmarks', 'agentscript')
 
     def createVmCommandLineArgs(self, benchmarks, runArgs):
         if not benchmarks:
@@ -1184,7 +1180,7 @@ def createVmCommandLineArgs(self, benchmarks, runArgs):
     def get_vm_registry(self):
         return mx_benchmark.js_vm_registry
 
-    def run(self, benchmarks, bmSuiteArgs):
+    def run(self, benchmarks, bmSuiteArgs) -> DataPoints:
         results = super(AgentScriptJsBenchmarkSuite, self).run(benchmarks, bmSuiteArgs)
         self.addAverageAcrossLatestResults(results)
         return results
@@ -1197,7 +1193,7 @@ def __init__(self, *args, **kwargs):
         self.startPattern = re.compile(kwargs.pop('startPattern'))
         super(ExcludeWarmupRule, self).__init__(*args, **kwargs)
 
-    def parse(self, text):
+    def parse(self, text) -> Iterable[DataPoint]:
         m = self.startPattern.search(text)
         if m:
             return super(ExcludeWarmupRule, self).parse(text[m.end()+1:])
@@ -1423,7 +1419,7 @@ def runAndReturnStdOut(self, benchmarks, bmSuiteArgs):
         output_root = mx_sdk_vm_impl.get_final_graalvm_distribution().get_output_root()
 
         def get_size_message(image_name, image_location):
-            return FileSizeBenchmarkSuite.SZ_MSG_PATTERN.format(image_name, getsize(join(output_root, image_location)), image_location, output_root)
+            return FileSizeBenchmarkSuite.SZ_MSG_PATTERN.format(image_name, getsize(os.path.join(output_root, image_location)), image_location, output_root)
 
         for location in mx_sdk_vm_impl.get_all_native_image_locations(include_libraries=True, include_launchers=False, abs_path=False):
             lib_name = 'lib:' + mx_sdk_vm_impl.remove_lib_prefix_suffix(basename(location))
diff --git a/wasm/mx.wasm/mx_wasm_benchmark.py b/wasm/mx.wasm/mx_wasm_benchmark.py
index 859d725ad22d..ac9032d422e1 100644
--- a/wasm/mx.wasm/mx_wasm_benchmark.py
+++ b/wasm/mx.wasm/mx_wasm_benchmark.py
@@ -241,7 +241,7 @@ def getBenchmarkName(self, bmSuiteArgs):
 
     def rules(self, out, benchmarks, bmSuiteArgs):
         return [
-            WasmJMHJsonRule(mx_benchmark.JMHBenchmarkSuiteBase.jmh_result_file, self.benchSuiteName(bmSuiteArgs)),
+            WasmJMHJsonRule(self.get_jmh_result_file(bmSuiteArgs), self.benchSuiteName(bmSuiteArgs)),
             mx_benchmark.StdOutRule(
                 r"Iteration (?P<iteration>[0-9]+), result = -?[0-9]+, sec = ([0-9]+\.[0-9]+), ops / sec = (?P<value>([0-9]+\.[0-9]+))", # pylint: disable=line-too-long
                 {