Skip to content

Commit

Permalink
[GR-50022] Refactor Native Image benchmarking to better control indiv…
Browse files Browse the repository at this point in the history
…idual stages

PullRequest: graal/16688
  • Loading branch information
patrick96 committed Feb 15, 2024
2 parents 8cf9604 + c159722 commit c2b403a
Show file tree
Hide file tree
Showing 6 changed files with 832 additions and 454 deletions.
61 changes: 60 additions & 1 deletion compiler/mx.compiler/mx_graal_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,15 @@
import re
import os
from tempfile import mkstemp
from typing import List, Optional

import mx
import mx_benchmark
import mx_sdk_benchmark
import mx_compiler
from mx_java_benchmarks import DaCapoBenchmarkSuite, ScalaDaCapoBenchmarkSuite
from mx_benchmark import DataPoints
from mx_sdk_benchmark import SUCCESSFUL_STAGE_PATTERNS

_suite = mx.suite('compiler')

Expand Down Expand Up @@ -406,7 +409,45 @@ def benchSuiteName(self, bmSuiteArgs=None):
mx_benchmark.add_bm_suite(ScalaDaCapoTimingBenchmarkSuite())


class JMHNativeImageBenchmarkMixin(mx_sdk_benchmark.NativeImageBenchmarkMixin):
class JMHNativeImageBenchmarkMixin(mx_benchmark.JMHBenchmarkSuiteBase, mx_sdk_benchmark.NativeImageBenchmarkMixin):

def get_jmh_result_file(self, bm_suite_args: List[str]) -> Optional[str]:
"""
Only generate a JMH result file in the run stage. Otherwise the file-based rule (see
:class:`mx_benchmark.JMHJsonRule`) will produce datapoints at every stage, based on results from a previous
stage.
"""
if self.is_native_mode(bm_suite_args) and not self.stages_info.fallback_mode:
# At this point, the StagesInfo class may not have all the information yet, in that case we rely on the
# requested stage. But if this function is called later again when it is fully set up, we have to use the
# effective stage instead.
# This is important so that the JMH parsing rule is only enabled when the stage actually ran (if it is
# skipped, it would otherwise pick up a previous result file)
if self.stages_info.is_set_up:
current_stage = self.stages_info.effective_stage
else:
current_stage = self.stages_info.requested_stage

if current_stage not in ["agent", "instrument-run", "run"]:
return None

return super().get_jmh_result_file(bm_suite_args)

def fallback_mode_reason(self, bm_suite_args: List[str]) -> Optional[str]:
"""
JMH benchmarks need to use the fallback mode if --jmh-run-individually is used.
The flag causes one native image to be built per JMH benchmark. This is fundamentally incompatible with the
default benchmarking mode of running each stage on its own because a benchmark will overwrite the intermediate
files of the previous benchmark if not all stages are run at once.
In the fallback mode, collection of performance data is limited. Only performance data of the ``run`` stage can
reliably be collected. Other metrics, such as image build statistics or profiling performance cannot reliably be
collected because they cannot be attributed so a specific individual JMH benchmark.
"""
if self.jmhArgs(bm_suite_args).jmh_run_individually:
return "--jmh-run-individually is not compatible with selecting individual stages"
else:
return None

def extra_image_build_argument(self, benchmark, args):
# JMH does HotSpot-specific field offset checks in class initializers
Expand Down Expand Up @@ -462,6 +503,9 @@ def group(self):
def subgroup(self):
return "graal-compiler"

def run(self, benchmarks, bmSuiteArgs) -> DataPoints:
return self.intercept_run(super(), benchmarks, bmSuiteArgs)


mx_benchmark.add_bm_suite(JMHRunnerGraalCoreBenchmarkSuite())

Expand All @@ -477,6 +521,9 @@ def group(self):
def subgroup(self):
return "graal-compiler"

def run(self, benchmarks, bmSuiteArgs) -> DataPoints:
return self.intercept_run(super(), benchmarks, bmSuiteArgs)


mx_benchmark.add_bm_suite(JMHJarGraalCoreBenchmarkSuite())

Expand All @@ -492,10 +539,16 @@ def group(self):
def subgroup(self):
return "graal-compiler"

def run(self, benchmarks, bmSuiteArgs) -> DataPoints:
return self.intercept_run(super(), benchmarks, bmSuiteArgs)

def filter_distribution(self, dist):
return super(JMHDistGraalCoreBenchmarkSuite, self).filter_distribution(dist) and \
not JMHDistWhiteboxBenchmarkSuite.is_whitebox_dependency(dist)

def successPatterns(self):
return super().successPatterns() + SUCCESSFUL_STAGE_PATTERNS


mx_benchmark.add_bm_suite(JMHDistGraalCoreBenchmarkSuite())

Expand All @@ -511,6 +564,9 @@ def group(self):
def subgroup(self):
return "graal-compiler"

def run(self, benchmarks, bmSuiteArgs) -> DataPoints:
return self.intercept_run(super(), benchmarks, bmSuiteArgs)

@staticmethod
def is_whitebox_dependency(dist):
return hasattr(dist, 'graalWhiteboxDistribution') and dist.graalWhiteboxDistribution
Expand Down Expand Up @@ -542,5 +598,8 @@ def getJMHEntry(self, bmSuiteArgs):
assert self.dist
return [mx.distribution(self.dist).mainClass]

def successPatterns(self):
return super().successPatterns() + SUCCESSFUL_STAGE_PATTERNS


mx_benchmark.add_bm_suite(JMHDistWhiteboxBenchmarkSuite())
18 changes: 8 additions & 10 deletions java-benchmarks/mx.java-benchmarks/mx_java_benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@

import mx
import mx_benchmark
from mx_benchmark import ParserEntry
from mx_benchmark import ParserEntry, DataPoints
import mx_sdk_benchmark
from mx_sdk_benchmark import NativeImageBundleBasedBenchmarkMixin
import mx_sdk_vm_impl
Expand Down Expand Up @@ -161,12 +161,8 @@ def skip_agent_assertions(self, benchmark, args):
else:
return []

def stages(self, args):
# This method overrides NativeImageMixin.stages
parsed_arg = mx_sdk_benchmark.parse_prefixed_arg('-Dnative-image.benchmark.stages=', args, 'Native Image benchmark stages should only be specified once.')
return parsed_arg.split(',') if parsed_arg else self.default_stages()

def default_stages(self):
# This method is used by NativeImageMixin.stages
raise NotImplementedError()


Expand All @@ -186,7 +182,6 @@ def get_application_startup_units(self):
return 's'

def default_stages(self):
# This method overrides NativeImageMixin.stages
return ['instrument-image', 'instrument-run', 'image', 'run']

def uses_bundles(self):
Expand Down Expand Up @@ -420,6 +415,9 @@ def benchmarkList(self, bmSuiteArgs):
def default_stages(self):
return ['image']

def run(self, benchmarks, bmSuiteArgs):
self.intercept_run(super(), benchmarks, bmSuiteArgs)

def createCommandLineArgs(self, benchmarks, bmSuiteArgs):
if benchmarks is None:
mx.abort("Suite can only run a single benchmark per VM instance.")
Expand Down Expand Up @@ -1952,7 +1950,7 @@ def rules(self, out, benchmarks, bmSuiteArgs):
)
]

def run(self, benchmarks, bmSuiteArgs):
def run(self, benchmarks, bmSuiteArgs) -> DataPoints:
results = super(RenaissanceBenchmarkSuite, self).run(benchmarks, bmSuiteArgs)
self.addAverageAcrossLatestResults(results)
return results
Expand Down Expand Up @@ -2029,7 +2027,7 @@ def getExtraIterationCount(self, iterations):
# We average over the last 2 out of 3 total iterations done by this suite.
return 2

def run(self, benchmarks, bmSuiteArgs):
def run(self, benchmarks, bmSuiteArgs) -> DataPoints:
runretval = self.runAndReturnStdOut(benchmarks, bmSuiteArgs)
retcode, out, dims = runretval
self.validateStdoutWithDimensions(
Expand Down Expand Up @@ -2164,7 +2162,7 @@ def rules(self, out, benchmarks, bmSuiteArgs):
)
]

def run(self, benchmarks, bmSuiteArgs):
def run(self, benchmarks, bmSuiteArgs) -> DataPoints:
results = super(AWFYBenchmarkSuite, self).run(benchmarks, bmSuiteArgs)
self.addAverageAcrossLatestResults(results)
return results
Expand Down
Loading

0 comments on commit c2b403a

Please sign in to comment.