Skip to content

Commit

Permalink
Merge pull request #46 from zakkak/2025-02-12-backports-23.1.5-batch1
Browse files Browse the repository at this point in the history
Merge upstream vm-23.1.5 tag 1st batch
  • Loading branch information
zakkak authored Feb 17, 2025
2 parents d1351c0 + a5083fd commit daeea07
Show file tree
Hide file tree
Showing 49 changed files with 1,056 additions and 340 deletions.
48 changes: 29 additions & 19 deletions ci/ci_common/bench-common.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,17 @@
else true
},

# max number of threads to use for benchmarking in general
# the goal being to limit parallelism on very large servers which may not be respresentative of real-world scenarios
bench_max_threads:: {
restrict_threads:: 36
},

bench_no_thread_cap:: {
restrict_threads:: null,
should_use_hwloc:: false
},

bench_hw:: {
_bench_machine:: {
targets+: ["bench"],
Expand All @@ -42,15 +53,16 @@
numa_nodes:: [],
is_numa:: std.length(self.numa_nodes) > 0,
num_threads:: error "num_threads must bet set!",
hyperthreading:: true,
threads_per_node:: if self.is_numa then self.num_threads / std.length(self.numa_nodes) else self.num_threads,
},

x52:: common.linux_amd64 + self._bench_machine + {
machine_name:: "x52",
capabilities+: ["no_frequency_scaling", "tmpfs25g"],
e3:: common.linux_amd64 + self._bench_machine + {
machine_name:: "e3",
capabilities: ["e3", "tmpfs25g", "linux", "amd64"],
numa_nodes:: [0, 1],
default_numa_node:: 0,
num_threads:: 72
default_numa_node:: 1,
num_threads:: 256
},
x82:: common.linux_amd64 + self._bench_machine + {
machine_name:: "x82",
Expand All @@ -62,31 +74,29 @@
xgene3:: common.linux_aarch64 + self._bench_machine + {
machine_name:: "xgene3",
capabilities+: [],
num_threads:: 32
num_threads:: 32,
hyperthreading:: false
},
a12c:: common.linux_aarch64 + self._bench_machine + {
machine_name:: "a12c",
capabilities+: ["no_frequency_scaling", "tmpfs25g"],
numa_nodes:: [0, 1],
default_numa_node:: 0,
num_threads:: 160
num_threads:: 160,
hyperthreading:: false
}
},

hwlocIfNuma(numa, cmd, node=0)::
if numa then
hwloc_cmd(cmd, num_threads, node, hyperthreading, max_threads_per_node)::
if num_threads == null then
["hwloc-bind", "--cpubind", "node:"+node, "--membind", "node:"+node, "--"] + cmd
else
cmd,

parallelHwloc(cmd_node0, cmd_node1)::
// Returns a list of commands that will run cmd_nod0 on NUMA node 0
// concurrently with cmd_node1 on NUMA node 1 and then wait for both to complete.
[
$.hwlocIfNuma(true, cmd_node0, node=0) + ["&"],
$.hwlocIfNuma(true, cmd_node1, node=1) + ["&"],
["wait"]
],
local threads = if num_threads != null then num_threads else max_threads_per_node;
assert if hyperthreading then threads % 2 == 0 else true: "It is required to bind to an even number of threads on hyperthreaded machines. Got requested "+threads+" threads";
assert threads <= max_threads_per_node: "Benchmarking must run on a single NUMA node for stability reasons. Got requested "+threads+" threads but the machine has only "+max_threads_per_node+" threads per node"; local cores = if hyperthreading then "0-"+((threads/2)-1)+".pu:0-1" else "0-"+(threads-1)+".pu:0";
local cpu_bind = if hyperthreading then "node:"+node+".core:"+cores else "node:"+node+".core:"+cores+".pu:0";
["hwloc-bind", "--cpubind", cpu_bind, "--membind", "node:"+node, "--"] + cmd
,

// building block used to generate fork builds
many_forks_benchmarking:: common.build_base + {
Expand Down
6 changes: 3 additions & 3 deletions common.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@
"labsjdk-ce-21": {"name": "labsjdk", "version": "ce-21.0.2+13-jvmci-23.1-b30", "platformspecific": true },
"labsjdk-ce-21Debug": {"name": "labsjdk", "version": "ce-21.0.2+13-jvmci-23.1-b30-debug", "platformspecific": true },
"labsjdk-ce-21-llvm": {"name": "labsjdk", "version": "ce-21.0.2+13-jvmci-23.1-b30-sulong", "platformspecific": true },
"labsjdk-ee-21": {"name": "labsjdk", "version": "ee-21.0.3+7-jvmci-23.1-b37", "platformspecific": true },
"labsjdk-ee-21Debug": {"name": "labsjdk", "version": "ee-21.0.3+7-jvmci-23.1-b37-debug", "platformspecific": true },
"labsjdk-ee-21-llvm": {"name": "labsjdk", "version": "ee-21.0.3+7-jvmci-23.1-b37-sulong", "platformspecific": true },
"labsjdk-ee-21": {"name": "labsjdk", "version": "ee-21.0.4+3-jvmci-23.1-b38", "platformspecific": true },
"labsjdk-ee-21Debug": {"name": "labsjdk", "version": "ee-21.0.4+3-jvmci-23.1-b38-debug", "platformspecific": true },
"labsjdk-ee-21-llvm": {"name": "labsjdk", "version": "ee-21.0.4+3-jvmci-23.1-b38-sulong", "platformspecific": true },

"oraclejdk22": {"name": "jpg-jdk", "version": "22", "build_id": "2", "release": true, "platformspecific": true, "extrabundles": ["static-libs"]}
},
Expand Down
50 changes: 25 additions & 25 deletions compiler/ci/ci_common/benchmark-builders.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -7,40 +7,40 @@

local main_builds = std.flattenArrays([
[
c.daily + c.opt_post_merge + hw.x52 + jdk + cc.libgraal + bench.dacapo + { unicorn_pull_request_benchmarking:: {name: 'libgraal', metrics: ['time']}},
c.weekly + hw.x52 + jdk + cc.libgraal + bench.dacapo_size_variants,
c.weekly + hw.x52 + jdk + cc.libgraal + bench.dacapo_timing,
c.daily + c.opt_post_merge + hw.x52 + jdk + cc.libgraal + bench.scala_dacapo + {unicorn_pull_request_benchmarking:: 'libgraal'},
c.weekly + hw.x52 + jdk + cc.libgraal + bench.scala_dacapo_size_variants,
c.weekly + hw.x52 + jdk + cc.libgraal + bench.scala_dacapo_timing,
c.daily + c.opt_post_merge + hw.x52 + jdk + cc.libgraal + bench.renaissance + {unicorn_pull_request_benchmarking:: 'libgraal'},
c.daily + c.opt_post_merge + hw.x52 + jdk + cc.libgraal + bench.specjvm2008 + {unicorn_pull_request_benchmarking:: 'libgraal'},
c.weekly + hw.x52 + jdk + cc.libgraal + bench.specjbb2015,
c.weekly + hw.x52 + jdk + cc.libgraal + bench.specjbb2015_full_machine,
c.weekly + hw.x52 + jdk + cc.libgraal + bench.renaissance_0_11,
c.daily + c.opt_post_merge + hw.x52 + jdk + cc.libgraal + bench.awfy + {unicorn_pull_request_benchmarking:: 'libgraal'},
c.daily + hw.x52 + jdk + cc.libgraal + bench.microservice_benchmarks,
c.daily + hw.x52 + jdk + cc.libgraal + bench.renaissance_legacy,
c.daily + hw.x52 + jdk + cc.libgraal + bench.micros_graal_whitebox,
c.daily + hw.x52 + jdk + cc.libgraal + bench.micros_graal_dist,
c.daily + hw.x52 + jdk + cc.libgraal + bench.micros_misc_graal_dist,
c.daily + hw.x52 + jdk + cc.libgraal + bench.micros_shootout_graal_dist,
c.daily + c.opt_post_merge + hw.e3 + jdk + cc.libgraal + bench.dacapo + { unicorn_pull_request_benchmarking:: {name: 'libgraal', metrics: ['time']}},
c.weekly + hw.e3 + jdk + cc.libgraal + bench.dacapo_size_variants,
c.weekly + hw.e3 + jdk + cc.libgraal + bench.dacapo_timing,
c.daily + c.opt_post_merge + hw.e3 + jdk + cc.libgraal + bench.scala_dacapo + {unicorn_pull_request_benchmarking:: 'libgraal'},
c.weekly + hw.e3 + jdk + cc.libgraal + bench.scala_dacapo_size_variants,
c.weekly + hw.e3 + jdk + cc.libgraal + bench.scala_dacapo_timing,
c.daily + c.opt_post_merge + hw.e3 + jdk + cc.libgraal + bench.renaissance + {unicorn_pull_request_benchmarking:: 'libgraal'},
c.daily + c.opt_post_merge + hw.e3 + jdk + cc.libgraal + bench.specjvm2008 + {unicorn_pull_request_benchmarking:: 'libgraal'},
c.weekly + hw.e3 + jdk + cc.libgraal + bench.specjbb2015,
c.weekly + hw.e3 + jdk + cc.libgraal + bench.specjbb2015_full_machine,
c.weekly + hw.e3 + jdk + cc.libgraal + bench.renaissance_0_11,
c.daily + c.opt_post_merge + hw.e3 + jdk + cc.libgraal + bench.awfy + {unicorn_pull_request_benchmarking:: 'libgraal'},
c.daily + hw.e3 + jdk + cc.libgraal + bench.microservice_benchmarks,
c.daily + hw.e3 + jdk + cc.libgraal + bench.renaissance_legacy,
c.daily + hw.e3 + jdk + cc.libgraal + bench.micros_graal_whitebox,
c.daily + hw.e3 + jdk + cc.libgraal + bench.micros_graal_dist,
c.daily + hw.e3 + jdk + cc.libgraal + bench.micros_misc_graal_dist,
c.daily + hw.e3 + jdk + cc.libgraal + bench.micros_shootout_graal_dist,
]
for jdk in cc.bench_jdks
]),

local profiling_builds = std.flattenArrays([
[
c.weekly + hw.x52 + jdk + cc.libgraal + suite + cc.enable_profiling + { job_prefix:: "bench-compiler-profiling" },
c.weekly + hw.x52 + jdk + cc.libgraal + suite + cc.footprint_tracking + { job_prefix:: "bench-compiler-footprint" }
c.monthly + hw.e3 + jdk + cc.libgraal + suite + cc.enable_profiling + { job_prefix:: "bench-compiler-profiling" },
c.monthly + hw.e3 + jdk + cc.libgraal + suite + cc.footprint_tracking + { job_prefix:: "bench-compiler-footprint" }
]
for jdk in cc.bench_jdks
for suite in bench.groups.profiled_suites
]),

local weekly_amd64_forks_builds = std.flattenArrays([
bc.generate_fork_builds(c.weekly + hw.x52 + jdk + cc.libgraal + suite, subdir='compiler') +
bc.generate_fork_builds(c.monthly + hw.x52 + jdk + cc.jargraal + suite, subdir='compiler')
bc.generate_fork_builds(c.weekly + hw.e3 + jdk + cc.libgraal + suite, subdir='compiler') +
bc.generate_fork_builds(c.monthly + hw.e3 + jdk + cc.jargraal + suite, subdir='compiler')
for jdk in cc.bench_jdks
for suite in bench.groups.weekly_forks_suites
]),
Expand All @@ -63,7 +63,7 @@
],

local zgc_builds = [
c.weekly + hw.x52 + jdk + cc.libgraal + cc.zgc_mode + suite,
c.weekly + hw.e3 + jdk + cc.libgraal + cc.zgc_mode + suite,
for jdk in cc.bench_jdks
for suite in bench.groups.main_suites + [bench.specjbb2015]
],
Expand All @@ -76,13 +76,13 @@
],

local no_tiered_builds = [
c.weekly + hw.x52 + jdk + cc.libgraal + cc.no_tiered_comp + suite,
c.monthly + hw.e3 + jdk + cc.libgraal + cc.no_tiered_comp + suite,
for jdk in cc.bench_jdks
for suite in bench.groups.main_suites
],

local no_profile_info_builds = [
c.weekly + hw.x52 + jdk + cc.libgraal + cc.no_profile_info + suite,
c.monthly + hw.e3 + jdk + cc.libgraal + cc.no_profile_info + suite,
for jdk in cc.bench_jdks
for suite in bench.groups.main_suites
],
Expand Down
38 changes: 11 additions & 27 deletions compiler/ci/ci_common/benchmark-suites.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

// suite definitions
// *****************
awfy: cc.compiler_benchmark + c.heap.small + {
awfy: cc.compiler_benchmark + c.heap.small + bc.bench_max_threads + {
suite:: "awfy",
run+: [
self.benchmark_cmd + ["awfy:*", "--"] + self.extra_vm_args
Expand All @@ -38,7 +38,7 @@
max_jdk_version:: null
},

dacapo: cc.compiler_benchmark + c.heap.default + {
dacapo: cc.compiler_benchmark + c.heap.default + bc.bench_max_threads + {
suite:: "dacapo",
run+: [
self.benchmark_cmd + ["dacapo:*", "--"] + self.extra_vm_args
Expand All @@ -50,7 +50,7 @@
max_jdk_version:: null
},

dacapo_size_variants: cc.compiler_benchmark + c.heap.default + {
dacapo_size_variants: cc.compiler_benchmark + c.heap.default + bc.bench_max_threads + {
suite:: "dacapo-size-variants",
run+: [
self.benchmark_cmd + ["dacapo-small:*", "--"] + self.extra_vm_args,
Expand All @@ -76,7 +76,7 @@
max_jdk_version:: null
},

scala_dacapo: cc.compiler_benchmark + c.heap.default + {
scala_dacapo: cc.compiler_benchmark + c.heap.default + bc.bench_max_threads + {
suite:: "scala-dacapo",
run+: [
self.benchmark_cmd + ["scala-dacapo:*", "--"] + self.extra_vm_args
Expand All @@ -88,7 +88,7 @@
max_jdk_version:: null
},

scala_dacapo_size_variants: cc.compiler_benchmark + c.heap.default + {
scala_dacapo_size_variants: cc.compiler_benchmark + c.heap.default + bc.bench_max_threads + {
suite:: "scala-dacapo-size-variants",
run+: [
self.benchmark_cmd + ["scala-dacapo-tiny:*", "--"] + self.extra_vm_args,
Expand Down Expand Up @@ -119,7 +119,7 @@
max_jdk_version:: null
},

renaissance_template(suite_version=null, suite_name="renaissance", max_jdk_version=null):: cc.compiler_benchmark + c.heap.default + {
renaissance_template(suite_version=null, suite_name="renaissance", max_jdk_version=null):: cc.compiler_benchmark + c.heap.default + bc.bench_max_threads + {
suite:: suite_name,
local suite_version_args = if suite_version != null then ["--bench-suite-version=" + suite_version] else [],
run+: [
Expand Down Expand Up @@ -208,7 +208,7 @@
},

// Microservice benchmarks
microservice_benchmarks: cc.compiler_benchmark + {
microservice_benchmarks: cc.compiler_benchmark + bc.bench_no_thread_cap + { # no thread cap here since hwloc is handled at the mx level for microservices
suite:: "microservices",
packages+: {
"pip:psutil": "==5.8.0"
Expand All @@ -221,16 +221,8 @@
local hwlocBind_16C_32T = ["--hwloc-bind=--cpubind node:0.core:0-15.pu:0-1 --membind node:0"],
run+: [
# shopcart-wrk
self.benchmark_cmd + ["shopcart-wrk:mixed-tiny"] + hwlocBind_1C_1T + ["--"] + self.extra_vm_args + ["-Xms32m", "-Xmx112m", "-XX:ActiveProcessorCount=1", "-XX:MaxDirectMemorySize=256m"],
bench_upload,
self.benchmark_cmd + ["shopcart-wrk:mixed-small"] + hwlocBind_2C_2T + ["--"] + self.extra_vm_args + ["-Xms64m", "-Xmx224m", "-XX:ActiveProcessorCount=2", "-XX:MaxDirectMemorySize=512m"],
bench_upload,
self.benchmark_cmd + ["shopcart-wrk:mixed-medium"] + hwlocBind_4C_4T + ["--"] + self.extra_vm_args + ["-Xms128m", "-Xmx512m", "-XX:ActiveProcessorCount=4", "-XX:MaxDirectMemorySize=1024m"],
bench_upload,
self.benchmark_cmd + ["shopcart-wrk:mixed-large"] + hwlocBind_16C_16T + ["--"] + self.extra_vm_args + ["-Xms512m", "-Xmx3072m", "-XX:ActiveProcessorCount=16", "-XX:MaxDirectMemorySize=4096m"],
bench_upload,
self.benchmark_cmd + ["shopcart-wrk:mixed-huge"] + hwlocBind_16C_32T + ["--"] + self.extra_vm_args + ["-Xms1024m", "-Xmx8192m", "-XX:ActiveProcessorCount=32", "-XX:MaxDirectMemorySize=8192m"],
bench_upload,

# tika-wrk odt
self.benchmark_cmd + ["tika-wrk:odt-tiny"] + hwlocBind_1C_1T + ["--"] + self.extra_vm_args + ["-Xms32m", "-Xmx150m", "-XX:ActiveProcessorCount=1"],
Expand All @@ -249,16 +241,8 @@
bench_upload,

# petclinic-wrk
self.benchmark_cmd + ["petclinic-wrk:mixed-tiny"] + hwlocBind_1C_1T + ["--"] + self.extra_vm_args + ["-Xms32m", "-Xmx100m", "-XX:ActiveProcessorCount=1"],
bench_upload,
self.benchmark_cmd + ["petclinic-wrk:mixed-small"] + hwlocBind_2C_2T + ["--"] + self.extra_vm_args + ["-Xms40m", "-Xmx144m", "-XX:ActiveProcessorCount=2"],
bench_upload,
self.benchmark_cmd + ["petclinic-wrk:mixed-medium"] + hwlocBind_4C_4T + ["--"] + self.extra_vm_args + ["-Xms80m", "-Xmx256m", "-XX:ActiveProcessorCount=4"],
bench_upload,
self.benchmark_cmd + ["petclinic-wrk:mixed-large"] + hwlocBind_16C_16T + ["--"] + self.extra_vm_args + ["-Xms320m", "-Xmx1280m", "-XX:ActiveProcessorCount=16"],
bench_upload,
self.benchmark_cmd + ["petclinic-wrk:mixed-huge"] + hwlocBind_16C_32T + ["--"] + self.extra_vm_args + ["-Xms640m", "-Xmx3072m", "-XX:ActiveProcessorCount=32"],
bench_upload,

# helloworld-wrk
self.benchmark_cmd + ["micronaut-helloworld-wrk:helloworld"] + hwlocBind_1C_1T + ["--"] + self.extra_vm_args + ["-Xms8m", "-Xmx64m", "-XX:ActiveProcessorCount=1", "-XX:MaxDirectMemorySize=256m"],
Expand All @@ -275,7 +259,7 @@
},

// JMH microbenchmarks
micros_graal_whitebox: cc.compiler_benchmark + c.heap.default + {
micros_graal_whitebox: cc.compiler_benchmark + c.heap.default + bc.bench_max_threads + {
suite:: "micros-graal-whitebox",
run+: [
self.benchmark_cmd + ["jmh-whitebox:*", "--"] + self.extra_vm_args
Expand All @@ -285,7 +269,7 @@
max_jdk_version:: null
},

micros_graal_dist: cc.compiler_benchmark + c.heap.default + {
micros_graal_dist: cc.compiler_benchmark + c.heap.default + bc.bench_max_threads + {
suite:: "micros-graal-dist",
run+: [
self.benchmark_cmd + ["jmh-dist:GRAAL_COMPILER_MICRO_BENCHMARKS", "--"] + self.extra_vm_args
Expand All @@ -295,7 +279,7 @@
max_jdk_version:: null
},

micros_misc_graal_dist: cc.compiler_benchmark + c.heap.default + {
micros_misc_graal_dist: cc.compiler_benchmark + c.heap.default + bc.bench_max_threads + {
suite:: "micros-misc-graal-dist",
run+: [
self.benchmark_cmd + ["jmh-dist:GRAAL_BENCH_MISC", "--"] + self.extra_vm_args
Expand All @@ -305,7 +289,7 @@
max_jdk_version:: null
},

micros_shootout_graal_dist: cc.compiler_benchmark + c.heap.default {
micros_shootout_graal_dist: cc.compiler_benchmark + c.heap.default + bc.bench_max_threads + {
suite:: "micros-shootout-graal-dist",
run+: [
self.benchmark_cmd + ["jmh-dist:GRAAL_BENCH_SHOOTOUT", "--"] + self.extra_vm_args
Expand Down
3 changes: 2 additions & 1 deletion compiler/ci/ci_common/compiler-common.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,8 @@
"${BENCH_RESULTS_FILE_PATH}",
"--machine-name=${MACHINE_NAME}"] +
(if std.objectHasAll(self.environment, 'MX_TRACKER') then ["--tracker=" + self.environment['MX_TRACKER']] else ["--tracker=rss"]),
benchmark_cmd:: bench_common.hwlocIfNuma(self.should_use_hwloc, self.plain_benchmark_cmd, node=self.default_numa_node),
restrict_threads:: null, # can be overridden to restrict the benchmark to the given number of threads. If null, will use one full NUMA node
benchmark_cmd:: if self.should_use_hwloc then bench_common.hwloc_cmd(self.plain_benchmark_cmd, self.restrict_threads, self.default_numa_node, self.hyperthreading, self.threads_per_node) else self.plain_benchmark_cmd,
min_heap_size:: if std.objectHasAll(self.environment, 'XMS') then ["-Xms${XMS}"] else [],
max_heap_size:: if std.objectHasAll(self.environment, 'XMX') then ["-Xmx${XMX}"] else [],
_WarnMissingIntrinsic:: true, # won't be needed after GR-34642
Expand Down
Loading

0 comments on commit daeea07

Please sign in to comment.