diff --git a/.github/scripts/gen-build-failure-report.sh b/.github/scripts/gen-build-failure-report.sh
index fd3215fc7fe2d..c8336a5f7a1f9 100644
--- a/.github/scripts/gen-build-failure-report.sh
+++ b/.github/scripts/gen-build-failure-report.sh
@@ -24,12 +24,19 @@
# questions.
#
+# Import common utils
+. report-utils.sh
+
GITHUB_STEP_SUMMARY="$1"
BUILD_DIR="$(ls -d build/*)"
# Send signal to the do-build action that we failed
touch "$BUILD_DIR/build-failure"
+# Collect hs_errs for build-time crashes, e.g. javac, jmod, jlink, CDS.
+# These usually land in make/
+hs_err_files=$(ls make/hs_err*.log 2> /dev/null || true)
+
(
echo '### :boom: Build failure summary'
echo ''
@@ -46,6 +53,20 @@ touch "$BUILD_DIR/build-failure"
echo ''
echo ''
+ for hs_err in $hs_err_files; do
+ echo "View HotSpot error log: "$hs_err"
"
+ echo ''
+ echo '```'
+ echo "$hs_err:"
+ echo ''
+ cat "$hs_err"
+ echo '```'
+ echo ' '
+ echo ''
+ done
+
echo ''
echo ':arrow_right: To see the entire test log, click the job in the list to the left. To download logs, see the `failure-logs` [artifact above](#artifacts).'
) >> $GITHUB_STEP_SUMMARY
+
+truncate_summary
diff --git a/.github/scripts/gen-test-results.sh b/.github/scripts/gen-test-results.sh
index 9e85eef4dc08d..6c6cbaa3740f6 100644
--- a/.github/scripts/gen-test-results.sh
+++ b/.github/scripts/gen-test-results.sh
@@ -24,6 +24,9 @@
# questions.
#
+# Import common utils
+. report-utils.sh
+
GITHUB_STEP_SUMMARY="$1"
test_suite_name=$(cat build/run-test-prebuilt/test-support/test-last-ids.txt)
@@ -89,18 +92,6 @@ for test in $failures $errors; do
fi
done >> $GITHUB_STEP_SUMMARY
-# With many failures, the summary can easily exceed 1024 kB, the limit set by Github
-# Trim it down if so.
-summary_size=$(wc -c < $GITHUB_STEP_SUMMARY)
-if [[ $summary_size -gt 1000000 ]]; then
- # Trim to below 1024 kB, and cut off after the last detail group
- head -c 1000000 $GITHUB_STEP_SUMMARY | tac | sed -n -e '/<\/details>/,$ p' | tac > $GITHUB_STEP_SUMMARY.tmp
- mv $GITHUB_STEP_SUMMARY.tmp $GITHUB_STEP_SUMMARY
- (
- echo ''
- echo ':x: **WARNING: Summary is too large and has been truncated.**'
- echo ''
- ) >> $GITHUB_STEP_SUMMARY
-fi
-
echo ':arrow_right: To see the entire test log, click the job in the list to the left.' >> $GITHUB_STEP_SUMMARY
+
+truncate_summary
diff --git a/.github/scripts/report-utils.sh b/.github/scripts/report-utils.sh
new file mode 100644
index 0000000000000..da5b6c04b3cbe
--- /dev/null
+++ b/.github/scripts/report-utils.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+#
+# Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
+# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+#
+# This code is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License version 2 only, as
+# published by the Free Software Foundation. Oracle designates this
+# particular file as subject to the "Classpath" exception as provided
+# by Oracle in the LICENSE file that accompanied this code.
+#
+# This code is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+# version 2 for more details (a copy is included in the LICENSE file that
+# accompanied this code).
+#
+# You should have received a copy of the GNU General Public License version
+# 2 along with this work; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+# or visit www.oracle.com if you need additional information or have any
+# questions.
+#
+
+function truncate_summary() {
+ # With large hs_errs, the summary can easily exceed 1024 kB, the limit set by Github
+ # Trim it down if so.
+ summary_size=$(wc -c < $GITHUB_STEP_SUMMARY)
+ if [[ $summary_size -gt 1000000 ]]; then
+ # Trim to below 1024 kB, and cut off after the last detail group
+ head -c 1000000 $GITHUB_STEP_SUMMARY | tac | sed -n -e '/<\/details>/,$ p' | tac > $GITHUB_STEP_SUMMARY.tmp
+ mv $GITHUB_STEP_SUMMARY.tmp $GITHUB_STEP_SUMMARY
+ (
+ echo ''
+ echo ':x: **WARNING: Summary is too large and has been truncated.**'
+ echo ''
+ ) >> $GITHUB_STEP_SUMMARY
+ fi
+}
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index e4c05acb684b7..d5958853701ca 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -384,6 +384,7 @@ jobs:
- build-windows-aarch64
- test-linux-x64
- test-macos-x64
+ - test-macos-aarch64
- test-windows-x64
steps:
diff --git a/SECURITY.md b/SECURITY.md
new file mode 100644
index 0000000000000..f4c5e7e67cb46
--- /dev/null
+++ b/SECURITY.md
@@ -0,0 +1,3 @@
+# JDK Vulnerabilities
+
+Please follow the process outlined in the [OpenJDK Vulnerability Policy](https://openjdk.org/groups/vulnerability/report) to disclose vulnerabilities in the JDK.
diff --git a/doc/building.html b/doc/building.html
index 707531553124b..c91d876246cde 100644
--- a/doc/building.html
+++ b/doc/building.html
@@ -614,10 +614,9 @@
clang
--with-toolchain-type=clang
.
Apple Xcode
The oldest supported version of Xcode is 13.0.
-You will need the Xcode command line developer tools to be able to
-build the JDK. (Actually, only the command line tools are
-needed, not the IDE.) The simplest way to install these is to run:
-xcode-select --install
+You will need to download Xcode either from the App Store or specific
+versions can be easily located via the Xcode Releases website.
When updating Xcode, it is advisable to keep an older version for
building the JDK. To use a specific version of Xcode you have multiple
options:
diff --git a/doc/building.md b/doc/building.md
index 51ac0cad7d98b..47ad9e7c72b4c 100644
--- a/doc/building.md
+++ b/doc/building.md
@@ -422,13 +422,9 @@ To use clang instead of gcc on Linux, use `--with-toolchain-type=clang`.
The oldest supported version of Xcode is 13.0.
-You will need the Xcode command line developer tools to be able to build the
-JDK. (Actually, *only* the command line tools are needed, not the IDE.) The
-simplest way to install these is to run:
-
-```
-xcode-select --install
-```
+You will need to download Xcode either from the App Store or specific versions
+can be easily located via the [Xcode Releases](https://xcodereleases.com)
+website.
When updating Xcode, it is advisable to keep an older version for building the
JDK. To use a specific version of Xcode you have multiple options:
diff --git a/make/autoconf/jvm-features.m4 b/make/autoconf/jvm-features.m4
index bd33315090fce..9695644bafe63 100644
--- a/make/autoconf/jvm-features.m4
+++ b/make/autoconf/jvm-features.m4
@@ -479,6 +479,22 @@ AC_DEFUN([JVM_FEATURES_CALCULATE_ACTIVE],
$JVM_FEATURES_ENABLED, $JVM_FEATURES_DISABLED)
])
+################################################################################
+# Filter the unsupported feature combinations.
+# This is called after JVM_FEATURES_ACTIVE are fully populated.
+#
+AC_DEFUN([JVM_FEATURES_FILTER_UNSUPPORTED],
+[
+ # G1 late barrier expansion in C2 is not implemented for some platforms.
+ # Choose not to support G1 in this configuration.
+ if JVM_FEATURES_IS_ACTIVE(compiler2); then
+ if test "x$OPENJDK_TARGET_CPU" = "xx86"; then
+ AC_MSG_NOTICE([G1 cannot be used with C2 on this platform, disabling G1])
+ UTIL_GET_NON_MATCHING_VALUES(JVM_FEATURES_ACTIVE, $JVM_FEATURES_ACTIVE, "g1gc")
+ fi
+ fi
+])
+
################################################################################
# Helper function for JVM_FEATURES_VERIFY. Check if the specified JVM
# feature is active. To be used in shell if constructs, like this:
@@ -554,6 +570,9 @@ AC_DEFUN_ONCE([JVM_FEATURES_SETUP],
# The result is stored in JVM_FEATURES_ACTIVE.
JVM_FEATURES_CALCULATE_ACTIVE($variant)
+ # Filter unsupported feature combinations from JVM_FEATURES_ACTIVE.
+ JVM_FEATURES_FILTER_UNSUPPORTED
+
# Verify consistency for JVM_FEATURES_ACTIVE.
JVM_FEATURES_VERIFY($variant)
diff --git a/make/autoconf/toolchain.m4 b/make/autoconf/toolchain.m4
index 75c8d2b61d084..d84ae447e541a 100644
--- a/make/autoconf/toolchain.m4
+++ b/make/autoconf/toolchain.m4
@@ -307,7 +307,7 @@ AC_DEFUN_ONCE([TOOLCHAIN_POST_DETECTION],
[
# Restore old path, except for the microsoft toolchain, which requires the
# toolchain path to remain in place. Otherwise the compiler will not work in
- # some siutations in later configure checks.
+ # some situations in later configure checks.
if test "x$TOOLCHAIN_TYPE" != "xmicrosoft"; then
PATH="$OLD_PATH"
fi
@@ -316,10 +316,6 @@ AC_DEFUN_ONCE([TOOLCHAIN_POST_DETECTION],
# This is necessary since AC_PROG_CC defaults CFLAGS to "-g -O2"
CFLAGS="$ORG_CFLAGS"
CXXFLAGS="$ORG_CXXFLAGS"
-
- # filter out some unwanted additions autoconf may add to CXX; we saw this on macOS with autoconf 2.72
- UTIL_GET_NON_MATCHING_VALUES(cxx_filtered, $CXX, -std=c++11 -std=gnu++11)
- CXX="$cxx_filtered"
])
# Check if a compiler is of the toolchain type we expect, and save the version
@@ -358,6 +354,11 @@ AC_DEFUN([TOOLCHAIN_EXTRACT_COMPILER_VERSION],
# Copyright (C) 2013 Free Software Foundation, Inc.
# This is free software; see the source for copying conditions. There is NO
# warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ # or look like
+ # gcc (GCC) 10.2.1 20200825 (Alibaba 10.2.1-3.8 2.32)
+ # Copyright (C) 2020 Free Software Foundation, Inc.
+ # This is free software; see the source for copying conditions. There is NO
+ # warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
COMPILER_VERSION_OUTPUT=`$COMPILER --version 2>&1`
# Check that this is likely to be GCC.
$ECHO "$COMPILER_VERSION_OUTPUT" | $GREP "Free Software Foundation" > /dev/null
@@ -371,7 +372,8 @@ AC_DEFUN([TOOLCHAIN_EXTRACT_COMPILER_VERSION],
COMPILER_VERSION_STRING=`$ECHO $COMPILER_VERSION_OUTPUT | \
$SED -e 's/ *Copyright .*//'`
COMPILER_VERSION_NUMBER=`$ECHO $COMPILER_VERSION_OUTPUT | \
- $SED -e 's/^.* \(@<:@1-9@:>@<:@0-9@:>@*\.@<:@0-9.@:>@*\)@<:@^0-9.@:>@.*$/\1/'`
+ $AWK -F ')' '{print [$]2}' | \
+ $AWK '{print [$]1}'`
elif test "x$TOOLCHAIN_TYPE" = xclang; then
# clang --version output typically looks like
# Apple clang version 15.0.0 (clang-1500.3.9.4)
diff --git a/make/autoconf/util.m4 b/make/autoconf/util.m4
index 6beadb4c942c7..5a6142d509202 100644
--- a/make/autoconf/util.m4
+++ b/make/autoconf/util.m4
@@ -25,6 +25,70 @@
m4_include([util_paths.m4])
+###############################################################################
+# Overwrite the existing version of AC_PROG_CC with our own custom variant.
+# Unlike the regular AC_PROG_CC, the compiler list must always be passed.
+AC_DEFUN([AC_PROG_CC],
+[
+ AC_LANG_PUSH(C)
+ AC_ARG_VAR([CC], [C compiler command])
+ AC_ARG_VAR([CFLAGS], [C compiler flags])
+
+ _AC_ARG_VAR_LDFLAGS()
+ _AC_ARG_VAR_LIBS()
+ _AC_ARG_VAR_CPPFLAGS()
+
+ AC_CHECK_TOOLS(CC, [$1])
+
+ test -z "$CC" && AC_MSG_FAILURE([no acceptable C compiler found in \$PATH])
+
+ # Provide some information about the compiler.
+ _AS_ECHO_LOG([checking for _AC_LANG compiler version])
+ set X $ac_compile
+ ac_compiler=$[2]
+ for ac_option in --version -v -V -qversion -version; do
+ _AC_DO_LIMIT([$ac_compiler $ac_option >&AS_MESSAGE_LOG_FD])
+ done
+
+ m4_expand_once([_AC_COMPILER_EXEEXT])
+ m4_expand_once([_AC_COMPILER_OBJEXT])
+
+ _AC_PROG_CC_G
+
+ AC_LANG_POP(C)
+])
+
+###############################################################################
+# Overwrite the existing version of AC_PROG_CXX with our own custom variant.
+# Unlike the regular AC_PROG_CXX, the compiler list must always be passed.
+AC_DEFUN([AC_PROG_CXX],
+[
+ AC_LANG_PUSH(C++)
+ AC_ARG_VAR([CXX], [C++ compiler command])
+ AC_ARG_VAR([CXXFLAGS], [C++ compiler flags])
+
+ _AC_ARG_VAR_LDFLAGS()
+ _AC_ARG_VAR_LIBS()
+ _AC_ARG_VAR_CPPFLAGS()
+
+ AC_CHECK_TOOLS(CXX, [$1])
+
+ # Provide some information about the compiler.
+ _AS_ECHO_LOG([checking for _AC_LANG compiler version])
+ set X $ac_compile
+ ac_compiler=$[2]
+ for ac_option in --version -v -V -qversion; do
+ _AC_DO_LIMIT([$ac_compiler $ac_option >&AS_MESSAGE_LOG_FD])
+ done
+
+ m4_expand_once([_AC_COMPILER_EXEEXT])
+ m4_expand_once([_AC_COMPILER_OBJEXT])
+
+ _AC_PROG_CXX_G
+
+ AC_LANG_POP(C++)
+])
+
################################################################################
# Create a function/macro that takes a series of named arguments. The call is
# similar to AC_DEFUN, but the setup of the function looks like this:
diff --git a/make/conf/github-actions.conf b/make/conf/github-actions.conf
index eca6c05033d88..a6b383daa8fd4 100644
--- a/make/conf/github-actions.conf
+++ b/make/conf/github-actions.conf
@@ -29,21 +29,21 @@ GTEST_VERSION=1.14.0
JTREG_VERSION=7.4+1
LINUX_X64_BOOT_JDK_EXT=tar.gz
-LINUX_X64_BOOT_JDK_URL=https://download.java.net/java/GA/jdk22.0.2/c9ecb94cd31b495da20a27d4581645e8/9/GPL/openjdk-22.0.2_linux-x64_bin.tar.gz
-LINUX_X64_BOOT_JDK_SHA256=41536f115668308ecf4eba92aaf6acaeb0936225828b741efd83b6173ba82963
+LINUX_X64_BOOT_JDK_URL=https://download.java.net/java/GA/jdk23/3c5b90190c68498b986a97f276efd28a/37/GPL/openjdk-23_linux-x64_bin.tar.gz
+LINUX_X64_BOOT_JDK_SHA256=08fea92724127c6fa0f2e5ea0b07ff4951ccb1e2f22db3c21eebbd7347152a67
ALPINE_LINUX_X64_BOOT_JDK_EXT=tar.gz
-ALPINE_LINUX_X64_BOOT_JDK_URL=https://github.com/adoptium/temurin22-binaries/releases/download/jdk-22.0.2%2B9/OpenJDK22U-jdk_x64_alpine-linux_hotspot_22.0.2_9.tar.gz
-ALPINE_LINUX_X64_BOOT_JDK_SHA256=49f73414824b1a7c268a611225fa4d7ce5e25600201e0f1cd59f94d1040b5264
+ALPINE_LINUX_X64_BOOT_JDK_URL=https://github.com/adoptium/temurin23-binaries/releases/download/jdk-23%2B37/OpenJDK23U-jdk_x64_alpine-linux_hotspot_23_37.tar.gz
+ALPINE_LINUX_X64_BOOT_JDK_SHA256=bff4c78f30d8d173e622bf2f40c36113df47337fc6d1ee5105ed2459841165aa
MACOS_AARCH64_BOOT_JDK_EXT=tar.gz
-MACOS_AARCH64_BOOT_JDK_URL=https://download.java.net/java/GA/jdk22.0.2/c9ecb94cd31b495da20a27d4581645e8/9/GPL/openjdk-22.0.2_macos-aarch64_bin.tar.gz
-MACOS_AARCH64_BOOT_JDK_SHA256=3dab98730234e1a87aec14bcb8171d2cae101e96ff4eed1dab96abbb08e843fd
+MACOS_AARCH64_BOOT_JDK_URL=https://download.java.net/java/GA/jdk23/3c5b90190c68498b986a97f276efd28a/37/GPL/openjdk-23_macos-aarch64_bin.tar.gz
+MACOS_AARCH64_BOOT_JDK_SHA256=9527bf080a74ae6dca51df413aa826f0c011c6048885e4c8ad112172be8815f3
MACOS_X64_BOOT_JDK_EXT=tar.gz
-MACOS_X64_BOOT_JDK_URL=https://download.java.net/java/GA/jdk22.0.2/c9ecb94cd31b495da20a27d4581645e8/9/GPL/openjdk-22.0.2_macos-x64_bin.tar.gz
-MACOS_X64_BOOT_JDK_SHA256=e8b3ec7a7077711223d31156e771f11723cd7af31c2017f1bd2eda20855940fb
+MACOS_X64_BOOT_JDK_URL=https://download.java.net/java/GA/jdk23/3c5b90190c68498b986a97f276efd28a/37/GPL/openjdk-23_macos-x64_bin.tar.gz
+MACOS_X64_BOOT_JDK_SHA256=5c3a909fd2079d0e376dd43c85c4f7d02d08914866f196480bd47784b2a0121e
WINDOWS_X64_BOOT_JDK_EXT=zip
-WINDOWS_X64_BOOT_JDK_URL=https://download.java.net/java/GA/jdk22.0.2/c9ecb94cd31b495da20a27d4581645e8/9/GPL/openjdk-22.0.2_windows-x64_bin.zip
-WINDOWS_X64_BOOT_JDK_SHA256=f2a9b9ab944e71a64637fcdc6b13a1188cf02d4eb9ecf71dc927e98b3e45f5dc
+WINDOWS_X64_BOOT_JDK_URL=https://download.java.net/java/GA/jdk23/3c5b90190c68498b986a97f276efd28a/37/GPL/openjdk-23_windows-x64_bin.zip
+WINDOWS_X64_BOOT_JDK_SHA256=cba5013874ba50cae543c86fe6423453816c77281e2751a8a9a633d966f1dc04
diff --git a/make/conf/jib-profiles.js b/make/conf/jib-profiles.js
index 30c45d4cde161..a85c20b2098ea 100644
--- a/make/conf/jib-profiles.js
+++ b/make/conf/jib-profiles.js
@@ -390,8 +390,8 @@ var getJibProfilesCommon = function (input, data) {
};
};
- common.boot_jdk_version = "22";
- common.boot_jdk_build_number = "36";
+ common.boot_jdk_version = "23";
+ common.boot_jdk_build_number = "37";
common.boot_jdk_home = input.get("boot_jdk", "install_path") + "/jdk-"
+ common.boot_jdk_version
+ (input.build_os == "macosx" ? ".jdk/Contents/Home" : "");
diff --git a/make/conf/version-numbers.conf b/make/conf/version-numbers.conf
index 1d47c2cddd001..055f9ca886618 100644
--- a/make/conf/version-numbers.conf
+++ b/make/conf/version-numbers.conf
@@ -37,6 +37,6 @@ DEFAULT_VERSION_DATE=2025-03-18
DEFAULT_VERSION_CLASSFILE_MAJOR=68 # "`$EXPR $DEFAULT_VERSION_FEATURE + 44`"
DEFAULT_VERSION_CLASSFILE_MINOR=0
DEFAULT_VERSION_DOCS_API_SINCE=11
-DEFAULT_ACCEPTABLE_BOOT_VERSIONS="22 23 24"
+DEFAULT_ACCEPTABLE_BOOT_VERSIONS="23 24"
DEFAULT_JDK_SOURCE_TARGET_VERSION=24
DEFAULT_PROMOTED_VERSION_PRE=ea
diff --git a/make/hotspot/gensrc/GensrcAdlc.gmk b/make/hotspot/gensrc/GensrcAdlc.gmk
index 8dada3cec0a1d..ddb2c3e33e513 100644
--- a/make/hotspot/gensrc/GensrcAdlc.gmk
+++ b/make/hotspot/gensrc/GensrcAdlc.gmk
@@ -200,6 +200,13 @@ ifeq ($(call check-jvm-feature, compiler2), true)
)))
endif
+ ifeq ($(call check-jvm-feature, g1gc), true)
+ AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \
+ $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/gc/g1/g1_$(HOTSPOT_TARGET_CPU).ad \
+ $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/gc/g1/g1_$(HOTSPOT_TARGET_CPU_ARCH).ad \
+ )))
+ endif
+
SINGLE_AD_SRCFILE := $(ADLC_SUPPORT_DIR)/all-ad-src.ad
INSERT_FILENAME_AWK_SCRIPT := \
diff --git a/make/jdk/src/classes/build/tools/tzdb/TzdbZoneRulesCompiler.java b/make/jdk/src/classes/build/tools/tzdb/TzdbZoneRulesCompiler.java
index 630d3a390d18a..426d0bb10ede1 100644
--- a/make/jdk/src/classes/build/tools/tzdb/TzdbZoneRulesCompiler.java
+++ b/make/jdk/src/classes/build/tools/tzdb/TzdbZoneRulesCompiler.java
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2012, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -273,7 +273,7 @@ private void outputFile(Path dstFile, String version,
// link version-region-rules
out.writeShort(builtZones.size());
for (Map.Entry entry : builtZones.entrySet()) {
- int regionIndex = Arrays.binarySearch(regionArray, entry.getKey());
+ int regionIndex = findRegionIndex(regionArray, entry.getKey());
int rulesIndex = rulesList.indexOf(entry.getValue());
out.writeShort(regionIndex);
out.writeShort(rulesIndex);
@@ -281,8 +281,8 @@ private void outputFile(Path dstFile, String version,
// alias-region
out.writeShort(links.size());
for (Map.Entry entry : links.entrySet()) {
- int aliasIndex = Arrays.binarySearch(regionArray, entry.getKey());
- int regionIndex = Arrays.binarySearch(regionArray, entry.getValue());
+ int aliasIndex = findRegionIndex(regionArray, entry.getKey());
+ int regionIndex = findRegionIndex(regionArray, entry.getValue());
out.writeShort(aliasIndex);
out.writeShort(regionIndex);
}
@@ -294,6 +294,14 @@ private void outputFile(Path dstFile, String version,
}
}
+ private static int findRegionIndex(String[] regionArray, String region) {
+ int index = Arrays.binarySearch(regionArray, region);
+ if (index < 0) {
+ throw new IllegalArgumentException("Unknown region: " + region);
+ }
+ return index;
+ }
+
/** Whether to output verbose messages. */
private boolean verbose;
diff --git a/make/modules/jdk.hotspot.agent/Lib.gmk b/make/modules/jdk.hotspot.agent/Lib.gmk
index f0ede594d0ce8..12f1c1f2a9077 100644
--- a/make/modules/jdk.hotspot.agent/Lib.gmk
+++ b/make/modules/jdk.hotspot.agent/Lib.gmk
@@ -59,9 +59,7 @@ $(eval $(call SetupJdkLibrary, BUILD_LIBSAPROC, \
OPTIMIZATION := HIGH, \
EXTRA_HEADER_DIRS := java.base:libjvm, \
DISABLED_WARNINGS_gcc := sign-compare, \
- DISABLED_WARNINGS_gcc_LinuxDebuggerLocal.cpp := unused-variable, \
DISABLED_WARNINGS_gcc_ps_core.c := pointer-arith, \
- DISABLED_WARNINGS_gcc_symtab.c := unused-but-set-variable, \
DISABLED_WARNINGS_clang := sign-compare, \
DISABLED_WARNINGS_clang_libproc_impl.c := format-nonliteral, \
DISABLED_WARNINGS_clang_MacosxDebuggerLocal.m := unused-variable, \
diff --git a/make/modules/jdk.incubator.vector/Lib.gmk b/make/modules/jdk.incubator.vector/Lib.gmk
index 0620549f05cd7..bf6ace6f97f7c 100644
--- a/make/modules/jdk.incubator.vector/Lib.gmk
+++ b/make/modules/jdk.incubator.vector/Lib.gmk
@@ -37,3 +37,21 @@ ifeq ($(call isTargetOs, linux windows)+$(call isTargetCpu, x86_64)+$(INCLUDE_CO
TARGETS += $(BUILD_LIBJSVML)
endif
+
+################################################################################
+## Build libsleef
+################################################################################
+
+ifeq ($(call isTargetOs, linux)+$(call isTargetCpu, riscv64)+$(INCLUDE_COMPILER2), true+true+true)
+ $(eval $(call SetupJdkLibrary, BUILD_LIBSLEEF, \
+ NAME := sleef, \
+ OPTIMIZATION := HIGH, \
+ SRC := libsleef/lib, \
+ EXTRA_SRC := libsleef/generated, \
+ DISABLED_WARNINGS_gcc := unused-function sign-compare tautological-compare ignored-qualifiers, \
+ DISABLED_WARNINGS_clang := unused-function sign-compare tautological-compare ignored-qualifiers, \
+ CFLAGS := -march=rv64gcv, \
+ ))
+
+ TARGETS += $(BUILD_LIBSLEEF)
+endif
diff --git a/make/test/JtregNativeJdk.gmk b/make/test/JtregNativeJdk.gmk
index d9f1e334a5cf8..90055cb5c0114 100644
--- a/make/test/JtregNativeJdk.gmk
+++ b/make/test/JtregNativeJdk.gmk
@@ -115,6 +115,8 @@ ifeq ($(call isTargetOs, linux), true)
# stripping during the test libraries' build.
BUILD_JDK_JTREG_LIBRARIES_CFLAGS_libFib := -g
BUILD_JDK_JTREG_LIBRARIES_STRIP_SYMBOLS_libFib := false
+ # nio tests' libCreationTimeHelper native needs -ldl linker flag
+ BUILD_JDK_JTREG_LIBRARIES_LDFLAGS_libCreationTimeHelper := -ldl
endif
ifeq ($(ASAN_ENABLED), true)
diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad
index fced9cfc35e57..d9c77a2f52926 100644
--- a/src/hotspot/cpu/aarch64/aarch64.ad
+++ b/src/hotspot/cpu/aarch64/aarch64.ad
@@ -1244,7 +1244,7 @@ source %{
// r27 is not allocatable when compressed oops is on and heapbase is not
// zero, compressed klass pointers doesn't use r27 after JDK-8234794
- if (UseCompressedOops && (CompressedOops::ptrs_base() != nullptr)) {
+ if (UseCompressedOops && (CompressedOops::base() != nullptr)) {
_NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(r27->as_VMReg()));
_NO_SPECIAL_REG_mask.Remove(OptoReg::as_OptoReg(r27->as_VMReg()));
_NO_SPECIAL_PTR_REG_mask.Remove(OptoReg::as_OptoReg(r27->as_VMReg()));
@@ -2307,10 +2307,6 @@ const RegMask* Matcher::predicate_reg_mask(void) {
return &_PR_REG_mask;
}
-const TypeVectMask* Matcher::predicate_reg_type(const Type* elemTy, int length) {
- return new TypeVectMask(elemTy, length);
-}
-
// Vector calling convention not yet implemented.
bool Matcher::supports_vector_calling_convention(void) {
return false;
@@ -2620,7 +2616,8 @@ static bool is_vector_bitwise_not_pattern(Node* n, Node* m) {
bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
if (is_vshift_con_pattern(n, m) ||
is_vector_bitwise_not_pattern(n, m) ||
- is_valid_sve_arith_imm_pattern(n, m)) {
+ is_valid_sve_arith_imm_pattern(n, m) ||
+ is_encode_and_store_pattern(n, m)) {
mstack.push(m, Visit);
return true;
}
@@ -2720,7 +2717,7 @@ typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
{
Address addr = mem2address(opcode, base, index, scale, disp);
if (addr.getMode() == Address::base_plus_offset) {
- // Fix up any out-of-range offsets.
+ /* Fix up any out-of-range offsets. */
assert_different_registers(rscratch1, base);
assert_different_registers(rscratch1, reg);
addr = __ legitimize_address(addr, size_in_memory, rscratch1);
@@ -2761,11 +2758,7 @@ typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
int opcode, Register base, int index, int size, int disp)
{
if (index == -1) {
- // Fix up any out-of-range offsets.
- assert_different_registers(rscratch1, base);
- Address addr = Address(base, disp);
- addr = __ legitimize_address(addr, (1 << T), rscratch1);
- (masm->*insn)(reg, T, addr);
+ (masm->*insn)(reg, T, Address(base, disp));
} else {
assert(disp == 0, "unsupported address mode");
(masm->*insn)(reg, T, Address(base, as_Register(index), Address::lsl(size)));
@@ -2820,7 +2813,7 @@ encode %{
// This encoding class is generated automatically from ad_encode.m4.
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
- enc_class aarch64_enc_ldrsbw(iRegI dst, memory mem) %{
+ enc_class aarch64_enc_ldrsbw(iRegI dst, memory1 mem) %{
Register dst_reg = as_Register($dst$$reg);
loadStore(masm, &MacroAssembler::ldrsbw, dst_reg, $mem->opcode(),
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 1);
@@ -2828,7 +2821,7 @@ encode %{
// This encoding class is generated automatically from ad_encode.m4.
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
- enc_class aarch64_enc_ldrsb(iRegI dst, memory mem) %{
+ enc_class aarch64_enc_ldrsb(iRegI dst, memory1 mem) %{
Register dst_reg = as_Register($dst$$reg);
loadStore(masm, &MacroAssembler::ldrsb, dst_reg, $mem->opcode(),
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 1);
@@ -2836,7 +2829,7 @@ encode %{
// This encoding class is generated automatically from ad_encode.m4.
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
- enc_class aarch64_enc_ldrb(iRegI dst, memory mem) %{
+ enc_class aarch64_enc_ldrb(iRegI dst, memory1 mem) %{
Register dst_reg = as_Register($dst$$reg);
loadStore(masm, &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 1);
@@ -2844,7 +2837,7 @@ encode %{
// This encoding class is generated automatically from ad_encode.m4.
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
- enc_class aarch64_enc_ldrb(iRegL dst, memory mem) %{
+ enc_class aarch64_enc_ldrb(iRegL dst, memory1 mem) %{
Register dst_reg = as_Register($dst$$reg);
loadStore(masm, &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 1);
@@ -2852,7 +2845,7 @@ encode %{
// This encoding class is generated automatically from ad_encode.m4.
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
- enc_class aarch64_enc_ldrshw(iRegI dst, memory mem) %{
+ enc_class aarch64_enc_ldrshw(iRegI dst, memory2 mem) %{
Register dst_reg = as_Register($dst$$reg);
loadStore(masm, &MacroAssembler::ldrshw, dst_reg, $mem->opcode(),
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 2);
@@ -2860,7 +2853,7 @@ encode %{
// This encoding class is generated automatically from ad_encode.m4.
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
- enc_class aarch64_enc_ldrsh(iRegI dst, memory mem) %{
+ enc_class aarch64_enc_ldrsh(iRegI dst, memory2 mem) %{
Register dst_reg = as_Register($dst$$reg);
loadStore(masm, &MacroAssembler::ldrsh, dst_reg, $mem->opcode(),
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 2);
@@ -2868,7 +2861,7 @@ encode %{
// This encoding class is generated automatically from ad_encode.m4.
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
- enc_class aarch64_enc_ldrh(iRegI dst, memory mem) %{
+ enc_class aarch64_enc_ldrh(iRegI dst, memory2 mem) %{
Register dst_reg = as_Register($dst$$reg);
loadStore(masm, &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 2);
@@ -2876,7 +2869,7 @@ encode %{
// This encoding class is generated automatically from ad_encode.m4.
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
- enc_class aarch64_enc_ldrh(iRegL dst, memory mem) %{
+ enc_class aarch64_enc_ldrh(iRegL dst, memory2 mem) %{
Register dst_reg = as_Register($dst$$reg);
loadStore(masm, &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 2);
@@ -2884,7 +2877,7 @@ encode %{
// This encoding class is generated automatically from ad_encode.m4.
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
- enc_class aarch64_enc_ldrw(iRegI dst, memory mem) %{
+ enc_class aarch64_enc_ldrw(iRegI dst, memory4 mem) %{
Register dst_reg = as_Register($dst$$reg);
loadStore(masm, &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4);
@@ -2892,7 +2885,7 @@ encode %{
// This encoding class is generated automatically from ad_encode.m4.
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
- enc_class aarch64_enc_ldrw(iRegL dst, memory mem) %{
+ enc_class aarch64_enc_ldrw(iRegL dst, memory4 mem) %{
Register dst_reg = as_Register($dst$$reg);
loadStore(masm, &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4);
@@ -2900,7 +2893,7 @@ encode %{
// This encoding class is generated automatically from ad_encode.m4.
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
- enc_class aarch64_enc_ldrsw(iRegL dst, memory mem) %{
+ enc_class aarch64_enc_ldrsw(iRegL dst, memory4 mem) %{
Register dst_reg = as_Register($dst$$reg);
loadStore(masm, &MacroAssembler::ldrsw, dst_reg, $mem->opcode(),
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4);
@@ -2908,7 +2901,7 @@ encode %{
// This encoding class is generated automatically from ad_encode.m4.
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
- enc_class aarch64_enc_ldr(iRegL dst, memory mem) %{
+ enc_class aarch64_enc_ldr(iRegL dst, memory8 mem) %{
Register dst_reg = as_Register($dst$$reg);
loadStore(masm, &MacroAssembler::ldr, dst_reg, $mem->opcode(),
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 8);
@@ -2916,7 +2909,7 @@ encode %{
// This encoding class is generated automatically from ad_encode.m4.
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
- enc_class aarch64_enc_ldrs(vRegF dst, memory mem) %{
+ enc_class aarch64_enc_ldrs(vRegF dst, memory4 mem) %{
FloatRegister dst_reg = as_FloatRegister($dst$$reg);
loadStore(masm, &MacroAssembler::ldrs, dst_reg, $mem->opcode(),
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4);
@@ -2924,7 +2917,7 @@ encode %{
// This encoding class is generated automatically from ad_encode.m4.
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
- enc_class aarch64_enc_ldrd(vRegD dst, memory mem) %{
+ enc_class aarch64_enc_ldrd(vRegD dst, memory8 mem) %{
FloatRegister dst_reg = as_FloatRegister($dst$$reg);
loadStore(masm, &MacroAssembler::ldrd, dst_reg, $mem->opcode(),
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 8);
@@ -2932,7 +2925,7 @@ encode %{
// This encoding class is generated automatically from ad_encode.m4.
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
- enc_class aarch64_enc_strb(iRegI src, memory mem) %{
+ enc_class aarch64_enc_strb(iRegI src, memory1 mem) %{
Register src_reg = as_Register($src$$reg);
loadStore(masm, &MacroAssembler::strb, src_reg, $mem->opcode(),
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 1);
@@ -2940,14 +2933,14 @@ encode %{
// This encoding class is generated automatically from ad_encode.m4.
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
- enc_class aarch64_enc_strb0(memory mem) %{
+ enc_class aarch64_enc_strb0(memory1 mem) %{
loadStore(masm, &MacroAssembler::strb, zr, $mem->opcode(),
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 1);
%}
// This encoding class is generated automatically from ad_encode.m4.
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
- enc_class aarch64_enc_strh(iRegI src, memory mem) %{
+ enc_class aarch64_enc_strh(iRegI src, memory2 mem) %{
Register src_reg = as_Register($src$$reg);
loadStore(masm, &MacroAssembler::strh, src_reg, $mem->opcode(),
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 2);
@@ -2955,14 +2948,14 @@ encode %{
// This encoding class is generated automatically from ad_encode.m4.
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
- enc_class aarch64_enc_strh0(memory mem) %{
+ enc_class aarch64_enc_strh0(memory2 mem) %{
loadStore(masm, &MacroAssembler::strh, zr, $mem->opcode(),
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 2);
%}
// This encoding class is generated automatically from ad_encode.m4.
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
- enc_class aarch64_enc_strw(iRegI src, memory mem) %{
+ enc_class aarch64_enc_strw(iRegI src, memory4 mem) %{
Register src_reg = as_Register($src$$reg);
loadStore(masm, &MacroAssembler::strw, src_reg, $mem->opcode(),
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4);
@@ -2970,14 +2963,14 @@ encode %{
// This encoding class is generated automatically from ad_encode.m4.
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
- enc_class aarch64_enc_strw0(memory mem) %{
+ enc_class aarch64_enc_strw0(memory4 mem) %{
loadStore(masm, &MacroAssembler::strw, zr, $mem->opcode(),
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4);
%}
// This encoding class is generated automatically from ad_encode.m4.
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
- enc_class aarch64_enc_str(iRegL src, memory mem) %{
+ enc_class aarch64_enc_str(iRegL src, memory8 mem) %{
Register src_reg = as_Register($src$$reg);
// we sometimes get asked to store the stack pointer into the
// current thread -- we cannot do that directly on AArch64
@@ -2992,14 +2985,14 @@ encode %{
// This encoding class is generated automatically from ad_encode.m4.
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
- enc_class aarch64_enc_str0(memory mem) %{
+ enc_class aarch64_enc_str0(memory8 mem) %{
loadStore(masm, &MacroAssembler::str, zr, $mem->opcode(),
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 8);
%}
// This encoding class is generated automatically from ad_encode.m4.
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
- enc_class aarch64_enc_strs(vRegF src, memory mem) %{
+ enc_class aarch64_enc_strs(vRegF src, memory4 mem) %{
FloatRegister src_reg = as_FloatRegister($src$$reg);
loadStore(masm, &MacroAssembler::strs, src_reg, $mem->opcode(),
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4);
@@ -3007,7 +3000,7 @@ encode %{
// This encoding class is generated automatically from ad_encode.m4.
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
- enc_class aarch64_enc_strd(vRegD src, memory mem) %{
+ enc_class aarch64_enc_strd(vRegD src, memory8 mem) %{
FloatRegister src_reg = as_FloatRegister($src$$reg);
loadStore(masm, &MacroAssembler::strd, src_reg, $mem->opcode(),
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 8);
@@ -3015,7 +3008,7 @@ encode %{
// This encoding class is generated automatically from ad_encode.m4.
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
- enc_class aarch64_enc_strb0_ordered(memory mem) %{
+ enc_class aarch64_enc_strb0_ordered(memory4 mem) %{
__ membar(Assembler::StoreStore);
loadStore(masm, &MacroAssembler::strb, zr, $mem->opcode(),
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 1);
@@ -3217,7 +3210,7 @@ encode %{
// synchronized read/update encodings
- enc_class aarch64_enc_ldaxr(iRegL dst, memory mem) %{
+ enc_class aarch64_enc_ldaxr(iRegL dst, memory8 mem) %{
Register dst_reg = as_Register($dst$$reg);
Register base = as_Register($mem$$base);
int index = $mem$$index;
@@ -3245,7 +3238,7 @@ encode %{
}
%}
- enc_class aarch64_enc_stlxr(iRegLNoSp src, memory mem) %{
+ enc_class aarch64_enc_stlxr(iRegLNoSp src, memory8 mem) %{
Register src_reg = as_Register($src$$reg);
Register base = as_Register($mem$$base);
int index = $mem$$index;
@@ -4173,10 +4166,60 @@ operand immIU7()
interface(CONST_INTER);
%}
-// Offset for immediate loads and stores
+// Offset for scaled or unscaled immediate loads and stores
operand immIOffset()
%{
- predicate(n->get_int() >= -256 && n->get_int() <= 65520);
+ predicate(Address::offset_ok_for_immed(n->get_int(), 0));
+ match(ConI);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+operand immIOffset1()
+%{
+ predicate(Address::offset_ok_for_immed(n->get_int(), 0));
+ match(ConI);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+operand immIOffset2()
+%{
+ predicate(Address::offset_ok_for_immed(n->get_int(), 1));
+ match(ConI);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+operand immIOffset4()
+%{
+ predicate(Address::offset_ok_for_immed(n->get_int(), 2));
+ match(ConI);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+operand immIOffset8()
+%{
+ predicate(Address::offset_ok_for_immed(n->get_int(), 3));
+ match(ConI);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+operand immIOffset16()
+%{
+ predicate(Address::offset_ok_for_immed(n->get_int(), 4));
match(ConI);
op_cost(0);
@@ -4194,6 +4237,56 @@ operand immLOffset()
interface(CONST_INTER);
%}
+operand immLoffset1()
+%{
+ predicate(Address::offset_ok_for_immed(n->get_long(), 0));
+ match(ConL);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+operand immLoffset2()
+%{
+ predicate(Address::offset_ok_for_immed(n->get_long(), 1));
+ match(ConL);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+operand immLoffset4()
+%{
+ predicate(Address::offset_ok_for_immed(n->get_long(), 2));
+ match(ConL);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+operand immLoffset8()
+%{
+ predicate(Address::offset_ok_for_immed(n->get_long(), 3));
+ match(ConL);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+operand immLoffset16()
+%{
+ predicate(Address::offset_ok_for_immed(n->get_long(), 4));
+ match(ConL);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
// 5 bit signed long integer
operand immL5()
%{
@@ -5106,7 +5199,105 @@ operand indIndex(iRegP reg, iRegL lreg)
%}
%}
-operand indOffI(iRegP reg, immIOffset off)
+operand indOffI1(iRegP reg, immIOffset1 off)
+%{
+ constraint(ALLOC_IN_RC(ptr_reg));
+ match(AddP reg off);
+ op_cost(0);
+ format %{ "[$reg, $off]" %}
+ interface(MEMORY_INTER) %{
+ base($reg);
+ index(0xffffffff);
+ scale(0x0);
+ disp($off);
+ %}
+%}
+
+operand indOffI2(iRegP reg, immIOffset2 off)
+%{
+ constraint(ALLOC_IN_RC(ptr_reg));
+ match(AddP reg off);
+ op_cost(0);
+ format %{ "[$reg, $off]" %}
+ interface(MEMORY_INTER) %{
+ base($reg);
+ index(0xffffffff);
+ scale(0x0);
+ disp($off);
+ %}
+%}
+
+operand indOffI4(iRegP reg, immIOffset4 off)
+%{
+ constraint(ALLOC_IN_RC(ptr_reg));
+ match(AddP reg off);
+ op_cost(0);
+ format %{ "[$reg, $off]" %}
+ interface(MEMORY_INTER) %{
+ base($reg);
+ index(0xffffffff);
+ scale(0x0);
+ disp($off);
+ %}
+%}
+
+operand indOffI8(iRegP reg, immIOffset8 off)
+%{
+ constraint(ALLOC_IN_RC(ptr_reg));
+ match(AddP reg off);
+ op_cost(0);
+ format %{ "[$reg, $off]" %}
+ interface(MEMORY_INTER) %{
+ base($reg);
+ index(0xffffffff);
+ scale(0x0);
+ disp($off);
+ %}
+%}
+
+operand indOffI16(iRegP reg, immIOffset16 off)
+%{
+ constraint(ALLOC_IN_RC(ptr_reg));
+ match(AddP reg off);
+ op_cost(0);
+ format %{ "[$reg, $off]" %}
+ interface(MEMORY_INTER) %{
+ base($reg);
+ index(0xffffffff);
+ scale(0x0);
+ disp($off);
+ %}
+%}
+
+operand indOffL1(iRegP reg, immLoffset1 off)
+%{
+ constraint(ALLOC_IN_RC(ptr_reg));
+ match(AddP reg off);
+ op_cost(0);
+ format %{ "[$reg, $off]" %}
+ interface(MEMORY_INTER) %{
+ base($reg);
+ index(0xffffffff);
+ scale(0x0);
+ disp($off);
+ %}
+%}
+
+operand indOffL2(iRegP reg, immLoffset2 off)
+%{
+ constraint(ALLOC_IN_RC(ptr_reg));
+ match(AddP reg off);
+ op_cost(0);
+ format %{ "[$reg, $off]" %}
+ interface(MEMORY_INTER) %{
+ base($reg);
+ index(0xffffffff);
+ scale(0x0);
+ disp($off);
+ %}
+%}
+
+operand indOffL4(iRegP reg, immLoffset4 off)
%{
constraint(ALLOC_IN_RC(ptr_reg));
match(AddP reg off);
@@ -5120,7 +5311,21 @@ operand indOffI(iRegP reg, immIOffset off)
%}
%}
-operand indOffL(iRegP reg, immLOffset off)
+operand indOffL8(iRegP reg, immLoffset8 off)
+%{
+ constraint(ALLOC_IN_RC(ptr_reg));
+ match(AddP reg off);
+ op_cost(0);
+ format %{ "[$reg, $off]" %}
+ interface(MEMORY_INTER) %{
+ base($reg);
+ index(0xffffffff);
+ scale(0x0);
+ disp($off);
+ %}
+%}
+
+operand indOffL16(iRegP reg, immLoffset16 off)
%{
constraint(ALLOC_IN_RC(ptr_reg));
match(AddP reg off);
@@ -5496,7 +5701,10 @@ operand iRegL2P(iRegL reg) %{
interface(REG_INTER)
%}
-opclass vmem(indirect, indIndex, indOffI, indOffL, indOffIN, indOffLN);
+opclass vmem2(indirect, indIndex, indOffI2, indOffL2);
+opclass vmem4(indirect, indIndex, indOffI4, indOffL4);
+opclass vmem8(indirect, indIndex, indOffI8, indOffL8);
+opclass vmem16(indirect, indIndex, indOffI16, indOffL16);
//----------OPERAND CLASSES----------------------------------------------------
// Operand Classes are groups of operands that are used as to simplify
@@ -5508,9 +5716,23 @@ opclass vmem(indirect, indIndex, indOffI, indOffL, indOffIN, indOffLN);
// memory is used to define read/write location for load/store
// instruction defs. we can turn a memory op into an Address
-opclass memory(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI, indOffL,
- indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN,
- indOffLN, indirectX2P, indOffX2P);
+opclass memory1(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI1, indOffL1,
+ indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indirectX2P, indOffX2P);
+
+opclass memory2(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI2, indOffL2,
+ indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indirectX2P, indOffX2P);
+
+opclass memory4(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI4, indOffL4,
+ indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN, indOffLN, indirectX2P, indOffX2P);
+
+opclass memory8(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI8, indOffL8,
+ indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN, indOffLN, indirectX2P, indOffX2P);
+
+// All of the memory operands. For the pipeline description.
+opclass memory(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex,
+ indOffI1, indOffL1, indOffI2, indOffL2, indOffI4, indOffL4, indOffI8, indOffL8,
+ indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN, indOffLN, indirectX2P, indOffX2P);
+
// iRegIorL2I is used for src inputs in rules for 32 bit int (I)
// operations. it allows the src to be either an iRegI or a (ConvL2I
@@ -6212,7 +6434,7 @@ define %{
// Load Instructions
// Load Byte (8 bit signed)
-instruct loadB(iRegINoSp dst, memory mem)
+instruct loadB(iRegINoSp dst, memory1 mem)
%{
match(Set dst (LoadB mem));
predicate(!needs_acquiring_load(n));
@@ -6226,7 +6448,7 @@ instruct loadB(iRegINoSp dst, memory mem)
%}
// Load Byte (8 bit signed) into long
-instruct loadB2L(iRegLNoSp dst, memory mem)
+instruct loadB2L(iRegLNoSp dst, memory1 mem)
%{
match(Set dst (ConvI2L (LoadB mem)));
predicate(!needs_acquiring_load(n->in(1)));
@@ -6240,7 +6462,7 @@ instruct loadB2L(iRegLNoSp dst, memory mem)
%}
// Load Byte (8 bit unsigned)
-instruct loadUB(iRegINoSp dst, memory mem)
+instruct loadUB(iRegINoSp dst, memory1 mem)
%{
match(Set dst (LoadUB mem));
predicate(!needs_acquiring_load(n));
@@ -6254,7 +6476,7 @@ instruct loadUB(iRegINoSp dst, memory mem)
%}
// Load Byte (8 bit unsigned) into long
-instruct loadUB2L(iRegLNoSp dst, memory mem)
+instruct loadUB2L(iRegLNoSp dst, memory1 mem)
%{
match(Set dst (ConvI2L (LoadUB mem)));
predicate(!needs_acquiring_load(n->in(1)));
@@ -6268,7 +6490,7 @@ instruct loadUB2L(iRegLNoSp dst, memory mem)
%}
// Load Short (16 bit signed)
-instruct loadS(iRegINoSp dst, memory mem)
+instruct loadS(iRegINoSp dst, memory2 mem)
%{
match(Set dst (LoadS mem));
predicate(!needs_acquiring_load(n));
@@ -6282,7 +6504,7 @@ instruct loadS(iRegINoSp dst, memory mem)
%}
// Load Short (16 bit signed) into long
-instruct loadS2L(iRegLNoSp dst, memory mem)
+instruct loadS2L(iRegLNoSp dst, memory2 mem)
%{
match(Set dst (ConvI2L (LoadS mem)));
predicate(!needs_acquiring_load(n->in(1)));
@@ -6296,7 +6518,7 @@ instruct loadS2L(iRegLNoSp dst, memory mem)
%}
// Load Char (16 bit unsigned)
-instruct loadUS(iRegINoSp dst, memory mem)
+instruct loadUS(iRegINoSp dst, memory2 mem)
%{
match(Set dst (LoadUS mem));
predicate(!needs_acquiring_load(n));
@@ -6310,7 +6532,7 @@ instruct loadUS(iRegINoSp dst, memory mem)
%}
// Load Short/Char (16 bit unsigned) into long
-instruct loadUS2L(iRegLNoSp dst, memory mem)
+instruct loadUS2L(iRegLNoSp dst, memory2 mem)
%{
match(Set dst (ConvI2L (LoadUS mem)));
predicate(!needs_acquiring_load(n->in(1)));
@@ -6324,7 +6546,7 @@ instruct loadUS2L(iRegLNoSp dst, memory mem)
%}
// Load Integer (32 bit signed)
-instruct loadI(iRegINoSp dst, memory mem)
+instruct loadI(iRegINoSp dst, memory4 mem)
%{
match(Set dst (LoadI mem));
predicate(!needs_acquiring_load(n));
@@ -6338,7 +6560,7 @@ instruct loadI(iRegINoSp dst, memory mem)
%}
// Load Integer (32 bit signed) into long
-instruct loadI2L(iRegLNoSp dst, memory mem)
+instruct loadI2L(iRegLNoSp dst, memory4 mem)
%{
match(Set dst (ConvI2L (LoadI mem)));
predicate(!needs_acquiring_load(n->in(1)));
@@ -6352,7 +6574,7 @@ instruct loadI2L(iRegLNoSp dst, memory mem)
%}
// Load Integer (32 bit unsigned) into long
-instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask)
+instruct loadUI2L(iRegLNoSp dst, memory4 mem, immL_32bits mask)
%{
match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
predicate(!needs_acquiring_load(n->in(1)->in(1)->as_Load()));
@@ -6366,7 +6588,7 @@ instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask)
%}
// Load Long (64 bit signed)
-instruct loadL(iRegLNoSp dst, memory mem)
+instruct loadL(iRegLNoSp dst, memory8 mem)
%{
match(Set dst (LoadL mem));
predicate(!needs_acquiring_load(n));
@@ -6380,7 +6602,7 @@ instruct loadL(iRegLNoSp dst, memory mem)
%}
// Load Range
-instruct loadRange(iRegINoSp dst, memory mem)
+instruct loadRange(iRegINoSp dst, memory4 mem)
%{
match(Set dst (LoadRange mem));
@@ -6393,7 +6615,7 @@ instruct loadRange(iRegINoSp dst, memory mem)
%}
// Load Pointer
-instruct loadP(iRegPNoSp dst, memory mem)
+instruct loadP(iRegPNoSp dst, memory8 mem)
%{
match(Set dst (LoadP mem));
predicate(!needs_acquiring_load(n) && (n->as_Load()->barrier_data() == 0));
@@ -6407,10 +6629,10 @@ instruct loadP(iRegPNoSp dst, memory mem)
%}
// Load Compressed Pointer
-instruct loadN(iRegNNoSp dst, memory mem)
+instruct loadN(iRegNNoSp dst, memory4 mem)
%{
match(Set dst (LoadN mem));
- predicate(!needs_acquiring_load(n));
+ predicate(!needs_acquiring_load(n) && n->as_Load()->barrier_data() == 0);
ins_cost(4 * INSN_COST);
format %{ "ldrw $dst, $mem\t# compressed ptr" %}
@@ -6421,7 +6643,7 @@ instruct loadN(iRegNNoSp dst, memory mem)
%}
// Load Klass Pointer
-instruct loadKlass(iRegPNoSp dst, memory mem)
+instruct loadKlass(iRegPNoSp dst, memory8 mem)
%{
match(Set dst (LoadKlass mem));
predicate(!needs_acquiring_load(n));
@@ -6435,7 +6657,7 @@ instruct loadKlass(iRegPNoSp dst, memory mem)
%}
// Load Narrow Klass Pointer
-instruct loadNKlass(iRegNNoSp dst, memory mem)
+instruct loadNKlass(iRegNNoSp dst, memory4 mem)
%{
match(Set dst (LoadNKlass mem));
predicate(!needs_acquiring_load(n));
@@ -6449,7 +6671,7 @@ instruct loadNKlass(iRegNNoSp dst, memory mem)
%}
// Load Float
-instruct loadF(vRegF dst, memory mem)
+instruct loadF(vRegF dst, memory4 mem)
%{
match(Set dst (LoadF mem));
predicate(!needs_acquiring_load(n));
@@ -6463,7 +6685,7 @@ instruct loadF(vRegF dst, memory mem)
%}
// Load Double
-instruct loadD(vRegD dst, memory mem)
+instruct loadD(vRegD dst, memory8 mem)
%{
match(Set dst (LoadD mem));
predicate(!needs_acquiring_load(n));
@@ -6666,38 +6888,8 @@ instruct loadConD(vRegD dst, immD con) %{
// Store Instructions
-// Store CMS card-mark Immediate
-instruct storeimmCM0(immI0 zero, memory mem)
-%{
- match(Set mem (StoreCM mem zero));
-
- ins_cost(INSN_COST);
- format %{ "storestore (elided)\n\t"
- "strb zr, $mem\t# byte" %}
-
- ins_encode(aarch64_enc_strb0(mem));
-
- ins_pipe(istore_mem);
-%}
-
-// Store CMS card-mark Immediate with intervening StoreStore
-// needed when using CMS with no conditional card marking
-instruct storeimmCM0_ordered(immI0 zero, memory mem)
-%{
- match(Set mem (StoreCM mem zero));
-
- ins_cost(INSN_COST * 2);
- format %{ "storestore\n\t"
- "dmb ishst"
- "\n\tstrb zr, $mem\t# byte" %}
-
- ins_encode(aarch64_enc_strb0_ordered(mem));
-
- ins_pipe(istore_mem);
-%}
-
// Store Byte
-instruct storeB(iRegIorL2I src, memory mem)
+instruct storeB(iRegIorL2I src, memory1 mem)
%{
match(Set mem (StoreB mem src));
predicate(!needs_releasing_store(n));
@@ -6711,7 +6903,7 @@ instruct storeB(iRegIorL2I src, memory mem)
%}
-instruct storeimmB0(immI0 zero, memory mem)
+instruct storeimmB0(immI0 zero, memory1 mem)
%{
match(Set mem (StoreB mem zero));
predicate(!needs_releasing_store(n));
@@ -6725,7 +6917,7 @@ instruct storeimmB0(immI0 zero, memory mem)
%}
// Store Char/Short
-instruct storeC(iRegIorL2I src, memory mem)
+instruct storeC(iRegIorL2I src, memory2 mem)
%{
match(Set mem (StoreC mem src));
predicate(!needs_releasing_store(n));
@@ -6738,7 +6930,7 @@ instruct storeC(iRegIorL2I src, memory mem)
ins_pipe(istore_reg_mem);
%}
-instruct storeimmC0(immI0 zero, memory mem)
+instruct storeimmC0(immI0 zero, memory2 mem)
%{
match(Set mem (StoreC mem zero));
predicate(!needs_releasing_store(n));
@@ -6753,7 +6945,7 @@ instruct storeimmC0(immI0 zero, memory mem)
// Store Integer
-instruct storeI(iRegIorL2I src, memory mem)
+instruct storeI(iRegIorL2I src, memory4 mem)
%{
match(Set mem(StoreI mem src));
predicate(!needs_releasing_store(n));
@@ -6766,7 +6958,7 @@ instruct storeI(iRegIorL2I src, memory mem)
ins_pipe(istore_reg_mem);
%}
-instruct storeimmI0(immI0 zero, memory mem)
+instruct storeimmI0(immI0 zero, memory4 mem)
%{
match(Set mem(StoreI mem zero));
predicate(!needs_releasing_store(n));
@@ -6780,7 +6972,7 @@ instruct storeimmI0(immI0 zero, memory mem)
%}
// Store Long (64 bit signed)
-instruct storeL(iRegL src, memory mem)
+instruct storeL(iRegL src, memory8 mem)
%{
match(Set mem (StoreL mem src));
predicate(!needs_releasing_store(n));
@@ -6794,7 +6986,7 @@ instruct storeL(iRegL src, memory mem)
%}
// Store Long (64 bit signed)
-instruct storeimmL0(immL0 zero, memory mem)
+instruct storeimmL0(immL0 zero, memory8 mem)
%{
match(Set mem (StoreL mem zero));
predicate(!needs_releasing_store(n));
@@ -6808,7 +7000,7 @@ instruct storeimmL0(immL0 zero, memory mem)
%}
// Store Pointer
-instruct storeP(iRegP src, memory mem)
+instruct storeP(iRegP src, memory8 mem)
%{
match(Set mem (StoreP mem src));
predicate(!needs_releasing_store(n) && n->as_Store()->barrier_data() == 0);
@@ -6822,7 +7014,7 @@ instruct storeP(iRegP src, memory mem)
%}
// Store Pointer
-instruct storeimmP0(immP0 zero, memory mem)
+instruct storeimmP0(immP0 zero, memory8 mem)
%{
match(Set mem (StoreP mem zero));
predicate(!needs_releasing_store(n) && n->as_Store()->barrier_data() == 0);
@@ -6836,10 +7028,10 @@ instruct storeimmP0(immP0 zero, memory mem)
%}
// Store Compressed Pointer
-instruct storeN(iRegN src, memory mem)
+instruct storeN(iRegN src, memory4 mem)
%{
match(Set mem (StoreN mem src));
- predicate(!needs_releasing_store(n));
+ predicate(!needs_releasing_store(n) && n->as_Store()->barrier_data() == 0);
ins_cost(INSN_COST);
format %{ "strw $src, $mem\t# compressed ptr" %}
@@ -6849,10 +7041,10 @@ instruct storeN(iRegN src, memory mem)
ins_pipe(istore_reg_mem);
%}
-instruct storeImmN0(immN0 zero, memory mem)
+instruct storeImmN0(immN0 zero, memory4 mem)
%{
match(Set mem (StoreN mem zero));
- predicate(!needs_releasing_store(n));
+ predicate(!needs_releasing_store(n) && n->as_Store()->barrier_data() == 0);
ins_cost(INSN_COST);
format %{ "strw zr, $mem\t# compressed ptr" %}
@@ -6863,7 +7055,7 @@ instruct storeImmN0(immN0 zero, memory mem)
%}
// Store Float
-instruct storeF(vRegF src, memory mem)
+instruct storeF(vRegF src, memory4 mem)
%{
match(Set mem (StoreF mem src));
predicate(!needs_releasing_store(n));
@@ -6880,7 +7072,7 @@ instruct storeF(vRegF src, memory mem)
// implement storeImmF0 and storeFImmPacked
// Store Double
-instruct storeD(vRegD src, memory mem)
+instruct storeD(vRegD src, memory8 mem)
%{
match(Set mem (StoreD mem src));
predicate(!needs_releasing_store(n));
@@ -6894,7 +7086,7 @@ instruct storeD(vRegD src, memory mem)
%}
// Store Compressed Klass Pointer
-instruct storeNKlass(iRegN src, memory mem)
+instruct storeNKlass(iRegN src, memory4 mem)
%{
predicate(!needs_releasing_store(n));
match(Set mem (StoreNKlass mem src));
@@ -6913,7 +7105,7 @@ instruct storeNKlass(iRegN src, memory mem)
// prefetch instructions
// Must be safe to execute with invalid address (cannot fault).
-instruct prefetchalloc( memory mem ) %{
+instruct prefetchalloc( memory8 mem ) %{
match(PrefetchAllocation mem);
ins_cost(INSN_COST);
@@ -7086,6 +7278,7 @@ instruct loadP_volatile(iRegPNoSp dst, /* sync_memory*/indirect mem)
instruct loadN_volatile(iRegNNoSp dst, /* sync_memory*/indirect mem)
%{
match(Set dst (LoadN mem));
+ predicate(n->as_Load()->barrier_data() == 0);
ins_cost(VOLATILE_REF_COST);
format %{ "ldarw $dst, $mem\t# compressed ptr" %}
@@ -7253,6 +7446,7 @@ instruct storeimmP0_volatile(immP0 zero, /* sync_memory*/indirect mem)
instruct storeN_volatile(iRegN src, /* sync_memory*/indirect mem)
%{
match(Set mem (StoreN mem src));
+ predicate(n->as_Store()->barrier_data() == 0);
ins_cost(VOLATILE_REF_COST);
format %{ "stlrw $src, $mem\t# compressed ptr" %}
@@ -7265,6 +7459,7 @@ instruct storeN_volatile(iRegN src, /* sync_memory*/indirect mem)
instruct storeimmN0_volatile(immN0 zero, /* sync_memory*/indirect mem)
%{
match(Set mem (StoreN mem zero));
+ predicate(n->as_Store()->barrier_data() == 0);
ins_cost(VOLATILE_REF_COST);
format %{ "stlrw zr, $mem\t# compressed ptr" %}
@@ -7482,7 +7677,7 @@ instruct popCountI(iRegINoSp dst, iRegIorL2I src, vRegF tmp) %{
ins_pipe(pipe_class_default);
%}
-instruct popCountI_mem(iRegINoSp dst, memory mem, vRegF tmp) %{
+instruct popCountI_mem(iRegINoSp dst, memory4 mem, vRegF tmp) %{
match(Set dst (PopCountI (LoadI mem)));
effect(TEMP tmp);
ins_cost(INSN_COST * 13);
@@ -7523,7 +7718,7 @@ instruct popCountL(iRegINoSp dst, iRegL src, vRegD tmp) %{
ins_pipe(pipe_class_default);
%}
-instruct popCountL_mem(iRegINoSp dst, memory mem, vRegD tmp) %{
+instruct popCountL_mem(iRegINoSp dst, memory8 mem, vRegD tmp) %{
match(Set dst (PopCountL (LoadL mem)));
effect(TEMP tmp);
ins_cost(INSN_COST * 13);
@@ -8061,6 +8256,7 @@ instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval
instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
match(Set res (CompareAndSwapN mem (Binary oldval newval)));
+ predicate(n->as_LoadStore()->barrier_data() == 0);
ins_cost(2 * VOLATILE_REF_COST);
effect(KILL cr);
@@ -8175,7 +8371,7 @@ instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP new
instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
- predicate(needs_acquiring_load_exclusive(n));
+ predicate(needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == 0);
match(Set res (CompareAndSwapN mem (Binary oldval newval)));
ins_cost(VOLATILE_REF_COST);
@@ -8280,6 +8476,7 @@ instruct compareAndExchangeL(iRegLNoSp res, indirect mem, iRegL oldval, iRegL ne
// This pattern is generated automatically from cas.m4.
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
+ predicate(n->as_LoadStore()->barrier_data() == 0);
match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
ins_cost(2 * VOLATILE_REF_COST);
effect(TEMP_DEF res, KILL cr);
@@ -8389,7 +8586,7 @@ instruct compareAndExchangeLAcq(iRegLNoSp res, indirect mem, iRegL oldval, iRegL
// This pattern is generated automatically from cas.m4.
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
instruct compareAndExchangeNAcq(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
- predicate(needs_acquiring_load_exclusive(n));
+ predicate(needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == 0);
match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
ins_cost(VOLATILE_REF_COST);
effect(TEMP_DEF res, KILL cr);
@@ -8501,6 +8698,7 @@ instruct weakCompareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL ne
// This pattern is generated automatically from cas.m4.
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
+ predicate(n->as_LoadStore()->barrier_data() == 0);
match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
ins_cost(2 * VOLATILE_REF_COST);
effect(KILL cr);
@@ -8620,7 +8818,7 @@ instruct weakCompareAndSwapLAcq(iRegINoSp res, indirect mem, iRegL oldval, iRegL
// This pattern is generated automatically from cas.m4.
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
instruct weakCompareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
- predicate(needs_acquiring_load_exclusive(n));
+ predicate(needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == 0);
match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
ins_cost(VOLATILE_REF_COST);
effect(KILL cr);
@@ -8681,6 +8879,7 @@ instruct get_and_setL(indirect mem, iRegL newv, iRegLNoSp prev) %{
%}
instruct get_and_setN(indirect mem, iRegN newv, iRegINoSp prev) %{
+ predicate(n->as_LoadStore()->barrier_data() == 0);
match(Set prev (GetAndSetN mem newv));
ins_cost(2 * VOLATILE_REF_COST);
format %{ "atomic_xchgw $prev, $newv, [$mem]" %}
@@ -8724,7 +8923,7 @@ instruct get_and_setLAcq(indirect mem, iRegL newv, iRegLNoSp prev) %{
%}
instruct get_and_setNAcq(indirect mem, iRegN newv, iRegINoSp prev) %{
- predicate(needs_acquiring_load_exclusive(n));
+ predicate(needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == 0);
match(Set prev (GetAndSetN mem newv));
ins_cost(VOLATILE_REF_COST);
format %{ "atomic_xchgw_acq $prev, $newv, [$mem]" %}
@@ -16672,7 +16871,7 @@ instruct compressBitsI_reg(iRegINoSp dst, iRegIorL2I src, iRegIorL2I mask,
ins_pipe(pipe_slow);
%}
-instruct compressBitsI_memcon(iRegINoSp dst, memory mem, immI mask,
+instruct compressBitsI_memcon(iRegINoSp dst, memory4 mem, immI mask,
vRegF tdst, vRegF tsrc, vRegF tmask) %{
match(Set dst (CompressBits (LoadI mem) mask));
effect(TEMP tdst, TEMP tsrc, TEMP tmask);
@@ -16709,7 +16908,7 @@ instruct compressBitsL_reg(iRegLNoSp dst, iRegL src, iRegL mask,
ins_pipe(pipe_slow);
%}
-instruct compressBitsL_memcon(iRegLNoSp dst, memory mem, immL mask,
+instruct compressBitsL_memcon(iRegLNoSp dst, memory8 mem, immL mask,
vRegF tdst, vRegF tsrc, vRegF tmask) %{
match(Set dst (CompressBits (LoadL mem) mask));
effect(TEMP tdst, TEMP tsrc, TEMP tmask);
@@ -16746,7 +16945,7 @@ instruct expandBitsI_reg(iRegINoSp dst, iRegIorL2I src, iRegIorL2I mask,
ins_pipe(pipe_slow);
%}
-instruct expandBitsI_memcon(iRegINoSp dst, memory mem, immI mask,
+instruct expandBitsI_memcon(iRegINoSp dst, memory4 mem, immI mask,
vRegF tdst, vRegF tsrc, vRegF tmask) %{
match(Set dst (ExpandBits (LoadI mem) mask));
effect(TEMP tdst, TEMP tsrc, TEMP tmask);
@@ -16784,7 +16983,7 @@ instruct expandBitsL_reg(iRegLNoSp dst, iRegL src, iRegL mask,
%}
-instruct expandBitsL_memcon(iRegINoSp dst, memory mem, immL mask,
+instruct expandBitsL_memcon(iRegINoSp dst, memory8 mem, immL mask,
vRegF tdst, vRegF tsrc, vRegF tmask) %{
match(Set dst (ExpandBits (LoadL mem) mask));
effect(TEMP tdst, TEMP tsrc, TEMP tmask);
diff --git a/src/hotspot/cpu/aarch64/aarch64_vector.ad b/src/hotspot/cpu/aarch64/aarch64_vector.ad
index cdbc4103df89a..0d3a240cecfd3 100644
--- a/src/hotspot/cpu/aarch64/aarch64_vector.ad
+++ b/src/hotspot/cpu/aarch64/aarch64_vector.ad
@@ -345,7 +345,7 @@ source %{
// ------------------------------ Vector load/store ----------------------------
// Load Vector (16 bits)
-instruct loadV2(vReg dst, vmem mem) %{
+instruct loadV2(vReg dst, vmem2 mem) %{
predicate(n->as_LoadVector()->memory_size() == 2);
match(Set dst (LoadVector mem));
format %{ "loadV2 $dst, $mem\t# vector (16 bits)" %}
@@ -354,7 +354,7 @@ instruct loadV2(vReg dst, vmem mem) %{
%}
// Store Vector (16 bits)
-instruct storeV2(vReg src, vmem mem) %{
+instruct storeV2(vReg src, vmem2 mem) %{
predicate(n->as_StoreVector()->memory_size() == 2);
match(Set mem (StoreVector mem src));
format %{ "storeV2 $mem, $src\t# vector (16 bits)" %}
@@ -363,7 +363,7 @@ instruct storeV2(vReg src, vmem mem) %{
%}
// Load Vector (32 bits)
-instruct loadV4(vReg dst, vmem mem) %{
+instruct loadV4(vReg dst, vmem4 mem) %{
predicate(n->as_LoadVector()->memory_size() == 4);
match(Set dst (LoadVector mem));
format %{ "loadV4 $dst, $mem\t# vector (32 bits)" %}
@@ -372,7 +372,7 @@ instruct loadV4(vReg dst, vmem mem) %{
%}
// Store Vector (32 bits)
-instruct storeV4(vReg src, vmem mem) %{
+instruct storeV4(vReg src, vmem4 mem) %{
predicate(n->as_StoreVector()->memory_size() == 4);
match(Set mem (StoreVector mem src));
format %{ "storeV4 $mem, $src\t# vector (32 bits)" %}
@@ -381,7 +381,7 @@ instruct storeV4(vReg src, vmem mem) %{
%}
// Load Vector (64 bits)
-instruct loadV8(vReg dst, vmem mem) %{
+instruct loadV8(vReg dst, vmem8 mem) %{
predicate(n->as_LoadVector()->memory_size() == 8);
match(Set dst (LoadVector mem));
format %{ "loadV8 $dst, $mem\t# vector (64 bits)" %}
@@ -390,7 +390,7 @@ instruct loadV8(vReg dst, vmem mem) %{
%}
// Store Vector (64 bits)
-instruct storeV8(vReg src, vmem mem) %{
+instruct storeV8(vReg src, vmem8 mem) %{
predicate(n->as_StoreVector()->memory_size() == 8);
match(Set mem (StoreVector mem src));
format %{ "storeV8 $mem, $src\t# vector (64 bits)" %}
@@ -399,7 +399,7 @@ instruct storeV8(vReg src, vmem mem) %{
%}
// Load Vector (128 bits)
-instruct loadV16(vReg dst, vmem mem) %{
+instruct loadV16(vReg dst, vmem16 mem) %{
predicate(n->as_LoadVector()->memory_size() == 16);
match(Set dst (LoadVector mem));
format %{ "loadV16 $dst, $mem\t# vector (128 bits)" %}
@@ -408,7 +408,7 @@ instruct loadV16(vReg dst, vmem mem) %{
%}
// Store Vector (128 bits)
-instruct storeV16(vReg src, vmem mem) %{
+instruct storeV16(vReg src, vmem16 mem) %{
predicate(n->as_StoreVector()->memory_size() == 16);
match(Set mem (StoreVector mem src));
format %{ "storeV16 $mem, $src\t# vector (128 bits)" %}
diff --git a/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 b/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4
index 020a75b51fa8f..99708e9ef317d 100644
--- a/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4
+++ b/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4
@@ -338,7 +338,7 @@ dnl VECTOR_LOAD_STORE($1, $2, $3, $4, $5 )
dnl VECTOR_LOAD_STORE(type, nbytes, arg_name, nbits, size)
define(`VECTOR_LOAD_STORE', `
// ifelse(load, $1, Load, Store) Vector ($4 bits)
-instruct $1V$2(vReg $3, vmem mem) %{
+instruct $1V$2(vReg $3, vmem$2 mem) %{
predicate(`n->as_'ifelse(load, $1, Load, Store)Vector()->memory_size() == $2);
match(Set ifelse(load, $1, dst (LoadVector mem), mem (StoreVector mem src)));
format %{ "$1V$2 ifelse(load, $1, `$dst, $mem', `$mem, $src')\t# vector ($4 bits)" %}
diff --git a/src/hotspot/cpu/aarch64/ad_encode.m4 b/src/hotspot/cpu/aarch64/ad_encode.m4
index e3d8ea661b60a..008dbd2c9369c 100644
--- a/src/hotspot/cpu/aarch64/ad_encode.m4
+++ b/src/hotspot/cpu/aarch64/ad_encode.m4
@@ -34,7 +34,7 @@ define(access, `
define(load,`
// This encoding class is generated automatically from ad_encode.m4.
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
- enc_class aarch64_enc_$2($1 dst, memory mem) %{dnl
+ enc_class aarch64_enc_$2($1 dst, memory$5 mem) %{dnl
access(dst,$2,$3,$4,$5)')dnl
load(iRegI,ldrsbw,,,1)
load(iRegI,ldrsb,,,1)
@@ -53,12 +53,12 @@ load(vRegD,ldrd,Float,,8)
define(STORE,`
// This encoding class is generated automatically from ad_encode.m4.
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
- enc_class aarch64_enc_$2($1 src, memory mem) %{dnl
+ enc_class aarch64_enc_$2($1 src, memory$5 mem) %{dnl
access(src,$2,$3,$4,$5)')dnl
define(STORE0,`
// This encoding class is generated automatically from ad_encode.m4.
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
- enc_class aarch64_enc_$2`'0(memory mem) %{
+ enc_class aarch64_enc_$2`'0(memory$4 mem) %{
choose(masm,zr,$2,$mem->opcode(),
as_$3Register($mem$$base),$mem$$index,$mem$$scale,$mem$$disp,$4)')dnl
STORE(iRegI,strb,,,1)
@@ -82,7 +82,7 @@ STORE(vRegD,strd,Float,,8)
// This encoding class is generated automatically from ad_encode.m4.
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
- enc_class aarch64_enc_strb0_ordered(memory mem) %{
+ enc_class aarch64_enc_strb0_ordered(memory4 mem) %{
__ membar(Assembler::StoreStore);
loadStore(masm, &MacroAssembler::strb, zr, $mem->opcode(),
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 1);
diff --git a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp
index 5e116d82761ac..1385366d8793b 100644
--- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp
@@ -1168,8 +1168,8 @@ void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) {
void LIR_Assembler::emit_alloc_obj(LIR_OpAllocObj* op) {
if (op->init_check()) {
- __ ldrb(rscratch1, Address(op->klass()->as_register(),
- InstanceKlass::init_state_offset()));
+ __ lea(rscratch1, Address(op->klass()->as_register(), InstanceKlass::init_state_offset()));
+ __ ldarb(rscratch1, rscratch1);
__ cmpw(rscratch1, InstanceKlass::fully_initialized);
add_debug_info_for_null_check_here(op->stub()->info());
__ br(Assembler::NE, *op->stub()->entry());
diff --git a/src/hotspot/cpu/aarch64/c2_CodeStubs_aarch64.cpp b/src/hotspot/cpu/aarch64/c2_CodeStubs_aarch64.cpp
index dabafb9288b83..4bd509880f29c 100644
--- a/src/hotspot/cpu/aarch64/c2_CodeStubs_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/c2_CodeStubs_aarch64.cpp
@@ -64,31 +64,4 @@ void C2EntryBarrierStub::emit(C2_MacroAssembler& masm) {
__ emit_int32(0); // nmethod guard value
}
-int C2HandleAnonOMOwnerStub::max_size() const {
- // Max size of stub has been determined by testing with 0, in which case
- // C2CodeStubList::emit() will throw an assertion and report the actual size that
- // is needed.
- return 24;
-}
-
-void C2HandleAnonOMOwnerStub::emit(C2_MacroAssembler& masm) {
- __ bind(entry());
- Register mon = monitor();
- Register t = tmp();
- assert(t != noreg, "need tmp register");
-
- // Fix owner to be the current thread.
- __ str(rthread, Address(mon, ObjectMonitor::owner_offset()));
-
- // Pop owner object from lock-stack.
- __ ldrw(t, Address(rthread, JavaThread::lock_stack_top_offset()));
- __ subw(t, t, oopSize);
-#ifdef ASSERT
- __ str(zr, Address(rthread, t));
-#endif
- __ strw(t, Address(rthread, JavaThread::lock_stack_top_offset()));
-
- __ b(continuation());
-}
-
#undef __
diff --git a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp
index b4c12ecd4a849..62831ee72ba05 100644
--- a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp
@@ -150,10 +150,12 @@ void C2_MacroAssembler::fast_unlock(Register objectReg, Register boxReg, Registe
Register oop = objectReg;
Register box = boxReg;
Register disp_hdr = tmpReg;
+ Register owner_addr = tmpReg;
Register tmp = tmp2Reg;
Label cont;
Label object_has_monitor;
Label count, no_count;
+ Label unlocked;
assert(LockingMode != LM_LIGHTWEIGHT, "lightweight locking should use fast_unlock_lightweight");
assert_different_registers(oop, box, tmp, disp_hdr);
@@ -204,14 +206,40 @@ void C2_MacroAssembler::fast_unlock(Register objectReg, Register boxReg, Registe
b(cont);
bind(notRecursive);
+
+ // Compute owner address.
+ lea(owner_addr, Address(tmp, ObjectMonitor::owner_offset()));
+
+ // Set owner to null.
+ // Release to satisfy the JMM
+ stlr(zr, owner_addr);
+ // We need a full fence after clearing owner to avoid stranding.
+ // StoreLoad achieves this.
+ membar(StoreLoad);
+
+ // Check if the entry lists are empty.
ldr(rscratch1, Address(tmp, ObjectMonitor::EntryList_offset()));
- ldr(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset()));
- orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if both are 0.
- cmp(rscratch1, zr); // Sets flags for result
- cbnz(rscratch1, cont);
- // need a release store here
- lea(tmp, Address(tmp, ObjectMonitor::owner_offset()));
- stlr(zr, tmp); // set unowned
+ ldr(tmpReg, Address(tmp, ObjectMonitor::cxq_offset()));
+ orr(rscratch1, rscratch1, tmpReg);
+ cmp(rscratch1, zr);
+ br(Assembler::EQ, cont); // If so we are done.
+
+ // Check if there is a successor.
+ ldr(rscratch1, Address(tmp, ObjectMonitor::succ_offset()));
+ cmp(rscratch1, zr);
+ br(Assembler::NE, unlocked); // If so we are done.
+
+ // Save the monitor pointer in the current thread, so we can try to
+ // reacquire the lock in SharedRuntime::monitor_exit_helper().
+ str(tmp, Address(rthread, JavaThread::unlocked_inflated_monitor_offset()));
+
+ cmp(zr, rthread); // Set Flag to NE => slow path
+ b(cont);
+
+ bind(unlocked);
+ cmp(zr, zr); // Set Flag to EQ => fast path
+
+ // Intentional fall-through
bind(cont);
// flag == EQ indicates success
@@ -498,33 +526,41 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register box, Regi
bind(not_recursive);
- Label release;
const Register t2_owner_addr = t2;
// Compute owner address.
lea(t2_owner_addr, Address(t1_monitor, ObjectMonitor::owner_offset()));
+ // Set owner to null.
+ // Release to satisfy the JMM
+ stlr(zr, t2_owner_addr);
+ // We need a full fence after clearing owner to avoid stranding.
+ // StoreLoad achieves this.
+ membar(StoreLoad);
+
// Check if the entry lists are empty.
ldr(rscratch1, Address(t1_monitor, ObjectMonitor::EntryList_offset()));
ldr(t3_t, Address(t1_monitor, ObjectMonitor::cxq_offset()));
orr(rscratch1, rscratch1, t3_t);
cmp(rscratch1, zr);
- br(Assembler::EQ, release);
+ br(Assembler::EQ, unlocked); // If so we are done.
- // The owner may be anonymous and we removed the last obj entry in
- // the lock-stack. This loses the information about the owner.
- // Write the thread to the owner field so the runtime knows the owner.
- str(rthread, Address(t2_owner_addr));
- b(slow_path);
+ // Check if there is a successor.
+ ldr(rscratch1, Address(t1_monitor, ObjectMonitor::succ_offset()));
+ cmp(rscratch1, zr);
+ br(Assembler::NE, unlocked); // If so we are done.
- bind(release);
- // Set owner to null.
- // Release to satisfy the JMM
- stlr(zr, t2_owner_addr);
+ // Save the monitor pointer in the current thread, so we can try to
+ // reacquire the lock in SharedRuntime::monitor_exit_helper().
+ str(t1_monitor, Address(rthread, JavaThread::unlocked_inflated_monitor_offset()));
+
+ cmp(zr, rthread); // Set Flag to NE => slow path
+ b(slow_path);
}
bind(unlocked);
decrement(Address(rthread, JavaThread::held_monitor_count_offset()));
+ cmp(zr, zr); // Set Flags to EQ => fast path
#ifdef ASSERT
// Check that unlocked label is reached with Flags == EQ.
diff --git a/src/hotspot/cpu/aarch64/cas.m4 b/src/hotspot/cpu/aarch64/cas.m4
index f8aac0c4939fa..7e13e153db18a 100644
--- a/src/hotspot/cpu/aarch64/cas.m4
+++ b/src/hotspot/cpu/aarch64/cas.m4
@@ -45,7 +45,9 @@ define(`CAS_INSN',
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
instruct compareAndExchange$1$6(iReg$2NoSp res, indirect mem, iReg$2 oldval, iReg$2 newval, rFlagsReg cr) %{
ifelse($1$6,PAcq,INDENT(predicate(needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == 0));),
+ $1$6,NAcq,INDENT(predicate(needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == 0);),
$1,P,INDENT(predicate(n->as_LoadStore()->barrier_data() == 0);),
+ $1,N,INDENT(predicate(n->as_LoadStore()->barrier_data() == 0);),
$6,Acq,INDENT(predicate(needs_acquiring_load_exclusive(n));),
`dnl')
match(Set res (CompareAndExchange$1 mem (Binary oldval newval)));
@@ -122,7 +124,9 @@ define(`CAS_INSN3',
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
instruct weakCompareAndSwap$1$6(iRegINoSp res, indirect mem, iReg$2 oldval, iReg$2 newval, rFlagsReg cr) %{
ifelse($1$6,PAcq,INDENT(predicate(needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == 0));),
+ $1$6,NAcq,INDENT(predicate(needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == 0);),
$1,P,INDENT(predicate(n->as_LoadStore()->barrier_data() == 0);),
+ $1,N,INDENT(predicate(n->as_LoadStore()->barrier_data() == 0);),
$6,Acq,INDENT(predicate(needs_acquiring_load_exclusive(n));),
`dnl')
match(Set res (WeakCompareAndSwap$1 mem (Binary oldval newval)));
diff --git a/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.cpp
index d02038b6e9193..b978c350ce131 100644
--- a/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.cpp
@@ -38,7 +38,10 @@
#include "c1/c1_LIRAssembler.hpp"
#include "c1/c1_MacroAssembler.hpp"
#include "gc/g1/c1/g1BarrierSetC1.hpp"
-#endif
+#endif // COMPILER1
+#ifdef COMPILER2
+#include "gc/g1/c2/g1BarrierSetC2.hpp"
+#endif // COMPILER2
#define __ masm->
@@ -95,6 +98,54 @@ void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* mas
__ pop(saved_regs, sp);
}
+static void generate_queue_test_and_insertion(MacroAssembler* masm, ByteSize index_offset, ByteSize buffer_offset, Label& runtime,
+ const Register thread, const Register value, const Register temp1, const Register temp2) {
+ // Can we store a value in the given thread's buffer?
+ // (The index field is typed as size_t.)
+ __ ldr(temp1, Address(thread, in_bytes(index_offset))); // temp1 := *(index address)
+ __ cbz(temp1, runtime); // jump to runtime if index == 0 (full buffer)
+ // The buffer is not full, store value into it.
+ __ sub(temp1, temp1, wordSize); // temp1 := next index
+ __ str(temp1, Address(thread, in_bytes(index_offset))); // *(index address) := next index
+ __ ldr(temp2, Address(thread, in_bytes(buffer_offset))); // temp2 := buffer address
+ __ str(value, Address(temp2, temp1)); // *(buffer address + next index) := value
+}
+
+static void generate_pre_barrier_fast_path(MacroAssembler* masm,
+ const Register thread,
+ const Register tmp1) {
+ Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
+ // Is marking active?
+ if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
+ __ ldrw(tmp1, in_progress);
+ } else {
+ assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
+ __ ldrb(tmp1, in_progress);
+ }
+}
+
+static void generate_pre_barrier_slow_path(MacroAssembler* masm,
+ const Register obj,
+ const Register pre_val,
+ const Register thread,
+ const Register tmp1,
+ const Register tmp2,
+ Label& done,
+ Label& runtime) {
+ // Do we need to load the previous value?
+ if (obj != noreg) {
+ __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW);
+ }
+ // Is the previous value null?
+ __ cbz(pre_val, done);
+ generate_queue_test_and_insertion(masm,
+ G1ThreadLocalData::satb_mark_queue_index_offset(),
+ G1ThreadLocalData::satb_mark_queue_buffer_offset(),
+ runtime,
+ thread, pre_val, tmp1, tmp2);
+ __ b(done);
+}
+
void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
Register obj,
Register pre_val,
@@ -115,43 +166,10 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
assert_different_registers(obj, pre_val, tmp1, tmp2);
assert(pre_val != noreg && tmp1 != noreg && tmp2 != noreg, "expecting a register");
- Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
- Address index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()));
- Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()));
-
- // Is marking active?
- if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
- __ ldrw(tmp1, in_progress);
- } else {
- assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
- __ ldrb(tmp1, in_progress);
- }
+ generate_pre_barrier_fast_path(masm, thread, tmp1);
+ // If marking is not active (*(mark queue active address) == 0), jump to done
__ cbzw(tmp1, done);
-
- // Do we need to load the previous value?
- if (obj != noreg) {
- __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW);
- }
-
- // Is the previous value null?
- __ cbz(pre_val, done);
-
- // Can we store original value in the thread's buffer?
- // Is index == 0?
- // (The index field is typed as size_t.)
-
- __ ldr(tmp1, index); // tmp := *index_adr
- __ cbz(tmp1, runtime); // tmp == 0?
- // If yes, goto runtime
-
- __ sub(tmp1, tmp1, wordSize); // tmp := tmp - wordSize
- __ str(tmp1, index); // *index_adr := tmp
- __ ldr(tmp2, buffer);
- __ add(tmp1, tmp1, tmp2); // tmp := tmp + *buffer_adr
-
- // Record the previous value
- __ str(pre_val, Address(tmp1, 0));
- __ b(done);
+ generate_pre_barrier_slow_path(masm, obj, pre_val, thread, tmp1, tmp2, done, runtime);
__ bind(runtime);
@@ -182,6 +200,50 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
}
+static void generate_post_barrier_fast_path(MacroAssembler* masm,
+ const Register store_addr,
+ const Register new_val,
+ const Register tmp1,
+ const Register tmp2,
+ Label& done,
+ bool new_val_may_be_null) {
+ // Does store cross heap regions?
+ __ eor(tmp1, store_addr, new_val); // tmp1 := store address ^ new value
+ __ lsr(tmp1, tmp1, G1HeapRegion::LogOfHRGrainBytes); // tmp1 := ((store address ^ new value) >> LogOfHRGrainBytes)
+ __ cbz(tmp1, done);
+ // Crosses regions, storing null?
+ if (new_val_may_be_null) {
+ __ cbz(new_val, done);
+ }
+ // Storing region crossing non-null, is card young?
+ __ lsr(tmp1, store_addr, CardTable::card_shift()); // tmp1 := card address relative to card table base
+ __ load_byte_map_base(tmp2); // tmp2 := card table base address
+ __ add(tmp1, tmp1, tmp2); // tmp1 := card address
+ __ ldrb(tmp2, Address(tmp1)); // tmp2 := card
+ __ cmpw(tmp2, (int)G1CardTable::g1_young_card_val()); // tmp2 := card == young_card_val?
+}
+
+static void generate_post_barrier_slow_path(MacroAssembler* masm,
+ const Register thread,
+ const Register tmp1,
+ const Register tmp2,
+ Label& done,
+ Label& runtime) {
+ __ membar(Assembler::StoreLoad); // StoreLoad membar
+ __ ldrb(tmp2, Address(tmp1)); // tmp2 := card
+ __ cbzw(tmp2, done);
+ // Storing a region crossing, non-null oop, card is clean.
+ // Dirty card and log.
+ STATIC_ASSERT(CardTable::dirty_card_val() == 0);
+ __ strb(zr, Address(tmp1)); // *(card address) := dirty_card_val
+ generate_queue_test_and_insertion(masm,
+ G1ThreadLocalData::dirty_card_queue_index_offset(),
+ G1ThreadLocalData::dirty_card_queue_buffer_offset(),
+ runtime,
+ thread, tmp1, tmp2, rscratch1);
+ __ b(done);
+}
+
void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
Register store_addr,
Register new_val,
@@ -194,70 +256,116 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
assert(store_addr != noreg && new_val != noreg && tmp1 != noreg
&& tmp2 != noreg, "expecting a register");
- Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
- Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
-
- BarrierSet* bs = BarrierSet::barrier_set();
- CardTableBarrierSet* ctbs = barrier_set_cast(bs);
- CardTable* ct = ctbs->card_table();
-
Label done;
Label runtime;
- // Does store cross heap regions?
+ generate_post_barrier_fast_path(masm, store_addr, new_val, tmp1, tmp2, done, true /* new_val_may_be_null */);
+ // If card is young, jump to done
+ __ br(Assembler::EQ, done);
+ generate_post_barrier_slow_path(masm, thread, tmp1, tmp2, done, runtime);
- __ eor(tmp1, store_addr, new_val);
- __ lsr(tmp1, tmp1, G1HeapRegion::LogOfHRGrainBytes);
- __ cbz(tmp1, done);
+ __ bind(runtime);
+ // save the live input values
+ RegSet saved = RegSet::of(store_addr);
+ __ push(saved, sp);
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), tmp1, thread);
+ __ pop(saved, sp);
- // crosses regions, storing null?
+ __ bind(done);
+}
- __ cbz(new_val, done);
+#if defined(COMPILER2)
- // storing region crossing non-null, is card already dirty?
+static void generate_c2_barrier_runtime_call(MacroAssembler* masm, G1BarrierStubC2* stub, const Register arg, const address runtime_path) {
+ SaveLiveRegisters save_registers(masm, stub);
+ if (c_rarg0 != arg) {
+ __ mov(c_rarg0, arg);
+ }
+ __ mov(c_rarg1, rthread);
+ __ mov(rscratch1, runtime_path);
+ __ blr(rscratch1);
+}
- const Register card_addr = tmp1;
+void G1BarrierSetAssembler::g1_write_barrier_pre_c2(MacroAssembler* masm,
+ Register obj,
+ Register pre_val,
+ Register thread,
+ Register tmp1,
+ Register tmp2,
+ G1PreBarrierStubC2* stub) {
+ assert(thread == rthread, "must be");
+ assert_different_registers(obj, pre_val, tmp1, tmp2);
+ assert(pre_val != noreg && tmp1 != noreg && tmp2 != noreg, "expecting a register");
- __ lsr(card_addr, store_addr, CardTable::card_shift());
+ stub->initialize_registers(obj, pre_val, thread, tmp1, tmp2);
- // get the address of the card
- __ load_byte_map_base(tmp2);
- __ add(card_addr, card_addr, tmp2);
- __ ldrb(tmp2, Address(card_addr));
- __ cmpw(tmp2, (int)G1CardTable::g1_young_card_val());
- __ br(Assembler::EQ, done);
+ generate_pre_barrier_fast_path(masm, thread, tmp1);
+ // If marking is active (*(mark queue active address) != 0), jump to stub (slow path)
+ __ cbnzw(tmp1, *stub->entry());
- assert((int)CardTable::dirty_card_val() == 0, "must be 0");
+ __ bind(*stub->continuation());
+}
- __ membar(Assembler::StoreLoad);
+void G1BarrierSetAssembler::generate_c2_pre_barrier_stub(MacroAssembler* masm,
+ G1PreBarrierStubC2* stub) const {
+ Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
+ Label runtime;
+ Register obj = stub->obj();
+ Register pre_val = stub->pre_val();
+ Register thread = stub->thread();
+ Register tmp1 = stub->tmp1();
+ Register tmp2 = stub->tmp2();
- __ ldrb(tmp2, Address(card_addr));
- __ cbzw(tmp2, done);
+ __ bind(*stub->entry());
+ generate_pre_barrier_slow_path(masm, obj, pre_val, thread, tmp1, tmp2, *stub->continuation(), runtime);
- // storing a region crossing, non-null oop, card is clean.
- // dirty card and log.
+ __ bind(runtime);
+ generate_c2_barrier_runtime_call(masm, stub, pre_val, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry));
+ __ b(*stub->continuation());
+}
- __ strb(zr, Address(card_addr));
+void G1BarrierSetAssembler::g1_write_barrier_post_c2(MacroAssembler* masm,
+ Register store_addr,
+ Register new_val,
+ Register thread,
+ Register tmp1,
+ Register tmp2,
+ G1PostBarrierStubC2* stub) {
+ assert(thread == rthread, "must be");
+ assert_different_registers(store_addr, new_val, thread, tmp1, tmp2,
+ rscratch1);
+ assert(store_addr != noreg && new_val != noreg && tmp1 != noreg
+ && tmp2 != noreg, "expecting a register");
- __ ldr(rscratch1, queue_index);
- __ cbz(rscratch1, runtime);
- __ sub(rscratch1, rscratch1, wordSize);
- __ str(rscratch1, queue_index);
+ stub->initialize_registers(thread, tmp1, tmp2);
- __ ldr(tmp2, buffer);
- __ str(card_addr, Address(tmp2, rscratch1));
- __ b(done);
+ bool new_val_may_be_null = (stub->barrier_data() & G1C2BarrierPostNotNull) == 0;
+ generate_post_barrier_fast_path(masm, store_addr, new_val, tmp1, tmp2, *stub->continuation(), new_val_may_be_null);
+ // If card is not young, jump to stub (slow path)
+ __ br(Assembler::NE, *stub->entry());
- __ bind(runtime);
- // save the live input values
- RegSet saved = RegSet::of(store_addr);
- __ push(saved, sp);
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread);
- __ pop(saved, sp);
+ __ bind(*stub->continuation());
+}
- __ bind(done);
+void G1BarrierSetAssembler::generate_c2_post_barrier_stub(MacroAssembler* masm,
+ G1PostBarrierStubC2* stub) const {
+ Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
+ Label runtime;
+ Register thread = stub->thread();
+ Register tmp1 = stub->tmp1(); // tmp1 holds the card address.
+ Register tmp2 = stub->tmp2();
+ assert(stub->tmp3() == noreg, "not needed in this platform");
+
+ __ bind(*stub->entry());
+ generate_post_barrier_slow_path(masm, thread, tmp1, tmp2, *stub->continuation(), runtime);
+
+ __ bind(runtime);
+ generate_c2_barrier_runtime_call(masm, stub, tmp1, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry));
+ __ b(*stub->continuation());
}
+#endif // COMPILER2
+
void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
Register dst, Address src, Register tmp1, Register tmp2) {
bool on_oop = is_reference_type(type);
diff --git a/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.hpp
index 7b4bc8cdc49de..4baa18cb94544 100644
--- a/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.hpp
@@ -33,6 +33,8 @@ class LIR_Assembler;
class StubAssembler;
class G1PreBarrierStub;
class G1PostBarrierStub;
+class G1PreBarrierStubC2;
+class G1PostBarrierStubC2;
class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
protected:
@@ -69,6 +71,27 @@ class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm);
#endif
+#ifdef COMPILER2
+ void g1_write_barrier_pre_c2(MacroAssembler* masm,
+ Register obj,
+ Register pre_val,
+ Register thread,
+ Register tmp1,
+ Register tmp2,
+ G1PreBarrierStubC2* c2_stub);
+ void generate_c2_pre_barrier_stub(MacroAssembler* masm,
+ G1PreBarrierStubC2* stub) const;
+ void g1_write_barrier_post_c2(MacroAssembler* masm,
+ Register store_addr,
+ Register new_val,
+ Register thread,
+ Register tmp1,
+ Register tmp2,
+ G1PostBarrierStubC2* c2_stub);
+ void generate_c2_post_barrier_stub(MacroAssembler* masm,
+ G1PostBarrierStubC2* stub) const;
+#endif
+
void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
Register dst, Address src, Register tmp1, Register tmp2);
};
diff --git a/src/hotspot/cpu/aarch64/gc/g1/g1_aarch64.ad b/src/hotspot/cpu/aarch64/gc/g1/g1_aarch64.ad
new file mode 100644
index 0000000000000..081a67d68807b
--- /dev/null
+++ b/src/hotspot/cpu/aarch64/gc/g1/g1_aarch64.ad
@@ -0,0 +1,680 @@
+//
+// Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+// or visit www.oracle.com if you need additional information or have any
+// questions.
+//
+
+source_hpp %{
+
+#include "gc/g1/c2/g1BarrierSetC2.hpp"
+#include "gc/shared/gc_globals.hpp"
+
+%}
+
+source %{
+
+#include "gc/g1/g1BarrierSetAssembler_aarch64.hpp"
+#include "gc/g1/g1BarrierSetRuntime.hpp"
+
+static void write_barrier_pre(MacroAssembler* masm,
+ const MachNode* node,
+ Register obj,
+ Register pre_val,
+ Register tmp1,
+ Register tmp2,
+ RegSet preserve = RegSet(),
+ RegSet no_preserve = RegSet()) {
+ if (!G1PreBarrierStubC2::needs_barrier(node)) {
+ return;
+ }
+ Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
+ G1BarrierSetAssembler* g1_asm = static_cast(BarrierSet::barrier_set()->barrier_set_assembler());
+ G1PreBarrierStubC2* const stub = G1PreBarrierStubC2::create(node);
+ for (RegSetIterator reg = preserve.begin(); *reg != noreg; ++reg) {
+ stub->preserve(*reg);
+ }
+ for (RegSetIterator reg = no_preserve.begin(); *reg != noreg; ++reg) {
+ stub->dont_preserve(*reg);
+ }
+ g1_asm->g1_write_barrier_pre_c2(masm, obj, pre_val, rthread, tmp1, tmp2, stub);
+}
+
+static void write_barrier_post(MacroAssembler* masm,
+ const MachNode* node,
+ Register store_addr,
+ Register new_val,
+ Register tmp1,
+ Register tmp2) {
+ if (!G1PostBarrierStubC2::needs_barrier(node)) {
+ return;
+ }
+ Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
+ G1BarrierSetAssembler* g1_asm = static_cast(BarrierSet::barrier_set()->barrier_set_assembler());
+ G1PostBarrierStubC2* const stub = G1PostBarrierStubC2::create(node);
+ g1_asm->g1_write_barrier_post_c2(masm, store_addr, new_val, rthread, tmp1, tmp2, stub);
+}
+
+%}
+
+// BEGIN This section of the file is automatically generated. Do not edit --------------
+
+// This section is generated from g1_aarch64.m4
+
+
+// This pattern is generated automatically from g1_aarch64.m4.
+// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
+instruct g1StoreP(indirect mem, iRegP src, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, rFlagsReg cr)
+%{
+ predicate(UseG1GC && !needs_releasing_store(n) && n->as_Store()->barrier_data() != 0);
+ match(Set mem (StoreP mem src));
+ effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ ins_cost(INSN_COST);
+ format %{ "str $src, $mem\t# ptr" %}
+ ins_encode %{
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $src$$Register) /* preserve */);
+ __ str($src$$Register, $mem$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $src$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(istore_reg_mem);
+%}
+
+// This pattern is generated automatically from g1_aarch64.m4.
+// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
+instruct g1StorePVolatile(indirect mem, iRegP src, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, rFlagsReg cr)
+%{
+ predicate(UseG1GC && needs_releasing_store(n) && n->as_Store()->barrier_data() != 0);
+ match(Set mem (StoreP mem src));
+ effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ ins_cost(VOLATILE_REF_COST);
+ format %{ "stlr $src, $mem\t# ptr" %}
+ ins_encode %{
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $src$$Register) /* preserve */);
+ __ stlr($src$$Register, $mem$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $src$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_class_memory);
+%}
+
+// This pattern is generated automatically from g1_aarch64.m4.
+// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
+instruct g1StoreN(indirect mem, iRegN src, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, rFlagsReg cr)
+%{
+ predicate(UseG1GC && !needs_releasing_store(n) && n->as_Store()->barrier_data() != 0);
+ match(Set mem (StoreN mem src));
+ effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ ins_cost(INSN_COST);
+ format %{ "strw $src, $mem\t# compressed ptr" %}
+ ins_encode %{
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $src$$Register) /* preserve */);
+ __ strw($src$$Register, $mem$$Register);
+ if ((barrier_data() & G1C2BarrierPost) != 0) {
+ if ((barrier_data() & G1C2BarrierPostNotNull) == 0) {
+ __ decode_heap_oop($tmp1$$Register, $src$$Register);
+ } else {
+ __ decode_heap_oop_not_null($tmp1$$Register, $src$$Register);
+ }
+ }
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $tmp1$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(istore_reg_mem);
+%}
+
+// This pattern is generated automatically from g1_aarch64.m4.
+// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
+instruct g1StoreNVolatile(indirect mem, iRegN src, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, rFlagsReg cr)
+%{
+ predicate(UseG1GC && needs_releasing_store(n) && n->as_Store()->barrier_data() != 0);
+ match(Set mem (StoreN mem src));
+ effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ ins_cost(VOLATILE_REF_COST);
+ format %{ "stlrw $src, $mem\t# compressed ptr" %}
+ ins_encode %{
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $src$$Register) /* preserve */);
+ __ stlrw($src$$Register, $mem$$Register);
+ if ((barrier_data() & G1C2BarrierPost) != 0) {
+ if ((barrier_data() & G1C2BarrierPostNotNull) == 0) {
+ __ decode_heap_oop($tmp1$$Register, $src$$Register);
+ } else {
+ __ decode_heap_oop_not_null($tmp1$$Register, $src$$Register);
+ }
+ }
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $tmp1$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_class_memory);
+%}
+
+// This pattern is generated automatically from g1_aarch64.m4.
+// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
+instruct g1EncodePAndStoreN(indirect mem, iRegP src, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, rFlagsReg cr)
+%{
+ predicate(UseG1GC && !needs_releasing_store(n) && n->as_Store()->barrier_data() != 0);
+ match(Set mem (StoreN mem (EncodeP src)));
+ effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ ins_cost(INSN_COST);
+ format %{ "encode_heap_oop $tmp1, $src\n\t"
+ "strw $tmp1, $mem\t# compressed ptr" %}
+ ins_encode %{
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $src$$Register) /* preserve */);
+ if ((barrier_data() & G1C2BarrierPostNotNull) == 0) {
+ __ encode_heap_oop($tmp1$$Register, $src$$Register);
+ } else {
+ __ encode_heap_oop_not_null($tmp1$$Register, $src$$Register);
+ }
+ __ strw($tmp1$$Register, $mem$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $src$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(istore_reg_mem);
+%}
+
+// This pattern is generated automatically from g1_aarch64.m4.
+// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
+instruct g1EncodePAndStoreNVolatile(indirect mem, iRegP src, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, rFlagsReg cr)
+%{
+ predicate(UseG1GC && needs_releasing_store(n) && n->as_Store()->barrier_data() != 0);
+ match(Set mem (StoreN mem (EncodeP src)));
+ effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ ins_cost(VOLATILE_REF_COST);
+ format %{ "encode_heap_oop $tmp1, $src\n\t"
+ "stlrw $tmp1, $mem\t# compressed ptr" %}
+ ins_encode %{
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $src$$Register) /* preserve */);
+ if ((barrier_data() & G1C2BarrierPostNotNull) == 0) {
+ __ encode_heap_oop($tmp1$$Register, $src$$Register);
+ } else {
+ __ encode_heap_oop_not_null($tmp1$$Register, $src$$Register);
+ }
+ __ stlrw($tmp1$$Register, $mem$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $src$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_class_memory);
+%}
+
+// This pattern is generated automatically from g1_aarch64.m4.
+// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
+instruct g1CompareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp1, iRegPNoSp tmp2, rFlagsReg cr)
+%{
+ predicate(UseG1GC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0);
+ match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
+ effect(TEMP res, TEMP tmp1, TEMP tmp2, KILL cr);
+ ins_cost(2 * VOLATILE_REF_COST);
+ format %{ "cmpxchg $res = $mem, $oldval, $newval\t# ptr" %}
+ ins_encode %{
+ assert_different_registers($oldval$$Register, $mem$$Register);
+ assert_different_registers($newval$$Register, $mem$$Register);
+ // Pass $oldval to the pre-barrier (instead of loading from $mem), because
+ // $oldval is the only value that can be overwritten.
+ // The same holds for g1CompareAndSwapP and its Acq variant.
+ write_barrier_pre(masm, this,
+ noreg /* obj */,
+ $oldval$$Register /* pre_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
+ false /* acquire */, true /* release */, false /* weak */, $res$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $newval$$Register /* new_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// This pattern is generated automatically from g1_aarch64.m4.
+// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
+instruct g1CompareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp1, iRegPNoSp tmp2, rFlagsReg cr)
+%{
+ predicate(UseG1GC && needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0);
+ match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
+ effect(TEMP res, TEMP tmp1, TEMP tmp2, KILL cr);
+ ins_cost(VOLATILE_REF_COST);
+ format %{ "cmpxchg_acq $res = $mem, $oldval, $newval\t# ptr" %}
+ ins_encode %{
+ assert_different_registers($oldval$$Register, $mem$$Register);
+ assert_different_registers($newval$$Register, $mem$$Register);
+ // Pass $oldval to the pre-barrier (instead of loading from $mem), because
+ // $oldval is the only value that can be overwritten.
+ // The same holds for g1CompareAndSwapP and its Acq variant.
+ write_barrier_pre(masm, this,
+ noreg /* obj */,
+ $oldval$$Register /* pre_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
+ true /* acquire */, true /* release */, false /* weak */, $res$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $newval$$Register /* new_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// This pattern is generated automatically from g1_aarch64.m4.
+// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
+instruct g1CompareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, rFlagsReg cr)
+%{
+ predicate(UseG1GC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0);
+ match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
+ effect(TEMP res, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ ins_cost(2 * VOLATILE_REF_COST);
+ format %{ "cmpxchg $res = $mem, $oldval, $newval\t# narrow oop" %}
+ ins_encode %{
+ assert_different_registers($oldval$$Register, $mem$$Register);
+ assert_different_registers($newval$$Register, $mem$$Register);
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::word,
+ false /* acquire */, true /* release */, false /* weak */, $res$$Register);
+ __ decode_heap_oop($tmp1$$Register, $newval$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $tmp1$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// This pattern is generated automatically from g1_aarch64.m4.
+// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
+instruct g1CompareAndExchangeNAcq(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, rFlagsReg cr)
+%{
+ predicate(UseG1GC && needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0);
+ match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
+ effect(TEMP res, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ ins_cost(VOLATILE_REF_COST);
+ format %{ "cmpxchg_acq $res = $mem, $oldval, $newval\t# narrow oop" %}
+ ins_encode %{
+ assert_different_registers($oldval$$Register, $mem$$Register);
+ assert_different_registers($newval$$Register, $mem$$Register);
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::word,
+ true /* acquire */, true /* release */, false /* weak */, $res$$Register);
+ __ decode_heap_oop($tmp1$$Register, $newval$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $tmp1$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// This pattern is generated automatically from g1_aarch64.m4.
+// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
+instruct g1CompareAndSwapP(iRegINoSp res, indirect mem, iRegP newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegP oldval, rFlagsReg cr)
+%{
+ predicate(UseG1GC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0);
+ match(Set res (CompareAndSwapP mem (Binary oldval newval)));
+ match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
+ effect(TEMP res, TEMP tmp1, TEMP tmp2, KILL cr);
+ ins_cost(2 * VOLATILE_REF_COST);
+ format %{ "cmpxchg $mem, $oldval, $newval\t# (ptr)\n\t"
+ "cset $res, EQ" %}
+ ins_encode %{
+ assert_different_registers($oldval$$Register, $mem$$Register);
+ assert_different_registers($newval$$Register, $mem$$Register);
+ write_barrier_pre(masm, this,
+ noreg /* obj */,
+ $oldval$$Register /* pre_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
+ false /* acquire */, true /* release */, false /* weak */, noreg);
+ __ cset($res$$Register, Assembler::EQ);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $newval$$Register /* new_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// This pattern is generated automatically from g1_aarch64.m4.
+// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
+instruct g1CompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegP oldval, rFlagsReg cr)
+%{
+ predicate(UseG1GC && needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0);
+ match(Set res (CompareAndSwapP mem (Binary oldval newval)));
+ match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
+ effect(TEMP res, TEMP tmp1, TEMP tmp2, KILL cr);
+ ins_cost(VOLATILE_REF_COST);
+ format %{ "cmpxchg_acq $mem, $oldval, $newval\t# (ptr)\n\t"
+ "cset $res, EQ" %}
+ ins_encode %{
+ assert_different_registers($oldval$$Register, $mem$$Register);
+ assert_different_registers($newval$$Register, $mem$$Register);
+ write_barrier_pre(masm, this,
+ noreg /* obj */,
+ $oldval$$Register /* pre_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
+ true /* acquire */, true /* release */, false /* weak */, noreg);
+ __ cset($res$$Register, Assembler::EQ);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $newval$$Register /* new_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// This pattern is generated automatically from g1_aarch64.m4.
+// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
+instruct g1CompareAndSwapN(iRegINoSp res, indirect mem, iRegN newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, iRegN oldval, rFlagsReg cr)
+%{
+ predicate(UseG1GC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0);
+ match(Set res (CompareAndSwapN mem (Binary oldval newval)));
+ match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
+ effect(TEMP res, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ ins_cost(2 * VOLATILE_REF_COST);
+ format %{ "cmpxchg $mem, $oldval, $newval\t# (narrow oop)\n\t"
+ "cset $res, EQ" %}
+ ins_encode %{
+ assert_different_registers($oldval$$Register, $mem$$Register);
+ assert_different_registers($newval$$Register, $mem$$Register);
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::word,
+ false /* acquire */, true /* release */, false /* weak */, noreg);
+ __ cset($res$$Register, Assembler::EQ);
+ __ decode_heap_oop($tmp1$$Register, $newval$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $tmp1$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// This pattern is generated automatically from g1_aarch64.m4.
+// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
+instruct g1CompareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, iRegN oldval, rFlagsReg cr)
+%{
+ predicate(UseG1GC && needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0);
+ match(Set res (CompareAndSwapN mem (Binary oldval newval)));
+ match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
+ effect(TEMP res, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ ins_cost(VOLATILE_REF_COST);
+ format %{ "cmpxchg_acq $mem, $oldval, $newval\t# (narrow oop)\n\t"
+ "cset $res, EQ" %}
+ ins_encode %{
+ assert_different_registers($oldval$$Register, $mem$$Register);
+ assert_different_registers($newval$$Register, $mem$$Register);
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::word,
+ true /* acquire */, true /* release */, false /* weak */, noreg);
+ __ cset($res$$Register, Assembler::EQ);
+ __ decode_heap_oop($tmp1$$Register, $newval$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $tmp1$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// This pattern is generated automatically from g1_aarch64.m4.
+// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
+instruct g1GetAndSetP(indirect mem, iRegP newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp preval, rFlagsReg cr)
+%{
+ predicate(UseG1GC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0);
+ match(Set preval (GetAndSetP mem newval));
+ effect(TEMP preval, TEMP tmp1, TEMP tmp2, KILL cr);
+ ins_cost(2 * VOLATILE_REF_COST);
+ format %{ "atomic_xchg $preval, $newval, [$mem]" %}
+ ins_encode %{
+ assert_different_registers($mem$$Register, $newval$$Register);
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $preval$$Register /* pre_val (as a temporary register) */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $preval$$Register, $newval$$Register) /* preserve */);
+ __ atomic_xchg($preval$$Register, $newval$$Register, $mem$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $newval$$Register /* new_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_serial);
+%}
+
+// This pattern is generated automatically from g1_aarch64.m4.
+// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
+instruct g1GetAndSetPAcq(indirect mem, iRegP newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp preval, rFlagsReg cr)
+%{
+ predicate(UseG1GC && needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0);
+ match(Set preval (GetAndSetP mem newval));
+ effect(TEMP preval, TEMP tmp1, TEMP tmp2, KILL cr);
+ ins_cost(VOLATILE_REF_COST);
+ format %{ "atomic_xchg_acq $preval, $newval, [$mem]" %}
+ ins_encode %{
+ assert_different_registers($mem$$Register, $newval$$Register);
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $preval$$Register /* pre_val (as a temporary register) */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $preval$$Register, $newval$$Register) /* preserve */);
+ __ atomic_xchgal($preval$$Register, $newval$$Register, $mem$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $newval$$Register /* new_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_serial);
+%}
+
+// This pattern is generated automatically from g1_aarch64.m4.
+// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
+instruct g1GetAndSetN(indirect mem, iRegN newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, iRegNNoSp preval, rFlagsReg cr)
+%{
+ predicate(UseG1GC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0);
+ match(Set preval (GetAndSetN mem newval));
+ effect(TEMP preval, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ ins_cost(2 * VOLATILE_REF_COST);
+ format %{ "atomic_xchgw $preval, $newval, [$mem]" %}
+ ins_encode %{
+ assert_different_registers($mem$$Register, $newval$$Register);
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $preval$$Register, $newval$$Register) /* preserve */);
+ __ atomic_xchgw($preval$$Register, $newval$$Register, $mem$$Register);
+ __ decode_heap_oop($tmp1$$Register, $newval$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $tmp1$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_serial);
+%}
+
+// This pattern is generated automatically from g1_aarch64.m4.
+// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
+instruct g1GetAndSetNAcq(indirect mem, iRegN newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, iRegNNoSp preval, rFlagsReg cr)
+%{
+ predicate(UseG1GC && needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0);
+ match(Set preval (GetAndSetN mem newval));
+ effect(TEMP preval, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ ins_cost(VOLATILE_REF_COST);
+ format %{ "atomic_xchgw_acq $preval, $newval, [$mem]" %}
+ ins_encode %{
+ assert_different_registers($mem$$Register, $newval$$Register);
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $preval$$Register, $newval$$Register) /* preserve */);
+ __ atomic_xchgalw($preval$$Register, $newval$$Register, $mem$$Register);
+ __ decode_heap_oop($tmp1$$Register, $newval$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $tmp1$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_serial);
+%}
+
+// This pattern is generated automatically from g1_aarch64.m4.
+// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
+instruct g1LoadP(iRegPNoSp dst, indirect mem, iRegPNoSp tmp1, iRegPNoSp tmp2, rFlagsReg cr)
+%{
+ // This instruction does not need an acquiring counterpart because it is only
+ // used for reference loading (Reference::get()). The same holds for g1LoadN.
+ predicate(UseG1GC && !needs_acquiring_load(n) && n->as_Load()->barrier_data() != 0);
+ match(Set dst (LoadP mem));
+ effect(TEMP dst, TEMP tmp1, TEMP tmp2, KILL cr);
+ ins_cost(4 * INSN_COST);
+ format %{ "ldr $dst, $mem\t# ptr" %}
+ ins_encode %{
+ __ ldr($dst$$Register, $mem$$Register);
+ write_barrier_pre(masm, this,
+ noreg /* obj */,
+ $dst$$Register /* pre_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */);
+ %}
+ ins_pipe(iload_reg_mem);
+%}
+
+// This pattern is generated automatically from g1_aarch64.m4.
+// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
+instruct g1LoadN(iRegNNoSp dst, indirect mem, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, rFlagsReg cr)
+%{
+ predicate(UseG1GC && !needs_acquiring_load(n) && n->as_Load()->barrier_data() != 0);
+ match(Set dst (LoadN mem));
+ effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ ins_cost(4 * INSN_COST);
+ format %{ "ldrw $dst, $mem\t# compressed ptr" %}
+ ins_encode %{
+ __ ldrw($dst$$Register, $mem$$Register);
+ if ((barrier_data() & G1C2BarrierPre) != 0) {
+ __ decode_heap_oop($tmp1$$Register, $dst$$Register);
+ write_barrier_pre(masm, this,
+ noreg /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ }
+ %}
+ ins_pipe(iload_reg_mem);
+%}
+
+// END This section of the file is automatically generated. Do not edit --------------
diff --git a/src/hotspot/cpu/aarch64/gc/g1/g1_aarch64.m4 b/src/hotspot/cpu/aarch64/gc/g1/g1_aarch64.m4
new file mode 100644
index 0000000000000..8fb1f7e8e428b
--- /dev/null
+++ b/src/hotspot/cpu/aarch64/gc/g1/g1_aarch64.m4
@@ -0,0 +1,384 @@
+dnl Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
+dnl DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+dnl
+dnl This code is free software; you can redistribute it and/or modify it
+dnl under the terms of the GNU General Public License version 2 only, as
+dnl published by the Free Software Foundation.
+dnl
+dnl This code is distributed in the hope that it will be useful, but WITHOUT
+dnl ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+dnl FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl version 2 for more details (a copy is included in the LICENSE file that
+dnl accompanied this code).
+dnl
+dnl You should have received a copy of the GNU General Public License version
+dnl 2 along with this work; if not, write to the Free Software Foundation,
+dnl Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+dnl
+dnl Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+dnl or visit www.oracle.com if you need additional information or have any
+dnl questions.
+dnl
+// BEGIN This section of the file is automatically generated. Do not edit --------------
+
+// This section is generated from g1_aarch64.m4
+
+define(`STOREP_INSN',
+`
+// This pattern is generated automatically from g1_aarch64.m4.
+// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
+instruct g1StoreP$1(indirect mem, iRegP src, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, rFlagsReg cr)
+%{
+ predicate(UseG1GC && ifelse($1,Volatile,'needs_releasing_store(n)`,'!needs_releasing_store(n)`) && n->as_Store()->barrier_data() != 0);
+ match(Set mem (StoreP mem src));
+ effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ ins_cost(ifelse($1,Volatile,VOLATILE_REF_COST,INSN_COST));
+ format %{ "$2 $src, $mem\t# ptr" %}
+ ins_encode %{
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $src$$Register) /* preserve */);
+ __ $2($src$$Register, $mem$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $src$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(ifelse($1,Volatile,pipe_class_memory,istore_reg_mem));
+%}')dnl
+STOREP_INSN(,str)
+STOREP_INSN(Volatile,stlr)
+dnl
+define(`STOREN_INSN',
+`
+// This pattern is generated automatically from g1_aarch64.m4.
+// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
+instruct g1StoreN$1(indirect mem, iRegN src, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, rFlagsReg cr)
+%{
+ predicate(UseG1GC && ifelse($1,Volatile,'needs_releasing_store(n)`,'!needs_releasing_store(n)`) && n->as_Store()->barrier_data() != 0);
+ match(Set mem (StoreN mem src));
+ effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ ins_cost(ifelse($1,Volatile,VOLATILE_REF_COST,INSN_COST));
+ format %{ "$2 $src, $mem\t# compressed ptr" %}
+ ins_encode %{
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $src$$Register) /* preserve */);
+ __ $2($src$$Register, $mem$$Register);
+ if ((barrier_data() & G1C2BarrierPost) != 0) {
+ if ((barrier_data() & G1C2BarrierPostNotNull) == 0) {
+ __ decode_heap_oop($tmp1$$Register, $src$$Register);
+ } else {
+ __ decode_heap_oop_not_null($tmp1$$Register, $src$$Register);
+ }
+ }
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $tmp1$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(ifelse($1,Volatile,pipe_class_memory,istore_reg_mem));
+%}')dnl
+STOREN_INSN(,strw)
+STOREN_INSN(Volatile,stlrw)
+dnl
+define(`ENCODESTOREN_INSN',
+`
+// This pattern is generated automatically from g1_aarch64.m4.
+// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
+instruct g1EncodePAndStoreN$1(indirect mem, iRegP src, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, rFlagsReg cr)
+%{
+ predicate(UseG1GC && ifelse($1,Volatile,'needs_releasing_store(n)`,'!needs_releasing_store(n)`) && n->as_Store()->barrier_data() != 0);
+ match(Set mem (StoreN mem (EncodeP src)));
+ effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ ins_cost(ifelse($1,Volatile,VOLATILE_REF_COST,INSN_COST));
+ format %{ "encode_heap_oop $tmp1, $src\n\t"
+ "$2 $tmp1, $mem\t# compressed ptr" %}
+ ins_encode %{
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $src$$Register) /* preserve */);
+ if ((barrier_data() & G1C2BarrierPostNotNull) == 0) {
+ __ encode_heap_oop($tmp1$$Register, $src$$Register);
+ } else {
+ __ encode_heap_oop_not_null($tmp1$$Register, $src$$Register);
+ }
+ __ $2($tmp1$$Register, $mem$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $src$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(ifelse($1,Volatile,pipe_class_memory,istore_reg_mem));
+%}')dnl
+ENCODESTOREN_INSN(,strw)
+ENCODESTOREN_INSN(Volatile,stlrw)
+dnl
+define(`CAEP_INSN',
+`
+// This pattern is generated automatically from g1_aarch64.m4.
+// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
+instruct g1CompareAndExchangeP$1(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp1, iRegPNoSp tmp2, rFlagsReg cr)
+%{
+ predicate(UseG1GC && ifelse($1,Acq,'needs_acquiring_load_exclusive(n)`,'!needs_acquiring_load_exclusive(n)`) && n->as_LoadStore()->barrier_data() != 0);
+ match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
+ effect(TEMP res, TEMP tmp1, TEMP tmp2, KILL cr);
+ ins_cost(ifelse($1,Acq,VOLATILE_REF_COST,2 * VOLATILE_REF_COST));
+ format %{ "cmpxchg$2 $res = $mem, $oldval, $newval\t# ptr" %}
+ ins_encode %{
+ assert_different_registers($oldval$$Register, $mem$$Register);
+ assert_different_registers($newval$$Register, $mem$$Register);
+ // Pass $oldval to the pre-barrier (instead of loading from $mem), because
+ // $oldval is the only value that can be overwritten.
+ // The same holds for g1CompareAndSwapP and its Acq variant.
+ write_barrier_pre(masm, this,
+ noreg /* obj */,
+ $oldval$$Register /* pre_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
+ $3 /* acquire */, true /* release */, false /* weak */, $res$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $newval$$Register /* new_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+CAEP_INSN(,,false)
+CAEP_INSN(Acq,_acq,true)
+dnl
+define(`CAEN_INSN',
+`
+// This pattern is generated automatically from g1_aarch64.m4.
+// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
+instruct g1CompareAndExchangeN$1(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, rFlagsReg cr)
+%{
+ predicate(UseG1GC && ifelse($1,Acq,'needs_acquiring_load_exclusive(n)`,'!needs_acquiring_load_exclusive(n)`) && n->as_LoadStore()->barrier_data() != 0);
+ match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
+ effect(TEMP res, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ ins_cost(ifelse($1,Acq,VOLATILE_REF_COST,2 * VOLATILE_REF_COST));
+ format %{ "cmpxchg$2 $res = $mem, $oldval, $newval\t# narrow oop" %}
+ ins_encode %{
+ assert_different_registers($oldval$$Register, $mem$$Register);
+ assert_different_registers($newval$$Register, $mem$$Register);
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::word,
+ $3 /* acquire */, true /* release */, false /* weak */, $res$$Register);
+ __ decode_heap_oop($tmp1$$Register, $newval$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $tmp1$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+CAEN_INSN(,,false)
+CAEN_INSN(Acq,_acq,true)
+dnl
+define(`CASP_INSN',
+`
+// This pattern is generated automatically from g1_aarch64.m4.
+// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
+instruct g1CompareAndSwapP$1(iRegINoSp res, indirect mem, iRegP newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegP oldval, rFlagsReg cr)
+%{
+ predicate(UseG1GC && ifelse($1,Acq,'needs_acquiring_load_exclusive(n)`,'!needs_acquiring_load_exclusive(n)`) && n->as_LoadStore()->barrier_data() != 0);
+ match(Set res (CompareAndSwapP mem (Binary oldval newval)));
+ match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
+ effect(TEMP res, TEMP tmp1, TEMP tmp2, KILL cr);
+ ins_cost(ifelse($1,Acq,VOLATILE_REF_COST,2 * VOLATILE_REF_COST));
+ format %{ "cmpxchg$2 $mem, $oldval, $newval\t# (ptr)\n\t"
+ "cset $res, EQ" %}
+ ins_encode %{
+ assert_different_registers($oldval$$Register, $mem$$Register);
+ assert_different_registers($newval$$Register, $mem$$Register);
+ write_barrier_pre(masm, this,
+ noreg /* obj */,
+ $oldval$$Register /* pre_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
+ $3 /* acquire */, true /* release */, false /* weak */, noreg);
+ __ cset($res$$Register, Assembler::EQ);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $newval$$Register /* new_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+CASP_INSN(,,false)
+CASP_INSN(Acq,_acq,true)
+dnl
+define(`CASN_INSN',
+`
+// This pattern is generated automatically from g1_aarch64.m4.
+// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
+instruct g1CompareAndSwapN$1(iRegINoSp res, indirect mem, iRegN newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, iRegN oldval, rFlagsReg cr)
+%{
+ predicate(UseG1GC && ifelse($1,Acq,'needs_acquiring_load_exclusive(n)`,'!needs_acquiring_load_exclusive(n)`) && n->as_LoadStore()->barrier_data() != 0);
+ match(Set res (CompareAndSwapN mem (Binary oldval newval)));
+ match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
+ effect(TEMP res, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ ins_cost(ifelse($1,Acq,VOLATILE_REF_COST,2 * VOLATILE_REF_COST));
+ format %{ "cmpxchg$2 $mem, $oldval, $newval\t# (narrow oop)\n\t"
+ "cset $res, EQ" %}
+ ins_encode %{
+ assert_different_registers($oldval$$Register, $mem$$Register);
+ assert_different_registers($newval$$Register, $mem$$Register);
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::word,
+ $3 /* acquire */, true /* release */, false /* weak */, noreg);
+ __ cset($res$$Register, Assembler::EQ);
+ __ decode_heap_oop($tmp1$$Register, $newval$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $tmp1$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+CASN_INSN(,,false)
+CASN_INSN(Acq,_acq,true)
+dnl
+define(`XCHGP_INSN',
+`
+// This pattern is generated automatically from g1_aarch64.m4.
+// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
+instruct g1GetAndSetP$1(indirect mem, iRegP newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp preval, rFlagsReg cr)
+%{
+ predicate(UseG1GC && ifelse($1,Acq,'needs_acquiring_load_exclusive(n)`,'!needs_acquiring_load_exclusive(n)`) && n->as_LoadStore()->barrier_data() != 0);
+ match(Set preval (GetAndSetP mem newval));
+ effect(TEMP preval, TEMP tmp1, TEMP tmp2, KILL cr);
+ ins_cost(ifelse($1,Acq,VOLATILE_REF_COST,2 * VOLATILE_REF_COST));
+ format %{ "atomic_xchg$2 $preval, $newval, [$mem]" %}
+ ins_encode %{
+ assert_different_registers($mem$$Register, $newval$$Register);
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $preval$$Register /* pre_val (as a temporary register) */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $preval$$Register, $newval$$Register) /* preserve */);
+ __ $3($preval$$Register, $newval$$Register, $mem$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $newval$$Register /* new_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_serial);
+%}')dnl
+XCHGP_INSN(,,atomic_xchg)
+XCHGP_INSN(Acq,_acq,atomic_xchgal)
+dnl
+define(`XCHGN_INSN',
+`
+// This pattern is generated automatically from g1_aarch64.m4.
+// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
+instruct g1GetAndSetN$1(indirect mem, iRegN newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, iRegNNoSp preval, rFlagsReg cr)
+%{
+ predicate(UseG1GC && ifelse($1,Acq,'needs_acquiring_load_exclusive(n)`,'!needs_acquiring_load_exclusive(n)`) && n->as_LoadStore()->barrier_data() != 0);
+ match(Set preval (GetAndSetN mem newval));
+ effect(TEMP preval, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ ins_cost(ifelse($1,Acq,VOLATILE_REF_COST,2 * VOLATILE_REF_COST));
+ format %{ "$2 $preval, $newval, [$mem]" %}
+ ins_encode %{
+ assert_different_registers($mem$$Register, $newval$$Register);
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $preval$$Register, $newval$$Register) /* preserve */);
+ __ $3($preval$$Register, $newval$$Register, $mem$$Register);
+ __ decode_heap_oop($tmp1$$Register, $newval$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $tmp1$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_serial);
+%}')dnl
+XCHGN_INSN(,atomic_xchgw,atomic_xchgw)
+XCHGN_INSN(Acq,atomic_xchgw_acq,atomic_xchgalw)
+
+// This pattern is generated automatically from g1_aarch64.m4.
+// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
+instruct g1LoadP(iRegPNoSp dst, indirect mem, iRegPNoSp tmp1, iRegPNoSp tmp2, rFlagsReg cr)
+%{
+ // This instruction does not need an acquiring counterpart because it is only
+ // used for reference loading (Reference::get()). The same holds for g1LoadN.
+ predicate(UseG1GC && !needs_acquiring_load(n) && n->as_Load()->barrier_data() != 0);
+ match(Set dst (LoadP mem));
+ effect(TEMP dst, TEMP tmp1, TEMP tmp2, KILL cr);
+ ins_cost(4 * INSN_COST);
+ format %{ "ldr $dst, $mem\t# ptr" %}
+ ins_encode %{
+ __ ldr($dst$$Register, $mem$$Register);
+ write_barrier_pre(masm, this,
+ noreg /* obj */,
+ $dst$$Register /* pre_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */);
+ %}
+ ins_pipe(iload_reg_mem);
+%}
+
+// This pattern is generated automatically from g1_aarch64.m4.
+// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
+instruct g1LoadN(iRegNNoSp dst, indirect mem, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, rFlagsReg cr)
+%{
+ predicate(UseG1GC && !needs_acquiring_load(n) && n->as_Load()->barrier_data() != 0);
+ match(Set dst (LoadN mem));
+ effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ ins_cost(4 * INSN_COST);
+ format %{ "ldrw $dst, $mem\t# compressed ptr" %}
+ ins_encode %{
+ __ ldrw($dst$$Register, $mem$$Register);
+ if ((barrier_data() & G1C2BarrierPre) != 0) {
+ __ decode_heap_oop($tmp1$$Register, $dst$$Register);
+ write_barrier_pre(masm, this,
+ noreg /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ }
+ %}
+ ins_pipe(iload_reg_mem);
+%}
+
+// END This section of the file is automatically generated. Do not edit --------------
diff --git a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp
index 06f4382015603..84d06dbcc7bfd 100644
--- a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp
@@ -67,9 +67,9 @@ void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, Dec
__ push(saved_regs, sp);
if (UseCompressedOops) {
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop_entry), src, dst, count);
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop), src, dst, count);
} else {
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop_entry), src, dst, count);
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop), src, dst, count);
}
__ pop(saved_regs, sp);
__ bind(done);
@@ -164,9 +164,9 @@ void ShenandoahBarrierSetAssembler::satb_write_barrier_pre(MacroAssembler* masm,
if (expand_call) {
assert(pre_val != c_rarg1, "smashed arg");
- __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread);
+ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre), pre_val, thread);
} else {
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread);
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre), pre_val, thread);
}
__ pop(saved, sp);
@@ -698,7 +698,7 @@ void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAss
__ bind(runtime);
__ push_call_clobbered_registers();
__ load_parameter(0, pre_val);
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread);
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre), pre_val, thread);
__ pop_call_clobbered_registers();
__ bind(done);
diff --git a/src/hotspot/cpu/aarch64/gc/x/x_aarch64.ad b/src/hotspot/cpu/aarch64/gc/x/x_aarch64.ad
index 5e690a8e47b94..6e401724baa82 100644
--- a/src/hotspot/cpu/aarch64/gc/x/x_aarch64.ad
+++ b/src/hotspot/cpu/aarch64/gc/x/x_aarch64.ad
@@ -51,7 +51,7 @@ static void x_load_barrier_slow_path(MacroAssembler* masm, const MachNode* node,
%}
// Load Pointer
-instruct xLoadP(iRegPNoSp dst, memory mem, rFlagsReg cr)
+instruct xLoadP(iRegPNoSp dst, memory8 mem, rFlagsReg cr)
%{
match(Set dst (LoadP mem));
predicate(UseZGC && !ZGenerational && !needs_acquiring_load(n) && (n->as_Load()->barrier_data() != 0));
diff --git a/src/hotspot/cpu/aarch64/gc/z/zAddress_aarch64.cpp b/src/hotspot/cpu/aarch64/gc/z/zAddress_aarch64.cpp
index cd834969e1a4f..fcec3ae64fde8 100644
--- a/src/hotspot/cpu/aarch64/gc/z/zAddress_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/gc/z/zAddress_aarch64.cpp
@@ -93,7 +93,7 @@ static size_t probe_valid_max_address_bit() {
}
size_t ZPlatformAddressOffsetBits() {
- const static size_t valid_max_address_offset_bits = probe_valid_max_address_bit() + 1;
+ static const size_t valid_max_address_offset_bits = probe_valid_max_address_bit() + 1;
const size_t max_address_offset_bits = valid_max_address_offset_bits - 3;
const size_t min_address_offset_bits = max_address_offset_bits - 2;
const size_t address_offset = round_up_power_of_2(MaxHeapSize * ZVirtualToPhysicalRatio);
diff --git a/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp
index 466e77a4460d0..3f1898b6742e1 100644
--- a/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp
@@ -1189,6 +1189,8 @@ void ZBarrierSetAssembler::generate_c2_store_barrier_stub(MacroAssembler* masm,
__ lea(rscratch1, RuntimeAddress(ZBarrierSetRuntime::store_barrier_on_native_oop_field_without_healing_addr()));
} else if (stub->is_atomic()) {
__ lea(rscratch1, RuntimeAddress(ZBarrierSetRuntime::store_barrier_on_oop_field_with_healing_addr()));
+ } else if (stub->is_nokeepalive()) {
+ __ lea(rscratch1, RuntimeAddress(ZBarrierSetRuntime::no_keepalive_store_barrier_on_oop_field_without_healing_addr()));
} else {
__ lea(rscratch1, RuntimeAddress(ZBarrierSetRuntime::store_barrier_on_oop_field_without_healing_addr()));
}
@@ -1307,11 +1309,11 @@ Label* ZLoadBarrierStubC2Aarch64::entry() {
return ZBarrierStubC2::entry();
}
-ZStoreBarrierStubC2Aarch64::ZStoreBarrierStubC2Aarch64(const MachNode* node, Address ref_addr, Register new_zaddress, Register new_zpointer, bool is_native, bool is_atomic)
- : ZStoreBarrierStubC2(node, ref_addr, new_zaddress, new_zpointer, is_native, is_atomic), _deferred_emit(false) {}
+ZStoreBarrierStubC2Aarch64::ZStoreBarrierStubC2Aarch64(const MachNode* node, Address ref_addr, Register new_zaddress, Register new_zpointer, bool is_native, bool is_atomic, bool is_nokeepalive)
+ : ZStoreBarrierStubC2(node, ref_addr, new_zaddress, new_zpointer, is_native, is_atomic, is_nokeepalive), _deferred_emit(false) {}
-ZStoreBarrierStubC2Aarch64* ZStoreBarrierStubC2Aarch64::create(const MachNode* node, Address ref_addr, Register new_zaddress, Register new_zpointer, bool is_native, bool is_atomic) {
- ZStoreBarrierStubC2Aarch64* const stub = new (Compile::current()->comp_arena()) ZStoreBarrierStubC2Aarch64(node, ref_addr, new_zaddress, new_zpointer, is_native, is_atomic);
+ZStoreBarrierStubC2Aarch64* ZStoreBarrierStubC2Aarch64::create(const MachNode* node, Address ref_addr, Register new_zaddress, Register new_zpointer, bool is_native, bool is_atomic, bool is_nokeepalive) {
+ ZStoreBarrierStubC2Aarch64* const stub = new (Compile::current()->comp_arena()) ZStoreBarrierStubC2Aarch64(node, ref_addr, new_zaddress, new_zpointer, is_native, is_atomic, is_nokeepalive);
register_stub(stub);
return stub;
}
diff --git a/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.hpp
index 2f716140ed19d..ad3a171c10370 100644
--- a/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.hpp
@@ -280,10 +280,10 @@ class ZStoreBarrierStubC2Aarch64 : public ZStoreBarrierStubC2 {
private:
bool _deferred_emit;
- ZStoreBarrierStubC2Aarch64(const MachNode* node, Address ref_addr, Register new_zaddress, Register new_zpointer, bool is_native, bool is_atomic);
+ ZStoreBarrierStubC2Aarch64(const MachNode* node, Address ref_addr, Register new_zaddress, Register new_zpointer, bool is_native, bool is_atomic, bool is_nokeepalive);
public:
- static ZStoreBarrierStubC2Aarch64* create(const MachNode* node, Address ref_addr, Register new_zaddress, Register new_zpointer, bool is_native, bool is_atomic);
+ static ZStoreBarrierStubC2Aarch64* create(const MachNode* node, Address ref_addr, Register new_zaddress, Register new_zpointer, bool is_native, bool is_atomic, bool is_nokeepalive);
virtual void emit_code(MacroAssembler& masm);
};
diff --git a/src/hotspot/cpu/aarch64/gc/z/z_aarch64.ad b/src/hotspot/cpu/aarch64/gc/z/z_aarch64.ad
index 1510b42bfe97d..088f92a01573e 100644
--- a/src/hotspot/cpu/aarch64/gc/z/z_aarch64.ad
+++ b/src/hotspot/cpu/aarch64/gc/z/z_aarch64.ad
@@ -91,7 +91,8 @@ static void z_store_barrier(MacroAssembler* masm, const MachNode* node, Address
z_color(masm, node, rnew_zpointer, rnew_zaddress);
} else {
bool is_native = (node->barrier_data() & ZBarrierNative) != 0;
- ZStoreBarrierStubC2Aarch64* const stub = ZStoreBarrierStubC2Aarch64::create(node, ref_addr, rnew_zaddress, rnew_zpointer, is_native, is_atomic);
+ bool is_nokeepalive = (node->barrier_data() & ZBarrierNoKeepalive) != 0;
+ ZStoreBarrierStubC2Aarch64* const stub = ZStoreBarrierStubC2Aarch64::create(node, ref_addr, rnew_zaddress, rnew_zpointer, is_native, is_atomic, is_nokeepalive);
ZBarrierSetAssembler* bs_asm = ZBarrierSet::assembler();
bs_asm->store_barrier_fast(masm, ref_addr, rnew_zaddress, rnew_zpointer, tmp, true /* in_nmethod */, is_atomic, *stub->entry(), *stub->continuation());
}
@@ -100,7 +101,7 @@ static void z_store_barrier(MacroAssembler* masm, const MachNode* node, Address
%}
// Load Pointer
-instruct zLoadP(iRegPNoSp dst, memory mem, rFlagsReg cr)
+instruct zLoadP(iRegPNoSp dst, memory8 mem, rFlagsReg cr)
%{
match(Set dst (LoadP mem));
predicate(UseZGC && ZGenerational && !needs_acquiring_load(n) && n->as_Load()->barrier_data() != 0);
diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
index 08b69b34a9462..9835fb5aca159 100644
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
@@ -1838,7 +1838,8 @@ void MacroAssembler::clinit_barrier(Register klass, Register scratch, Label* L_f
L_slow_path = &L_fallthrough;
}
// Fast path check: class is fully initialized
- ldrb(scratch, Address(klass, InstanceKlass::init_state_offset()));
+ lea(scratch, Address(klass, InstanceKlass::init_state_offset()));
+ ldarb(scratch, scratch);
subs(zr, scratch, InstanceKlass::fully_initialized);
br(Assembler::EQ, *L_fast_path);
@@ -2967,7 +2968,7 @@ void MacroAssembler::verify_heapbase(const char* msg) {
if (CheckCompressedOops) {
Label ok;
push(1 << rscratch1->encoding(), sp); // cmpptr trashes rscratch1
- cmpptr(rheapbase, ExternalAddress(CompressedOops::ptrs_base_addr()));
+ cmpptr(rheapbase, ExternalAddress(CompressedOops::base_addr()));
br(Assembler::EQ, ok);
stop(msg);
bind(ok);
@@ -3133,9 +3134,9 @@ void MacroAssembler::reinit_heapbase()
{
if (UseCompressedOops) {
if (Universe::is_fully_initialized()) {
- mov(rheapbase, CompressedOops::ptrs_base());
+ mov(rheapbase, CompressedOops::base());
} else {
- lea(rheapbase, ExternalAddress(CompressedOops::ptrs_base_addr()));
+ lea(rheapbase, ExternalAddress(CompressedOops::base_addr()));
ldr(rheapbase, Address(rheapbase));
}
}
@@ -5010,8 +5011,10 @@ void MacroAssembler::decode_heap_oop(Register d, Register s) {
verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?");
#endif
if (CompressedOops::base() == nullptr) {
- if (CompressedOops::shift() != 0 || d != s) {
+ if (CompressedOops::shift() != 0) {
lsl(d, s, CompressedOops::shift());
+ } else if (d != s) {
+ mov(d, s);
}
} else {
Label done;
diff --git a/src/hotspot/cpu/aarch64/methodHandles_aarch64.cpp b/src/hotspot/cpu/aarch64/methodHandles_aarch64.cpp
index 68800d04d69ba..aa6a9d14ff176 100644
--- a/src/hotspot/cpu/aarch64/methodHandles_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/methodHandles_aarch64.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -27,6 +27,7 @@
#include "asm/macroAssembler.hpp"
#include "classfile/javaClasses.inline.hpp"
#include "classfile/vmClasses.hpp"
+#include "compiler/disassembler.hpp"
#include "interpreter/interpreter.hpp"
#include "interpreter/interpreterRuntime.hpp"
#include "memory/allocation.inline.hpp"
@@ -36,7 +37,7 @@
#include "runtime/frame.inline.hpp"
#include "runtime/stubRoutines.hpp"
-#define __ _masm->
+#define __ Disassembler::hook(__FILE__, __LINE__, _masm)->
#ifdef PRODUCT
#define BLOCK_COMMENT(str) /* nothing */
diff --git a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp
index 3117c75149854..52996f4c4a503 100644
--- a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp
@@ -49,6 +49,7 @@
#include "runtime/sharedRuntime.hpp"
#include "runtime/signature.hpp"
#include "runtime/stubRoutines.hpp"
+#include "runtime/timerTrace.hpp"
#include "runtime/vframeArray.hpp"
#include "utilities/align.hpp"
#include "utilities/formatBuffer.hpp"
@@ -2233,7 +2234,7 @@ void SharedRuntime::generate_deopt_blob() {
int reexecute_offset = __ pc() - start;
#if INCLUDE_JVMCI && !defined(COMPILER1)
- if (EnableJVMCI && UseJVMCICompiler) {
+ if (UseJVMCICompiler) {
// JVMCI does not use this kind of deoptimization
__ should_not_reach_here();
}
diff --git a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
index b3513a586de35..31116e006f025 100644
--- a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
@@ -3417,15 +3417,15 @@ class StubGenerator: public StubCodeGenerator {
Register rscratch3 = r10;
Register rscratch4 = r11;
- __ andw(rscratch3, r2, r4);
- __ bicw(rscratch4, r3, r4);
reg_cache.extract_u32(rscratch1, k);
__ movw(rscratch2, t);
- __ orrw(rscratch3, rscratch3, rscratch4);
__ addw(rscratch4, r1, rscratch2);
__ addw(rscratch4, rscratch4, rscratch1);
- __ addw(rscratch3, rscratch3, rscratch4);
- __ rorw(rscratch2, rscratch3, 32 - s);
+ __ bicw(rscratch2, r3, r4);
+ __ andw(rscratch3, r2, r4);
+ __ addw(rscratch2, rscratch2, rscratch4);
+ __ addw(rscratch2, rscratch2, rscratch3);
+ __ rorw(rscratch2, rscratch2, 32 - s);
__ addw(r1, rscratch2, r2);
}
@@ -7320,6 +7320,28 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
+ // load Method* target of MethodHandle
+ // j_rarg0 = jobject receiver
+ // rmethod = result
+ address generate_upcall_stub_load_target() {
+ StubCodeMark mark(this, "StubRoutines", "upcall_stub_load_target");
+ address start = __ pc();
+
+ __ resolve_global_jobject(j_rarg0, rscratch1, rscratch2);
+ // Load target method from receiver
+ __ load_heap_oop(rmethod, Address(j_rarg0, java_lang_invoke_MethodHandle::form_offset()), rscratch1, rscratch2);
+ __ load_heap_oop(rmethod, Address(rmethod, java_lang_invoke_LambdaForm::vmentry_offset()), rscratch1, rscratch2);
+ __ load_heap_oop(rmethod, Address(rmethod, java_lang_invoke_MemberName::method_offset()), rscratch1, rscratch2);
+ __ access_load_at(T_ADDRESS, IN_HEAP, rmethod,
+ Address(rmethod, java_lang_invoke_ResolvedMethodName::vmtarget_offset()),
+ noreg, noreg);
+ __ str(rmethod, Address(rthread, JavaThread::callee_target_offset())); // just in case callee is deoptimized
+
+ __ ret(lr);
+
+ return start;
+ }
+
#undef __
#define __ masm->
@@ -8241,6 +8263,7 @@ class StubGenerator: public StubCodeGenerator {
#endif
StubRoutines::_upcall_stub_exception_handler = generate_upcall_stub_exception_handler();
+ StubRoutines::_upcall_stub_load_target = generate_upcall_stub_load_target();
StubRoutines::aarch64::set_completed(); // Inidicate that arraycopy and zero_blocks stubs are generated
}
diff --git a/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp
index 38d48b86f23b0..9894841e933d8 100644
--- a/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp
@@ -26,6 +26,7 @@
#include "precompiled.hpp"
#include "asm/macroAssembler.inline.hpp"
#include "classfile/javaClasses.hpp"
+#include "compiler/disassembler.hpp"
#include "compiler/compiler_globals.hpp"
#include "gc/shared/barrierSetAssembler.hpp"
#include "interpreter/bytecodeHistogram.hpp"
@@ -67,13 +68,7 @@
// Max size with JVMTI
int TemplateInterpreter::InterpreterCodeSize = 200 * 1024;
-#define __ _masm->
-
-//-----------------------------------------------------------------------------
-
-extern "C" void entry(CodeBuffer*);
-
-//-----------------------------------------------------------------------------
+#define __ Disassembler::hook(__FILE__, __LINE__, _masm)->
address TemplateInterpreterGenerator::generate_slow_signature_handler() {
address entry = __ pc();
@@ -2004,13 +1999,21 @@ void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t,
address& vep) {
assert(t->is_valid() && t->tos_in() == vtos, "illegal template");
Label L;
- aep = __ pc(); __ push_ptr(); __ b(L);
- fep = __ pc(); __ push_f(); __ b(L);
- dep = __ pc(); __ push_d(); __ b(L);
- lep = __ pc(); __ push_l(); __ b(L);
- bep = cep = sep =
- iep = __ pc(); __ push_i();
- vep = __ pc();
+ aep = __ pc(); // atos entry point
+ __ push_ptr();
+ __ b(L);
+ fep = __ pc(); // ftos entry point
+ __ push_f();
+ __ b(L);
+ dep = __ pc(); // dtos entry point
+ __ push_d();
+ __ b(L);
+ lep = __ pc(); // ltos entry point
+ __ push_l();
+ __ b(L);
+ bep = cep = sep = iep = __ pc(); // [bcsi]tos entry point
+ __ push_i();
+ vep = __ pc(); // vtos entry point
__ bind(L);
generate_and_dispatch(t);
}
diff --git a/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp b/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp
index 25eb339bfce71..48ff356f9a558 100644
--- a/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp
@@ -25,6 +25,7 @@
#include "precompiled.hpp"
#include "asm/macroAssembler.inline.hpp"
+#include "compiler/disassembler.hpp"
#include "compiler/compilerDefinitions.inline.hpp"
#include "gc/shared/barrierSetAssembler.hpp"
#include "gc/shared/collectedHeap.hpp"
@@ -49,7 +50,7 @@
#include "runtime/synchronizer.hpp"
#include "utilities/powerOfTwo.hpp"
-#define __ _masm->
+#define __ Disassembler::hook(__FILE__, __LINE__, _masm)->
// Address computation: local variables
diff --git a/src/hotspot/cpu/aarch64/upcallLinker_aarch64.cpp b/src/hotspot/cpu/aarch64/upcallLinker_aarch64.cpp
index 28ec07815be5c..517fccb2d1aa5 100644
--- a/src/hotspot/cpu/aarch64/upcallLinker_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/upcallLinker_aarch64.cpp
@@ -24,6 +24,7 @@
#include "precompiled.hpp"
#include "asm/macroAssembler.hpp"
+#include "classfile/javaClasses.hpp"
#include "logging/logStream.hpp"
#include "memory/resourceArea.hpp"
#include "prims/upcallLinker.hpp"
@@ -117,7 +118,7 @@ static void restore_callee_saved_registers(MacroAssembler* _masm, const ABIDescr
static const int upcall_stub_code_base_size = 1024;
static const int upcall_stub_size_per_arg = 16;
-address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry,
+address UpcallLinker::make_upcall_stub(jobject receiver, Symbol* signature,
BasicType* out_sig_bt, int total_out_args,
BasicType ret_type,
jobject jabi, jobject jconv,
@@ -222,7 +223,6 @@ address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry,
__ block_comment("{ on_entry");
__ lea(c_rarg0, Address(sp, frame_data_offset));
- __ movptr(c_rarg1, (intptr_t)receiver);
__ movptr(rscratch1, CAST_FROM_FN_PTR(uint64_t, UpcallLinker::on_entry));
__ blr(rscratch1);
__ mov(rthread, r0);
@@ -238,12 +238,10 @@ address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry,
arg_shuffle.generate(_masm, as_VMStorage(shuffle_reg), abi._shadow_space_bytes, 0);
__ block_comment("} argument shuffle");
- __ block_comment("{ receiver ");
- __ get_vm_result(j_rarg0, rthread);
- __ block_comment("} receiver ");
-
- __ mov_metadata(rmethod, entry);
- __ str(rmethod, Address(rthread, JavaThread::callee_target_offset())); // just in case callee is deoptimized
+ __ block_comment("{ load target ");
+ __ movptr(j_rarg0, (intptr_t)receiver);
+ __ far_call(RuntimeAddress(StubRoutines::upcall_stub_load_target()), rscratch1); // puts target Method* in rmethod
+ __ block_comment("} load target ");
__ push_cont_fastpath(rthread);
@@ -318,7 +316,7 @@ address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry,
#ifndef PRODUCT
stringStream ss;
- ss.print("upcall_stub_%s", entry->signature()->as_C_string());
+ ss.print("upcall_stub_%s", signature->as_C_string());
const char* name = _masm->code_string(ss.as_string());
#else // PRODUCT
const char* name = "upcall_stub";
diff --git a/src/hotspot/cpu/arm/arm.ad b/src/hotspot/cpu/arm/arm.ad
index 2c7de0a58a204..bfca986f350cb 100644
--- a/src/hotspot/cpu/arm/arm.ad
+++ b/src/hotspot/cpu/arm/arm.ad
@@ -1003,10 +1003,6 @@ const RegMask* Matcher::predicate_reg_mask(void) {
return nullptr;
}
-const TypeVectMask* Matcher::predicate_reg_type(const Type* elemTy, int length) {
- return nullptr;
-}
-
// Vector calling convention not yet implemented.
bool Matcher::supports_vector_calling_convention(void) {
return false;
@@ -3890,6 +3886,7 @@ instruct loadRange(iRegI dst, memoryI mem) %{
instruct loadP(iRegP dst, memoryP mem) %{
+ predicate(!(UseG1GC && n->as_Load()->barrier_data() != 0));
match(Set dst (LoadP mem));
ins_cost(MEMORY_REF_COST);
size(4);
@@ -4225,18 +4222,6 @@ instruct storeB(memoryB mem, store_RegI src) %{
ins_pipe(istore_mem_reg);
%}
-instruct storeCM(memoryB mem, store_RegI src) %{
- match(Set mem (StoreCM mem src));
- ins_cost(MEMORY_REF_COST);
-
- size(4);
- format %{ "STRB $src,$mem\t! CMS card-mark byte" %}
- ins_encode %{
- __ strb($src$$Register, $mem$$Address);
- %}
- ins_pipe(istore_mem_reg);
-%}
-
// Store Char/Short
@@ -4356,6 +4341,7 @@ instruct movSP(store_ptr_RegP dst, SPRegP src) %{
instruct storeP(memoryP mem, store_ptr_RegP src) %{
+ predicate(!(UseG1GC && n->as_Store()->barrier_data() != 0));
match(Set mem (StoreP mem src));
ins_cost(MEMORY_REF_COST);
size(4);
@@ -5390,6 +5376,7 @@ instruct compareAndSwapI_bool(memoryex mem, iRegI oldval, iRegI newval, iRegI re
%}
instruct compareAndSwapP_bool(memoryex mem, iRegP oldval, iRegP newval, iRegI res, iRegI tmp, flagsReg ccr ) %{
+ predicate(!(UseG1GC && n->as_LoadStore()->barrier_data() != 0));
match(Set res (CompareAndSwapP mem (Binary oldval newval)));
effect( KILL ccr, TEMP tmp);
size(28);
@@ -5659,6 +5646,7 @@ instruct xchgL(memoryex mem, iRegLd newval, iRegLd res, iRegI tmp, flagsReg ccr)
%}
instruct xchgP(memoryex mem, iRegP newval, iRegP res, iRegI tmp, flagsReg ccr) %{
+ predicate(!(UseG1GC && n->as_LoadStore()->barrier_data() != 0));
match(Set res (GetAndSetP mem newval));
effect(KILL ccr, TEMP tmp, TEMP res);
size(16);
diff --git a/src/hotspot/cpu/arm/assembler_arm_32.hpp b/src/hotspot/cpu/arm/assembler_arm_32.hpp
index dd04ad1ab3a3c..e53eefac097ef 100644
--- a/src/hotspot/cpu/arm/assembler_arm_32.hpp
+++ b/src/hotspot/cpu/arm/assembler_arm_32.hpp
@@ -119,8 +119,9 @@ class RegisterSet {
}
friend RegisterSet operator | (const RegisterSet set1, const RegisterSet set2) {
- assert((set1._encoding & set2._encoding) == 0,
- "encoding constraint");
+// why so strong constraint?
+// assert((set1._encoding & set2._encoding) == 0,
+// "encoding constraint");
return RegisterSet(set1._encoding | set2._encoding);
}
@@ -142,6 +143,11 @@ class RegisterSet {
}
return count;
}
+
+ static RegisterSet from(RegSet set) {
+ assert(set.size(), "RegSet must not be empty");
+ return RegisterSet(set.bits());
+ }
};
#if R9_IS_SCRATCHED
@@ -157,6 +163,10 @@ class FloatRegisterSet {
public:
+ FloatRegisterSet() {
+ _encoding = 0;
+ }
+
FloatRegisterSet(FloatRegister reg) {
if (reg->hi_bit() == 0) {
_encoding = reg->hi_bits() << 12 | reg->lo_bit() << 22 | 1;
@@ -185,6 +195,15 @@ class FloatRegisterSet {
return (_encoding & 0xFFFFFF00) | ((_encoding & 0xFF) << 1);
}
+ static FloatRegisterSet from(FloatRegSet set) {
+ assert(set.size(), "FloatRegSet must not be empty");
+ // the vector load/store instructions operate on a set of consecutive registers.
+ // for the sake of simplicity, write all registers between the first and last in the set
+ size_t range = (*set.rbegin())->encoding() - (*set.begin())->encoding() + 1;
+ // push_float stores float regisgters by pairs
+ return FloatRegisterSet(*set.begin(), (range+1)/2);
+ }
+
};
diff --git a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp
index bb6a93e6f8da7..b14e6f0b4ca0c 100644
--- a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp
+++ b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp
@@ -948,6 +948,7 @@ void LIR_Assembler::emit_alloc_obj(LIR_OpAllocObj* op) {
if (op->init_check()) {
Register tmp = op->tmp1()->as_register();
__ ldrb(tmp, Address(op->klass()->as_register(), InstanceKlass::init_state_offset()));
+ __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::LoadLoad | MacroAssembler::LoadStore), Rtemp);
add_debug_info_for_null_check_here(op->stub()->info());
__ cmp(tmp, InstanceKlass::fully_initialized);
__ b(*op->stub()->entry(), ne);
diff --git a/src/hotspot/cpu/arm/gc/g1/g1BarrierSetAssembler_arm.cpp b/src/hotspot/cpu/arm/gc/g1/g1BarrierSetAssembler_arm.cpp
index 3c5e29aa8710f..56ae7707fbf38 100644
--- a/src/hotspot/cpu/arm/gc/g1/g1BarrierSetAssembler_arm.cpp
+++ b/src/hotspot/cpu/arm/gc/g1/g1BarrierSetAssembler_arm.cpp
@@ -39,8 +39,10 @@
#include "c1/c1_LIRAssembler.hpp"
#include "c1/c1_MacroAssembler.hpp"
#include "gc/g1/c1/g1BarrierSetC1.hpp"
-#endif
-
+#endif // COMPILER1
+#ifdef COMPILER2
+#include "gc/g1/c2/g1BarrierSetC2.hpp"
+#endif // COMPILER2
#define __ masm->
#ifdef PRODUCT
@@ -106,70 +108,87 @@ void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* mas
#endif // !R9_IS_SCRATCHED
}
-// G1 pre-barrier.
-// Blows all volatile registers R0-R3, Rtemp, LR).
-// If store_addr != noreg, then previous value is loaded from [store_addr];
-// in such case store_addr and new_val registers are preserved;
-// otherwise pre_val register is preserved.
-void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
- Register store_addr,
- Register new_val,
- Register pre_val,
- Register tmp1,
- Register tmp2) {
- Label done;
- Label runtime;
-
- if (store_addr != noreg) {
- assert_different_registers(store_addr, new_val, pre_val, tmp1, tmp2, noreg);
- } else {
- assert (new_val == noreg, "should be");
- assert_different_registers(pre_val, tmp1, tmp2, noreg);
- }
-
- Address in_progress(Rthread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
- Address index(Rthread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()));
- Address buffer(Rthread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()));
+static void generate_queue_test_and_insertion(MacroAssembler* masm, ByteSize index_offset, ByteSize buffer_offset, Label& runtime,
+ const Register thread, const Register value, const Register temp1, const Register temp2) {
+ assert_different_registers(value, temp1, temp2);
+ // Can we store original value in the thread's buffer?
+ // (The index field is typed as size_t.)
+ __ ldr(temp1, Address(thread, in_bytes(index_offset))); // temp1 := *(index address)
+ __ cbz(temp1, runtime); // jump to runtime if index == 0 (full buffer)
+ // The buffer is not full, store value into it.
+ __ sub(temp1, temp1, wordSize); // temp1 := next index
+ __ str(temp1, Address(thread, in_bytes(index_offset))); // *(index address) := next index
+ __ ldr(temp2, Address(thread, in_bytes(buffer_offset))); // temp2 := buffer address
+ // Record the previous value
+ __ str(value, Address(temp2, temp1)); // *(buffer address + next index) := value
+ }
+static void generate_pre_barrier_fast_path(MacroAssembler* masm,
+ const Register thread,
+ const Register tmp1) {
+ Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
// Is marking active?
assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "adjust this code");
__ ldrb(tmp1, in_progress);
- __ cbz(tmp1, done);
+}
+static void generate_pre_barrier_slow_path(MacroAssembler* masm,
+ const Register obj,
+ const Register pre_val,
+ const Register thread,
+ const Register tmp1,
+ const Register tmp2,
+ Label& done,
+ Label& runtime) {
// Do we need to load the previous value?
- if (store_addr != noreg) {
- __ load_heap_oop(pre_val, Address(store_addr, 0));
+ if (obj != noreg) {
+ __ load_heap_oop(pre_val, Address(obj, 0));
}
// Is the previous value null?
__ cbz(pre_val, done);
- // Can we store original value in the thread's buffer?
- // Is index == 0?
- // (The index field is typed as size_t.)
+ generate_queue_test_and_insertion(masm,
+ G1ThreadLocalData::satb_mark_queue_index_offset(),
+ G1ThreadLocalData::satb_mark_queue_buffer_offset(),
+ runtime,
+ thread, pre_val, tmp1, tmp2);
+ __ b(done);
+}
- __ ldr(tmp1, index); // tmp1 := *index_adr
- __ ldr(tmp2, buffer);
+// G1 pre-barrier.
+// Blows all volatile registers R0-R3, LR).
+// If obj != noreg, then previous value is loaded from [obj];
+// in such case obj and pre_val registers is preserved;
+// otherwise pre_val register is preserved.
+void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
+ Register obj,
+ Register pre_val,
+ Register tmp1,
+ Register tmp2) {
+ Label done;
+ Label runtime;
- __ subs(tmp1, tmp1, wordSize); // tmp1 := tmp1 - wordSize
- __ b(runtime, lt); // If negative, goto runtime
+ assert_different_registers(obj, pre_val, tmp1, tmp2, noreg);
- __ str(tmp1, index); // *index_adr := tmp1
+ generate_pre_barrier_fast_path(masm, Rthread, tmp1);
+ // If marking is not active (*(mark queue active address) == 0), jump to done
+ __ cbz(tmp1, done);
- // Record the previous value
- __ str(pre_val, Address(tmp2, tmp1));
- __ b(done);
+ generate_pre_barrier_slow_path(masm, obj, pre_val, Rthread, tmp1, tmp2, done, runtime);
__ bind(runtime);
// save the live input values
- if (store_addr != noreg) {
- // avoid raw_push to support any ordering of store_addr and new_val
- __ push(RegisterSet(store_addr) | RegisterSet(new_val));
- } else {
- __ push(pre_val);
+ RegisterSet set = RegisterSet(pre_val) | RegisterSet(R0, R3) | RegisterSet(R12);
+ // save the live input values
+ if (obj != noreg) {
+ // avoid raw_push to support any ordering of store_addr and pre_val
+ set = set | RegisterSet(obj);
}
+ __ push(set);
+
if (pre_val != R0) {
__ mov(R0, pre_val);
}
@@ -177,33 +196,17 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
__ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), R0, R1);
- if (store_addr != noreg) {
- __ pop(RegisterSet(store_addr) | RegisterSet(new_val));
- } else {
- __ pop(pre_val);
- }
-
+ __ pop(set);
__ bind(done);
}
-// G1 post-barrier.
-// Blows all volatile registers R0-R3, Rtemp, LR).
-void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
- Register store_addr,
- Register new_val,
- Register tmp1,
- Register tmp2,
- Register tmp3) {
-
- Address queue_index(Rthread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
- Address buffer(Rthread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
-
- BarrierSet* bs = BarrierSet::barrier_set();
- CardTableBarrierSet* ctbs = barrier_set_cast(bs);
- CardTable* ct = ctbs->card_table();
- Label done;
- Label runtime;
-
+static void generate_post_barrier_fast_path(MacroAssembler* masm,
+ const Register store_addr,
+ const Register new_val,
+ const Register tmp1,
+ const Register tmp2,
+ Label& done,
+ bool new_val_may_be_null) {
// Does store cross heap regions?
__ eor(tmp1, store_addr, new_val);
@@ -211,22 +214,31 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
__ b(done, eq);
// crosses regions, storing null?
-
- __ cbz(new_val, done);
-
+ if (new_val_may_be_null) {
+ __ cbz(new_val, done);
+ }
// storing region crossing non-null, is card already dirty?
const Register card_addr = tmp1;
- __ mov_address(tmp2, (address)ct->byte_map_base());
+ CardTableBarrierSet* ct = barrier_set_cast(BarrierSet::barrier_set());
+ __ mov_address(tmp2, (address)ct->card_table()->byte_map_base());
__ add(card_addr, tmp2, AsmOperand(store_addr, lsr, CardTable::card_shift()));
__ ldrb(tmp2, Address(card_addr));
__ cmp(tmp2, (int)G1CardTable::g1_young_card_val());
- __ b(done, eq);
+}
+static void generate_post_barrier_slow_path(MacroAssembler* masm,
+ const Register thread,
+ const Register tmp1,
+ const Register tmp2,
+ const Register tmp3,
+ Label& done,
+ Label& runtime) {
__ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad), tmp2);
-
assert(CardTable::dirty_card_val() == 0, "adjust this code");
+ // card_addr is loaded by generate_post_barrier_fast_path
+ const Register card_addr = tmp1;
__ ldrb(tmp2, Address(card_addr));
__ cbz(tmp2, done);
@@ -234,29 +246,139 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
// dirty card and log.
__ strb(__ zero_register(tmp2), Address(card_addr));
+ generate_queue_test_and_insertion(masm,
+ G1ThreadLocalData::dirty_card_queue_index_offset(),
+ G1ThreadLocalData::dirty_card_queue_buffer_offset(),
+ runtime,
+ thread, card_addr, tmp2, tmp3);
+ __ b(done);
+}
- __ ldr(tmp2, queue_index);
- __ ldr(tmp3, buffer);
- __ subs(tmp2, tmp2, wordSize);
- __ b(runtime, lt); // go to runtime if now negative
-
- __ str(tmp2, queue_index);
+// G1 post-barrier.
+// Blows all volatile registers R0-R3, LR).
+void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
+ Register store_addr,
+ Register new_val,
+ Register tmp1,
+ Register tmp2,
+ Register tmp3) {
+ Label done;
+ Label runtime;
- __ str(card_addr, Address(tmp3, tmp2));
- __ b(done);
+ generate_post_barrier_fast_path(masm, store_addr, new_val, tmp1, tmp2, done, true /* new_val_may_be_null */);
+ // If card is young, jump to done
+ // card_addr and card are loaded by generate_post_barrier_fast_path
+ const Register card = tmp2;
+ const Register card_addr = tmp1;
+ __ b(done, eq);
+ generate_post_barrier_slow_path(masm, Rthread, card_addr, tmp2, tmp3, done, runtime);
__ bind(runtime);
+ RegisterSet set = RegisterSet(store_addr) | RegisterSet(R0, R3) | RegisterSet(R12);
+ __ push(set);
+
if (card_addr != R0) {
__ mov(R0, card_addr);
}
__ mov(R1, Rthread);
__ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), R0, R1);
+ __ pop(set);
+
__ bind(done);
}
+#if defined(COMPILER2)
+
+static void generate_c2_barrier_runtime_call(MacroAssembler* masm, G1BarrierStubC2* stub, const Register arg, const address runtime_path, Register tmp1) {
+ SaveLiveRegisters save_registers(masm, stub);
+ if (c_rarg0 != arg) {
+ __ mov(c_rarg0, arg);
+ }
+ __ mov(c_rarg1, Rthread);
+ __ call_VM_leaf(runtime_path, R0, R1);
+}
+
+void G1BarrierSetAssembler::g1_write_barrier_pre_c2(MacroAssembler* masm,
+ Register obj,
+ Register pre_val,
+ Register thread,
+ Register tmp1,
+ Register tmp2,
+ G1PreBarrierStubC2* stub) {
+ assert(thread == Rthread, "must be");
+ assert_different_registers(obj, pre_val, tmp1, tmp2);
+ assert(pre_val != noreg && tmp1 != noreg && tmp2 != noreg, "expecting a register");
+
+ stub->initialize_registers(obj, pre_val, thread, tmp1, tmp2);
+
+ generate_pre_barrier_fast_path(masm, thread, tmp1);
+ // If marking is active (*(mark queue active address) != 0), jump to stub (slow path)
+ __ cbnz(tmp1, *stub->entry());
+
+ __ bind(*stub->continuation());
+}
+
+void G1BarrierSetAssembler::generate_c2_pre_barrier_stub(MacroAssembler* masm,
+ G1PreBarrierStubC2* stub) const {
+ Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
+ Label runtime;
+ Register obj = stub->obj();
+ Register pre_val = stub->pre_val();
+ Register thread = stub->thread();
+ Register tmp1 = stub->tmp1();
+ Register tmp2 = stub->tmp2();
+
+ __ bind(*stub->entry());
+ generate_pre_barrier_slow_path(masm, obj, pre_val, thread, tmp1, tmp2, *stub->continuation(), runtime);
+
+ __ bind(runtime);
+ generate_c2_barrier_runtime_call(masm, stub, pre_val, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), tmp1);
+ __ b(*stub->continuation());
+}
+
+void G1BarrierSetAssembler::g1_write_barrier_post_c2(MacroAssembler* masm,
+ Register store_addr,
+ Register new_val,
+ Register thread,
+ Register tmp1,
+ Register tmp2,
+ Register tmp3,
+ G1PostBarrierStubC2* stub) {
+ assert(thread == Rthread, "must be");
+ assert_different_registers(store_addr, new_val, thread, tmp1, tmp2, noreg);
+
+ stub->initialize_registers(thread, tmp1, tmp2, tmp3);
+
+ bool new_val_may_be_null = (stub->barrier_data() & G1C2BarrierPostNotNull) == 0;
+ generate_post_barrier_fast_path(masm, store_addr, new_val, tmp1, tmp2, *stub->continuation(), new_val_may_be_null);
+ // If card is not young, jump to stub (slow path)
+ __ b(*stub->entry(), ne);
+
+ __ bind(*stub->continuation());
+}
+
+void G1BarrierSetAssembler::generate_c2_post_barrier_stub(MacroAssembler* masm,
+ G1PostBarrierStubC2* stub) const {
+ Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
+ Label runtime;
+ Register thread = stub->thread();
+ Register tmp1 = stub->tmp1(); // tmp1 holds the card address.
+ Register tmp2 = stub->tmp2();
+ Register tmp3 = stub->tmp3();
+
+ __ bind(*stub->entry());
+ generate_post_barrier_slow_path(masm, thread, tmp1, tmp2, tmp3, *stub->continuation(), runtime);
+
+ __ bind(runtime);
+ generate_c2_barrier_runtime_call(masm, stub, tmp1, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), tmp2);
+ __ b(*stub->continuation());
+}
+
+#endif // COMPILER2
+
void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
Register dst, Address src, Register tmp1, Register tmp2, Register tmp3) {
bool on_oop = type == T_OBJECT || type == T_ARRAY;
@@ -268,7 +390,7 @@ void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorator
if (on_oop && on_reference) {
// Generate the G1 pre-barrier code to log the value of
// the referent field in an SATB buffer.
- g1_write_barrier_pre(masm, noreg, noreg, dst, tmp1, tmp2);
+ g1_write_barrier_pre(masm, noreg, dst, tmp1, tmp2);
}
}
@@ -295,7 +417,7 @@ void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet deco
}
if (needs_pre_barrier) {
- g1_write_barrier_pre(masm, store_addr, new_val, tmp1, tmp2, tmp3);
+ g1_write_barrier_pre(masm, store_addr, tmp3 /*pre_val*/, tmp1, tmp2);
}
if (is_null) {
diff --git a/src/hotspot/cpu/arm/gc/g1/g1BarrierSetAssembler_arm.hpp b/src/hotspot/cpu/arm/gc/g1/g1BarrierSetAssembler_arm.hpp
index 52932faa3e4de..aefde19142e40 100644
--- a/src/hotspot/cpu/arm/gc/g1/g1BarrierSetAssembler_arm.hpp
+++ b/src/hotspot/cpu/arm/gc/g1/g1BarrierSetAssembler_arm.hpp
@@ -33,6 +33,8 @@ class LIR_Assembler;
class StubAssembler;
class G1PreBarrierStub;
class G1PostBarrierStub;
+class G1PreBarrierStubC2;
+class G1PostBarrierStubC2;
class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
protected:
@@ -43,7 +45,6 @@ class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
void g1_write_barrier_pre(MacroAssembler* masm,
Register store_addr,
- Register new_val,
Register pre_val,
Register tmp1,
Register tmp2);
@@ -70,6 +71,29 @@ class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm);
#endif
+
+#ifdef COMPILER2
+ void g1_write_barrier_pre_c2(MacroAssembler* masm,
+ Register obj,
+ Register pre_val,
+ Register thread,
+ Register tmp1,
+ Register tmp2,
+ G1PreBarrierStubC2* c2_stub);
+ void generate_c2_pre_barrier_stub(MacroAssembler* masm,
+ G1PreBarrierStubC2* stub) const;
+ void g1_write_barrier_post_c2(MacroAssembler* masm,
+ Register store_addr,
+ Register new_val,
+ Register thread,
+ Register tmp1,
+ Register tmp2,
+ Register tmp3,
+ G1PostBarrierStubC2* c2_stub);
+ void generate_c2_post_barrier_stub(MacroAssembler* masm,
+ G1PostBarrierStubC2* stub) const;
+#endif
+
};
#endif // CPU_ARM_GC_G1_G1BARRIERSETASSEMBLER_ARM_HPP
diff --git a/src/hotspot/cpu/arm/gc/g1/g1_arm.ad b/src/hotspot/cpu/arm/gc/g1/g1_arm.ad
new file mode 100644
index 0000000000000..8a0a9e1aa531a
--- /dev/null
+++ b/src/hotspot/cpu/arm/gc/g1/g1_arm.ad
@@ -0,0 +1,201 @@
+//
+// Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+// or visit www.oracle.com if you need additional information or have any
+// questions.
+//
+
+source_hpp %{
+
+#include "gc/g1/c2/g1BarrierSetC2.hpp"
+#include "gc/shared/gc_globals.hpp"
+
+%}
+
+source %{
+
+#include "gc/g1/g1BarrierSetAssembler_arm.hpp"
+#include "gc/g1/g1BarrierSetRuntime.hpp"
+
+static void write_barrier_pre(MacroAssembler* masm,
+ const MachNode* node,
+ Register obj,
+ Register pre_val,
+ Register tmp1,
+ Register tmp2,
+ RegSet preserve = RegSet(),
+ RegSet no_preserve = RegSet()) {
+ if (!G1PreBarrierStubC2::needs_barrier(node)) {
+ return;
+ }
+ Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
+ G1BarrierSetAssembler* g1_asm = static_cast(BarrierSet::barrier_set()->barrier_set_assembler());
+ G1PreBarrierStubC2* const stub = G1PreBarrierStubC2::create(node);
+ for (RegSetIterator reg = preserve.begin(); *reg != noreg; ++reg) {
+ stub->preserve(*reg);
+ }
+ for (RegSetIterator reg = no_preserve.begin(); *reg != noreg; ++reg) {
+ stub->dont_preserve(*reg);
+ }
+ g1_asm->g1_write_barrier_pre_c2(masm, obj, pre_val, Rthread, tmp1, tmp2, stub);
+}
+
+static void write_barrier_post(MacroAssembler* masm,
+ const MachNode* node,
+ Register store_addr,
+ Register new_val,
+ Register tmp1,
+ Register tmp2,
+ Register tmp3) {
+ if (!G1PostBarrierStubC2::needs_barrier(node)) {
+ return;
+ }
+ Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
+ G1BarrierSetAssembler* g1_asm = static_cast(BarrierSet::barrier_set()->barrier_set_assembler());
+ G1PostBarrierStubC2* const stub = G1PostBarrierStubC2::create(node);
+ g1_asm->g1_write_barrier_post_c2(masm, store_addr, new_val, Rthread, tmp1, tmp2, tmp3, stub);
+}
+
+%}
+
+instruct g1StoreP(indirect mem, iRegP src, iRegP tmp1, iRegP tmp2, iRegP tmp3, flagsReg icc)
+%{
+ predicate(UseG1GC && n->as_Store()->barrier_data() != 0);
+ match(Set mem (StoreP mem src));
+ effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL icc);
+ ins_cost(2 * (MEMORY_REF_COST + BRANCH_COST));
+ format %{ "sd $src, $mem\t# ptr" %}
+ ins_encode %{
+ guarantee($mem$$disp == 0, "impossible encoding");
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $src$$Register) /* preserve */);
+ __ str($src$$Register, Address($mem$$Register));
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $src$$Register /* new_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */,
+ $tmp3$$Register /* tmp3 */);
+ %}
+ ins_pipe(istore_mem_reg);
+%}
+
+instruct g1CompareAndSwapP(iRegI res, indirect mem, iRegP newval, iRegP tmp1, iRegP tmp2, iRegP tmp3, iRegP oldval, flagsReg ccr )
+%{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0);
+ match(Set res (CompareAndSwapP mem (Binary oldval newval)));
+ effect(KILL ccr, TEMP res, TEMP tmp1, TEMP tmp2, TEMP tmp3);
+ ins_cost(4 * (MEMORY_REF_COST + BRANCH_COST));
+ format %{ "loop: \n\t"
+ "LDREX $tmp1, $mem\t! If $oldval==[$mem] Then store $newval into [$mem]\n\t"
+ "CMP $tmp1, $oldval\n\t"
+ "STREX.eq $tmp1, $newval, $mem\n\t"
+ "MOV.ne $tmp1, 0 \n\t"
+ "EORS.eq $tmp1,$tmp1, 1 \n\t"
+ "B.eq loop \n\t"
+ "MOV $res, $tmp1" %}
+ ins_encode %{
+ guarantee($mem$$disp == 0, "impossible encoding");
+ assert_different_registers($oldval$$Register, $mem$$Register);
+ assert_different_registers($newval$$Register, $mem$$Register);
+ write_barrier_pre(masm, this,
+ noreg /* obj */,
+ $oldval$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+ Label loop;
+ __ bind(loop);
+ __ ldrex($tmp1$$Register,$mem$$Address);
+ __ cmp($tmp1$$Register, $oldval$$Register);
+ __ strex($tmp1$$Register, $newval$$Register, $mem$$Address, eq);
+ __ mov($tmp1$$Register, 0, ne);
+ __ eors($tmp1$$Register, $tmp1$$Register, 1, eq);
+ __ b(loop, eq);
+ __ mov($res$$Register, $tmp1$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $newval$$Register /* new_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */,
+ $tmp3$$Register /* tmp3 */);
+ %}
+ ins_pipe(long_memory_op);
+%}
+
+
+instruct g1GetAndSetP(indirect mem, iRegP newval, iRegP tmp1, iRegP tmp2, iRegP tmp3, iRegP preval, flagsReg ccr)
+%{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0);
+ match(Set preval (GetAndSetP mem newval));
+ effect(KILL ccr, TEMP preval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
+ ins_cost(4 * (MEMORY_REF_COST + BRANCH_COST));
+ format %{ "loop: \n\t"
+ "LDREX $preval, $mem\n\t"
+ "STREX $tmp1, $newval, $mem\n\t"
+ "CMP $tmp1, 0 \n\t"
+ "B.ne loop \n\t" %}
+ ins_encode %{
+ guarantee($mem$$disp == 0, "impossible encoding");
+ assert_different_registers($mem$$Register, $newval$$Register);
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $preval$$Register /* pre_val (as a temporary register) */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $preval$$Register, $newval$$Register) /* preserve */);
+ Label loop;
+ __ bind(loop);
+ __ ldrex($preval$$Register,$mem$$Address);
+ __ strex($tmp1$$Register, $newval$$Register, $mem$$Address);
+ __ cmp($tmp1$$Register, 0);
+ __ b(loop, ne);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $newval$$Register /* new_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */,
+ $tmp3$$Register /* tmp3 */);
+ %}
+ ins_pipe(long_memory_op);
+%}
+
+instruct g1LoadP(iRegP dst, indirect mem, iRegP tmp1, iRegP tmp2, flagsReg icc)
+%{
+ predicate(UseG1GC && n->as_Load()->barrier_data() != 0);
+ match(Set dst (LoadP mem));
+ effect(TEMP dst, TEMP tmp1, TEMP tmp2, KILL icc);
+ ins_cost(MEMORY_REF_COST + BRANCH_COST);
+ format %{ "ld $dst, $mem\t# ptr" %}
+ ins_encode %{
+ guarantee($mem$$disp == 0, "impossible encoding");
+ __ ldr($dst$$Register, Address($mem$$Register));
+ write_barrier_pre(masm, this,
+ noreg /* obj */,
+ $dst$$Register /* pre_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */);
+ %}
+ ins_pipe(iload_mem);
+%}
diff --git a/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.cpp b/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.cpp
index ea19730673cb6..c13a259a1b960 100644
--- a/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.cpp
+++ b/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.cpp
@@ -31,6 +31,10 @@
#include "runtime/javaThread.hpp"
#include "runtime/stubRoutines.hpp"
+#ifdef COMPILER2
+#include "gc/shared/c2/barrierSetC2.hpp"
+#endif // COMPILER2
+
#define __ masm->
void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
@@ -206,7 +210,57 @@ void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm) {
#ifdef COMPILER2
OptoReg::Name BarrierSetAssembler::refine_register(const Node* node, OptoReg::Name opto_reg) {
- Unimplemented(); // This must be implemented to support late barrier expansion.
+ if (!OptoReg::is_reg(opto_reg)) {
+ return OptoReg::Bad;
+ }
+
+ const VMReg vm_reg = OptoReg::as_VMReg(opto_reg);
+ if (!vm_reg->is_valid()){
+ // skip APSR and FPSCR
+ return OptoReg::Bad;
+ }
+
+ return opto_reg;
}
+void SaveLiveRegisters::initialize(BarrierStubC2* stub) {
+ // Record registers that needs to be saved/restored
+ RegMaskIterator rmi(stub->preserve_set());
+ while (rmi.has_next()) {
+ const OptoReg::Name opto_reg = rmi.next();
+ if (OptoReg::is_reg(opto_reg)) {
+ const VMReg vm_reg = OptoReg::as_VMReg(opto_reg);
+ if (vm_reg->is_Register()) {
+ gp_regs += RegSet::of(vm_reg->as_Register());
+ } else if (vm_reg->is_FloatRegister()) {
+ fp_regs += FloatRegSet::of(vm_reg->as_FloatRegister());
+ } else {
+ fatal("Unknown register type");
+ }
+ }
+ }
+ // Remove C-ABI SOE registers that will be updated
+ gp_regs -= RegSet::range(R4, R11) + RegSet::of(R13, R15);
+
+ // Remove C-ABI SOE fp registers
+ fp_regs -= FloatRegSet::range(S16, S31);
+}
+
+SaveLiveRegisters::SaveLiveRegisters(MacroAssembler* masm, BarrierStubC2* stub)
+ : masm(masm),
+ gp_regs(),
+ fp_regs() {
+ // Figure out what registers to save/restore
+ initialize(stub);
+
+ // Save registers
+ if (gp_regs.size() > 0) __ push(RegisterSet::from(gp_regs));
+ if (fp_regs.size() > 0) __ fpush(FloatRegisterSet::from(fp_regs));
+}
+
+SaveLiveRegisters::~SaveLiveRegisters() {
+ // Restore registers
+ if (fp_regs.size() > 0) __ fpop(FloatRegisterSet::from(fp_regs));
+ if (gp_regs.size() > 0) __ pop(RegisterSet::from(gp_regs));
+}
#endif // COMPILER2
diff --git a/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.hpp b/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.hpp
index 60021390ea26f..054d172f46340 100644
--- a/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.hpp
+++ b/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.hpp
@@ -31,7 +31,9 @@
#ifdef COMPILER2
#include "code/vmreg.hpp"
#include "opto/optoreg.hpp"
+#include "opto/regmask.hpp"
+class BarrierStubC2;
class Node;
#endif // COMPILER2
@@ -69,4 +71,26 @@ class BarrierSetAssembler: public CHeapObj {
#endif // COMPILER2
};
+#ifdef COMPILER2
+// This class saves and restores the registers that need to be preserved across
+// the runtime call represented by a given C2 barrier stub. Use as follows:
+// {
+// SaveLiveRegisters save(masm, stub);
+// ..
+// __ bl(...);
+// ..
+// }
+class SaveLiveRegisters {
+private:
+ MacroAssembler* const masm;
+ RegSet gp_regs;
+ FloatRegSet fp_regs;
+
+public:
+ void initialize(BarrierStubC2* stub);
+ SaveLiveRegisters(MacroAssembler* masm, BarrierStubC2* stub);
+ ~SaveLiveRegisters();
+};
+
+#endif // COMPILER2
#endif // CPU_ARM_GC_SHARED_BARRIERSETASSEMBLER_ARM_HPP
diff --git a/src/hotspot/cpu/arm/register_arm.hpp b/src/hotspot/cpu/arm/register_arm.hpp
index 9f486d2a62586..d8961fd293578 100644
--- a/src/hotspot/cpu/arm/register_arm.hpp
+++ b/src/hotspot/cpu/arm/register_arm.hpp
@@ -303,6 +303,31 @@ class ConcreteRegisterImpl : public AbstractRegisterImpl {
static const int max_fpr;
};
+typedef AbstractRegSet RegSet;
+typedef AbstractRegSet FloatRegSet;
+
+template <>
+inline Register AbstractRegSet::first() {
+ if (_bitset == 0) { return noreg; }
+ return as_Register(count_trailing_zeros(_bitset));
+}
+
+
+template <>
+inline FloatRegister AbstractRegSet::first() {
+ uint32_t first = _bitset & -_bitset;
+ return first ? as_FloatRegister(exact_log2(first)) : fnoreg;
+}
+
+template <>
+inline FloatRegister AbstractRegSet::last() {
+ if (_bitset == 0) { return fnoreg; }
+ int last = max_size() - 1 - count_leading_zeros(_bitset);
+ return as_FloatRegister(last);
+}
+
+
+
class VFPSystemRegisterImpl;
typedef VFPSystemRegisterImpl* VFPSystemRegister;
class VFPSystemRegisterImpl : public AbstractRegisterImpl {
diff --git a/src/hotspot/cpu/arm/sharedRuntime_arm.cpp b/src/hotspot/cpu/arm/sharedRuntime_arm.cpp
index 7648e5c5d9260..7c1f3aafe7d52 100644
--- a/src/hotspot/cpu/arm/sharedRuntime_arm.cpp
+++ b/src/hotspot/cpu/arm/sharedRuntime_arm.cpp
@@ -38,6 +38,7 @@
#include "runtime/sharedRuntime.hpp"
#include "runtime/safepointMechanism.hpp"
#include "runtime/stubRoutines.hpp"
+#include "runtime/timerTrace.hpp"
#include "runtime/vframeArray.hpp"
#include "utilities/align.hpp"
#include "utilities/powerOfTwo.hpp"
diff --git a/src/hotspot/cpu/arm/templateInterpreterGenerator_arm.cpp b/src/hotspot/cpu/arm/templateInterpreterGenerator_arm.cpp
index 679f07a028e2c..ec9d237e50da0 100644
--- a/src/hotspot/cpu/arm/templateInterpreterGenerator_arm.cpp
+++ b/src/hotspot/cpu/arm/templateInterpreterGenerator_arm.cpp
@@ -175,6 +175,7 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M
break;
case Interpreter::java_lang_math_fmaD:
case Interpreter::java_lang_math_fmaF:
+ case Interpreter::java_lang_math_tanh:
// TODO: Implement intrinsic
break;
default:
diff --git a/src/hotspot/cpu/arm/templateTable_arm.cpp b/src/hotspot/cpu/arm/templateTable_arm.cpp
index 80519fd89f426..0974ff1f9a9c3 100644
--- a/src/hotspot/cpu/arm/templateTable_arm.cpp
+++ b/src/hotspot/cpu/arm/templateTable_arm.cpp
@@ -3974,6 +3974,7 @@ void TemplateTable::_new() {
// make sure klass is initialized
// make sure klass is fully initialized
__ ldrb(Rtemp, Address(Rklass, InstanceKlass::init_state_offset()));
+ __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::LoadLoad | MacroAssembler::LoadStore), Rtemp);
__ cmp(Rtemp, InstanceKlass::fully_initialized);
__ b(slow_case, ne);
diff --git a/src/hotspot/cpu/arm/upcallLinker_arm.cpp b/src/hotspot/cpu/arm/upcallLinker_arm.cpp
index c7645f4a03351..696b2001e6b7b 100644
--- a/src/hotspot/cpu/arm/upcallLinker_arm.cpp
+++ b/src/hotspot/cpu/arm/upcallLinker_arm.cpp
@@ -25,7 +25,7 @@
#include "prims/upcallLinker.hpp"
#include "utilities/debug.hpp"
-address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry,
+address UpcallLinker::make_upcall_stub(jobject receiver, Symbol* signature,
BasicType* out_sig_bt, int total_out_args,
BasicType ret_type,
jobject jabi, jobject jconv,
diff --git a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp
index 42934dc7c3179..684c06614a97a 100644
--- a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp
+++ b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp
@@ -2274,6 +2274,7 @@ void LIR_Assembler::emit_alloc_obj(LIR_OpAllocObj* op) {
}
__ lbz(op->tmp1()->as_register(),
in_bytes(InstanceKlass::init_state_offset()), op->klass()->as_register());
+ // acquire barrier included in membar_storestore() which follows the allocation immediately.
__ cmpwi(CCR0, op->tmp1()->as_register(), InstanceKlass::fully_initialized);
__ bc_far_optimized(Assembler::bcondCRbiIs0, __ bi0(CCR0, Assembler::equal), *op->stub()->entry());
}
diff --git a/src/hotspot/cpu/ppc/c1_MacroAssembler_ppc.cpp b/src/hotspot/cpu/ppc/c1_MacroAssembler_ppc.cpp
index c05e97a4e9aa3..83fad376d292a 100644
--- a/src/hotspot/cpu/ppc/c1_MacroAssembler_ppc.cpp
+++ b/src/hotspot/cpu/ppc/c1_MacroAssembler_ppc.cpp
@@ -92,7 +92,7 @@ void C1_MacroAssembler::lock_object(Register Rmark, Register Roop, Register Rbox
}
if (LockingMode == LM_LIGHTWEIGHT) {
- lightweight_lock(Roop, Rmark, Rscratch, slow_int);
+ lightweight_lock(Rbox, Roop, Rmark, Rscratch, slow_int);
} else if (LockingMode == LM_LEGACY) {
// ... and mark it unlocked.
ori(Rmark, Rmark, markWord::unlocked_value);
diff --git a/src/hotspot/cpu/ppc/c2_MacroAssembler_ppc.cpp b/src/hotspot/cpu/ppc/c2_MacroAssembler_ppc.cpp
index cc69c0abe361f..1147c3b42b25f 100644
--- a/src/hotspot/cpu/ppc/c2_MacroAssembler_ppc.cpp
+++ b/src/hotspot/cpu/ppc/c2_MacroAssembler_ppc.cpp
@@ -39,12 +39,12 @@
void C2_MacroAssembler::fast_lock_lightweight(ConditionRegister flag, Register obj, Register box,
Register tmp1, Register tmp2, Register tmp3) {
- compiler_fast_lock_lightweight_object(flag, obj, tmp1, tmp2, tmp3);
+ compiler_fast_lock_lightweight_object(flag, obj, box, tmp1, tmp2, tmp3);
}
void C2_MacroAssembler::fast_unlock_lightweight(ConditionRegister flag, Register obj, Register box,
Register tmp1, Register tmp2, Register tmp3) {
- compiler_fast_unlock_lightweight_object(flag, obj, tmp1, tmp2, tmp3);
+ compiler_fast_unlock_lightweight_object(flag, obj, box, tmp1, tmp2, tmp3);
}
// Intrinsics for CompactStrings
diff --git a/src/hotspot/cpu/ppc/frame_ppc.cpp b/src/hotspot/cpu/ppc/frame_ppc.cpp
index 4c1ffeb0d768e..eb16af5e9db1b 100644
--- a/src/hotspot/cpu/ppc/frame_ppc.cpp
+++ b/src/hotspot/cpu/ppc/frame_ppc.cpp
@@ -117,9 +117,9 @@ bool frame::safe_for_sender(JavaThread *thread) {
return false;
}
- common_abi* sender_abi = (common_abi*) fp;
+ volatile common_abi* sender_abi = (common_abi*) fp; // May get updated concurrently by deoptimization!
intptr_t* sender_sp = (intptr_t*) fp;
- address sender_pc = (address) sender_abi->lr;;
+ address sender_pc = (address) sender_abi->lr;
if (Continuation::is_return_barrier_entry(sender_pc)) {
// If our sender_pc is the return barrier, then our "real" sender is the continuation entry
@@ -134,9 +134,18 @@ bool frame::safe_for_sender(JavaThread *thread) {
return false;
}
+ intptr_t* unextended_sender_sp = is_interpreted_frame() ? interpreter_frame_sender_sp() : sender_sp;
+
+ // If the sender is a deoptimized nmethod we need to check if the original pc is valid.
+ nmethod* sender_nm = sender_blob->as_nmethod_or_null();
+ if (sender_nm != nullptr && sender_nm->is_deopt_pc(sender_pc)) {
+ address orig_pc = *(address*)((address)unextended_sender_sp + sender_nm->orig_pc_offset());
+ if (!sender_nm->insts_contains_inclusive(orig_pc)) return false;
+ }
+
// It should be safe to construct the sender though it might not be valid.
- frame sender(sender_sp, sender_pc, nullptr /* unextended_sp */, nullptr /* fp */, sender_blob);
+ frame sender(sender_sp, sender_pc, unextended_sender_sp, nullptr /* fp */, sender_blob);
// Do we have a valid fp?
address sender_fp = (address) sender.fp();
diff --git a/src/hotspot/cpu/ppc/gc/g1/g1BarrierSetAssembler_ppc.cpp b/src/hotspot/cpu/ppc/gc/g1/g1BarrierSetAssembler_ppc.cpp
index 7d230d301c22b..39693bdf925bf 100644
--- a/src/hotspot/cpu/ppc/gc/g1/g1BarrierSetAssembler_ppc.cpp
+++ b/src/hotspot/cpu/ppc/gc/g1/g1BarrierSetAssembler_ppc.cpp
@@ -41,10 +41,20 @@
#include "c1/c1_LIRAssembler.hpp"
#include "c1/c1_MacroAssembler.hpp"
#include "gc/g1/c1/g1BarrierSetC1.hpp"
-#endif
+#endif // COMPILER1
+#ifdef COMPILER2
+#include "gc/g1/c2/g1BarrierSetC2.hpp"
+#endif // COMPILER2
#define __ masm->
+static void generate_marking_inactive_test(MacroAssembler* masm) {
+ int active_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset());
+ assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
+ __ lbz(R0, active_offset, R16_thread); // tmp1 := *(mark queue active address)
+ __ cmpwi(CCR0, R0, 0);
+}
+
void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
Register from, Register to, Register count,
Register preserve1, Register preserve2) {
@@ -58,13 +68,7 @@ void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm
Label filtered;
// Is marking active?
- if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
- __ lwz(R0, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()), R16_thread);
- } else {
- guarantee(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
- __ lbz(R0, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()), R16_thread);
- }
- __ cmpdi(CCR0, R0, 0);
+ generate_marking_inactive_test(masm);
__ beq(CCR0, filtered);
__ save_LR(R0);
@@ -109,35 +113,48 @@ void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* mas
__ restore_LR(R0);
}
+static void generate_queue_insertion(MacroAssembler* masm, ByteSize index_offset, ByteSize buffer_offset, Label& runtime,
+ const Register value, const Register temp) {
+ assert_different_registers(value, temp);
+ // Can we store a value in the given thread's buffer?
+ // (The index field is typed as size_t.)
+ __ ld(temp, in_bytes(index_offset), R16_thread); // temp := *(index address)
+ __ cmpdi(CCR0, temp, 0); // jump to runtime if index == 0 (full buffer)
+ __ beq(CCR0, runtime);
+ // The buffer is not full, store value into it.
+ __ ld(R0, in_bytes(buffer_offset), R16_thread); // R0 := buffer address
+ __ addi(temp, temp, -wordSize); // temp := next index
+ __ std(temp, in_bytes(index_offset), R16_thread); // *(index address) := next index
+ __ stdx(value, temp, R0); // *(buffer address + next index) := value
+}
+
void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, DecoratorSet decorators,
Register obj, RegisterOrConstant ind_or_offs, Register pre_val,
Register tmp1, Register tmp2,
MacroAssembler::PreservationLevel preservation_level) {
+ assert_different_registers(pre_val, tmp1, tmp2);
+
bool not_null = (decorators & IS_NOT_NULL) != 0,
preloaded = obj == noreg;
Register nv_save = noreg;
- if (preloaded) {
+ // Determine necessary runtime invocation preservation measures
+ const bool needs_frame = preservation_level >= MacroAssembler::PRESERVATION_FRAME_LR;
+ const bool preserve_gp_registers = preservation_level >= MacroAssembler::PRESERVATION_FRAME_LR_GP_REGS;
+ const bool preserve_fp_registers = preservation_level >= MacroAssembler::PRESERVATION_FRAME_LR_GP_FP_REGS;
+ int nbytes_save = 0;
+
+ if (pre_val->is_volatile() && preloaded && !preserve_gp_registers) {
// We are not loading the previous value so make
// sure that we don't trash the value in pre_val
// with the code below.
- assert_different_registers(pre_val, tmp1, tmp2);
- if (pre_val->is_volatile()) {
- nv_save = !tmp1->is_volatile() ? tmp1 : tmp2;
- assert(!nv_save->is_volatile(), "need one nv temp register if pre_val lives in volatile register");
- }
+ nv_save = !tmp1->is_volatile() ? tmp1 : tmp2;
+ assert(!nv_save->is_volatile(), "need one nv temp register if pre_val lives in volatile register");
}
Label runtime, filtered;
- // Is marking active?
- if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
- __ lwz(tmp1, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()), R16_thread);
- } else {
- guarantee(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
- __ lbz(tmp1, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()), R16_thread);
- }
- __ cmpdi(CCR0, tmp1, 0);
+ generate_marking_inactive_test(masm);
__ beq(CCR0, filtered);
// Do we need to load the previous value?
@@ -175,28 +192,12 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, Decorator
// Can we store original value in the thread's buffer?
// Is index == 0?
// (The index field is typed as size_t.)
- const Register Rbuffer = tmp1, Rindex = tmp2;
-
- __ ld(Rindex, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()), R16_thread);
- __ cmpdi(CCR0, Rindex, 0);
- __ beq(CCR0, runtime); // If index == 0, goto runtime.
- __ ld(Rbuffer, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()), R16_thread);
-
- __ addi(Rindex, Rindex, -wordSize); // Decrement index.
- __ std(Rindex, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()), R16_thread);
-
- // Record the previous value.
- __ stdx(pre_val, Rbuffer, Rindex);
+ generate_queue_insertion(masm, G1ThreadLocalData::satb_mark_queue_index_offset(), G1ThreadLocalData::satb_mark_queue_buffer_offset(),
+ runtime, pre_val, tmp1);
__ b(filtered);
__ bind(runtime);
- // Determine necessary runtime invocation preservation measures
- const bool needs_frame = preservation_level >= MacroAssembler::PRESERVATION_FRAME_LR;
- const bool preserve_gp_registers = preservation_level >= MacroAssembler::PRESERVATION_FRAME_LR_GP_REGS;
- const bool preserve_fp_registers = preservation_level >= MacroAssembler::PRESERVATION_FRAME_LR_GP_FP_REGS;
- int nbytes_save = 0;
-
// May need to preserve LR. Also needed if current frame is not compatible with C calling convention.
if (needs_frame) {
if (preserve_gp_registers) {
@@ -210,11 +211,11 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, Decorator
__ push_frame_reg_args(nbytes_save, tmp2);
}
- if (pre_val->is_volatile() && preloaded && !preserve_gp_registers) {
+ if (nv_save != noreg) {
__ mr(nv_save, pre_val); // Save pre_val across C call if it was preloaded.
}
__ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, R16_thread);
- if (pre_val->is_volatile() && preloaded && !preserve_gp_registers) {
+ if (nv_save != noreg) {
__ mr(pre_val, nv_save); // restore
}
@@ -230,6 +231,26 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, Decorator
__ bind(filtered);
}
+static void generate_region_crossing_test(MacroAssembler* masm, const Register store_addr, const Register new_val) {
+ __ xorr(R0, store_addr, new_val); // tmp1 := store address ^ new value
+ __ srdi_(R0, R0, G1HeapRegion::LogOfHRGrainBytes); // tmp1 := ((store address ^ new value) >> LogOfHRGrainBytes)
+}
+
+static Address generate_card_young_test(MacroAssembler* masm, const Register store_addr, const Register tmp1, const Register tmp2) {
+ CardTableBarrierSet* ct = barrier_set_cast(BarrierSet::barrier_set());
+ __ load_const_optimized(tmp1, (address)(ct->card_table()->byte_map_base()), tmp2);
+ __ srdi(tmp2, store_addr, CardTable::card_shift()); // tmp1 := card address relative to card table base
+ __ lbzx(R0, tmp1, tmp2); // tmp1 := card address
+ __ cmpwi(CCR0, R0, (int)G1CardTable::g1_young_card_val());
+ return Address(tmp1, tmp2); // return card address
+}
+
+static void generate_card_dirty_test(MacroAssembler* masm, Address card_addr) {
+ __ membar(Assembler::StoreLoad); // Must reload after StoreLoad membar due to concurrent refinement
+ __ lbzx(R0, card_addr.base(), card_addr.index()); // tmp2 := card
+ __ cmpwi(CCR0, R0, (int)G1CardTable::dirty_card_val()); // tmp2 := card == dirty_card_val?
+}
+
void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, DecoratorSet decorators,
Register store_addr, Register new_val,
Register tmp1, Register tmp2, Register tmp3,
@@ -241,9 +262,7 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, Decorato
CardTableBarrierSet* ct = barrier_set_cast(BarrierSet::barrier_set());
- // Does store cross heap regions?
- __ xorr(tmp1, store_addr, new_val);
- __ srdi_(tmp1, tmp1, G1HeapRegion::LogOfHRGrainBytes);
+ generate_region_crossing_test(masm, store_addr, new_val);
__ beq(CCR0, filtered);
// Crosses regions, storing null?
@@ -257,43 +276,22 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, Decorato
__ beq(CCR0, filtered);
}
- // Storing region crossing non-null, is card already dirty?
- const Register Rcard_addr = tmp1;
- Register Rbase = tmp2;
- __ load_const_optimized(Rbase, (address)(ct->card_table()->byte_map_base()), /*temp*/ tmp3);
-
- __ srdi(Rcard_addr, store_addr, CardTable::card_shift());
-
- // Get the address of the card.
- __ lbzx(/*card value*/ tmp3, Rbase, Rcard_addr);
- __ cmpwi(CCR0, tmp3, (int)G1CardTable::g1_young_card_val());
+ Address card_addr = generate_card_young_test(masm, store_addr, tmp1, tmp2);
__ beq(CCR0, filtered);
- __ membar(Assembler::StoreLoad);
- __ lbzx(/*card value*/ tmp3, Rbase, Rcard_addr); // Reload after membar.
- __ cmpwi(CCR0, tmp3 /* card value */, (int)G1CardTable::dirty_card_val());
+ generate_card_dirty_test(masm, card_addr);
__ beq(CCR0, filtered);
- // Storing a region crossing, non-null oop, card is clean.
- // Dirty card and log.
- __ li(tmp3, (int)G1CardTable::dirty_card_val());
- //release(); // G1: oops are allowed to get visible after dirty marking.
- __ stbx(tmp3, Rbase, Rcard_addr);
-
- __ add(Rcard_addr, Rbase, Rcard_addr); // This is the address which needs to get enqueued.
- Rbase = noreg; // end of lifetime
+ __ li(R0, (int)G1CardTable::dirty_card_val());
+ __ stbx(R0, card_addr.base(), card_addr.index()); // *(card address) := dirty_card_val
- const Register Rqueue_index = tmp2,
- Rqueue_buf = tmp3;
- __ ld(Rqueue_index, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()), R16_thread);
- __ cmpdi(CCR0, Rqueue_index, 0);
- __ beq(CCR0, runtime); // index == 0 then jump to runtime
- __ ld(Rqueue_buf, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()), R16_thread);
+ Register Rcard_addr = tmp3;
+ __ add(Rcard_addr, card_addr.base(), card_addr.index()); // This is the address which needs to get enqueued.
- __ addi(Rqueue_index, Rqueue_index, -wordSize); // decrement index
- __ std(Rqueue_index, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()), R16_thread);
-
- __ stdx(Rcard_addr, Rqueue_buf, Rqueue_index); // store card
+ generate_queue_insertion(masm,
+ G1ThreadLocalData::dirty_card_queue_index_offset(),
+ G1ThreadLocalData::dirty_card_queue_buffer_offset(),
+ runtime, Rcard_addr, tmp1);
__ b(filtered);
__ bind(runtime);
@@ -392,6 +390,142 @@ void G1BarrierSetAssembler::resolve_jobject(MacroAssembler* masm, Register value
__ bind(done);
}
+#ifdef COMPILER2
+
+static void generate_c2_barrier_runtime_call(MacroAssembler* masm, G1BarrierStubC2* stub, const Register arg, const address runtime_path) {
+ SaveLiveRegisters save_registers(masm, stub);
+ __ call_VM_leaf(runtime_path, arg, R16_thread);
+}
+
+void G1BarrierSetAssembler::g1_write_barrier_pre_c2(MacroAssembler* masm,
+ Register obj,
+ Register pre_val,
+ Register tmp1,
+ Register tmp2,
+ G1PreBarrierStubC2* stub) {
+ assert_different_registers(obj, tmp1, tmp2, R0);
+ assert_different_registers(pre_val, tmp1, R0);
+ assert(!UseCompressedOops || tmp2 != noreg, "tmp2 needed with CompressedOops");
+
+ stub->initialize_registers(obj, pre_val, R16_thread, tmp1, tmp2);
+
+ generate_marking_inactive_test(masm);
+ __ bc_far_optimized(Assembler::bcondCRbiIs0, __ bi0(CCR0, Assembler::equal), *stub->entry());
+
+ __ bind(*stub->continuation());
+}
+
+void G1BarrierSetAssembler::generate_c2_pre_barrier_stub(MacroAssembler* masm,
+ G1PreBarrierStubC2* stub) const {
+ Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
+ Label runtime;
+ Register obj = stub->obj();
+ Register pre_val = stub->pre_val();
+ Register tmp1 = stub->tmp1();
+
+ __ bind(*stub->entry());
+
+ if (obj != noreg) {
+ // Note: C2 currently doesn't use implicit null checks with barriers.
+ // Otherwise, obj could be null and the following instruction would raise a SIGSEGV.
+ if (UseCompressedOops) {
+ __ lwz(pre_val, 0, obj);
+ } else {
+ __ ld(pre_val, 0, obj);
+ }
+ }
+ __ cmpdi(CCR0, pre_val, 0);
+ __ bc_far_optimized(Assembler::bcondCRbiIs1, __ bi0(CCR0, Assembler::equal), *stub->continuation());
+
+ Register pre_val_decoded = pre_val;
+ if (UseCompressedOops) {
+ pre_val_decoded = __ decode_heap_oop_not_null(stub->tmp2(), pre_val);
+ }
+
+ generate_queue_insertion(masm,
+ G1ThreadLocalData::satb_mark_queue_index_offset(),
+ G1ThreadLocalData::satb_mark_queue_buffer_offset(),
+ runtime, pre_val_decoded, tmp1);
+ __ b(*stub->continuation());
+
+ __ bind(runtime);
+ generate_c2_barrier_runtime_call(masm, stub, pre_val_decoded, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry));
+ __ b(*stub->continuation());
+}
+
+void G1BarrierSetAssembler::g1_write_barrier_post_c2(MacroAssembler* masm,
+ Register store_addr,
+ Register new_val,
+ Register tmp1,
+ Register tmp2,
+ G1PostBarrierStubC2* stub,
+ bool decode_new_val) {
+ assert_different_registers(store_addr, new_val, tmp1, R0);
+ assert_different_registers(store_addr, tmp1, tmp2, R0);
+
+ stub->initialize_registers(R16_thread, tmp1, tmp2);
+
+ bool null_check_required = (stub->barrier_data() & G1C2BarrierPostNotNull) == 0;
+ Register new_val_decoded = new_val;
+
+ if (decode_new_val) {
+ assert(UseCompressedOops, "or should not be here");
+ if (null_check_required && CompressedOops::base() != nullptr) {
+ // We prefer doing the null check after the region crossing check.
+ // Only compressed oop modes with base != null require a null check here.
+ __ cmpwi(CCR0, new_val, 0);
+ __ beq(CCR0, *stub->continuation());
+ null_check_required = false;
+ }
+ new_val_decoded = __ decode_heap_oop_not_null(tmp2, new_val);
+ }
+
+ generate_region_crossing_test(masm, store_addr, new_val_decoded);
+ __ beq(CCR0, *stub->continuation());
+
+ // crosses regions, storing null?
+ if (null_check_required) {
+ __ cmpdi(CCR0, new_val_decoded, 0);
+ __ beq(CCR0, *stub->continuation());
+ }
+
+ Address card_addr = generate_card_young_test(masm, store_addr, tmp1, tmp2);
+ assert(card_addr.base() == tmp1 && card_addr.index() == tmp2, "needed by post barrier stub");
+ __ bc_far_optimized(Assembler::bcondCRbiIs0, __ bi0(CCR0, Assembler::equal), *stub->entry());
+
+ __ bind(*stub->continuation());
+}
+
+void G1BarrierSetAssembler::generate_c2_post_barrier_stub(MacroAssembler* masm,
+ G1PostBarrierStubC2* stub) const {
+ Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
+ Label runtime;
+ Address card_addr(stub->tmp1(), stub->tmp2()); // See above.
+
+ __ bind(*stub->entry());
+
+ generate_card_dirty_test(masm, card_addr);
+ __ bc_far_optimized(Assembler::bcondCRbiIs1, __ bi0(CCR0, Assembler::equal), *stub->continuation());
+
+ __ li(R0, (int)G1CardTable::dirty_card_val());
+ __ stbx(R0, card_addr.base(), card_addr.index()); // *(card address) := dirty_card_val
+
+ Register Rcard_addr = stub->tmp1();
+ __ add(Rcard_addr, card_addr.base(), card_addr.index()); // This is the address which needs to get enqueued.
+
+ generate_queue_insertion(masm,
+ G1ThreadLocalData::dirty_card_queue_index_offset(),
+ G1ThreadLocalData::dirty_card_queue_buffer_offset(),
+ runtime, Rcard_addr, stub->tmp2());
+ __ b(*stub->continuation());
+
+ __ bind(runtime);
+ generate_c2_barrier_runtime_call(masm, stub, Rcard_addr, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry));
+ __ b(*stub->continuation());
+}
+
+#endif // COMPILER2
+
#ifdef COMPILER1
#undef __
@@ -470,13 +604,7 @@ void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler*
__ std(tmp2, -24, R1_SP);
// Is marking still active?
- if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
- __ lwz(tmp, satb_q_active_byte_offset, R16_thread);
- } else {
- assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
- __ lbz(tmp, satb_q_active_byte_offset, R16_thread);
- }
- __ cmpdi(CCR0, tmp, 0);
+ generate_marking_inactive_test(sasm);
__ beq(CCR0, marking_not_active);
__ bind(restart);
diff --git a/src/hotspot/cpu/ppc/gc/g1/g1BarrierSetAssembler_ppc.hpp b/src/hotspot/cpu/ppc/gc/g1/g1BarrierSetAssembler_ppc.hpp
index d9a252ff6eaee..1c9fe8a5d106f 100644
--- a/src/hotspot/cpu/ppc/gc/g1/g1BarrierSetAssembler_ppc.hpp
+++ b/src/hotspot/cpu/ppc/gc/g1/g1BarrierSetAssembler_ppc.hpp
@@ -30,10 +30,16 @@
#include "gc/shared/modRefBarrierSetAssembler.hpp"
#include "utilities/macros.hpp"
+#ifdef COMPILER2
+#include "gc/g1/c2/g1BarrierSetC2.hpp"
+#endif
+
class LIR_Assembler;
class StubAssembler;
class G1PreBarrierStub;
class G1PostBarrierStub;
+class G1PreBarrierStubC2;
+class G1PostBarrierStubC2;
class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
protected:
@@ -59,6 +65,25 @@ class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
MacroAssembler::PreservationLevel preservation_level);
public:
+#ifdef COMPILER2
+ void g1_write_barrier_pre_c2(MacroAssembler* masm,
+ Register obj,
+ Register pre_val,
+ Register tmp1,
+ Register tmp2,
+ G1PreBarrierStubC2* c2_stub);
+ void generate_c2_pre_barrier_stub(MacroAssembler* masm,
+ G1PreBarrierStubC2* stub) const;
+ void g1_write_barrier_post_c2(MacroAssembler* masm,
+ Register store_addr,
+ Register new_val,
+ Register tmp1,
+ Register tmp2,
+ G1PostBarrierStubC2* c2_stub,
+ bool decode_new_val);
+ void generate_c2_post_barrier_stub(MacroAssembler* masm,
+ G1PostBarrierStubC2* stub) const;
+#endif
#ifdef COMPILER1
void gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub);
void gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub);
diff --git a/src/hotspot/cpu/ppc/gc/g1/g1_ppc.ad b/src/hotspot/cpu/ppc/gc/g1/g1_ppc.ad
new file mode 100644
index 0000000000000..f4163242cad7b
--- /dev/null
+++ b/src/hotspot/cpu/ppc/gc/g1/g1_ppc.ad
@@ -0,0 +1,684 @@
+//
+// Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2024 SAP SE. All rights reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+// or visit www.oracle.com if you need additional information or have any
+// questions.
+//
+
+source_hpp %{
+
+#include "gc/g1/c2/g1BarrierSetC2.hpp"
+#include "gc/shared/gc_globals.hpp"
+
+%}
+
+source %{
+
+#include "gc/g1/g1BarrierSetAssembler_ppc.hpp"
+#include "gc/g1/g1BarrierSetRuntime.hpp"
+
+static void pre_write_barrier(MacroAssembler* masm,
+ const MachNode* node,
+ Register obj,
+ Register pre_val,
+ Register tmp1,
+ Register tmp2 = noreg, // only needed with CompressedOops when pre_val needs to be preserved
+ RegSet preserve = RegSet(),
+ RegSet no_preserve = RegSet()) {
+ if (!G1PreBarrierStubC2::needs_barrier(node)) {
+ return;
+ }
+ Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
+ G1BarrierSetAssembler* g1_asm = static_cast(BarrierSet::barrier_set()->barrier_set_assembler());
+ G1PreBarrierStubC2* const stub = G1PreBarrierStubC2::create(node);
+ for (RegSetIterator reg = preserve.begin(); *reg != noreg; ++reg) {
+ stub->preserve(*reg);
+ }
+ for (RegSetIterator reg = no_preserve.begin(); *reg != noreg; ++reg) {
+ stub->dont_preserve(*reg);
+ }
+ g1_asm->g1_write_barrier_pre_c2(masm, obj, pre_val, tmp1, (tmp2 != noreg) ? tmp2 : pre_val, stub);
+}
+
+static void post_write_barrier(MacroAssembler* masm,
+ const MachNode* node,
+ Register store_addr,
+ Register new_val,
+ Register tmp1,
+ Register tmp2,
+ bool decode_new_val = false) {
+ if (!G1PostBarrierStubC2::needs_barrier(node)) {
+ return;
+ }
+ Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
+ G1BarrierSetAssembler* g1_asm = static_cast(BarrierSet::barrier_set()->barrier_set_assembler());
+ G1PostBarrierStubC2* const stub = G1PostBarrierStubC2::create(node);
+ g1_asm->g1_write_barrier_post_c2(masm, store_addr, new_val, tmp1, tmp2, stub, decode_new_val);
+}
+
+%}
+
+instruct g1StoreP(indirect mem, iRegPsrc src, iRegPdst tmp1, iRegPdst tmp2, flagsRegCR0 cr0)
+%{
+ predicate(UseG1GC && n->as_Store()->barrier_data() != 0);
+ match(Set mem (StoreP mem src));
+ effect(TEMP tmp1, TEMP tmp2, KILL cr0);
+ ins_cost(2 * MEMORY_REF_COST);
+ format %{ "std $mem, $src\t# ptr" %}
+ ins_encode %{
+ pre_write_barrier(masm, this,
+ $mem$$Register,
+ $tmp1$$Register,
+ $tmp2$$Register,
+ noreg,
+ RegSet::of($mem$$Register, $src$$Register) /* preserve */);
+ __ std($src$$Register, 0, $mem$$Register);
+ post_write_barrier(masm, this,
+ $mem$$Register,
+ $src$$Register /* new_val */,
+ $tmp1$$Register,
+ $tmp2$$Register);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct g1StoreN(indirect mem, iRegNsrc src, iRegPdst tmp1, iRegPdst tmp2, flagsRegCR0 cr0)
+%{
+ predicate(UseG1GC && n->as_Store()->barrier_data() != 0);
+ match(Set mem (StoreN mem src));
+ effect(TEMP tmp1, TEMP tmp2, KILL cr0);
+ ins_cost(2 * MEMORY_REF_COST);
+ format %{ "stw $mem, $src\t# ptr" %}
+ ins_encode %{
+ pre_write_barrier(masm, this,
+ $mem$$Register,
+ $tmp1$$Register,
+ $tmp2$$Register,
+ noreg,
+ RegSet::of($mem$$Register, $src$$Register) /* preserve */);
+ __ stw($src$$Register, 0, $mem$$Register);
+ post_write_barrier(masm, this,
+ $mem$$Register,
+ $src$$Register /* new_val */,
+ $tmp1$$Register,
+ $tmp2$$Register,
+ true /* decode_new_val */);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct g1EncodePAndStoreN(indirect mem, iRegPsrc src, iRegPdst tmp1, iRegPdst tmp2, flagsRegCR0 cr0)
+%{
+ predicate(UseG1GC && n->as_Store()->barrier_data() != 0);
+ match(Set mem (StoreN mem (EncodeP src)));
+ effect(TEMP tmp1, TEMP tmp2, KILL cr0);
+ ins_cost(2 * MEMORY_REF_COST);
+ format %{ "encode_heap_oop $src\n\t"
+ "stw $mem, $src\t# ptr" %}
+ ins_encode %{
+ pre_write_barrier(masm, this,
+ $mem$$Register,
+ $tmp1$$Register,
+ $tmp2$$Register,
+ noreg,
+ RegSet::of($mem$$Register, $src$$Register) /* preserve */);
+ Register encoded_oop = noreg;
+ if ((barrier_data() & G1C2BarrierPostNotNull) == 0) {
+ encoded_oop = __ encode_heap_oop($tmp2$$Register, $src$$Register);
+ } else {
+ encoded_oop = __ encode_heap_oop_not_null($tmp2$$Register, $src$$Register);
+ }
+ __ stw(encoded_oop, 0, $mem$$Register);
+ post_write_barrier(masm, this,
+ $mem$$Register,
+ $src$$Register /* new_val */,
+ $tmp1$$Register,
+ $tmp2$$Register);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct g1CompareAndExchangeP(iRegPdst res, indirect mem, iRegPsrc oldval, iRegPsrc newval, iRegPdst tmp1, iRegPdst tmp2, flagsRegCR0 cr0)
+%{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0 &&
+ (((CompareAndExchangeNode*)n)->order() != MemNode::acquire && ((CompareAndExchangeNode*)n)->order() != MemNode::seqcst));
+ match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
+ effect(TEMP_DEF res, TEMP tmp1, TEMP tmp2, KILL cr0);
+ format %{ "cmpxchgd $newval, $mem" %}
+ ins_encode %{
+ Label no_update;
+ __ cmpxchgd(CCR0, $res$$Register, $oldval$$Register, $newval$$Register, $mem$$Register,
+ MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
+ noreg, &no_update, true);
+ // Pass oldval to SATB which is the only value which can get overwritten.
+ // Can be done after cmpxchg because there's no safepoint here.
+ pre_write_barrier(masm, this,
+ noreg,
+ $oldval$$Register,
+ $tmp1$$Register,
+ $tmp2$$Register,
+ RegSet::of($mem$$Register, $newval$$Register) /* preserve */);
+ post_write_barrier(masm, this,
+ $mem$$Register,
+ $newval$$Register,
+ $tmp1$$Register,
+ $tmp2$$Register);
+ __ bind(no_update);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct g1CompareAndExchangeP_acq(iRegPdst res, indirect mem, iRegPsrc oldval, iRegPsrc newval, iRegPdst tmp1, iRegPdst tmp2, flagsRegCR0 cr0)
+%{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0 &&
+ (((CompareAndExchangeNode*)n)->order() == MemNode::acquire || ((CompareAndExchangeNode*)n)->order() == MemNode::seqcst));
+ match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
+ effect(TEMP_DEF res, TEMP tmp1, TEMP tmp2, KILL cr0);
+ format %{ "cmpxchgd acq $newval, $mem" %}
+ ins_encode %{
+ Label no_update;
+ __ cmpxchgd(CCR0, $res$$Register, $oldval$$Register, $newval$$Register, $mem$$Register,
+ MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
+ noreg, &no_update, true);
+ // Pass oldval to SATB which is the only value which can get overwritten.
+ // Can be done after cmpxchg because there's no safepoint here.
+ pre_write_barrier(masm, this,
+ noreg,
+ $oldval$$Register,
+ $tmp1$$Register,
+ $tmp2$$Register,
+ RegSet::of($mem$$Register, $newval$$Register) /* preserve */);
+ post_write_barrier(masm, this,
+ $mem$$Register,
+ $newval$$Register,
+ $tmp1$$Register,
+ $tmp2$$Register);
+ __ bind(no_update);
+ if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
+ __ isync();
+ } else {
+ // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that.
+ __ sync();
+ }
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct g1CompareAndExchangeN(iRegNdst res, indirect mem, iRegNsrc oldval, iRegNsrc newval, iRegPdst tmp1, iRegPdst tmp2, flagsRegCR0 cr0)
+%{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0 &&
+ (((CompareAndExchangeNode*)n)->order() != MemNode::acquire && ((CompareAndExchangeNode*)n)->order() != MemNode::seqcst));
+ match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
+ effect(TEMP_DEF res, TEMP tmp1, TEMP tmp2, KILL cr0);
+ format %{ "cmpxchgw $newval, $mem" %}
+ ins_encode %{
+ Label no_update;
+ __ cmpxchgw(CCR0, $res$$Register, $oldval$$Register, $newval$$Register, $mem$$Register,
+ MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
+ noreg, &no_update, true);
+ // Pass oldval to SATB which is the only value which can get overwritten.
+ // Can be done after cmpxchg because there's no safepoint here.
+ pre_write_barrier(masm, this,
+ noreg,
+ $oldval$$Register,
+ $tmp1$$Register,
+ $tmp2$$Register,
+ RegSet::of($mem$$Register, $newval$$Register) /* preserve */);
+ post_write_barrier(masm, this,
+ $mem$$Register,
+ $newval$$Register,
+ $tmp1$$Register,
+ $tmp2$$Register,
+ true /* decode_new_val */);
+ __ bind(no_update);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct g1CompareAndExchangeN_acq(iRegNdst res, indirect mem, iRegNsrc oldval, iRegNsrc newval, iRegPdst tmp1, iRegPdst tmp2, flagsRegCR0 cr0)
+%{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0 &&
+ (((CompareAndExchangeNode*)n)->order() == MemNode::acquire || ((CompareAndExchangeNode*)n)->order() == MemNode::seqcst));
+ match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
+ effect(TEMP_DEF res, TEMP tmp1, TEMP tmp2, KILL cr0);
+ format %{ "cmpxchgw acq $newval, $mem" %}
+ ins_encode %{
+ Label no_update;
+ __ cmpxchgw(CCR0, $res$$Register, $oldval$$Register, $newval$$Register, $mem$$Register,
+ MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
+ noreg, &no_update, true);
+ // Pass oldval to SATB which is the only value which can get overwritten.
+ // Can be done after cmpxchg because there's no safepoint here.
+ pre_write_barrier(masm, this,
+ noreg,
+ $oldval$$Register,
+ $tmp1$$Register,
+ $tmp2$$Register,
+ RegSet::of($mem$$Register, $newval$$Register) /* preserve */);
+ post_write_barrier(masm, this,
+ $mem$$Register,
+ $newval$$Register,
+ $tmp1$$Register,
+ $tmp2$$Register,
+ true /* decode_new_val */);
+ __ bind(no_update);
+ if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
+ __ isync();
+ } else {
+ // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that.
+ __ sync();
+ }
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct g1CompareAndSwapP(iRegIdst res, indirect mem, iRegPsrc oldval, iRegPsrc newval, iRegPdst tmp, flagsRegCR0 cr0)
+%{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0 &&
+ (((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst));
+ match(Set res (CompareAndSwapP mem (Binary oldval newval)));
+ effect(TEMP_DEF res, TEMP tmp, KILL cr0);
+ format %{ "CMPXCHGD $res, $mem, $oldval, $newval; as bool; ptr" %}
+ ins_encode %{
+ Label no_update;
+ __ li($res$$Register, 0);
+ __ cmpxchgd(CCR0, R0, $oldval$$Register, $newval$$Register, $mem$$Register,
+ MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
+ noreg, &no_update, true);
+ // Pass oldval to SATB which is the only value which can get overwritten.
+ // Can be done after cmpxchg because there's no safepoint here.
+ pre_write_barrier(masm, this,
+ noreg,
+ $oldval$$Register /* pre_val */,
+ $tmp$$Register,
+ $res$$Register /* temp */,
+ RegSet::of($mem$$Register, $newval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+ post_write_barrier(masm, this,
+ $mem$$Register,
+ $newval$$Register,
+ $tmp$$Register,
+ $res$$Register /* temp */);
+ __ li($res$$Register, 1);
+ __ bind(no_update);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct g1CompareAndSwapP_acq(iRegIdst res, indirect mem, iRegPsrc oldval, iRegPsrc newval, iRegPdst tmp, flagsRegCR0 cr0)
+%{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0 &&
+ (((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst));
+ match(Set res (CompareAndSwapP mem (Binary oldval newval)));
+ effect(TEMP_DEF res, TEMP tmp, KILL cr0);
+ format %{ "CMPXCHGD acq $res, $mem, $oldval, $newval; as bool; ptr" %}
+ ins_encode %{
+ Label no_update;
+ __ li($res$$Register, 0);
+ __ cmpxchgd(CCR0, R0, $oldval$$Register, $newval$$Register, $mem$$Register,
+ MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
+ noreg, &no_update, true);
+ // Pass oldval to SATB which is the only value which can get overwritten.
+ // Can be done after cmpxchg because there's no safepoint here.
+ pre_write_barrier(masm, this,
+ noreg,
+ $oldval$$Register /* pre_val */,
+ $tmp$$Register,
+ $res$$Register /* temp */,
+ RegSet::of($mem$$Register, $newval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+ post_write_barrier(masm, this,
+ $mem$$Register,
+ $newval$$Register,
+ $tmp$$Register,
+ $res$$Register /* temp */);
+ __ li($res$$Register, 1);
+ __ bind(no_update);
+ if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
+ __ isync();
+ } else {
+ // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that.
+ __ sync();
+ }
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct g1CompareAndSwapN(iRegIdst res, indirect mem, iRegNsrc oldval, iRegNsrc newval, iRegPdst tmp, flagsRegCR0 cr0)
+%{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0 &&
+ (((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst));
+ match(Set res (CompareAndSwapN mem (Binary oldval newval)));
+ effect(TEMP_DEF res, TEMP tmp, KILL cr0);
+ format %{ "CMPXCHGW $res, $mem, $oldval, $newval; as bool; ptr" %}
+ ins_encode %{
+ Label no_update;
+ __ li($res$$Register, 0);
+ __ cmpxchgw(CCR0, R0, $oldval$$Register, $newval$$Register, $mem$$Register,
+ MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
+ noreg, &no_update, true);
+ // Pass oldval to SATB which is the only value which can get overwritten.
+ // Can be done after cmpxchg because there's no safepoint here.
+ pre_write_barrier(masm, this,
+ noreg,
+ $oldval$$Register /* pre_val */,
+ $tmp$$Register,
+ $res$$Register /* temp */,
+ RegSet::of($mem$$Register, $newval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+ post_write_barrier(masm, this,
+ $mem$$Register,
+ $newval$$Register,
+ $tmp$$Register,
+ $res$$Register /* temp */,
+ true /* decode_new_val */);
+ __ li($res$$Register, 1);
+ __ bind(no_update);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct g1CompareAndSwapN_acq(iRegIdst res, indirect mem, iRegNsrc oldval, iRegNsrc newval, iRegPdst tmp, flagsRegCR0 cr0)
+%{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0 &&
+ (((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst));
+ match(Set res (CompareAndSwapN mem (Binary oldval newval)));
+ effect(TEMP_DEF res, TEMP tmp, KILL cr0);
+ format %{ "CMPXCHGW acq $res, $mem, $oldval, $newval; as bool; ptr" %}
+ ins_encode %{
+ Label no_update;
+ __ li($res$$Register, 0);
+ __ cmpxchgw(CCR0, R0, $oldval$$Register, $newval$$Register, $mem$$Register,
+ MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
+ noreg, &no_update, true);
+ // Pass oldval to SATB which is the only value which can get overwritten.
+ // Can be done after cmpxchg because there's no safepoint here.
+ pre_write_barrier(masm, this,
+ noreg,
+ $oldval$$Register /* pre_val */,
+ $tmp$$Register,
+ $res$$Register /* temp */,
+ RegSet::of($mem$$Register, $newval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+ post_write_barrier(masm, this,
+ $mem$$Register,
+ $newval$$Register,
+ $tmp$$Register,
+ $res$$Register /* temp */,
+ true /* decode_new_val */);
+ __ li($res$$Register, 1);
+ __ bind(no_update);
+ if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
+ __ isync();
+ } else {
+ // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that.
+ __ sync();
+ }
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct weakG1CompareAndSwapP(iRegIdst res, indirect mem, iRegPsrc oldval, iRegPsrc newval, iRegPdst tmp, flagsRegCR0 cr0)
+%{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0 &&
+ (((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst));
+ match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
+ effect(TEMP_DEF res, TEMP tmp, KILL cr0);
+ format %{ "weak CMPXCHGD $res, $mem, $oldval, $newval; as bool; ptr" %}
+ ins_encode %{
+ Label no_update;
+ __ li($res$$Register, 0);
+ __ cmpxchgd(CCR0, R0, $oldval$$Register, $newval$$Register, $mem$$Register,
+ MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
+ noreg, &no_update, true, true);
+ // Pass oldval to SATB which is the only value which can get overwritten.
+ // Can be done after cmpxchg because there's no safepoint here.
+ pre_write_barrier(masm, this,
+ noreg,
+ $oldval$$Register /* pre_val */,
+ $tmp$$Register,
+ $res$$Register /* temp */,
+ RegSet::of($mem$$Register, $newval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+ post_write_barrier(masm, this,
+ $mem$$Register,
+ $newval$$Register,
+ $tmp$$Register,
+ $res$$Register /* temp */);
+ __ li($res$$Register, 1);
+ __ bind(no_update);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct weakG1CompareAndSwapP_acq(iRegIdst res, indirect mem, iRegPsrc oldval, iRegPsrc newval, iRegPdst tmp, flagsRegCR0 cr0)
+%{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0 &&
+ (((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst));
+ match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
+ effect(TEMP_DEF res, TEMP tmp, KILL cr0);
+ format %{ "weak CMPXCHGD acq $res, $mem, $oldval, $newval; as bool; ptr" %}
+ ins_encode %{
+ Label no_update;
+ __ li($res$$Register, 0);
+ __ cmpxchgd(CCR0, R0, $oldval$$Register, $newval$$Register, $mem$$Register,
+ MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
+ noreg, &no_update, true, true);
+ // Pass oldval to SATB which is the only value which can get overwritten.
+ // Can be done after cmpxchg because there's no safepoint here.
+ pre_write_barrier(masm, this,
+ noreg,
+ $oldval$$Register /* pre_val */,
+ $tmp$$Register,
+ $res$$Register /* temp */,
+ RegSet::of($mem$$Register, $newval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+ post_write_barrier(masm, this,
+ $mem$$Register,
+ $newval$$Register,
+ $tmp$$Register,
+ $res$$Register /* temp */);
+ __ li($res$$Register, 1);
+ if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
+ __ isync();
+ } else {
+ // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that.
+ __ sync();
+ }
+ __ bind(no_update); // weak version requires no memory barrier on failure
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct weakG1CompareAndSwapN(iRegIdst res, indirect mem, iRegNsrc oldval, iRegNsrc newval, iRegPdst tmp, flagsRegCR0 cr0)
+%{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0 &&
+ (((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst));
+ match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
+ effect(TEMP_DEF res, TEMP tmp, KILL cr0);
+ format %{ "weak CMPXCHGW $res, $mem, $oldval, $newval; as bool; ptr" %}
+ ins_encode %{
+ Label no_update;
+ __ li($res$$Register, 0);
+ __ cmpxchgw(CCR0, R0, $oldval$$Register, $newval$$Register, $mem$$Register,
+ MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
+ noreg, &no_update, true, true);
+ // Pass oldval to SATB which is the only value which can get overwritten.
+ // Can be done after cmpxchg because there's no safepoint here.
+ pre_write_barrier(masm, this,
+ noreg,
+ $oldval$$Register /* pre_val */,
+ $tmp$$Register,
+ $res$$Register /* temp */,
+ RegSet::of($mem$$Register, $newval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+ post_write_barrier(masm, this,
+ $mem$$Register,
+ $newval$$Register,
+ $tmp$$Register,
+ $res$$Register /* temp */,
+ true /* decode_new_val */);
+ __ li($res$$Register, 1);
+ __ bind(no_update);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct weakG1CompareAndSwapN_acq(iRegIdst res, indirect mem, iRegNsrc oldval, iRegNsrc newval, iRegPdst tmp, flagsRegCR0 cr0)
+%{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0 &&
+ (((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst));
+ match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
+ effect(TEMP_DEF res, TEMP tmp, KILL cr0);
+ format %{ "weak CMPXCHGW acq $res, $mem, $oldval, $newval; as bool; ptr" %}
+ ins_encode %{
+ Label no_update;
+ __ li($res$$Register, 0);
+ __ cmpxchgw(CCR0, R0, $oldval$$Register, $newval$$Register, $mem$$Register,
+ MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
+ noreg, &no_update, true, true);
+ // Pass oldval to SATB which is the only value which can get overwritten.
+ // Can be done after cmpxchg because there's no safepoint here.
+ pre_write_barrier(masm, this,
+ noreg,
+ $oldval$$Register /* pre_val */,
+ $tmp$$Register,
+ $res$$Register /* temp */,
+ RegSet::of($mem$$Register, $newval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+ post_write_barrier(masm, this,
+ $mem$$Register,
+ $newval$$Register,
+ $tmp$$Register,
+ $res$$Register /* temp */,
+ true /* decode_new_val */);
+ __ li($res$$Register, 1);
+ if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
+ __ isync();
+ } else {
+ // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that.
+ __ sync();
+ }
+ __ bind(no_update); // weak version requires no memory barrier on failure
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct g1GetAndSetP(iRegPdst res, indirect mem, iRegPsrc newval, iRegPdst tmp1, iRegPdst tmp2, flagsRegCR0 cr0)
+%{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0);
+ match(Set res (GetAndSetP mem newval));
+ effect(TEMP_DEF res, TEMP tmp1, TEMP tmp2, KILL cr0);
+ format %{ "GetAndSetP $newval, $mem" %}
+ ins_encode %{
+ assert_different_registers($mem$$Register, $newval$$Register);
+ __ getandsetd($res$$Register, $newval$$Register, $mem$$Register,
+ MacroAssembler::cmpxchgx_hint_atomic_update());
+ // Can be done after cmpxchg because there's no safepoint here.
+ pre_write_barrier(masm, this,
+ noreg /* obj */,
+ $res$$Register /* res */,
+ $tmp1$$Register,
+ $tmp2$$Register,
+ RegSet::of($mem$$Register, $newval$$Register) /* preserve */);
+ post_write_barrier(masm, this,
+ $mem$$Register,
+ $newval$$Register,
+ $tmp1$$Register,
+ $tmp2$$Register);
+ if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
+ __ isync();
+ } else {
+ __ sync();
+ }
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct g1GetAndSetN(iRegNdst res, indirect mem, iRegNsrc newval, iRegPdst tmp1, iRegPdst tmp2, flagsRegCR0 cr0)
+%{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0);
+ match(Set res (GetAndSetN mem newval));
+ effect(TEMP_DEF res, TEMP tmp1, TEMP tmp2, KILL cr0);
+ format %{ "GetAndSetN $newval, $mem" %}
+ ins_encode %{
+ assert_different_registers($mem$$Register, $newval$$Register);
+ __ getandsetw($res$$Register, $newval$$Register, $mem$$Register,
+ MacroAssembler::cmpxchgx_hint_atomic_update());
+ // Can be done after cmpxchg because there's no safepoint here.
+ pre_write_barrier(masm, this,
+ noreg /* obj */,
+ $res$$Register /* res */,
+ $tmp1$$Register,
+ $tmp2$$Register,
+ RegSet::of($mem$$Register, $newval$$Register) /* preserve */);
+ post_write_barrier(masm, this,
+ $mem$$Register,
+ $newval$$Register,
+ $tmp1$$Register,
+ $tmp2$$Register,
+ true /* decode_new_val */);
+ if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
+ __ isync();
+ } else {
+ __ sync();
+ }
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct g1LoadP(iRegPdst dst, memoryAlg4 mem, iRegPdst tmp, flagsRegCR0 cr0)
+%{
+ predicate(UseG1GC && n->as_Load()->is_unordered() && n->as_Load()->barrier_data() != 0);
+ // This instruction does not need an acquiring counterpart because it is only
+ // used for reference loading (Reference::get()).
+ match(Set dst (LoadP mem));
+ effect(TEMP_DEF dst, TEMP tmp, KILL cr0);
+ ins_cost(2 * MEMORY_REF_COST);
+ format %{ "ld $dst, $mem\t# ptr" %}
+ ins_encode %{
+ __ ld($dst$$Register, $mem$$disp, $mem$$base$$Register);
+ pre_write_barrier(masm, this,
+ noreg /* obj */,
+ $dst$$Register /* pre_val */,
+ $tmp$$Register);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct g1LoadN(iRegNdst dst, memoryAlg4 mem, iRegPdst tmp1, iRegPdst tmp2, flagsRegCR0 cr0)
+%{
+ predicate(UseG1GC && n->as_Load()->is_unordered() && n->as_Load()->barrier_data() != 0);
+ // This instruction does not need an acquiring counterpart because it is only
+ // used for reference loading (Reference::get()).
+ match(Set dst (LoadN mem));
+ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2, KILL cr0);
+ ins_cost(2 * MEMORY_REF_COST);
+ format %{ "lwz $dst, $mem\t# ptr" %}
+ ins_encode %{
+ __ lwz($dst$$Register, $mem$$disp, $mem$$base$$Register);
+ pre_write_barrier(masm, this,
+ noreg /* obj */,
+ $dst$$Register,
+ $tmp1$$Register,
+ $tmp2$$Register);
+ %}
+ ins_pipe(pipe_class_default);
+%}
diff --git a/src/hotspot/cpu/ppc/gc/shenandoah/shenandoahBarrierSetAssembler_ppc.cpp b/src/hotspot/cpu/ppc/gc/shenandoah/shenandoahBarrierSetAssembler_ppc.cpp
index 3cb5c5a628f39..5315080721249 100644
--- a/src/hotspot/cpu/ppc/gc/shenandoah/shenandoahBarrierSetAssembler_ppc.cpp
+++ b/src/hotspot/cpu/ppc/gc/shenandoah/shenandoahBarrierSetAssembler_ppc.cpp
@@ -144,9 +144,9 @@ void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler *masm, Dec
// Invoke runtime.
address jrt_address = nullptr;
if (UseCompressedOops) {
- jrt_address = CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop_entry);
+ jrt_address = CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop);
} else {
- jrt_address = CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop_entry);
+ jrt_address = CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop);
}
assert(jrt_address != nullptr, "jrt routine cannot be found");
@@ -302,7 +302,7 @@ void ShenandoahBarrierSetAssembler::satb_write_barrier_impl(MacroAssembler *masm
}
// Invoke runtime.
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, R16_thread);
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre), pre_val, R16_thread);
// Restore to-be-preserved registers.
if (!preserve_gp_registers && preloaded_mode && pre_val->is_volatile()) {
@@ -906,7 +906,7 @@ void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAss
__ push_frame_reg_args(nbytes_save, R11_tmp1);
// Invoke runtime.
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), R0_pre_val, R16_thread);
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre), R0_pre_val, R16_thread);
// Restore to-be-preserved registers.
__ pop_frame();
diff --git a/src/hotspot/cpu/ppc/gc/z/zAddress_ppc.cpp b/src/hotspot/cpu/ppc/gc/z/zAddress_ppc.cpp
index 136fd7a8ad1cd..ddeb9adf0a9ae 100644
--- a/src/hotspot/cpu/ppc/gc/z/zAddress_ppc.cpp
+++ b/src/hotspot/cpu/ppc/gc/z/zAddress_ppc.cpp
@@ -90,7 +90,7 @@ static size_t probe_valid_max_address_bit() {
}
size_t ZPlatformAddressOffsetBits() {
- const static size_t valid_max_address_offset_bits = probe_valid_max_address_bit() + 1;
+ static const size_t valid_max_address_offset_bits = probe_valid_max_address_bit() + 1;
const size_t max_address_offset_bits = valid_max_address_offset_bits - 3;
const size_t min_address_offset_bits = max_address_offset_bits - 2;
const size_t address_offset = round_up_power_of_2(MaxHeapSize * ZVirtualToPhysicalRatio);
diff --git a/src/hotspot/cpu/ppc/gc/z/zBarrierSetAssembler_ppc.cpp b/src/hotspot/cpu/ppc/gc/z/zBarrierSetAssembler_ppc.cpp
index 89ab1b1edeeb4..8a65022126e66 100644
--- a/src/hotspot/cpu/ppc/gc/z/zBarrierSetAssembler_ppc.cpp
+++ b/src/hotspot/cpu/ppc/gc/z/zBarrierSetAssembler_ppc.cpp
@@ -943,6 +943,8 @@ void ZBarrierSetAssembler::generate_c2_store_barrier_stub(MacroAssembler* masm,
__ call_VM_leaf(ZBarrierSetRuntime::store_barrier_on_native_oop_field_without_healing_addr(), R3_ARG1);
} else if (stub->is_atomic()) {
__ call_VM_leaf(ZBarrierSetRuntime::store_barrier_on_oop_field_with_healing_addr(), R3_ARG1);
+ } else if (stub->is_nokeepalive()) {
+ __ call_VM_leaf(ZBarrierSetRuntime::no_keepalive_store_barrier_on_oop_field_without_healing_addr(), R3_ARG1);
} else {
__ call_VM_leaf(ZBarrierSetRuntime::store_barrier_on_oop_field_without_healing_addr(), R3_ARG1);
}
diff --git a/src/hotspot/cpu/ppc/gc/z/z_ppc.ad b/src/hotspot/cpu/ppc/gc/z/z_ppc.ad
index 017574d40ff8b..bb696a4738f40 100644
--- a/src/hotspot/cpu/ppc/gc/z/z_ppc.ad
+++ b/src/hotspot/cpu/ppc/gc/z/z_ppc.ad
@@ -83,7 +83,8 @@ static void z_store_barrier(MacroAssembler* masm, const MachNode* node, Register
z_color(masm, rnew_zpointer, rnew_zaddress);
} else {
bool is_native = (node->barrier_data() & ZBarrierNative) != 0;
- ZStoreBarrierStubC2* const stub = ZStoreBarrierStubC2::create(node, Address(ref_base, disp), rnew_zaddress, rnew_zpointer, is_native, is_atomic);
+ bool is_nokeepalive = (node->barrier_data() & ZBarrierNoKeepalive) != 0;
+ ZStoreBarrierStubC2* const stub = ZStoreBarrierStubC2::create(node, Address(ref_base, disp), rnew_zaddress, rnew_zpointer, is_native, is_atomic, is_nokeepalive);
ZBarrierSetAssembler* bs_asm = ZBarrierSet::assembler();
bs_asm->store_barrier_fast(masm, ref_base, disp, rnew_zaddress, rnew_zpointer, true /* in_nmethod */, is_atomic, *stub->entry(), *stub->continuation());
}
diff --git a/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp b/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp
index a29e0810d52ca..aa77f0169ea1a 100644
--- a/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp
+++ b/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp
@@ -968,7 +968,7 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) {
}
if (LockingMode == LM_LIGHTWEIGHT) {
- lightweight_lock(object, header, tmp, slow_case);
+ lightweight_lock(monitor, object, header, tmp, slow_case);
b(count_locking);
} else if (LockingMode == LM_LEGACY) {
// Load markWord from object into header.
diff --git a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp
index 8449d74d8a861..a194c030a6124 100644
--- a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp
+++ b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp
@@ -2410,7 +2410,7 @@ void MacroAssembler::verify_secondary_supers_table(Register r_sub_klass,
void MacroAssembler::clinit_barrier(Register klass, Register thread, Label* L_fast_path, Label* L_slow_path) {
assert(L_fast_path != nullptr || L_slow_path != nullptr, "at least one is required");
- Label L_fallthrough;
+ Label L_check_thread, L_fallthrough;
if (L_fast_path == nullptr) {
L_fast_path = &L_fallthrough;
} else if (L_slow_path == nullptr) {
@@ -2419,10 +2419,14 @@ void MacroAssembler::clinit_barrier(Register klass, Register thread, Label* L_fa
// Fast path check: class is fully initialized
lbz(R0, in_bytes(InstanceKlass::init_state_offset()), klass);
+ // acquire by cmp-branch-isync if fully_initialized
cmpwi(CCR0, R0, InstanceKlass::fully_initialized);
- beq(CCR0, *L_fast_path);
+ bne(CCR0, L_check_thread);
+ isync();
+ b(*L_fast_path);
// Fast path check: current thread is initializer thread
+ bind(L_check_thread);
ld(R0, in_bytes(InstanceKlass::init_thread_offset()), klass);
cmpd(CCR0, thread, R0);
if (L_slow_path == &L_fallthrough) {
@@ -2715,13 +2719,34 @@ void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Registe
b(success);
bind(notRecursive);
+
+ // Set owner to null.
+ // Release to satisfy the JMM
+ release();
+ li(temp, 0);
+ std(temp, in_bytes(ObjectMonitor::owner_offset()), current_header);
+ // We need a full fence after clearing owner to avoid stranding.
+ // StoreLoad achieves this.
+ membar(StoreLoad);
+
+ // Check if the entry lists are empty.
ld(temp, in_bytes(ObjectMonitor::EntryList_offset()), current_header);
ld(displaced_header, in_bytes(ObjectMonitor::cxq_offset()), current_header);
orr(temp, temp, displaced_header); // Will be 0 if both are 0.
cmpdi(flag, temp, 0);
- bne(flag, failure);
- release();
- std(temp, in_bytes(ObjectMonitor::owner_offset()), current_header);
+ beq(flag, success); // If so we are done.
+
+ // Check if there is a successor.
+ ld(temp, in_bytes(ObjectMonitor::succ_offset()), current_header);
+ cmpdi(flag, temp, 0);
+ bne(flag, success); // If so we are done.
+
+ // Save the monitor pointer in the current thread, so we can try
+ // to reacquire the lock in SharedRuntime::monitor_exit_helper().
+ std(current_header, in_bytes(JavaThread::unlocked_inflated_monitor_offset()), R16_thread);
+
+ crxor(flag, Assembler::equal, flag, Assembler::equal); // Set flag = NE => slow path
+ b(failure);
// flag == EQ indicates success, decrement held monitor count
// flag == NE indicates failure
@@ -2730,9 +2755,9 @@ void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Registe
bind(failure);
}
-void MacroAssembler::compiler_fast_lock_lightweight_object(ConditionRegister flag, Register obj, Register tmp1,
- Register tmp2, Register tmp3) {
- assert_different_registers(obj, tmp1, tmp2, tmp3);
+void MacroAssembler::compiler_fast_lock_lightweight_object(ConditionRegister flag, Register obj, Register box,
+ Register tmp1, Register tmp2, Register tmp3) {
+ assert_different_registers(obj, box, tmp1, tmp2, tmp3);
assert(flag == CCR0, "bad condition register");
// Handle inflated monitor.
@@ -2742,11 +2767,17 @@ void MacroAssembler::compiler_fast_lock_lightweight_object(ConditionRegister fla
// Finish fast lock unsuccessfully. MUST branch to with flag == EQ
Label slow_path;
+ if (UseObjectMonitorTable) {
+ // Clear cache in case fast locking succeeds.
+ li(tmp1, 0);
+ std(tmp1, in_bytes(BasicObjectLock::lock_offset()) + BasicLock::object_monitor_cache_offset_in_bytes(), box);
+ }
+
if (DiagnoseSyncOnValueBasedClasses != 0) {
load_klass(tmp1, obj);
lbz(tmp1, in_bytes(Klass::misc_flags_offset()), tmp1);
- testbitdi(flag, R0, tmp1, exact_log2(KlassFlags::_misc_is_value_based_class));
- bne(flag, slow_path);
+ testbitdi(CCR0, R0, tmp1, exact_log2(KlassFlags::_misc_is_value_based_class));
+ bne(CCR0, slow_path);
}
const Register mark = tmp1;
@@ -2761,8 +2792,8 @@ void MacroAssembler::compiler_fast_lock_lightweight_object(ConditionRegister fla
// Check if lock-stack is full.
lwz(top, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread);
- cmplwi(flag, top, LockStack::end_offset() - 1);
- bgt(flag, slow_path);
+ cmplwi(CCR0, top, LockStack::end_offset() - 1);
+ bgt(CCR0, slow_path);
// The underflow check is elided. The recursive check will always fail
// when the lock stack is empty because of the _bad_oop_sentinel field.
@@ -2770,19 +2801,19 @@ void MacroAssembler::compiler_fast_lock_lightweight_object(ConditionRegister fla
// Check if recursive.
subi(t, top, oopSize);
ldx(t, R16_thread, t);
- cmpd(flag, obj, t);
- beq(flag, push);
+ cmpd(CCR0, obj, t);
+ beq(CCR0, push);
// Check for monitor (0b10) or locked (0b00).
ld(mark, oopDesc::mark_offset_in_bytes(), obj);
andi_(t, mark, markWord::lock_mask_in_place);
- cmpldi(flag, t, markWord::unlocked_value);
- bgt(flag, inflated);
- bne(flag, slow_path);
+ cmpldi(CCR0, t, markWord::unlocked_value);
+ bgt(CCR0, inflated);
+ bne(CCR0, slow_path);
// Not inflated.
- // Try to lock. Transition lock bits 0b00 => 0b01
+ // Try to lock. Transition lock bits 0b01 => 0b00
assert(oopDesc::mark_offset_in_bytes() == 0, "required to avoid a lea");
atomically_flip_locked_state(/* is_unlock */ false, obj, mark, slow_path, MacroAssembler::MemBarAcq);
@@ -2797,38 +2828,84 @@ void MacroAssembler::compiler_fast_lock_lightweight_object(ConditionRegister fla
{ // Handle inflated monitor.
bind(inflated);
+ // mark contains the tagged ObjectMonitor*.
+ const uintptr_t monitor_tag = markWord::monitor_value;
+ const Register monitor = mark;
+ const Register owner_addr = tmp2;
+ Label monitor_locked;
+
if (!UseObjectMonitorTable) {
- // mark contains the tagged ObjectMonitor*.
- const Register tagged_monitor = mark;
- const uintptr_t monitor_tag = markWord::monitor_value;
- const Register owner_addr = tmp2;
+ // Compute owner address.
+ addi(owner_addr, mark, in_bytes(ObjectMonitor::owner_offset()) - monitor_tag);
+ } else {
+ Label monitor_found;
+ Register cache_addr = tmp2;
+
+ // Load cache address
+ addi(cache_addr, R16_thread, in_bytes(JavaThread::om_cache_oops_offset()));
+
+ const int num_unrolled = 2;
+ for (int i = 0; i < num_unrolled; i++) {
+ ld(tmp3, 0, cache_addr);
+ cmpd(CCR0, tmp3, obj);
+ beq(CCR0, monitor_found);
+ addi(cache_addr, cache_addr, in_bytes(OMCache::oop_to_oop_difference()));
+ }
+
+ Label loop;
+
+ // Search for obj in cache.
+ bind(loop);
+
+ // Check for match.
+ ld(tmp3, 0, cache_addr);
+ cmpd(CCR0, tmp3, obj);
+ beq(CCR0, monitor_found);
+
+ // Search until null encountered, guaranteed _null_sentinel at end.
+ addi(cache_addr, cache_addr, in_bytes(OMCache::oop_to_oop_difference()));
+ cmpdi(CCR1, tmp3, 0);
+ bne(CCR1, loop);
+ // Cache Miss, CCR0.NE set from cmp above
+ b(slow_path);
+
+ bind(monitor_found);
+ ld(monitor, in_bytes(OMCache::oop_to_monitor_difference()), cache_addr);
// Compute owner address.
- addi(owner_addr, tagged_monitor, in_bytes(ObjectMonitor::owner_offset()) - monitor_tag);
-
- // CAS owner (null => current thread).
- cmpxchgd(/*flag=*/flag,
- /*current_value=*/t,
- /*compare_value=*/(intptr_t)0,
- /*exchange_value=*/R16_thread,
- /*where=*/owner_addr,
- MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
- MacroAssembler::cmpxchgx_hint_acquire_lock());
- beq(flag, locked);
-
- // Check if recursive.
- cmpd(flag, t, R16_thread);
- bne(flag, slow_path);
-
- // Recursive.
+ addi(owner_addr, monitor, in_bytes(ObjectMonitor::owner_offset()));
+ }
+
+ // CAS owner (null => current thread).
+ cmpxchgd(/*flag=*/CCR0,
+ /*current_value=*/t,
+ /*compare_value=*/(intptr_t)0,
+ /*exchange_value=*/R16_thread,
+ /*where=*/owner_addr,
+ MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
+ MacroAssembler::cmpxchgx_hint_acquire_lock());
+ beq(CCR0, monitor_locked);
+
+ // Check if recursive.
+ cmpd(CCR0, t, R16_thread);
+ bne(CCR0, slow_path);
+
+ // Recursive.
+ if (!UseObjectMonitorTable) {
+ assert_different_registers(tmp1, owner_addr);
ld(tmp1, in_bytes(ObjectMonitor::recursions_offset() - ObjectMonitor::owner_offset()), owner_addr);
addi(tmp1, tmp1, 1);
std(tmp1, in_bytes(ObjectMonitor::recursions_offset() - ObjectMonitor::owner_offset()), owner_addr);
} else {
- // OMCache lookup not supported yet. Take the slowpath.
- // Set flag to NE
- crxor(flag, Assembler::equal, flag, Assembler::equal);
- b(slow_path);
+ assert_different_registers(tmp2, monitor);
+ ld(tmp2, in_bytes(ObjectMonitor::recursions_offset()), monitor);
+ addi(tmp2, tmp2, 1);
+ std(tmp2, in_bytes(ObjectMonitor::recursions_offset()), monitor);
+ }
+
+ bind(monitor_locked);
+ if (UseObjectMonitorTable) {
+ std(monitor, BasicLock::object_monitor_cache_offset_in_bytes(), box);
}
}
@@ -2838,21 +2915,21 @@ void MacroAssembler::compiler_fast_lock_lightweight_object(ConditionRegister fla
#ifdef ASSERT
// Check that locked label is reached with flag == EQ.
Label flag_correct;
- beq(flag, flag_correct);
+ beq(CCR0, flag_correct);
stop("Fast Lock Flag != EQ");
#endif
bind(slow_path);
#ifdef ASSERT
// Check that slow_path label is reached with flag == NE.
- bne(flag, flag_correct);
+ bne(CCR0, flag_correct);
stop("Fast Lock Flag != NE");
bind(flag_correct);
#endif
// C2 uses the value of flag (NE vs EQ) to determine the continuation.
}
-void MacroAssembler::compiler_fast_unlock_lightweight_object(ConditionRegister flag, Register obj, Register tmp1,
- Register tmp2, Register tmp3) {
+void MacroAssembler::compiler_fast_unlock_lightweight_object(ConditionRegister flag, Register obj, Register box,
+ Register tmp1, Register tmp2, Register tmp3) {
assert_different_registers(obj, tmp1, tmp2, tmp3);
assert(flag == CCR0, "bad condition register");
@@ -2874,9 +2951,9 @@ void MacroAssembler::compiler_fast_unlock_lightweight_object(ConditionRegister f
lwz(top, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread);
subi(top, top, oopSize);
ldx(t, R16_thread, top);
- cmpd(flag, obj, t);
+ cmpd(CCR0, obj, t);
// Top of lock stack was not obj. Must be monitor.
- bne(flag, inflated_load_monitor);
+ bne(CCR0, inflated_load_monitor);
// Pop lock-stack.
DEBUG_ONLY(li(t, 0);)
@@ -2889,8 +2966,8 @@ void MacroAssembler::compiler_fast_unlock_lightweight_object(ConditionRegister f
// Check if recursive.
subi(t, top, oopSize);
ldx(t, R16_thread, t);
- cmpd(flag, obj, t);
- beq(flag, unlocked);
+ cmpd(CCR0, obj, t);
+ beq(CCR0, unlocked);
// Not recursive.
@@ -2941,62 +3018,74 @@ void MacroAssembler::compiler_fast_unlock_lightweight_object(ConditionRegister f
cmplwi(CCR0, top, in_bytes(JavaThread::lock_stack_base_offset()));
blt(CCR0, check_done);
ldx(t, R16_thread, top);
- cmpd(flag, obj, t);
- bne(flag, inflated);
+ cmpd(CCR0, obj, t);
+ bne(CCR0, inflated);
stop("Fast Unlock lock on stack");
bind(check_done);
#endif
- if (!UseObjectMonitorTable) {
- // mark contains the tagged ObjectMonitor*.
- const Register monitor = mark;
- const uintptr_t monitor_tag = markWord::monitor_value;
+ // mark contains the tagged ObjectMonitor*.
+ const Register monitor = mark;
+ const uintptr_t monitor_tag = markWord::monitor_value;
+ if (!UseObjectMonitorTable) {
// Untag the monitor.
subi(monitor, mark, monitor_tag);
+ } else {
+ ld(monitor, BasicLock::object_monitor_cache_offset_in_bytes(), box);
+ // null check with Flags == NE, no valid pointer below alignof(ObjectMonitor*)
+ cmpldi(CCR0, monitor, checked_cast(alignof(ObjectMonitor*)));
+ blt(CCR0, slow_path);
+ }
- const Register recursions = tmp2;
- Label not_recursive;
-
- // Check if recursive.
- ld(recursions, in_bytes(ObjectMonitor::recursions_offset()), monitor);
- addic_(recursions, recursions, -1);
- blt(CCR0, not_recursive);
+ const Register recursions = tmp2;
+ Label not_recursive;
- // Recursive unlock.
- std(recursions, in_bytes(ObjectMonitor::recursions_offset()), monitor);
- crorc(CCR0, Assembler::equal, CCR0, Assembler::equal);
- b(unlocked);
+ // Check if recursive.
+ ld(recursions, in_bytes(ObjectMonitor::recursions_offset()), monitor);
+ addic_(recursions, recursions, -1);
+ blt(CCR0, not_recursive);
- bind(not_recursive);
+ // Recursive unlock.
+ std(recursions, in_bytes(ObjectMonitor::recursions_offset()), monitor);
+ crorc(CCR0, Assembler::equal, CCR0, Assembler::equal);
+ b(unlocked);
- Label release_;
- const Register t2 = tmp2;
+ bind(not_recursive);
- // Check if the entry lists are empty.
- ld(t, in_bytes(ObjectMonitor::EntryList_offset()), monitor);
- ld(t2, in_bytes(ObjectMonitor::cxq_offset()), monitor);
- orr(t, t, t2);
- cmpdi(flag, t, 0);
- beq(flag, release_);
+ Label set_eq_unlocked;
+ const Register t2 = tmp2;
- // The owner may be anonymous and we removed the last obj entry in
- // the lock-stack. This loses the information about the owner.
- // Write the thread to the owner field so the runtime knows the owner.
- std(R16_thread, in_bytes(ObjectMonitor::owner_offset()), monitor);
- b(slow_path);
+ // Set owner to null.
+ // Release to satisfy the JMM
+ release();
+ li(t, 0);
+ std(t, in_bytes(ObjectMonitor::owner_offset()), monitor);
+ // We need a full fence after clearing owner to avoid stranding.
+ // StoreLoad achieves this.
+ membar(StoreLoad);
+
+ // Check if the entry lists are empty.
+ ld(t, in_bytes(ObjectMonitor::EntryList_offset()), monitor);
+ ld(t2, in_bytes(ObjectMonitor::cxq_offset()), monitor);
+ orr(t, t, t2);
+ cmpdi(CCR0, t, 0);
+ beq(CCR0, unlocked); // If so we are done.
+
+ // Check if there is a successor.
+ ld(t, in_bytes(ObjectMonitor::succ_offset()), monitor);
+ cmpdi(CCR0, t, 0);
+ bne(CCR0, set_eq_unlocked); // If so we are done.
+
+ // Save the monitor pointer in the current thread, so we can try
+ // to reacquire the lock in SharedRuntime::monitor_exit_helper().
+ std(monitor, in_bytes(JavaThread::unlocked_inflated_monitor_offset()), R16_thread);
+
+ crxor(CCR0, Assembler::equal, CCR0, Assembler::equal); // Set flag = NE => slow path
+ b(slow_path);
- bind(release_);
- // Set owner to null.
- release();
- // t contains 0
- std(t, in_bytes(ObjectMonitor::owner_offset()), monitor);
- } else {
- // OMCache lookup not supported yet. Take the slowpath.
- // Set flag to NE
- crxor(flag, Assembler::equal, flag, Assembler::equal);
- b(slow_path);
- }
+ bind(set_eq_unlocked);
+ crorc(CCR0, Assembler::equal, CCR0, Assembler::equal); // Set flag = EQ => fast path
}
bind(unlocked);
@@ -3005,13 +3094,13 @@ void MacroAssembler::compiler_fast_unlock_lightweight_object(ConditionRegister f
#ifdef ASSERT
// Check that unlocked label is reached with flag == EQ.
Label flag_correct;
- beq(flag, flag_correct);
+ beq(CCR0, flag_correct);
stop("Fast Lock Flag != EQ");
#endif
bind(slow_path);
#ifdef ASSERT
// Check that slow_path label is reached with flag == NE.
- bne(flag, flag_correct);
+ bne(CCR0, flag_correct);
stop("Fast Lock Flag != NE");
bind(flag_correct);
#endif
@@ -4640,15 +4729,21 @@ void MacroAssembler::atomically_flip_locked_state(bool is_unlock, Register obj,
//
// - obj: the object to be locked
// - t1, t2: temporary register
-void MacroAssembler::lightweight_lock(Register obj, Register t1, Register t2, Label& slow) {
+void MacroAssembler::lightweight_lock(Register box, Register obj, Register t1, Register t2, Label& slow) {
assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking");
- assert_different_registers(obj, t1, t2);
+ assert_different_registers(box, obj, t1, t2);
Label push;
const Register top = t1;
const Register mark = t2;
const Register t = R0;
+ if (UseObjectMonitorTable) {
+ // Clear cache in case fast locking succeeds.
+ li(t, 0);
+ std(t, in_bytes(BasicObjectLock::lock_offset()) + BasicLock::object_monitor_cache_offset_in_bytes(), box);
+ }
+
// Check if the lock-stack is full.
lwz(top, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread);
cmplwi(CCR0, top, LockStack::end_offset());
@@ -4669,7 +4764,7 @@ void MacroAssembler::lightweight_lock(Register obj, Register t1, Register t2, La
andi_(t, t, markWord::lock_mask_in_place);
bne(CCR0, slow);
- // Try to lock. Transition lock bits 0b00 => 0b01
+ // Try to lock. Transition lock bits 0b01 => 0b00
atomically_flip_locked_state(/* is_unlock */ false, obj, mark, slow, MacroAssembler::MemBarAcq);
bind(push);
diff --git a/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp b/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp
index 03ad37a4fb04a..224e7bff99541 100644
--- a/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp
+++ b/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp
@@ -654,7 +654,7 @@ class MacroAssembler: public Assembler {
void inc_held_monitor_count(Register tmp);
void dec_held_monitor_count(Register tmp);
void atomically_flip_locked_state(bool is_unlock, Register obj, Register tmp, Label& failed, int semantics);
- void lightweight_lock(Register obj, Register t1, Register t2, Label& slow);
+ void lightweight_lock(Register box, Register obj, Register t1, Register t2, Label& slow);
void lightweight_unlock(Register obj, Register t1, Label& slow);
// allocation (for C1)
@@ -675,11 +675,11 @@ class MacroAssembler: public Assembler {
void compiler_fast_unlock_object(ConditionRegister flag, Register oop, Register box,
Register tmp1, Register tmp2, Register tmp3);
- void compiler_fast_lock_lightweight_object(ConditionRegister flag, Register oop, Register tmp1,
- Register tmp2, Register tmp3);
+ void compiler_fast_lock_lightweight_object(ConditionRegister flag, Register oop, Register box,
+ Register tmp1, Register tmp2, Register tmp3);
- void compiler_fast_unlock_lightweight_object(ConditionRegister flag, Register oop, Register tmp1,
- Register tmp2, Register tmp3);
+ void compiler_fast_unlock_lightweight_object(ConditionRegister flag, Register oop, Register box,
+ Register tmp1, Register tmp2, Register tmp3);
// Check if safepoint requested and if so branch
void safepoint_poll(Label& slow_path, Register temp, bool at_return, bool in_nmethod);
diff --git a/src/hotspot/cpu/ppc/ppc.ad b/src/hotspot/cpu/ppc/ppc.ad
index e7e066ebcc6d3..f74dde0f97e6e 100644
--- a/src/hotspot/cpu/ppc/ppc.ad
+++ b/src/hotspot/cpu/ppc/ppc.ad
@@ -1000,6 +1000,10 @@ int MachNode::compute_padding(int current_offset) const {
// Should the matcher clone input 'm' of node 'n'?
bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
+ if (is_encode_and_store_pattern(n, m)) {
+ mstack.push(m, Visit);
+ return true;
+ }
return false;
}
@@ -2150,10 +2154,6 @@ const RegMask* Matcher::predicate_reg_mask(void) {
return nullptr;
}
-const TypeVectMask* Matcher::predicate_reg_type(const Type* elemTy, int length) {
- return nullptr;
-}
-
// Vector calling convention not yet implemented.
bool Matcher::supports_vector_calling_convention(void) {
return false;
@@ -5407,7 +5407,7 @@ instruct loadRange(iRegIdst dst, memory mem) %{
// Load Compressed Pointer
instruct loadN(iRegNdst dst, memory mem) %{
match(Set dst (LoadN mem));
- predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
+ predicate((n->as_Load()->is_unordered() || followed_by_acquire(n)) && n->as_Load()->barrier_data() == 0);
ins_cost(MEMORY_REF_COST);
format %{ "LWZ $dst, $mem \t// load compressed ptr" %}
@@ -5419,6 +5419,7 @@ instruct loadN(iRegNdst dst, memory mem) %{
// Load Compressed Pointer acquire.
instruct loadN_ac(iRegNdst dst, memory mem) %{
match(Set dst (LoadN mem));
+ predicate(n->as_Load()->barrier_data() == 0);
ins_cost(3*MEMORY_REF_COST);
format %{ "LWZ $dst, $mem \t// load acquire compressed ptr\n\t"
@@ -5432,7 +5433,7 @@ instruct loadN_ac(iRegNdst dst, memory mem) %{
// Load Compressed Pointer and decode it if narrow_oop_shift == 0.
instruct loadN2P_unscaled(iRegPdst dst, memory mem) %{
match(Set dst (DecodeN (LoadN mem)));
- predicate(_kids[0]->_leaf->as_Load()->is_unordered() && CompressedOops::shift() == 0);
+ predicate(_kids[0]->_leaf->as_Load()->is_unordered() && CompressedOops::shift() == 0 && _kids[0]->_leaf->as_Load()->barrier_data() == 0);
ins_cost(MEMORY_REF_COST);
format %{ "LWZ $dst, $mem \t// DecodeN (unscaled)" %}
@@ -6423,6 +6424,7 @@ instruct reinterpretX(vecX dst) %{
// Store Compressed Oop
instruct storeN(memory dst, iRegN_P2N src) %{
match(Set dst (StoreN dst src));
+ predicate(n->as_Store()->barrier_data() == 0);
ins_cost(MEMORY_REF_COST);
format %{ "STW $src, $dst \t// compressed oop" %}
@@ -6476,23 +6478,6 @@ instruct storeD(memory mem, regD src) %{
ins_pipe(pipe_class_memory);
%}
-//----------Store Instructions With Zeros--------------------------------------
-
-instruct storeCM(memory mem, immI_0 zero) %{
- match(Set mem (StoreCM mem zero));
- ins_cost(MEMORY_REF_COST);
-
- format %{ "STB #0, $mem \t// CMS card-mark byte store" %}
- size(8);
- ins_encode %{
- __ li(R0, 0);
- // No release barrier: Oops are allowed to get visible after marking.
- guarantee($mem$$base$$Register != R1_SP, "use frame_slots_bias");
- __ stb(R0, $mem$$disp, $mem$$base$$Register);
- %}
- ins_pipe(pipe_class_memory);
-%}
-
// Convert oop pointer into compressed form.
// Nodes for postalloc expand.
@@ -6598,7 +6583,7 @@ instruct encodeP_not_null_Ex(iRegNdst dst, iRegPsrc src) %{
instruct encodeP_not_null_base_null(iRegNdst dst, iRegPsrc src) %{
match(Set dst (EncodeP src));
predicate(CompressedOops::shift() != 0 &&
- CompressedOops::base() ==0);
+ CompressedOops::base() == nullptr);
format %{ "SRDI $dst, $src, #3 \t// encodeP, $src != nullptr" %}
size(4);
@@ -6695,7 +6680,7 @@ instruct decodeN_Ex(iRegPdst dst, iRegNsrc src, flagsReg crx) %{
predicate((n->bottom_type()->is_oopptr()->ptr() != TypePtr::NotNull &&
n->bottom_type()->is_oopptr()->ptr() != TypePtr::Constant) &&
CompressedOops::shift() != 0 &&
- CompressedOops::base() != 0);
+ CompressedOops::base() != nullptr);
ins_cost(4 * DEFAULT_COST); // Should be more expensive than decodeN_Disjoint_isel_Ex.
effect(TEMP crx);
@@ -6707,7 +6692,7 @@ instruct decodeN_Ex(iRegPdst dst, iRegNsrc src, flagsReg crx) %{
instruct decodeN_nullBase(iRegPdst dst, iRegNsrc src) %{
match(Set dst (DecodeN src));
predicate(CompressedOops::shift() != 0 &&
- CompressedOops::base() == 0);
+ CompressedOops::base() == nullptr);
format %{ "SLDI $dst, $src, #3 \t// DecodeN (zerobased)" %}
size(4);
@@ -6825,7 +6810,7 @@ instruct decodeN_notNull_addBase_Ex(iRegPdst dst, iRegNsrc src) %{
predicate((n->bottom_type()->is_oopptr()->ptr() == TypePtr::NotNull ||
n->bottom_type()->is_oopptr()->ptr() == TypePtr::Constant) &&
CompressedOops::shift() != 0 &&
- CompressedOops::base() != 0);
+ CompressedOops::base() != nullptr);
ins_cost(2 * DEFAULT_COST);
format %{ "DecodeN $dst, $src \t// $src != nullptr, postalloc expanded" %}
@@ -7477,6 +7462,7 @@ instruct compareAndSwapI_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc
instruct compareAndSwapN_regP_regN_regN(iRegIdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2, flagsRegCR0 cr0) %{
match(Set res (CompareAndSwapN mem_ptr (Binary src1 src2)));
+ predicate(n->as_LoadStore()->barrier_data() == 0);
effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
format %{ "CMPXCHGW $res, $mem_ptr, $src1, $src2; as bool" %}
ins_encode %{
@@ -7676,7 +7662,7 @@ instruct weakCompareAndSwapI_acq_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr,
instruct weakCompareAndSwapN_regP_regN_regN(iRegIdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2, flagsRegCR0 cr0) %{
match(Set res (WeakCompareAndSwapN mem_ptr (Binary src1 src2)));
- predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst);
+ predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && n->as_LoadStore()->barrier_data() == 0);
effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
format %{ "weak CMPXCHGW $res, $mem_ptr, $src1, $src2; as bool" %}
ins_encode %{
@@ -7690,7 +7676,7 @@ instruct weakCompareAndSwapN_regP_regN_regN(iRegIdst res, iRegPdst mem_ptr, iReg
instruct weakCompareAndSwapN_acq_regP_regN_regN(iRegIdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2, flagsRegCR0 cr0) %{
match(Set res (WeakCompareAndSwapN mem_ptr (Binary src1 src2)));
- predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst);
+ predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && n->as_LoadStore()->barrier_data() == 0);
effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
format %{ "weak CMPXCHGW acq $res, $mem_ptr, $src1, $src2; as bool" %}
ins_encode %{
@@ -7939,7 +7925,7 @@ instruct compareAndExchangeI_acq_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr,
instruct compareAndExchangeN_regP_regN_regN(iRegNdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2, flagsRegCR0 cr0) %{
match(Set res (CompareAndExchangeN mem_ptr (Binary src1 src2)));
- predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst);
+ predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && n->as_LoadStore()->barrier_data() == 0);
effect(TEMP_DEF res, TEMP cr0);
format %{ "CMPXCHGW $res, $mem_ptr, $src1, $src2; as narrow oop" %}
ins_encode %{
@@ -7953,7 +7939,7 @@ instruct compareAndExchangeN_regP_regN_regN(iRegNdst res, iRegPdst mem_ptr, iReg
instruct compareAndExchangeN_acq_regP_regN_regN(iRegNdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2, flagsRegCR0 cr0) %{
match(Set res (CompareAndExchangeN mem_ptr (Binary src1 src2)));
- predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst);
+ predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && n->as_LoadStore()->barrier_data() == 0);
effect(TEMP_DEF res, TEMP cr0);
format %{ "CMPXCHGW acq $res, $mem_ptr, $src1, $src2; as narrow oop" %}
ins_encode %{
@@ -8262,6 +8248,7 @@ instruct getAndSetP(iRegPdst res, iRegPdst mem_ptr, iRegPsrc src, flagsRegCR0 cr
instruct getAndSetN(iRegNdst res, iRegPdst mem_ptr, iRegNsrc src, flagsRegCR0 cr0) %{
match(Set res (GetAndSetN mem_ptr src));
+ predicate(n->as_LoadStore()->barrier_data() == 0);
effect(TEMP_DEF res, TEMP cr0);
format %{ "GetAndSetN $res, $mem_ptr, $src" %}
ins_encode %{
@@ -12106,10 +12093,10 @@ instruct cmpFastUnlock(flagsRegCR0 crx, iRegPdst oop, iRegPdst box, iRegPdst tmp
ins_pipe(pipe_class_compare);
%}
-instruct cmpFastLockLightweight(flagsRegCR0 crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iRegPdst tmp2) %{
+instruct cmpFastLockLightweight(flagsRegCR0 crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iRegPdst tmp2, flagsRegCR1 cr1) %{
predicate(LockingMode == LM_LIGHTWEIGHT);
match(Set crx (FastLock oop box));
- effect(TEMP tmp1, TEMP tmp2);
+ effect(TEMP tmp1, TEMP tmp2, KILL cr1);
format %{ "FASTLOCK $oop, $box, $tmp1, $tmp2" %}
ins_encode %{
diff --git a/src/hotspot/cpu/ppc/register_ppc.hpp b/src/hotspot/cpu/ppc/register_ppc.hpp
index 302d49884fae3..b7ba4f053b5d6 100644
--- a/src/hotspot/cpu/ppc/register_ppc.hpp
+++ b/src/hotspot/cpu/ppc/register_ppc.hpp
@@ -27,6 +27,7 @@
#define CPU_PPC_REGISTER_PPC_HPP
#include "asm/register.hpp"
+#include "utilities/count_trailing_zeros.hpp"
// forward declaration
class VMRegImpl;
@@ -555,4 +556,12 @@ constexpr Register R29_TOC = R29;
constexpr Register R11_scratch1 = R11;
constexpr Register R12_scratch2 = R12;
+template <>
+inline Register AbstractRegSet::first() {
+ if (_bitset == 0) { return noreg; }
+ return as_Register(count_trailing_zeros(_bitset));
+}
+
+typedef AbstractRegSet RegSet;
+
#endif // CPU_PPC_REGISTER_PPC_HPP
diff --git a/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp b/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp
index 5cf5f7cf73e03..aa8ae6070b6a6 100644
--- a/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp
+++ b/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp
@@ -2399,7 +2399,7 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
// Try fastpath for locking.
if (LockingMode == LM_LIGHTWEIGHT) {
// fast_lock kills r_temp_1, r_temp_2, r_temp_3.
- __ compiler_fast_lock_lightweight_object(CCR0, r_oop, r_temp_1, r_temp_2, r_temp_3);
+ __ compiler_fast_lock_lightweight_object(CCR0, r_oop, r_box, r_temp_1, r_temp_2, r_temp_3);
} else {
// fast_lock kills r_temp_1, r_temp_2, r_temp_3.
__ compiler_fast_lock_object(CCR0, r_oop, r_box, r_temp_1, r_temp_2, r_temp_3);
@@ -2605,7 +2605,7 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
// Try fastpath for unlocking.
if (LockingMode == LM_LIGHTWEIGHT) {
- __ compiler_fast_unlock_lightweight_object(CCR0, r_oop, r_temp_1, r_temp_2, r_temp_3);
+ __ compiler_fast_unlock_lightweight_object(CCR0, r_oop, r_box, r_temp_1, r_temp_2, r_temp_3);
} else {
__ compiler_fast_unlock_object(CCR0, r_oop, r_box, r_temp_1, r_temp_2, r_temp_3);
}
diff --git a/src/hotspot/cpu/ppc/stubGenerator_ppc.cpp b/src/hotspot/cpu/ppc/stubGenerator_ppc.cpp
index ee3f1911e2082..206c161287fa2 100644
--- a/src/hotspot/cpu/ppc/stubGenerator_ppc.cpp
+++ b/src/hotspot/cpu/ppc/stubGenerator_ppc.cpp
@@ -4587,6 +4587,30 @@ address generate_lookup_secondary_supers_table_stub(u1 super_klass_index) {
return start;
}
+ // load Method* target of MethodHandle
+ // R3_ARG1 = jobject receiver
+ // R19_method = result Method*
+ address generate_upcall_stub_load_target() {
+
+ StubCodeMark mark(this, "StubRoutines", "upcall_stub_load_target");
+ address start = __ pc();
+
+ __ resolve_global_jobject(R3_ARG1, R22_tmp2, R23_tmp3, MacroAssembler::PRESERVATION_FRAME_LR_GP_FP_REGS);
+ // Load target method from receiver
+ __ load_heap_oop(R19_method, java_lang_invoke_MethodHandle::form_offset(), R3_ARG1,
+ R22_tmp2, R23_tmp3, MacroAssembler::PRESERVATION_FRAME_LR_GP_FP_REGS, IS_NOT_NULL);
+ __ load_heap_oop(R19_method, java_lang_invoke_LambdaForm::vmentry_offset(), R19_method,
+ R22_tmp2, R23_tmp3, MacroAssembler::PRESERVATION_FRAME_LR_GP_FP_REGS, IS_NOT_NULL);
+ __ load_heap_oop(R19_method, java_lang_invoke_MemberName::method_offset(), R19_method,
+ R22_tmp2, R23_tmp3, MacroAssembler::PRESERVATION_FRAME_LR_GP_FP_REGS, IS_NOT_NULL);
+ __ ld(R19_method, java_lang_invoke_ResolvedMethodName::vmtarget_offset(), R19_method);
+ __ std(R19_method, in_bytes(JavaThread::callee_target_offset()), R16_thread); // just in case callee is deoptimized
+
+ __ blr();
+
+ return start;
+ }
+
// Initialization
void generate_initial_stubs() {
// Generates all stubs and initializes the entry points
@@ -4651,6 +4675,7 @@ address generate_lookup_secondary_supers_table_stub(u1 super_klass_index) {
}
StubRoutines::_upcall_stub_exception_handler = generate_upcall_stub_exception_handler();
+ StubRoutines::_upcall_stub_load_target = generate_upcall_stub_load_target();
}
void generate_compiler_stubs() {
diff --git a/src/hotspot/cpu/ppc/templateInterpreterGenerator_ppc.cpp b/src/hotspot/cpu/ppc/templateInterpreterGenerator_ppc.cpp
index 03dca2aeb9b7b..cf3dd4cbd34c0 100644
--- a/src/hotspot/cpu/ppc/templateInterpreterGenerator_ppc.cpp
+++ b/src/hotspot/cpu/ppc/templateInterpreterGenerator_ppc.cpp
@@ -1078,6 +1078,7 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M
case Interpreter::java_lang_math_sin : runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dsin); break;
case Interpreter::java_lang_math_cos : runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dcos); break;
case Interpreter::java_lang_math_tan : runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dtan); break;
+ case Interpreter::java_lang_math_tanh : /* run interpreted */ break;
case Interpreter::java_lang_math_abs : /* run interpreted */ break;
case Interpreter::java_lang_math_sqrt : runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dsqrt); break;
case Interpreter::java_lang_math_log : runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dlog); break;
diff --git a/src/hotspot/cpu/ppc/upcallLinker_ppc.cpp b/src/hotspot/cpu/ppc/upcallLinker_ppc.cpp
index b60fd4f16d163..635bab900d157 100644
--- a/src/hotspot/cpu/ppc/upcallLinker_ppc.cpp
+++ b/src/hotspot/cpu/ppc/upcallLinker_ppc.cpp
@@ -24,6 +24,7 @@
#include "precompiled.hpp"
#include "asm/macroAssembler.inline.hpp"
+#include "classfile/javaClasses.hpp"
#include "logging/logStream.hpp"
#include "memory/resourceArea.hpp"
#include "prims/upcallLinker.hpp"
@@ -118,7 +119,7 @@ static void restore_callee_saved_registers(MacroAssembler* _masm, const ABIDescr
static const int upcall_stub_code_base_size = 1024;
static const int upcall_stub_size_per_arg = 16; // arg save & restore + move
-address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry,
+address UpcallLinker::make_upcall_stub(jobject receiver, Symbol* signature,
BasicType* out_sig_bt, int total_out_args,
BasicType ret_type,
jobject jabi, jobject jconv,
@@ -221,7 +222,6 @@ address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry,
__ block_comment("{ on_entry");
__ load_const_optimized(call_target_address, CAST_FROM_FN_PTR(uint64_t, UpcallLinker::on_entry), R0);
__ addi(R3_ARG1, R1_SP, frame_data_offset);
- __ load_const_optimized(R4_ARG2, (intptr_t)receiver, R0);
__ call_c(call_target_address);
__ mr(R16_thread, R3_RET);
__ block_comment("} on_entry");
@@ -236,12 +236,12 @@ address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry,
arg_shuffle.generate(_masm, as_VMStorage(callerSP), frame::native_abi_minframe_size, frame::jit_out_preserve_size);
__ block_comment("} argument shuffle");
- __ block_comment("{ receiver ");
- __ get_vm_result(R3_ARG1);
- __ block_comment("} receiver ");
-
- __ load_const_optimized(R19_method, (intptr_t)entry);
- __ std(R19_method, in_bytes(JavaThread::callee_target_offset()), R16_thread);
+ __ block_comment("{ load target ");
+ __ load_const_optimized(call_target_address, StubRoutines::upcall_stub_load_target(), R0);
+ __ load_const_optimized(R3_ARG1, (intptr_t)receiver, R0);
+ __ mtctr(call_target_address);
+ __ bctrl(); // loads target Method* into R19_method
+ __ block_comment("} load target ");
__ push_cont_fastpath();
@@ -326,7 +326,7 @@ address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry,
#ifndef PRODUCT
stringStream ss;
- ss.print("upcall_stub_%s", entry->signature()->as_C_string());
+ ss.print("upcall_stub_%s", signature->as_C_string());
const char* name = _masm->code_string(ss.as_string());
#else // PRODUCT
const char* name = "upcall_stub";
diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp
index 98ab86bf72eb6..ad3d18fa39268 100644
--- a/src/hotspot/cpu/riscv/assembler_riscv.hpp
+++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp
@@ -46,8 +46,10 @@
class Argument {
public:
enum {
- n_int_register_parameters_c = 8, // x10, x11, ... x17 (c_rarg0, c_rarg1, ...)
- n_float_register_parameters_c = 8, // f10, f11, ... f17 (c_farg0, c_farg1, ... )
+ // check more info at https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-cc.adoc
+ n_int_register_parameters_c = 8, // x10, x11, ... x17 (c_rarg0, c_rarg1, ...)
+ n_float_register_parameters_c = 8, // f10, f11, ... f17 (c_farg0, c_farg1, ... )
+ n_vector_register_parameters_c = 16, // v8, v9, ... v23
n_int_register_parameters_j = 8, // x11, ... x17, x10 (j_rarg0, j_rarg1, ...)
n_float_register_parameters_j = 8 // f10, f11, ... f17 (j_farg0, j_farg1, ...)
@@ -143,6 +145,10 @@ constexpr Register x19_sender_sp = x19; // Sender's SP while in interpreter
constexpr Register t0 = x5;
constexpr Register t1 = x6;
constexpr Register t2 = x7;
+constexpr Register t3 = x28;
+constexpr Register t4 = x29;
+constexpr Register t5 = x30;
+constexpr Register t6 = x31;
const Register g_INTArgReg[Argument::n_int_register_parameters_c] = {
c_rarg0, c_rarg1, c_rarg2, c_rarg3, c_rarg4, c_rarg5, c_rarg6, c_rarg7
@@ -705,6 +711,16 @@ class Assembler : public AbstractAssembler {
emit(insn);
}
+ void fencei() {
+ unsigned insn = 0;
+ patch((address)&insn, 6, 0, 0b0001111); // opcode
+ patch((address)&insn, 11, 7, 0b00000); // rd
+ patch((address)&insn, 14, 12, 0b001); // func
+ patch((address)&insn, 19, 15, 0b00000); // rs1
+ patch((address)&insn, 31, 20, 0b000000000000); // fm
+ emit(insn);
+ }
+
#define INSN(NAME, op, funct3, funct7) \
void NAME() { \
unsigned insn = 0; \
diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
index 940706b0a7376..828f70e4decee 100644
--- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
@@ -980,6 +980,7 @@ void LIR_Assembler::emit_alloc_obj(LIR_OpAllocObj* op) {
if (op->init_check()) {
__ lbu(t0, Address(op->klass()->as_register(),
InstanceKlass::init_state_offset()));
+ __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
__ mv(t1, (u1)InstanceKlass::fully_initialized);
add_debug_info_for_null_check_here(op->stub()->info());
__ bne(t0, t1, *op->stub()->entry(), /* is_far */ true);
diff --git a/src/hotspot/cpu/riscv/c2_CodeStubs_riscv.cpp b/src/hotspot/cpu/riscv/c2_CodeStubs_riscv.cpp
index 7995750aba96b..db18525b89c76 100644
--- a/src/hotspot/cpu/riscv/c2_CodeStubs_riscv.cpp
+++ b/src/hotspot/cpu/riscv/c2_CodeStubs_riscv.cpp
@@ -71,32 +71,4 @@ void C2EntryBarrierStub::emit(C2_MacroAssembler& masm) {
__ emit_int32(0); // nmethod guard value
}
-int C2HandleAnonOMOwnerStub::max_size() const {
- // Max size of stub has been determined by testing with 0 without using RISC-V compressed
- // instruction-set extension, in which case C2CodeStubList::emit() will throw an assertion
- // and report the actual size that is needed.
- return 20 DEBUG_ONLY(+8);
-}
-
-void C2HandleAnonOMOwnerStub::emit(C2_MacroAssembler& masm) {
- __ bind(entry());
- Register mon = monitor();
- Register t = tmp();
- assert(t != noreg, "need tmp register");
-
- // Fix owner to be the current thread.
- __ sd(xthread, Address(mon, ObjectMonitor::owner_offset()));
-
- // Pop owner object from lock-stack.
- __ lwu(t, Address(xthread, JavaThread::lock_stack_top_offset()));
- __ subw(t, t, oopSize);
-#ifdef ASSERT
- __ add(t0, xthread, t);
- __ sd(zr, Address(t0, 0));
-#endif
- __ sw(t, Address(xthread, JavaThread::lock_stack_top_offset()));
-
- __ j(continuation());
-}
-
#undef __
diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
index e2c9b9dd609e0..75f87e35adf41 100644
--- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
+++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
@@ -165,6 +165,7 @@ void C2_MacroAssembler::fast_unlock(Register objectReg, Register boxReg,
Register oop = objectReg;
Register box = boxReg;
Register disp_hdr = tmp1Reg;
+ Register owner_addr = tmp1Reg;
Register tmp = tmp2Reg;
Label object_has_monitor;
// Finish fast lock successfully. MUST branch to with flag == 0
@@ -222,15 +223,33 @@ void C2_MacroAssembler::fast_unlock(Register objectReg, Register boxReg,
j(unlocked);
bind(notRecursive);
- ld(t0, Address(tmp, ObjectMonitor::EntryList_offset()));
- ld(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset()));
- orr(t0, t0, disp_hdr); // Will be 0 if both are 0.
- bnez(t0, slow_path);
+ // Compute owner address.
+ la(owner_addr, Address(tmp, ObjectMonitor::owner_offset()));
- // need a release store here
- la(tmp, Address(tmp, ObjectMonitor::owner_offset()));
+ // Set owner to null.
+ // Release to satisfy the JMM
membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
- sd(zr, Address(tmp)); // set unowned
+ sd(zr, Address(owner_addr));
+ // We need a full fence after clearing owner to avoid stranding.
+ // StoreLoad achieves this.
+ membar(StoreLoad);
+
+ // Check if the entry lists are empty.
+ ld(t0, Address(tmp, ObjectMonitor::EntryList_offset()));
+ ld(tmp1Reg, Address(tmp, ObjectMonitor::cxq_offset()));
+ orr(t0, t0, tmp1Reg);
+ beqz(t0, unlocked); // If so we are done.
+
+ // Check if there is a successor.
+ ld(t0, Address(tmp, ObjectMonitor::succ_offset()));
+ bnez(t0, unlocked); // If so we are done.
+
+ // Save the monitor pointer in the current thread, so we can try to
+ // reacquire the lock in SharedRuntime::monitor_exit_helper().
+ sd(tmp, Address(xthread, JavaThread::unlocked_inflated_monitor_offset()));
+
+ mv(flag, 1);
+ j(slow_path);
bind(unlocked);
mv(flag, zr);
@@ -534,28 +553,35 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register box,
bind(not_recursive);
- Label release;
const Register tmp2_owner_addr = tmp2;
// Compute owner address.
la(tmp2_owner_addr, Address(tmp1_monitor, ObjectMonitor::owner_offset()));
+ // Set owner to null.
+ // Release to satisfy the JMM
+ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
+ sd(zr, Address(tmp2_owner_addr));
+ // We need a full fence after clearing owner to avoid stranding.
+ // StoreLoad achieves this.
+ membar(StoreLoad);
+
// Check if the entry lists are empty.
ld(t0, Address(tmp1_monitor, ObjectMonitor::EntryList_offset()));
ld(tmp3_t, Address(tmp1_monitor, ObjectMonitor::cxq_offset()));
orr(t0, t0, tmp3_t);
- beqz(t0, release);
+ beqz(t0, unlocked); // If so we are done.
- // The owner may be anonymous and we removed the last obj entry in
- // the lock-stack. This loses the information about the owner.
- // Write the thread to the owner field so the runtime knows the owner.
- sd(xthread, Address(tmp2_owner_addr));
- j(slow_path);
+ // Check if there is a successor.
+ ld(tmp3_t, Address(tmp1_monitor, ObjectMonitor::succ_offset()));
+ bnez(tmp3_t, unlocked); // If so we are done.
- bind(release);
- // Set owner to null.
- membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
- sd(zr, Address(tmp2_owner_addr));
+ // Save the monitor pointer in the current thread, so we can try
+ // to reacquire the lock in SharedRuntime::monitor_exit_helper().
+ sd(tmp1_monitor, Address(xthread, JavaThread::unlocked_inflated_monitor_offset()));
+
+ mv(flag, 1);
+ j(slow_path);
}
bind(unlocked);
diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
index 062f80290626f..7036c44d99dc9 100644
--- a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
+++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
@@ -1,6 +1,6 @@
/*
* Copyright (c) 2018, 2023, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2024, Huawei Technologies Co., Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -39,7 +39,10 @@
#include "c1/c1_LIRAssembler.hpp"
#include "c1/c1_MacroAssembler.hpp"
#include "gc/g1/c1/g1BarrierSetC1.hpp"
-#endif
+#endif // COMPILER1
+#ifdef COMPILER2
+#include "gc/g1/c2/g1BarrierSetC2.hpp"
+#endif // COMPILER2
#define __ masm->
@@ -96,6 +99,55 @@ void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* mas
__ pop_reg(saved_regs, sp);
}
+static void generate_queue_test_and_insertion(MacroAssembler* masm, ByteSize index_offset, ByteSize buffer_offset, Label& runtime,
+ const Register thread, const Register value, const Register tmp1, const Register tmp2) {
+ // Can we store a value in the given thread's buffer?
+ // (The index field is typed as size_t.)
+ __ ld(tmp1, Address(thread, in_bytes(index_offset))); // tmp1 := *(index address)
+ __ beqz(tmp1, runtime); // jump to runtime if index == 0 (full buffer)
+ // The buffer is not full, store value into it.
+ __ sub(tmp1, tmp1, wordSize); // tmp1 := next index
+ __ sd(tmp1, Address(thread, in_bytes(index_offset))); // *(index address) := next index
+ __ ld(tmp2, Address(thread, in_bytes(buffer_offset))); // tmp2 := buffer address
+ __ add(tmp2, tmp2, tmp1);
+ __ sd(value, Address(tmp2)); // *(buffer address + next index) := value
+}
+
+static void generate_pre_barrier_fast_path(MacroAssembler* masm,
+ const Register thread,
+ const Register tmp1) {
+ Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
+ // Is marking active?
+ if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
+ __ lwu(tmp1, in_progress);
+ } else {
+ assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
+ __ lbu(tmp1, in_progress);
+ }
+}
+
+static void generate_pre_barrier_slow_path(MacroAssembler* masm,
+ const Register obj,
+ const Register pre_val,
+ const Register thread,
+ const Register tmp1,
+ const Register tmp2,
+ Label& done,
+ Label& runtime) {
+ // Do we need to load the previous value?
+ if (obj != noreg) {
+ __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW);
+ }
+ // Is the previous value null?
+ __ beqz(pre_val, done, true);
+ generate_queue_test_and_insertion(masm,
+ G1ThreadLocalData::satb_mark_queue_index_offset(),
+ G1ThreadLocalData::satb_mark_queue_buffer_offset(),
+ runtime,
+ thread, pre_val, tmp1, tmp2);
+ __ j(done);
+}
+
void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
Register obj,
Register pre_val,
@@ -116,43 +168,10 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
assert_different_registers(obj, pre_val, tmp1, tmp2);
assert(pre_val != noreg && tmp1 != noreg && tmp2 != noreg, "expecting a register");
- Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
- Address index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()));
- Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()));
-
- // Is marking active?
- if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { // 4-byte width
- __ lwu(tmp1, in_progress);
- } else {
- assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
- __ lbu(tmp1, in_progress);
- }
+ generate_pre_barrier_fast_path(masm, thread, tmp1);
+ // If marking is not active (*(mark queue active address) == 0), jump to done
__ beqz(tmp1, done);
-
- // Do we need to load the previous value?
- if (obj != noreg) {
- __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW);
- }
-
- // Is the previous value null?
- __ beqz(pre_val, done);
-
- // Can we store original value in the thread's buffer?
- // Is index == 0?
- // (The index field is typed as size_t.)
-
- __ ld(tmp1, index); // tmp := *index_adr
- __ beqz(tmp1, runtime); // tmp == 0?
- // If yes, goto runtime
-
- __ sub(tmp1, tmp1, wordSize); // tmp := tmp - wordSize
- __ sd(tmp1, index); // *index_adr := tmp
- __ ld(tmp2, buffer);
- __ add(tmp1, tmp1, tmp2); // tmp := tmp + *buffer_adr
-
- // Record the previous value
- __ sd(pre_val, Address(tmp1, 0));
- __ j(done);
+ generate_pre_barrier_slow_path(masm, obj, pre_val, thread, tmp1, tmp2, done, runtime);
__ bind(runtime);
@@ -171,6 +190,49 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
}
+static void generate_post_barrier_fast_path(MacroAssembler* masm,
+ const Register store_addr,
+ const Register new_val,
+ const Register tmp1,
+ const Register tmp2,
+ Label& done,
+ bool new_val_may_be_null) {
+ // Does store cross heap regions?
+ __ xorr(tmp1, store_addr, new_val); // tmp1 := store address ^ new value
+ __ srli(tmp1, tmp1, G1HeapRegion::LogOfHRGrainBytes); // tmp1 := ((store address ^ new value) >> LogOfHRGrainBytes)
+ __ beqz(tmp1, done);
+ // Crosses regions, storing null?
+ if (new_val_may_be_null) {
+ __ beqz(new_val, done);
+ }
+ // Storing region crossing non-null, is card young?
+ __ srli(tmp1, store_addr, CardTable::card_shift()); // tmp1 := card address relative to card table base
+ __ load_byte_map_base(tmp2); // tmp2 := card table base address
+ __ add(tmp1, tmp1, tmp2); // tmp1 := card address
+ __ lbu(tmp2, Address(tmp1)); // tmp2 := card
+}
+
+static void generate_post_barrier_slow_path(MacroAssembler* masm,
+ const Register thread,
+ const Register tmp1,
+ const Register tmp2,
+ Label& done,
+ Label& runtime) {
+ __ membar(MacroAssembler::StoreLoad); // StoreLoad membar
+ __ lbu(tmp2, Address(tmp1)); // tmp2 := card
+ __ beqz(tmp2, done, true);
+ // Storing a region crossing, non-null oop, card is clean.
+ // Dirty card and log.
+ STATIC_ASSERT(CardTable::dirty_card_val() == 0);
+ __ sb(zr, Address(tmp1)); // *(card address) := dirty_card_val
+ generate_queue_test_and_insertion(masm,
+ G1ThreadLocalData::dirty_card_queue_index_offset(),
+ G1ThreadLocalData::dirty_card_queue_buffer_offset(),
+ runtime,
+ thread, tmp1, tmp2, t0);
+ __ j(done);
+}
+
void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
Register store_addr,
Register new_val,
@@ -179,73 +241,119 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
Register tmp2) {
assert(thread == xthread, "must be");
assert_different_registers(store_addr, new_val, thread, tmp1, tmp2, t0);
- assert(store_addr != noreg && new_val != noreg && tmp1 != noreg &&
- tmp2 != noreg, "expecting a register");
-
- Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
- Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
-
- BarrierSet* bs = BarrierSet::barrier_set();
- CardTableBarrierSet* ctbs = barrier_set_cast(bs);
+ assert(store_addr != noreg && new_val != noreg && tmp1 != noreg && tmp2 != noreg,
+ "expecting a register");
Label done;
Label runtime;
- // Does store cross heap regions?
+ generate_post_barrier_fast_path(masm, store_addr, new_val, tmp1, tmp2, done, true /* new_val_may_be_null */);
+ // If card is young, jump to done (tmp2 holds the card value)
+ __ mv(t0, (int)G1CardTable::g1_young_card_val());
+ __ beq(tmp2, t0, done); // card == young_card_val?
+ generate_post_barrier_slow_path(masm, thread, tmp1, tmp2, done, runtime);
- __ xorr(tmp1, store_addr, new_val);
- __ srli(tmp1, tmp1, G1HeapRegion::LogOfHRGrainBytes);
- __ beqz(tmp1, done);
+ __ bind(runtime);
+ // save the live input values
+ RegSet saved = RegSet::of(store_addr);
+ __ push_reg(saved, sp);
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), tmp1, thread);
+ __ pop_reg(saved, sp);
- // crosses regions, storing null?
+ __ bind(done);
+}
- __ beqz(new_val, done);
+#if defined(COMPILER2)
- // storing region crossing non-null, is card already dirty?
+static void generate_c2_barrier_runtime_call(MacroAssembler* masm, G1BarrierStubC2* stub, const Register arg, const address runtime_path) {
+ SaveLiveRegisters save_registers(masm, stub);
+ if (c_rarg0 != arg) {
+ __ mv(c_rarg0, arg);
+ }
+ __ mv(c_rarg1, xthread);
+ __ mv(t0, runtime_path);
+ __ jalr(t0);
+}
- const Register card_addr = tmp1;
+void G1BarrierSetAssembler::g1_write_barrier_pre_c2(MacroAssembler* masm,
+ Register obj,
+ Register pre_val,
+ Register thread,
+ Register tmp1,
+ Register tmp2,
+ G1PreBarrierStubC2* stub) {
+ assert(thread == xthread, "must be");
+ assert_different_registers(obj, pre_val, tmp1, tmp2);
+ assert(pre_val != noreg && tmp1 != noreg && tmp2 != noreg, "expecting a register");
- __ srli(card_addr, store_addr, CardTable::card_shift());
+ stub->initialize_registers(obj, pre_val, thread, tmp1, tmp2);
- // get the address of the card
- __ load_byte_map_base(tmp2);
- __ add(card_addr, card_addr, tmp2);
- __ lbu(tmp2, Address(card_addr));
- __ mv(t0, (int)G1CardTable::g1_young_card_val());
- __ beq(tmp2, t0, done);
+ generate_pre_barrier_fast_path(masm, thread, tmp1);
+ // If marking is active (*(mark queue active address) != 0), jump to stub (slow path)
+ __ bnez(tmp1, *stub->entry(), true);
- assert((int)CardTable::dirty_card_val() == 0, "must be 0");
+ __ bind(*stub->continuation());
+}
- __ membar(MacroAssembler::StoreLoad);
+void G1BarrierSetAssembler::generate_c2_pre_barrier_stub(MacroAssembler* masm,
+ G1PreBarrierStubC2* stub) const {
+ Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
+ Label runtime;
+ Register obj = stub->obj();
+ Register pre_val = stub->pre_val();
+ Register thread = stub->thread();
+ Register tmp1 = stub->tmp1();
+ Register tmp2 = stub->tmp2();
- __ lbu(tmp2, Address(card_addr));
- __ beqz(tmp2, done);
+ __ bind(*stub->entry());
+ generate_pre_barrier_slow_path(masm, obj, pre_val, thread, tmp1, tmp2, *stub->continuation(), runtime);
- // storing a region crossing, non-null oop, card is clean.
- // dirty card and log.
+ __ bind(runtime);
+ generate_c2_barrier_runtime_call(masm, stub, pre_val, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry));
+ __ j(*stub->continuation());
+}
- __ sb(zr, Address(card_addr));
+void G1BarrierSetAssembler::g1_write_barrier_post_c2(MacroAssembler* masm,
+ Register store_addr,
+ Register new_val,
+ Register thread,
+ Register tmp1,
+ Register tmp2,
+ G1PostBarrierStubC2* stub) {
+ assert(thread == xthread, "must be");
+ assert_different_registers(store_addr, new_val, thread, tmp1, tmp2, t0);
+ assert(store_addr != noreg && new_val != noreg && tmp1 != noreg && tmp2 != noreg,
+ "expecting a register");
- __ ld(t0, queue_index);
- __ beqz(t0, runtime);
- __ sub(t0, t0, wordSize);
- __ sd(t0, queue_index);
+ stub->initialize_registers(thread, tmp1, tmp2);
- __ ld(tmp2, buffer);
- __ add(t0, tmp2, t0);
- __ sd(card_addr, Address(t0, 0));
- __ j(done);
+ bool new_val_may_be_null = (stub->barrier_data() & G1C2BarrierPostNotNull) == 0;
+ generate_post_barrier_fast_path(masm, store_addr, new_val, tmp1, tmp2, *stub->continuation(), new_val_may_be_null);
+ // If card is not young, jump to stub (slow path) (tmp2 holds the card value)
+ __ mv(t0, (int)G1CardTable::g1_young_card_val());
+ __ bne(tmp2, t0, *stub->entry(), true);
- __ bind(runtime);
- // save the live input values
- RegSet saved = RegSet::of(store_addr);
- __ push_reg(saved, sp);
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread);
- __ pop_reg(saved, sp);
+ __ bind(*stub->continuation());
+}
- __ bind(done);
+void G1BarrierSetAssembler::generate_c2_post_barrier_stub(MacroAssembler* masm,
+ G1PostBarrierStubC2* stub) const {
+ Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
+ Label runtime;
+ Register thread = stub->thread();
+ Register tmp1 = stub->tmp1(); // tmp1 holds the card address.
+ Register tmp2 = stub->tmp2();
+
+ __ bind(*stub->entry());
+ generate_post_barrier_slow_path(masm, thread, tmp1, tmp2, *stub->continuation(), runtime);
+
+ __ bind(runtime);
+ generate_c2_barrier_runtime_call(masm, stub, tmp1, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry));
+ __ j(*stub->continuation());
}
+#endif // COMPILER2
+
void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
Register dst, Address src, Register tmp1, Register tmp2) {
bool on_oop = is_reference_type(type);
diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp
index 96568994079dd..c7bee2ef6f3a8 100644
--- a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp
+++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp
@@ -1,6 +1,6 @@
/*
* Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2024, Huawei Technologies Co., Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -36,6 +36,8 @@ class LIR_Assembler;
class StubAssembler;
class G1PreBarrierStub;
class G1PostBarrierStub;
+class G1PreBarrierStubC2;
+class G1PostBarrierStubC2;
class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
protected:
@@ -72,6 +74,27 @@ class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm);
#endif
+#ifdef COMPILER2
+ void g1_write_barrier_pre_c2(MacroAssembler* masm,
+ Register obj,
+ Register pre_val,
+ Register thread,
+ Register tmp1,
+ Register tmp2,
+ G1PreBarrierStubC2* c2_stub);
+ void generate_c2_pre_barrier_stub(MacroAssembler* masm,
+ G1PreBarrierStubC2* stub) const;
+ void g1_write_barrier_post_c2(MacroAssembler* masm,
+ Register store_addr,
+ Register new_val,
+ Register thread,
+ Register tmp1,
+ Register tmp2,
+ G1PostBarrierStubC2* c2_stub);
+ void generate_c2_post_barrier_stub(MacroAssembler* masm,
+ G1PostBarrierStubC2* stub) const;
+#endif
+
void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
Register dst, Address src, Register tmp1, Register tmp2);
};
diff --git a/src/hotspot/cpu/riscv/gc/g1/g1_riscv.ad b/src/hotspot/cpu/riscv/gc/g1/g1_riscv.ad
new file mode 100644
index 0000000000000..7a525323021dd
--- /dev/null
+++ b/src/hotspot/cpu/riscv/gc/g1/g1_riscv.ad
@@ -0,0 +1,564 @@
+//
+// Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+// or visit www.oracle.com if you need additional information or have any
+// questions.
+//
+
+source_hpp %{
+
+#include "gc/g1/c2/g1BarrierSetC2.hpp"
+#include "gc/shared/gc_globals.hpp"
+
+%}
+
+source %{
+
+#include "gc/g1/g1BarrierSetAssembler_riscv.hpp"
+#include "gc/g1/g1BarrierSetRuntime.hpp"
+
+static void write_barrier_pre(MacroAssembler* masm,
+ const MachNode* node,
+ Register obj,
+ Register pre_val,
+ Register tmp1,
+ Register tmp2,
+ RegSet preserve = RegSet(),
+ RegSet no_preserve = RegSet()) {
+ if (!G1PreBarrierStubC2::needs_barrier(node)) {
+ return;
+ }
+ Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
+ G1BarrierSetAssembler* g1_asm = static_cast(BarrierSet::barrier_set()->barrier_set_assembler());
+ G1PreBarrierStubC2* const stub = G1PreBarrierStubC2::create(node);
+ for (RegSetIterator reg = preserve.begin(); *reg != noreg; ++reg) {
+ stub->preserve(*reg);
+ }
+ for (RegSetIterator reg = no_preserve.begin(); *reg != noreg; ++reg) {
+ stub->dont_preserve(*reg);
+ }
+ g1_asm->g1_write_barrier_pre_c2(masm, obj, pre_val, xthread, tmp1, tmp2, stub);
+}
+
+static void write_barrier_post(MacroAssembler* masm,
+ const MachNode* node,
+ Register store_addr,
+ Register new_val,
+ Register tmp1,
+ Register tmp2) {
+ if (!G1PostBarrierStubC2::needs_barrier(node)) {
+ return;
+ }
+ Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
+ G1BarrierSetAssembler* g1_asm = static_cast(BarrierSet::barrier_set()->barrier_set_assembler());
+ G1PostBarrierStubC2* const stub = G1PostBarrierStubC2::create(node);
+ g1_asm->g1_write_barrier_post_c2(masm, store_addr, new_val, xthread, tmp1, tmp2, stub);
+}
+
+%}
+
+instruct g1StoreP(indirect mem, iRegP src, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, rFlagsReg cr)
+%{
+ predicate(UseG1GC && n->as_Store()->barrier_data() != 0);
+ match(Set mem (StoreP mem src));
+ effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ ins_cost(STORE_COST);
+ format %{ "sd $src, $mem\t# ptr" %}
+ ins_encode %{
+ guarantee($mem$$disp == 0, "impossible encoding");
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $src$$Register) /* preserve */);
+ __ sd($src$$Register, Address($mem$$Register));
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $src$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(istore_reg_mem);
+%}
+
+instruct g1StoreN(indirect mem, iRegN src, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, rFlagsReg cr)
+%{
+ predicate(UseG1GC && n->as_Store()->barrier_data() != 0);
+ match(Set mem (StoreN mem src));
+ effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ ins_cost(STORE_COST);
+ format %{ "sw $src, $mem\t# compressed ptr" %}
+ ins_encode %{
+ guarantee($mem$$disp == 0, "impossible encoding");
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $src$$Register) /* preserve */);
+ __ sw($src$$Register, Address($mem$$Register));
+ if ((barrier_data() & G1C2BarrierPost) != 0) {
+ if ((barrier_data() & G1C2BarrierPostNotNull) == 0) {
+ __ decode_heap_oop($tmp1$$Register, $src$$Register);
+ } else {
+ __ decode_heap_oop_not_null($tmp1$$Register, $src$$Register);
+ }
+ }
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $tmp1$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(istore_reg_mem);
+%}
+
+instruct g1EncodePAndStoreN(indirect mem, iRegP src, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, rFlagsReg cr)
+%{
+ predicate(UseG1GC && n->as_Store()->barrier_data() != 0);
+ match(Set mem (StoreN mem (EncodeP src)));
+ effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ ins_cost(STORE_COST);
+ format %{ "encode_heap_oop $tmp1, $src\n\t"
+ "sw $tmp1, $mem\t# compressed ptr" %}
+ ins_encode %{
+ guarantee($mem$$disp == 0, "impossible encoding");
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $src$$Register) /* preserve */);
+ if ((barrier_data() & G1C2BarrierPostNotNull) == 0) {
+ __ encode_heap_oop($tmp1$$Register, $src$$Register);
+ } else {
+ __ encode_heap_oop_not_null($tmp1$$Register, $src$$Register);
+ }
+ __ sw($tmp1$$Register, Address($mem$$Register));
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $src$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(istore_reg_mem);
+%}
+
+instruct g1CompareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp1, iRegPNoSp tmp2, rFlagsReg cr)
+%{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0);
+ match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
+ effect(TEMP res, TEMP tmp1, TEMP tmp2, KILL cr);
+ ins_cost(2 * VOLATILE_REF_COST);
+ format %{ "cmpxchg $res = $mem, $oldval, $newval\t# ptr" %}
+ ins_encode %{
+ guarantee($mem$$disp == 0, "impossible encoding");
+ assert_different_registers($oldval$$Register, $mem$$Register);
+ assert_different_registers($newval$$Register, $mem$$Register);
+ // Pass $oldval to the pre-barrier (instead of loading from $mem), because
+ // $oldval is the only value that can be overwritten.
+ // The same holds for g1CompareAndSwapP and its Acq variant.
+ write_barrier_pre(masm, this,
+ noreg /* obj */,
+ $oldval$$Register /* pre_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $newval$$Register /* new_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct g1CompareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp1, iRegPNoSp tmp2, rFlagsReg cr)
+%{
+ predicate(UseG1GC && needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() != 0);
+ match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
+ effect(TEMP res, TEMP tmp1, TEMP tmp2, KILL cr);
+ ins_cost(VOLATILE_REF_COST);
+ format %{ "cmpxchg_acq $res = $mem, $oldval, $newval\t# ptr" %}
+ ins_encode %{
+ guarantee($mem$$disp == 0, "impossible encoding");
+ assert_different_registers($oldval$$Register, $mem$$Register);
+ assert_different_registers($newval$$Register, $mem$$Register);
+ // Pass $oldval to the pre-barrier (instead of loading from $mem), because
+ // $oldval is the only value that can be overwritten.
+ // The same holds for g1CompareAndSwapP and its Acq variant.
+ write_barrier_pre(masm, this,
+ noreg /* obj */,
+ $oldval$$Register /* pre_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $newval$$Register /* new_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct g1CompareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, rFlagsReg cr)
+%{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0);
+ match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
+ effect(TEMP res, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ ins_cost(2 * VOLATILE_REF_COST);
+ format %{ "cmpxchg $res = $mem, $oldval, $newval\t# narrow oop" %}
+ ins_encode %{
+ guarantee($mem$$disp == 0, "impossible encoding");
+ assert_different_registers($oldval$$Register, $mem$$Register);
+ assert_different_registers($newval$$Register, $mem$$Register);
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::uint32,
+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
+ __ decode_heap_oop($tmp1$$Register, $newval$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $tmp1$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct g1CompareAndExchangeNAcq(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, rFlagsReg cr)
+%{
+ predicate(UseG1GC && needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() != 0);
+ match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
+ effect(TEMP res, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ ins_cost(VOLATILE_REF_COST);
+ format %{ "cmpxchg_acq $res = $mem, $oldval, $newval\t# narrow oop" %}
+ ins_encode %{
+ guarantee($mem$$disp == 0, "impossible encoding");
+ assert_different_registers($oldval$$Register, $mem$$Register);
+ assert_different_registers($newval$$Register, $mem$$Register);
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::uint32,
+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
+ __ decode_heap_oop($tmp1$$Register, $newval$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $tmp1$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct g1CompareAndSwapP(iRegINoSp res, indirect mem, iRegP newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegP oldval, rFlagsReg cr)
+%{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0);
+ match(Set res (CompareAndSwapP mem (Binary oldval newval)));
+ match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
+ effect(TEMP res, TEMP tmp1, TEMP tmp2, KILL cr);
+ ins_cost(2 * VOLATILE_REF_COST);
+ format %{ "cmpxchg $mem, $oldval, $newval\t# (ptr)\n\t"
+ "mv $res, $res == $oldval" %}
+ ins_encode %{
+ guarantee($mem$$disp == 0, "impossible encoding");
+ assert_different_registers($oldval$$Register, $mem$$Register);
+ assert_different_registers($newval$$Register, $mem$$Register);
+ write_barrier_pre(masm, this,
+ noreg /* obj */,
+ $oldval$$Register /* pre_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
+ /*result as bool*/ true);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $newval$$Register /* new_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct g1CompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegP oldval, rFlagsReg cr)
+%{
+ predicate(UseG1GC && needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() != 0);
+ match(Set res (CompareAndSwapP mem (Binary oldval newval)));
+ match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
+ effect(TEMP res, TEMP tmp1, TEMP tmp2, KILL cr);
+ ins_cost(VOLATILE_REF_COST);
+ format %{ "cmpxchg_acq $mem, $oldval, $newval\t# (ptr)\n\t"
+ "mv $res, $res == $oldval" %}
+ ins_encode %{
+ guarantee($mem$$disp == 0, "impossible encoding");
+ assert_different_registers($oldval$$Register, $mem$$Register);
+ assert_different_registers($newval$$Register, $mem$$Register);
+ write_barrier_pre(masm, this,
+ noreg /* obj */,
+ $oldval$$Register /* pre_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
+ /*result as bool*/ true);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $newval$$Register /* new_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct g1CompareAndSwapN(iRegINoSp res, indirect mem, iRegN newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, iRegN oldval, rFlagsReg cr)
+%{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0);
+ match(Set res (CompareAndSwapN mem (Binary oldval newval)));
+ match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
+ effect(TEMP res, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ ins_cost(2 * VOLATILE_REF_COST);
+ format %{ "cmpxchg $mem, $oldval, $newval\t# (narrow oop)\n\t"
+ "mv $res, $res == $oldval" %}
+ ins_encode %{
+ guarantee($mem$$disp == 0, "impossible encoding");
+ assert_different_registers($oldval$$Register, $mem$$Register);
+ assert_different_registers($newval$$Register, $mem$$Register);
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::uint32,
+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
+ /*result as bool*/ true);
+ __ decode_heap_oop($tmp1$$Register, $newval$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $tmp1$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct g1CompareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, iRegN oldval, rFlagsReg cr)
+%{
+ predicate(UseG1GC && needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() != 0);
+ match(Set res (CompareAndSwapN mem (Binary oldval newval)));
+ match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
+ effect(TEMP res, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ ins_cost(VOLATILE_REF_COST);
+ format %{ "cmpxchg_acq $mem, $oldval, $newval\t# (narrow oop)\n\t"
+ "mv $res, $res == $oldval" %}
+ ins_encode %{
+ guarantee($mem$$disp == 0, "impossible encoding");
+ assert_different_registers($oldval$$Register, $mem$$Register);
+ assert_different_registers($newval$$Register, $mem$$Register);
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::uint32,
+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
+ /*result as bool*/ true);
+ __ decode_heap_oop($tmp1$$Register, $newval$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $tmp1$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct g1GetAndSetP(indirect mem, iRegP newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp preval, rFlagsReg cr)
+%{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0);
+ match(Set preval (GetAndSetP mem newval));
+ effect(TEMP preval, TEMP tmp1, TEMP tmp2, KILL cr);
+ ins_cost(2 * VOLATILE_REF_COST);
+ format %{ "atomic_xchg $preval, $newval, [$mem]" %}
+ ins_encode %{
+ guarantee($mem$$disp == 0, "impossible encoding");
+ assert_different_registers($mem$$Register, $newval$$Register);
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $preval$$Register /* pre_val (as a temporary register) */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $preval$$Register, $newval$$Register) /* preserve */);
+ __ atomic_xchg($preval$$Register, $newval$$Register, $mem$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $newval$$Register /* new_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_serial);
+%}
+
+instruct g1GetAndSetPAcq(indirect mem, iRegP newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp preval, rFlagsReg cr)
+%{
+ predicate(UseG1GC && needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() != 0);
+ match(Set preval (GetAndSetP mem newval));
+ effect(TEMP preval, TEMP tmp1, TEMP tmp2, KILL cr);
+ ins_cost(VOLATILE_REF_COST);
+ format %{ "atomic_xchg_acq $preval, $newval, [$mem]" %}
+ ins_encode %{
+ guarantee($mem$$disp == 0, "impossible encoding");
+ assert_different_registers($mem$$Register, $newval$$Register);
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $preval$$Register /* pre_val (as a temporary register) */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $preval$$Register, $newval$$Register) /* preserve */);
+ __ atomic_xchgal($preval$$Register, $newval$$Register, $mem$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $newval$$Register /* new_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_serial);
+%}
+
+instruct g1GetAndSetN(indirect mem, iRegN newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, iRegNNoSp preval, rFlagsReg cr)
+%{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0);
+ match(Set preval (GetAndSetN mem newval));
+ effect(TEMP preval, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ ins_cost(2 * VOLATILE_REF_COST);
+ format %{ "atomic_xchgwu $preval, $newval, [$mem]" %}
+ ins_encode %{
+ guarantee($mem$$disp == 0, "impossible encoding");
+ assert_different_registers($mem$$Register, $newval$$Register);
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $preval$$Register, $newval$$Register) /* preserve */);
+ __ atomic_xchgwu($preval$$Register, $newval$$Register, $mem$$Register);
+ __ decode_heap_oop($tmp1$$Register, $newval$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $tmp1$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_serial);
+%}
+
+instruct g1GetAndSetNAcq(indirect mem, iRegN newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, iRegNNoSp preval, rFlagsReg cr)
+%{
+ predicate(UseG1GC && needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() != 0);
+ match(Set preval (GetAndSetN mem newval));
+ effect(TEMP preval, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ ins_cost(VOLATILE_REF_COST);
+ format %{ "atomic_xchgwu_acq $preval, $newval, [$mem]" %}
+ ins_encode %{
+ guarantee($mem$$disp == 0, "impossible encoding");
+ assert_different_registers($mem$$Register, $newval$$Register);
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */,
+ RegSet::of($mem$$Register, $preval$$Register, $newval$$Register) /* preserve */);
+ __ atomic_xchgalwu($preval$$Register, $newval$$Register, $mem$$Register);
+ __ decode_heap_oop($tmp1$$Register, $newval$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $tmp1$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_serial);
+%}
+
+instruct g1LoadP(iRegPNoSp dst, indirect mem, iRegPNoSp tmp1, iRegPNoSp tmp2, rFlagsReg cr)
+%{
+ predicate(UseG1GC && n->as_Load()->barrier_data() != 0);
+ match(Set dst (LoadP mem));
+ effect(TEMP dst, TEMP tmp1, TEMP tmp2, KILL cr);
+ ins_cost(LOAD_COST + BRANCH_COST);
+ format %{ "ld $dst, $mem\t# ptr" %}
+ ins_encode %{
+ guarantee($mem$$disp == 0, "impossible encoding");
+ __ ld($dst$$Register, Address($mem$$Register));
+ write_barrier_pre(masm, this,
+ noreg /* obj */,
+ $dst$$Register /* pre_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */);
+ %}
+ ins_pipe(iload_reg_mem);
+%}
+
+instruct g1LoadN(iRegNNoSp dst, indirect mem, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, rFlagsReg cr)
+%{
+ predicate(UseG1GC && n->as_Load()->barrier_data() != 0);
+ match(Set dst (LoadN mem));
+ effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ ins_cost(LOAD_COST + BRANCH_COST);
+ format %{ "lwu $dst, $mem\t# compressed ptr" %}
+ ins_encode %{
+ guarantee($mem$$disp == 0, "impossible encoding");
+ __ lwu($dst$$Register, Address($mem$$Register));
+ if ((barrier_data() & G1C2BarrierPre) != 0) {
+ __ decode_heap_oop($tmp1$$Register, $dst$$Register);
+ write_barrier_pre(masm, this,
+ noreg /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ }
+ %}
+ ins_pipe(iload_reg_mem);
+%}
diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp
index 9a79a92327723..cc73d14a756f2 100644
--- a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp
+++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp
@@ -70,10 +70,10 @@ void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, Dec
__ push_reg(saved_regs, sp);
if (UseCompressedOops) {
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop_entry),
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop),
src, dst, count);
} else {
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop_entry), src, dst, count);
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop), src, dst, count);
}
__ pop_reg(saved_regs, sp);
__ bind(done);
@@ -165,9 +165,9 @@ void ShenandoahBarrierSetAssembler::satb_write_barrier_pre(MacroAssembler* masm,
// expand_call should be passed true.
if (expand_call) {
assert(pre_val != c_rarg1, "smashed arg");
- __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread);
+ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre), pre_val, thread);
} else {
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread);
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre), pre_val, thread);
}
__ pop_reg(saved, sp);
@@ -645,7 +645,7 @@ void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAss
__ bind(runtime);
__ push_call_clobbered_registers();
__ load_parameter(0, pre_val);
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread);
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre), pre_val, thread);
__ pop_call_clobbered_registers();
__ bind(done);
diff --git a/src/hotspot/cpu/riscv/gc/x/x_riscv.ad b/src/hotspot/cpu/riscv/gc/x/x_riscv.ad
index ef02f301c6aeb..b93b7066425b9 100644
--- a/src/hotspot/cpu/riscv/gc/x/x_riscv.ad
+++ b/src/hotspot/cpu/riscv/gc/x/x_riscv.ad
@@ -52,11 +52,11 @@ static void x_load_barrier_slow_path(MacroAssembler* masm, const MachNode* node,
%}
// Load Pointer
-instruct xLoadP(iRegPNoSp dst, memory mem, iRegPNoSp tmp)
+instruct xLoadP(iRegPNoSp dst, memory mem, iRegPNoSp tmp, rFlagsReg cr)
%{
match(Set dst (LoadP mem));
predicate(UseZGC && !ZGenerational && (n->as_Load()->barrier_data() != 0));
- effect(TEMP dst, TEMP tmp);
+ effect(TEMP dst, TEMP tmp, KILL cr);
ins_cost(4 * DEFAULT_COST);
@@ -71,11 +71,11 @@ instruct xLoadP(iRegPNoSp dst, memory mem, iRegPNoSp tmp)
ins_pipe(iload_reg_mem);
%}
-instruct xCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp) %{
+instruct xCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
match(Set res (CompareAndSwapP mem (Binary oldval newval)));
match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
predicate(UseZGC && !ZGenerational && !needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() == XLoadBarrierStrong);
- effect(TEMP_DEF res, TEMP tmp);
+ effect(TEMP_DEF res, TEMP tmp, KILL cr);
ins_cost(2 * VOLATILE_REF_COST);
@@ -105,11 +105,11 @@ instruct xCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newva
ins_pipe(pipe_slow);
%}
-instruct xCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp) %{
+instruct xCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
match(Set res (CompareAndSwapP mem (Binary oldval newval)));
match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
predicate(UseZGC && !ZGenerational && needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == XLoadBarrierStrong));
- effect(TEMP_DEF res, TEMP tmp);
+ effect(TEMP_DEF res, TEMP tmp, KILL cr);
ins_cost(2 * VOLATILE_REF_COST);
@@ -139,10 +139,10 @@ instruct xCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP ne
ins_pipe(pipe_slow);
%}
-instruct xCompareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp) %{
+instruct xCompareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
predicate(UseZGC && !ZGenerational && !needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() == XLoadBarrierStrong);
- effect(TEMP_DEF res, TEMP tmp);
+ effect(TEMP_DEF res, TEMP tmp, KILL cr);
ins_cost(2 * VOLATILE_REF_COST);
@@ -167,10 +167,10 @@ instruct xCompareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP n
ins_pipe(pipe_slow);
%}
-instruct xCompareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp) %{
+instruct xCompareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
predicate(UseZGC && !ZGenerational && needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() == XLoadBarrierStrong);
- effect(TEMP_DEF res, TEMP tmp);
+ effect(TEMP_DEF res, TEMP tmp, KILL cr);
ins_cost(2 * VOLATILE_REF_COST);
@@ -195,10 +195,10 @@ instruct xCompareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iReg
ins_pipe(pipe_slow);
%}
-instruct xGetAndSetP(indirect mem, iRegP newv, iRegPNoSp prev, iRegPNoSp tmp) %{
+instruct xGetAndSetP(indirect mem, iRegP newv, iRegPNoSp prev, iRegPNoSp tmp, rFlagsReg cr) %{
match(Set prev (GetAndSetP mem newv));
predicate(UseZGC && !ZGenerational && !needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() != 0);
- effect(TEMP_DEF prev, TEMP tmp);
+ effect(TEMP_DEF prev, TEMP tmp, KILL cr);
ins_cost(2 * VOLATILE_REF_COST);
@@ -212,10 +212,10 @@ instruct xGetAndSetP(indirect mem, iRegP newv, iRegPNoSp prev, iRegPNoSp tmp) %{
ins_pipe(pipe_serial);
%}
-instruct xGetAndSetPAcq(indirect mem, iRegP newv, iRegPNoSp prev, iRegPNoSp tmp) %{
+instruct xGetAndSetPAcq(indirect mem, iRegP newv, iRegPNoSp prev, iRegPNoSp tmp, rFlagsReg cr) %{
match(Set prev (GetAndSetP mem newv));
predicate(UseZGC && !ZGenerational && needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() != 0));
- effect(TEMP_DEF prev, TEMP tmp);
+ effect(TEMP_DEF prev, TEMP tmp, KILL cr);
ins_cost(VOLATILE_REF_COST);
diff --git a/src/hotspot/cpu/riscv/gc/z/zAddress_riscv.cpp b/src/hotspot/cpu/riscv/gc/z/zAddress_riscv.cpp
index ef13676b02ed8..df111723d56b6 100644
--- a/src/hotspot/cpu/riscv/gc/z/zAddress_riscv.cpp
+++ b/src/hotspot/cpu/riscv/gc/z/zAddress_riscv.cpp
@@ -92,7 +92,7 @@ static size_t probe_valid_max_address_bit() {
}
size_t ZPlatformAddressOffsetBits() {
- const static size_t valid_max_address_offset_bits = probe_valid_max_address_bit() + 1;
+ static const size_t valid_max_address_offset_bits = probe_valid_max_address_bit() + 1;
const size_t max_address_offset_bits = valid_max_address_offset_bits - 3;
const size_t min_address_offset_bits = max_address_offset_bits - 2;
const size_t address_offset = round_up_power_of_2(MaxHeapSize * ZVirtualToPhysicalRatio);
diff --git a/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp
index 8fbeaa45371d1..4a82bd9c2d09a 100644
--- a/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp
+++ b/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp
@@ -636,8 +636,20 @@ void ZBarrierSetAssembler::patch_barrier_relocation(address addr, int format) {
ShouldNotReachHere();
}
- // A full fence is generated before icache_flush by default in invalidate_word
- ICache::invalidate_range(addr, bytes);
+ // If we are using UseCtxFencei no ICache invalidation is needed here.
+ // Instead every hart will preform an fence.i either by a Java thread
+ // (due to patching epoch will take it to slow path),
+ // or by the kernel when a Java thread is moved to a hart.
+ // The instruction streams changes must only happen before the disarm of
+ // the nmethod barrier. Where the disarm have a leading full two way fence.
+ // If this is performed during a safepoint, all Java threads will emit a fence.i
+ // before transitioning to 'Java', e.g. leaving native or the safepoint wait barrier.
+ if (!UseCtxFencei) {
+ // ICache invalidation is a serialization point.
+ // The above patching of instructions happens before the invalidation.
+ // Hence it have a leading full two way fence (wr, wr).
+ ICache::invalidate_range(addr, bytes);
+ }
}
#ifdef COMPILER2
@@ -749,6 +761,8 @@ void ZBarrierSetAssembler::generate_c2_store_barrier_stub(MacroAssembler* masm,
__ la(t0, RuntimeAddress(ZBarrierSetRuntime::store_barrier_on_native_oop_field_without_healing_addr()));
} else if (stub->is_atomic()) {
__ la(t0, RuntimeAddress(ZBarrierSetRuntime::store_barrier_on_oop_field_with_healing_addr()));
+ } else if (stub->is_nokeepalive()) {
+ __ la(t0, RuntimeAddress(ZBarrierSetRuntime::no_keepalive_store_barrier_on_oop_field_without_healing_addr()));
} else {
__ la(t0, RuntimeAddress(ZBarrierSetRuntime::store_barrier_on_oop_field_without_healing_addr()));
}
diff --git a/src/hotspot/cpu/riscv/gc/z/z_riscv.ad b/src/hotspot/cpu/riscv/gc/z/z_riscv.ad
index 4c94e504475ee..24669f45eb4d2 100644
--- a/src/hotspot/cpu/riscv/gc/z/z_riscv.ad
+++ b/src/hotspot/cpu/riscv/gc/z/z_riscv.ad
@@ -82,7 +82,8 @@ static void z_store_barrier(MacroAssembler* masm, const MachNode* node, Address
z_color(masm, node, rnew_zpointer, rnew_zaddress, tmp);
} else {
bool is_native = (node->barrier_data() & ZBarrierNative) != 0;
- ZStoreBarrierStubC2* const stub = ZStoreBarrierStubC2::create(node, ref_addr, rnew_zaddress, rnew_zpointer, is_native, is_atomic);
+ bool is_nokeepalive = (node->barrier_data() & ZBarrierNoKeepalive) != 0;
+ ZStoreBarrierStubC2* const stub = ZStoreBarrierStubC2::create(node, ref_addr, rnew_zaddress, rnew_zpointer, is_native, is_atomic, is_nokeepalive);
ZBarrierSetAssembler* bs_asm = ZBarrierSet::assembler();
bs_asm->store_barrier_fast(masm, ref_addr, rnew_zaddress, rnew_zpointer, tmp, true /* in_nmethod */, is_atomic, *stub->entry(), *stub->continuation());
}
@@ -90,11 +91,11 @@ static void z_store_barrier(MacroAssembler* masm, const MachNode* node, Address
%}
// Load Pointer
-instruct zLoadP(iRegPNoSp dst, memory mem, iRegPNoSp tmp)
+instruct zLoadP(iRegPNoSp dst, memory mem, iRegPNoSp tmp, rFlagsReg cr)
%{
match(Set dst (LoadP mem));
predicate(UseZGC && ZGenerational && n->as_Load()->barrier_data() != 0);
- effect(TEMP dst, TEMP tmp);
+ effect(TEMP dst, TEMP tmp, KILL cr);
ins_cost(4 * DEFAULT_COST);
@@ -110,11 +111,11 @@ instruct zLoadP(iRegPNoSp dst, memory mem, iRegPNoSp tmp)
%}
// Store Pointer
-instruct zStoreP(memory mem, iRegP src, iRegPNoSp tmp1, iRegPNoSp tmp2)
+instruct zStoreP(memory mem, iRegP src, iRegPNoSp tmp1, iRegPNoSp tmp2, rFlagsReg cr)
%{
predicate(UseZGC && ZGenerational && n->as_Store()->barrier_data() != 0);
match(Set mem (StoreP mem src));
- effect(TEMP tmp1, TEMP tmp2);
+ effect(TEMP tmp1, TEMP tmp2, KILL cr);
ins_cost(125); // XXX
format %{ "sd $mem, $src\t# ptr" %}
@@ -127,11 +128,11 @@ instruct zStoreP(memory mem, iRegP src, iRegPNoSp tmp1, iRegPNoSp tmp2)
%}
instruct zCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval,
- iRegPNoSp oldval_tmp, iRegPNoSp newval_tmp, iRegPNoSp tmp1) %{
+ iRegPNoSp oldval_tmp, iRegPNoSp newval_tmp, iRegPNoSp tmp1, rFlagsReg cr) %{
match(Set res (CompareAndSwapP mem (Binary oldval newval)));
match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
predicate(UseZGC && ZGenerational && !needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() != 0);
- effect(TEMP oldval_tmp, TEMP newval_tmp, TEMP tmp1, TEMP_DEF res);
+ effect(TEMP oldval_tmp, TEMP newval_tmp, TEMP tmp1, TEMP_DEF res, KILL cr);
ins_cost(2 * VOLATILE_REF_COST);
@@ -150,11 +151,11 @@ instruct zCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newva
%}
instruct zCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval,
- iRegPNoSp oldval_tmp, iRegPNoSp newval_tmp, iRegPNoSp tmp1) %{
+ iRegPNoSp oldval_tmp, iRegPNoSp newval_tmp, iRegPNoSp tmp1, rFlagsReg cr) %{
match(Set res (CompareAndSwapP mem (Binary oldval newval)));
match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
predicate(UseZGC && ZGenerational && needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() != 0);
- effect(TEMP oldval_tmp, TEMP newval_tmp, TEMP tmp1, TEMP_DEF res);
+ effect(TEMP oldval_tmp, TEMP newval_tmp, TEMP tmp1, TEMP_DEF res, KILL cr);
ins_cost(2 * VOLATILE_REF_COST);
@@ -173,10 +174,10 @@ instruct zCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP ne
%}
instruct zCompareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval,
- iRegPNoSp oldval_tmp, iRegPNoSp newval_tmp, iRegPNoSp tmp1) %{
+ iRegPNoSp oldval_tmp, iRegPNoSp newval_tmp, iRegPNoSp tmp1, rFlagsReg cr) %{
match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
predicate(UseZGC && ZGenerational && !needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() != 0);
- effect(TEMP oldval_tmp, TEMP newval_tmp, TEMP tmp1, TEMP_DEF res);
+ effect(TEMP oldval_tmp, TEMP newval_tmp, TEMP tmp1, TEMP_DEF res, KILL cr);
ins_cost(2 * VOLATILE_REF_COST);
@@ -195,10 +196,10 @@ instruct zCompareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP n
%}
instruct zCompareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval,
- iRegPNoSp oldval_tmp, iRegPNoSp newval_tmp, iRegPNoSp tmp1) %{
+ iRegPNoSp oldval_tmp, iRegPNoSp newval_tmp, iRegPNoSp tmp1, rFlagsReg cr) %{
match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
predicate(UseZGC && ZGenerational && needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() != 0);
- effect(TEMP oldval_tmp, TEMP newval_tmp, TEMP tmp1, TEMP_DEF res);
+ effect(TEMP oldval_tmp, TEMP newval_tmp, TEMP tmp1, TEMP_DEF res, KILL cr);
ins_cost(2 * VOLATILE_REF_COST);
@@ -216,10 +217,10 @@ instruct zCompareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iReg
ins_pipe(pipe_slow);
%}
-instruct zGetAndSetP(indirect mem, iRegP newv, iRegPNoSp prev, iRegPNoSp tmp) %{
+instruct zGetAndSetP(indirect mem, iRegP newv, iRegPNoSp prev, iRegPNoSp tmp, rFlagsReg cr) %{
match(Set prev (GetAndSetP mem newv));
predicate(UseZGC && ZGenerational && !needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() != 0);
- effect(TEMP_DEF prev, TEMP tmp);
+ effect(TEMP_DEF prev, TEMP tmp, KILL cr);
ins_cost(2 * VOLATILE_REF_COST);
@@ -234,10 +235,10 @@ instruct zGetAndSetP(indirect mem, iRegP newv, iRegPNoSp prev, iRegPNoSp tmp) %{
ins_pipe(pipe_serial);
%}
-instruct zGetAndSetPAcq(indirect mem, iRegP newv, iRegPNoSp prev, iRegPNoSp tmp) %{
+instruct zGetAndSetPAcq(indirect mem, iRegP newv, iRegPNoSp prev, iRegPNoSp tmp, rFlagsReg cr) %{
match(Set prev (GetAndSetP mem newv));
predicate(UseZGC && ZGenerational && needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() != 0);
- effect(TEMP_DEF prev, TEMP tmp);
+ effect(TEMP_DEF prev, TEMP tmp, KILL cr);
ins_cost(2 * VOLATILE_REF_COST);
diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp
index c2585f2d1618d..dd31de14704ab 100644
--- a/src/hotspot/cpu/riscv/globals_riscv.hpp
+++ b/src/hotspot/cpu/riscv/globals_riscv.hpp
@@ -122,6 +122,8 @@ define_pd_global(intx, InlineSmallCode, 1000);
product(bool, UseRVVForBigIntegerShiftIntrinsics, true, \
"Use RVV instructions for left/right shift of BigInteger") \
product(bool, UseTrampolines, false, EXPERIMENTAL, \
- "Far calls uses jal to trampoline.")
+ "Far calls uses jal to trampoline.") \
+ product(bool, UseCtxFencei, false, EXPERIMENTAL, \
+ "Use PR_RISCV_CTX_SW_FENCEI_ON to avoid explicit icache flush")
#endif // CPU_RISCV_GLOBALS_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
index cbca980288984..46701b6ede387 100644
--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
@@ -493,6 +493,7 @@ void MacroAssembler::clinit_barrier(Register klass, Register tmp, Label* L_fast_
// Fast path check: class is fully initialized
lbu(tmp, Address(klass, InstanceKlass::init_state_offset()));
+ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
sub(tmp, tmp, InstanceKlass::fully_initialized);
beqz(tmp, *L_fast_path);
@@ -1454,6 +1455,105 @@ void MacroAssembler::update_word_crc32(Register crc, Register v, Register tmp1,
xorr(crc, crc, tmp2);
}
+
+#ifdef COMPILER2
+// This improvement (vectorization) is based on java.base/share/native/libzip/zlib/zcrc32.c.
+// To make it, following steps are taken:
+// 1. in zcrc32.c, modify N to 16 and related code,
+// 2. re-generate the tables needed, we use tables of (N == 16, W == 4)
+// 3. finally vectorize the code (original implementation in zcrc32.c is just scalar code).
+// New tables for vector version is after table3.
+void MacroAssembler::vector_update_crc32(Register crc, Register buf, Register len,
+ Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5,
+ Register table0, Register table3) {
+ assert_different_registers(t1, crc, buf, len, tmp1, tmp2, tmp3, tmp4, tmp5, table0, table3);
+ const int N = 16, W = 4;
+ const int64_t single_table_size = 256;
+ const Register blks = tmp2;
+ const Register tmpTable = tmp3, tableN16 = tmp4;
+ const VectorRegister vcrc = v4, vword = v8, vtmp = v12;
+ Label VectorLoop;
+ Label LastBlock;
+
+ add(tableN16, table3, 1*single_table_size*sizeof(juint), tmp1);
+ mv(tmp5, 0xff);
+
+ if (MaxVectorSize == 16) {
+ vsetivli(zr, N, Assembler::e32, Assembler::m4, Assembler::ma, Assembler::ta);
+ } else if (MaxVectorSize == 32) {
+ vsetivli(zr, N, Assembler::e32, Assembler::m2, Assembler::ma, Assembler::ta);
+ } else {
+ assert(MaxVectorSize > 32, "sanity");
+ vsetivli(zr, N, Assembler::e32, Assembler::m1, Assembler::ma, Assembler::ta);
+ }
+
+ vmv_v_x(vcrc, zr);
+ vmv_s_x(vcrc, crc);
+
+ // multiple of 64
+ srli(blks, len, 6);
+ slli(t1, blks, 6);
+ sub(len, len, t1);
+ sub(blks, blks, 1);
+ blez(blks, LastBlock);
+
+ bind(VectorLoop);
+ {
+ mv(tmpTable, tableN16);
+
+ vle32_v(vword, buf);
+ vxor_vv(vword, vword, vcrc);
+
+ addi(buf, buf, N*4);
+
+ vand_vx(vtmp, vword, tmp5);
+ vsll_vi(vtmp, vtmp, 2);
+ vluxei32_v(vcrc, tmpTable, vtmp);
+
+ mv(tmp1, 1);
+ for (int k = 1; k < W; k++) {
+ addi(tmpTable, tmpTable, single_table_size*4);
+
+ slli(t1, tmp1, 3);
+ vsrl_vx(vtmp, vword, t1);
+
+ vand_vx(vtmp, vtmp, tmp5);
+ vsll_vi(vtmp, vtmp, 2);
+ vluxei32_v(vtmp, tmpTable, vtmp);
+
+ vxor_vv(vcrc, vcrc, vtmp);
+
+ addi(tmp1, tmp1, 1);
+ }
+
+ sub(blks, blks, 1);
+ bgtz(blks, VectorLoop);
+ }
+
+ bind(LastBlock);
+ {
+ vle32_v(vtmp, buf);
+ vxor_vv(vcrc, vcrc, vtmp);
+ mv(crc, zr);
+ for (int i = 0; i < N; i++) {
+ vmv_x_s(tmp2, vcrc);
+ // in vmv_x_s, the value is sign-extended to SEW bits, but we need zero-extended here.
+ zext_w(tmp2, tmp2);
+ vslidedown_vi(vcrc, vcrc, 1);
+ xorr(crc, crc, tmp2);
+ for (int j = 0; j < W; j++) {
+ andr(t1, crc, tmp5);
+ shadd(t1, t1, table0, tmp1, 2);
+ lwu(t1, Address(t1, 0));
+ srli(tmp2, crc, 8);
+ xorr(crc, tmp2, t1);
+ }
+ }
+ addi(buf, buf, N*4);
+ }
+}
+#endif // COMPILER2
+
/**
* @param crc register containing existing CRC (32-bit)
* @param buf register pointing to input byte buffer (byte*)
@@ -1465,33 +1565,41 @@ void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len,
Register table0, Register table1, Register table2, Register table3,
Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5, Register tmp6) {
assert_different_registers(crc, buf, len, table0, table1, table2, table3, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6);
- Label L_by16_loop, L_unroll_loop, L_unroll_loop_entry, L_by4, L_by4_loop, L_by1, L_by1_loop, L_exit;
+ Label L_vector_entry,
+ L_unroll_loop,
+ L_by4_loop_entry, L_by4_loop,
+ L_by1_loop, L_exit;
+ const int64_t single_table_size = 256;
const int64_t unroll = 16;
const int64_t unroll_words = unroll*wordSize;
mv(tmp5, right_32_bits);
- subw(len, len, unroll_words);
andn(crc, tmp5, crc);
const ExternalAddress table_addr = StubRoutines::crc_table_addr();
la(table0, table_addr);
- add(table1, table0, 1*256*sizeof(juint), tmp1);
- add(table2, table0, 2*256*sizeof(juint), tmp1);
- add(table3, table2, 1*256*sizeof(juint), tmp1);
+ add(table1, table0, 1*single_table_size*sizeof(juint), tmp1);
+ add(table2, table0, 2*single_table_size*sizeof(juint), tmp1);
+ add(table3, table2, 1*single_table_size*sizeof(juint), tmp1);
- bge(len, zr, L_unroll_loop_entry);
- addiw(len, len, unroll_words-4);
- bge(len, zr, L_by4_loop);
- addiw(len, len, 4);
- bgt(len, zr, L_by1_loop);
- j(L_exit);
+#ifdef COMPILER2
+ if (UseRVV) {
+ const int64_t tmp_limit = MaxVectorSize >= 32 ? unroll_words*3 : unroll_words*5;
+ mv(tmp1, tmp_limit);
+ bge(len, tmp1, L_vector_entry);
+ }
+#endif // COMPILER2
+
+ mv(tmp1, unroll_words);
+ blt(len, tmp1, L_by4_loop_entry);
+
+ const Register loop_buf_end = tmp3;
align(CodeEntryAlignment);
- bind(L_unroll_loop_entry);
- const Register buf_end = tmp3;
- add(buf_end, buf, len); // buf_end will be used as endpoint for loop below
+ // Entry for L_unroll_loop
+ add(loop_buf_end, buf, len); // loop_buf_end will be used as endpoint for loop below
andi(len, len, unroll_words-1); // len = (len % unroll_words)
- sub(len, len, unroll_words); // Length after all iterations
+ sub(loop_buf_end, loop_buf_end, len);
bind(L_unroll_loop);
for (int i = 0; i < unroll; i++) {
ld(tmp1, Address(buf, i*wordSize));
@@ -1500,44 +1608,52 @@ void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len,
}
addi(buf, buf, unroll_words);
- ble(buf, buf_end, L_unroll_loop);
- addiw(len, len, unroll_words-4);
- bge(len, zr, L_by4_loop);
- addiw(len, len, 4);
- bgt(len, zr, L_by1_loop);
- j(L_exit);
-
+ blt(buf, loop_buf_end, L_unroll_loop);
+
+ bind(L_by4_loop_entry);
+ mv(tmp1, 4);
+ blt(len, tmp1, L_by1_loop);
+ add(loop_buf_end, buf, len); // loop_buf_end will be used as endpoint for loop below
+ andi(len, len, 3);
+ sub(loop_buf_end, loop_buf_end, len);
bind(L_by4_loop);
lwu(tmp1, Address(buf));
update_word_crc32(crc, tmp1, tmp2, tmp4, tmp6, table0, table1, table2, table3, false);
- subw(len, len, 4);
addi(buf, buf, 4);
- bge(len, zr, L_by4_loop);
- addiw(len, len, 4);
- ble(len, zr, L_exit);
+ blt(buf, loop_buf_end, L_by4_loop);
bind(L_by1_loop);
+ beqz(len, L_exit);
+
subw(len, len, 1);
lwu(tmp1, Address(buf));
andi(tmp2, tmp1, right_8_bits);
update_byte_crc32(crc, tmp2, table0);
- ble(len, zr, L_exit);
+ beqz(len, L_exit);
subw(len, len, 1);
srli(tmp2, tmp1, 8);
andi(tmp2, tmp2, right_8_bits);
update_byte_crc32(crc, tmp2, table0);
- ble(len, zr, L_exit);
+ beqz(len, L_exit);
subw(len, len, 1);
srli(tmp2, tmp1, 16);
andi(tmp2, tmp2, right_8_bits);
update_byte_crc32(crc, tmp2, table0);
- ble(len, zr, L_exit);
- srli(tmp2, tmp1, 24);
- andi(tmp2, tmp2, right_8_bits);
- update_byte_crc32(crc, tmp2, table0);
+#ifdef COMPILER2
+ // put vector code here, otherwise "offset is too large" error occurs.
+ if (UseRVV) {
+ // only need to jump exit when UseRVV == true, it's a jump from end of block `L_by1_loop`.
+ j(L_exit);
+
+ bind(L_vector_entry);
+ vector_update_crc32(crc, buf, len, tmp1, tmp2, tmp3, tmp4, tmp6, table0, table3);
+
+ bgtz(len, L_by4_loop_entry);
+ }
+#endif // COMPILER2
bind(L_exit);
andn(crc, tmp5, crc);
@@ -1853,9 +1969,9 @@ int MacroAssembler::patch_oop(address insn_addr, address o) {
void MacroAssembler::reinit_heapbase() {
if (UseCompressedOops) {
if (Universe::is_fully_initialized()) {
- mv(xheapbase, CompressedOops::ptrs_base());
+ mv(xheapbase, CompressedOops::base());
} else {
- ExternalAddress target(CompressedOops::ptrs_base_addr());
+ ExternalAddress target(CompressedOops::base_addr());
relocate(target.rspec(), [&] {
int32_t offset;
la(xheapbase, target.target(), offset);
@@ -1968,23 +2084,11 @@ void MacroAssembler::addw(Register Rd, Register Rn, int32_t increment, Register
}
void MacroAssembler::sub(Register Rd, Register Rn, int64_t decrement, Register temp) {
- if (is_simm12(-decrement)) {
- addi(Rd, Rn, -decrement);
- } else {
- assert_different_registers(Rn, temp);
- li(temp, decrement);
- sub(Rd, Rn, temp);
- }
+ add(Rd, Rn, -decrement, temp);
}
void MacroAssembler::subw(Register Rd, Register Rn, int32_t decrement, Register temp) {
- if (is_simm12(-decrement)) {
- addiw(Rd, Rn, -decrement);
- } else {
- assert_different_registers(Rn, temp);
- li(temp, decrement);
- subw(Rd, Rn, temp);
- }
+ addw(Rd, Rn, -decrement, temp);
}
void MacroAssembler::andrw(Register Rd, Register Rs1, Register Rs2) {
@@ -2844,7 +2948,7 @@ int MacroAssembler::corrected_idivq(Register result, Register rs1, Register rs2,
return idivq_offset;
}
-// Look up the method for a megamorpic invkkeinterface call.
+// Look up the method for a megamorphic invokeinterface call.
// The target method is determined by .
// The receiver klass is in recv_klass.
// On success, the result will be in method_result, and execution falls through.
@@ -2859,9 +2963,9 @@ void MacroAssembler::lookup_interface_method(Register recv_klass,
assert_different_registers(recv_klass, intf_klass, scan_tmp);
assert_different_registers(method_result, intf_klass, scan_tmp);
assert(recv_klass != method_result || !return_method,
- "recv_klass can be destroyed when mehtid isn't needed");
+ "recv_klass can be destroyed when method isn't needed");
assert(itable_index.is_constant() || itable_index.as_register() == method_result,
- "caller must be same register for non-constant itable index as for method");
+ "caller must use same register for non-constant itable index as for method");
// Compute start of first itableOffsetEntry (which is at the end of the vtable).
int vtable_base = in_bytes(Klass::vtable_start_offset());
@@ -3054,6 +3158,13 @@ void MacroAssembler::membar(uint32_t order_constraint) {
}
}
+void MacroAssembler::cmodx_fence() {
+ BLOCK_COMMENT("cmodx fence");
+ if (VM_Version::supports_fencei_barrier()) {
+ Assembler::fencei();
+ }
+}
+
// Form an address from base + offset in Rd. Rd my or may not
// actually be used: you must use the Address that is returned. It
// is up to you to ensure that the shift provided matches the size
diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
index c3161beea117d..fd174f241eb0b 100644
--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
@@ -431,6 +431,8 @@ class MacroAssembler: public Assembler {
}
}
+ void cmodx_fence();
+
void pause() {
Assembler::fence(w, 0);
}
@@ -1321,6 +1323,10 @@ class MacroAssembler: public Assembler {
void update_byte_crc32(Register crc, Register val, Register table);
#ifdef COMPILER2
+ void vector_update_crc32(Register crc, Register buf, Register len,
+ Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5,
+ Register table0, Register table3);
+
void mul_add(Register out, Register in, Register offset,
Register len, Register k, Register tmp);
void wide_mul(Register prod_lo, Register prod_hi, Register n, Register m);
@@ -1350,7 +1356,7 @@ class MacroAssembler: public Assembler {
Register tmp1, Register tmp2, Register tmp3, Register tmp4,
Register tmp5, Register tmp6, Register product_hi);
-#endif
+#endif // COMPILER2
void inflate_lo32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);
void inflate_hi32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);
diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp
index deeb771d83bb8..f638db9f0bfe4 100644
--- a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp
+++ b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, Red Hat Inc. All rights reserved.
* Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -28,6 +28,7 @@
#include "asm/macroAssembler.hpp"
#include "classfile/javaClasses.inline.hpp"
#include "classfile/vmClasses.hpp"
+#include "compiler/disassembler.hpp"
#include "interpreter/interpreter.hpp"
#include "interpreter/interpreterRuntime.hpp"
#include "memory/allocation.inline.hpp"
@@ -37,7 +38,7 @@
#include "runtime/frame.inline.hpp"
#include "runtime/stubRoutines.hpp"
-#define __ _masm->
+#define __ Disassembler::hook(__FILE__, __LINE__, _masm)->
#ifdef PRODUCT
#define BLOCK_COMMENT(str) /* nothing */
@@ -444,7 +445,6 @@ void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm,
__ far_jump(RuntimeAddress(SharedRuntime::throw_IncompatibleClassChangeError_entry()));
}
}
-
}
#ifndef PRODUCT
diff --git a/src/hotspot/cpu/riscv/relocInfo_riscv.cpp b/src/hotspot/cpu/riscv/relocInfo_riscv.cpp
index d0903c96e2271..18b4302c7e68e 100644
--- a/src/hotspot/cpu/riscv/relocInfo_riscv.cpp
+++ b/src/hotspot/cpu/riscv/relocInfo_riscv.cpp
@@ -55,7 +55,21 @@ void Relocation::pd_set_data_value(address x, bool verify_only) {
bytes = MacroAssembler::pd_patch_instruction_size(addr(), x);
break;
}
- ICache::invalidate_range(addr(), bytes);
+
+ // If we are using UseCtxFencei no ICache invalidation is needed here.
+ // Instead every hart will preform an fence.i either by a Java thread
+ // (due to patching epoch will take it to slow path),
+ // or by the kernel when a Java thread is moved to a hart.
+ // The instruction streams changes must only happen before the disarm of
+ // the nmethod barrier. Where the disarm have a leading full two way fence.
+ // If this is performed during a safepoint, all Java threads will emit a fence.i
+ // before transitioning to 'Java', e.g. leaving native or the safepoint wait barrier.
+ if (!UseCtxFencei) {
+ // ICache invalidation is a serialization point.
+ // The above patching of instructions happens before the invalidation.
+ // Hence it have a leading full two way fence (wr, wr).
+ ICache::invalidate_range(addr(), bytes);
+ }
}
address Relocation::pd_call_destination(address orig_addr) {
diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
index 05f55fd0da7af..54d1f1c05736d 100644
--- a/src/hotspot/cpu/riscv/riscv.ad
+++ b/src/hotspot/cpu/riscv/riscv.ad
@@ -1966,18 +1966,18 @@ const RegMask* Matcher::predicate_reg_mask(void) {
return &_VMASK_REG_mask;
}
-const TypeVectMask* Matcher::predicate_reg_type(const Type* elemTy, int length) {
- return new TypeVectMask(elemTy, length);
-}
-
// Vector calling convention not yet implemented.
bool Matcher::supports_vector_calling_convention(void) {
- return false;
+ return EnableVectorSupport && UseVectorStubs;
}
OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
- Unimplemented();
- return OptoRegPair(0, 0);
+ assert(EnableVectorSupport && UseVectorStubs, "sanity");
+ assert(ideal_reg == Op_VecA, "sanity");
+ // check more info at https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-cc.adoc
+ int lo = V8_num;
+ int hi = V8_K_num;
+ return OptoRegPair(hi, lo);
}
// Is this branch offset short enough that a short branch can be used?
@@ -2224,7 +2224,8 @@ bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
assert_cond(m != nullptr);
if (is_vshift_con_pattern(n, m) || // ShiftV src (ShiftCntV con)
is_vector_bitwise_not_pattern(n, m) ||
- is_vector_scalar_bitwise_pattern(n, m)) {
+ is_vector_scalar_bitwise_pattern(n, m) ||
+ is_encode_and_store_pattern(n, m)) {
mstack.push(m, Visit);
return true;
}
@@ -4785,6 +4786,7 @@ instruct loadP(iRegPNoSp dst, memory mem)
// Load Compressed Pointer
instruct loadN(iRegNNoSp dst, memory mem)
%{
+ predicate(n->as_Load()->barrier_data() == 0);
match(Set dst (LoadN mem));
ins_cost(LOAD_COST);
@@ -5033,41 +5035,6 @@ instruct loadConD0(fRegD dst, immD0 con) %{
ins_pipe(fp_load_constant_d);
%}
-// Store Instructions
-// Store CMS card-mark Immediate
-instruct storeimmCM0(immI0 zero, memory mem)
-%{
- match(Set mem (StoreCM mem zero));
-
- ins_cost(STORE_COST);
- format %{ "storestore (elided)\n\t"
- "sb zr, $mem\t# byte, #@storeimmCM0" %}
-
- ins_encode %{
- __ sb(zr, Address(as_Register($mem$$base), $mem$$disp));
- %}
-
- ins_pipe(istore_mem);
-%}
-
-// Store CMS card-mark Immediate with intervening StoreStore
-// needed when using CMS with no conditional card marking
-instruct storeimmCM0_ordered(immI0 zero, memory mem)
-%{
- match(Set mem (StoreCM mem zero));
-
- ins_cost(ALU_COST + STORE_COST);
- format %{ "membar(StoreStore)\n\t"
- "sb zr, $mem\t# byte, #@storeimmCM0_ordered" %}
-
- ins_encode %{
- __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
- __ sb(zr, Address(as_Register($mem$$base), $mem$$disp));
- %}
-
- ins_pipe(istore_mem);
-%}
-
// Store Byte
instruct storeB(iRegIorL2I src, memory mem)
%{
@@ -5220,6 +5187,7 @@ instruct storeimmP0(immP0 zero, memory mem)
// Store Compressed Pointer
instruct storeN(iRegN src, memory mem)
%{
+ predicate(n->as_Store()->barrier_data() == 0);
match(Set mem (StoreN mem src));
ins_cost(STORE_COST);
@@ -5234,6 +5202,7 @@ instruct storeN(iRegN src, memory mem)
instruct storeImmN0(immN0 zero, memory mem)
%{
+ predicate(n->as_Store()->barrier_data() == 0);
match(Set mem (StoreN mem zero));
ins_cost(STORE_COST);
@@ -5424,6 +5393,7 @@ instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval
instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval)
%{
+ predicate(n->as_LoadStore()->barrier_data() == 0);
match(Set res (CompareAndSwapN mem (Binary oldval newval)));
ins_cost(LOAD_COST + STORE_COST + ALU_COST * 8 + BRANCH_COST * 4);
@@ -5545,7 +5515,7 @@ instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP new
instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval)
%{
- predicate(needs_acquiring_load_reserved(n));
+ predicate(needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() == 0);
match(Set res (CompareAndSwapN mem (Binary oldval newval)));
@@ -5653,6 +5623,7 @@ instruct compareAndExchangeL(iRegLNoSp res, indirect mem, iRegL oldval, iRegL ne
instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval)
%{
+ predicate(n->as_LoadStore()->barrier_data() == 0);
match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 3);
@@ -5786,7 +5757,7 @@ instruct compareAndExchangeLAcq(iRegLNoSp res, indirect mem, iRegL oldval, iRegL
instruct compareAndExchangeNAcq(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval)
%{
- predicate(needs_acquiring_load_reserved(n));
+ predicate(needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() == 0);
match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
@@ -5914,6 +5885,7 @@ instruct weakCompareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL ne
instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval)
%{
+ predicate(n->as_LoadStore()->barrier_data() == 0);
match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 4);
@@ -6045,7 +6017,7 @@ instruct weakCompareAndSwapLAcq(iRegINoSp res, indirect mem, iRegL oldval, iRegL
instruct weakCompareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval)
%{
- predicate(needs_acquiring_load_reserved(n));
+ predicate(needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() == 0);
match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
@@ -6117,6 +6089,8 @@ instruct get_and_setL(indirect mem, iRegL newv, iRegLNoSp prev)
instruct get_and_setN(indirect mem, iRegN newv, iRegINoSp prev)
%{
+ predicate(n->as_LoadStore()->barrier_data() == 0);
+
match(Set prev (GetAndSetN mem newv));
ins_cost(ALU_COST);
@@ -6182,7 +6156,7 @@ instruct get_and_setLAcq(indirect mem, iRegL newv, iRegLNoSp prev)
instruct get_and_setNAcq(indirect mem, iRegN newv, iRegINoSp prev)
%{
- predicate(needs_acquiring_load_reserved(n));
+ predicate(needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() == 0);
match(Set prev (GetAndSetN mem newv));
@@ -10066,6 +10040,23 @@ instruct CallLeafDirect(method meth, rFlagsReg cr)
ins_pipe(pipe_class_call);
%}
+// Call Runtime Instruction without safepoint and with vector arguments
+
+instruct CallLeafDirectVector(method meth, rFlagsReg cr)
+%{
+ match(CallLeafVector);
+
+ effect(USE meth, KILL cr);
+
+ ins_cost(BRANCH_COST);
+
+ format %{ "CALL, runtime leaf vector $meth" %}
+
+ ins_encode(riscv_enc_java_to_runtime(meth));
+
+ ins_pipe(pipe_class_call);
+%}
+
// Call Runtime Instruction
instruct CallLeafNoFPDirect(method meth, rFlagsReg cr)
diff --git a/src/hotspot/cpu/riscv/riscv_v.ad b/src/hotspot/cpu/riscv/riscv_v.ad
index 54947f6bf9a19..510c0ff5d4646 100644
--- a/src/hotspot/cpu/riscv/riscv_v.ad
+++ b/src/hotspot/cpu/riscv/riscv_v.ad
@@ -4895,11 +4895,10 @@ instruct gather_loadS(vReg dst, indirect mem, vReg idx) %{
effect(TEMP_DEF dst);
format %{ "gather_loadS $dst, $mem, $idx" %}
ins_encode %{
- __ vmv1r_v(as_VectorRegister($dst$$reg), as_VectorRegister($idx$$reg));
BasicType bt = Matcher::vector_element_basic_type(this);
Assembler::SEW sew = Assembler::elemtype_to_sew(bt);
__ vsetvli_helper(bt, Matcher::vector_length(this));
- __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($dst$$reg), (int)sew);
+ __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($idx$$reg), (int)sew);
__ vluxei32_v(as_VectorRegister($dst$$reg), as_Register($mem$$base),
as_VectorRegister($dst$$reg));
%}
@@ -4929,11 +4928,10 @@ instruct gather_loadS_masked(vReg dst, indirect mem, vReg idx, vRegMask_V0 v0, v
effect(TEMP_DEF dst, TEMP tmp);
format %{ "gather_loadS_masked $dst, $mem, $idx, $v0\t# KILL $tmp" %}
ins_encode %{
- __ vmv1r_v(as_VectorRegister($tmp$$reg), as_VectorRegister($idx$$reg));
BasicType bt = Matcher::vector_element_basic_type(this);
Assembler::SEW sew = Assembler::elemtype_to_sew(bt);
__ vsetvli_helper(bt, Matcher::vector_length(this));
- __ vsll_vi(as_VectorRegister($tmp$$reg), as_VectorRegister($tmp$$reg), (int)sew);
+ __ vsll_vi(as_VectorRegister($tmp$$reg), as_VectorRegister($idx$$reg), (int)sew);
__ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($dst$$reg),
as_VectorRegister($dst$$reg));
__ vluxei32_v(as_VectorRegister($dst$$reg), as_Register($mem$$base),
@@ -4969,11 +4967,10 @@ instruct scatter_storeS(indirect mem, vReg src, vReg idx, vReg tmp) %{
effect(TEMP tmp);
format %{ "scatter_storeS $mem, $idx, $src\t# KILL $tmp" %}
ins_encode %{
- __ vmv1r_v(as_VectorRegister($tmp$$reg), as_VectorRegister($idx$$reg));
BasicType bt = Matcher::vector_element_basic_type(this, $src);
Assembler::SEW sew = Assembler::elemtype_to_sew(bt);
__ vsetvli_helper(bt, Matcher::vector_length(this, $src));
- __ vsll_vi(as_VectorRegister($tmp$$reg), as_VectorRegister($tmp$$reg), (int)sew);
+ __ vsll_vi(as_VectorRegister($tmp$$reg), as_VectorRegister($idx$$reg), (int)sew);
__ vsuxei32_v(as_VectorRegister($src$$reg), as_Register($mem$$base),
as_VectorRegister($tmp$$reg));
%}
@@ -5003,11 +5000,10 @@ instruct scatter_storeS_masked(indirect mem, vReg src, vReg idx, vRegMask_V0 v0,
effect(TEMP tmp);
format %{ "scatter_storeS_masked $mem, $idx, $src, $v0\t# KILL $tmp" %}
ins_encode %{
- __ vmv1r_v(as_VectorRegister($tmp$$reg), as_VectorRegister($idx$$reg));
BasicType bt = Matcher::vector_element_basic_type(this, $src);
Assembler::SEW sew = Assembler::elemtype_to_sew(bt);
__ vsetvli_helper(bt, Matcher::vector_length(this, $src));
- __ vsll_vi(as_VectorRegister($tmp$$reg), as_VectorRegister($tmp$$reg), (int)sew);
+ __ vsll_vi(as_VectorRegister($tmp$$reg), as_VectorRegister($idx$$reg), (int)sew);
__ vsuxei32_v(as_VectorRegister($src$$reg), as_Register($mem$$base),
as_VectorRegister($tmp$$reg), Assembler::v0_t);
%}
diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
index 879fd92272279..2b629fcfcb293 100644
--- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
+++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
@@ -666,7 +666,20 @@ AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm
int SharedRuntime::vector_calling_convention(VMRegPair *regs,
uint num_bits,
uint total_args_passed) {
- Unimplemented();
+ assert(total_args_passed <= Argument::n_vector_register_parameters_c, "unsupported");
+ assert(num_bits >= 64 && num_bits <= 2048 && is_power_of_2(num_bits), "unsupported");
+
+ // check more info at https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-cc.adoc
+ static const VectorRegister VEC_ArgReg[Argument::n_vector_register_parameters_c] = {
+ v8, v9, v10, v11, v12, v13, v14, v15,
+ v16, v17, v18, v19, v20, v21, v22, v23
+ };
+
+ const int next_reg_val = 3;
+ for (uint i = 0; i < total_args_passed; i++) {
+ VMReg vmreg = VEC_ArgReg[i]->as_VMReg();
+ regs[i].set_pair(vmreg->next(next_reg_val), vmreg);
+ }
return 0;
}
@@ -2110,7 +2123,7 @@ void SharedRuntime::generate_deopt_blob() {
int reexecute_offset = __ pc() - start;
#if INCLUDE_JVMCI && !defined(COMPILER1)
- if (EnableJVMCI && UseJVMCICompiler) {
+ if (UseJVMCICompiler) {
// JVMCI does not use this kind of deoptimization
__ should_not_reach_here();
}
diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
index 8792dea7de5eb..bdb92e0b835f4 100644
--- a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
+++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
@@ -2428,6 +2428,14 @@ class StubGenerator: public StubCodeGenerator {
__ la(t1, ExternalAddress(bs_asm->patching_epoch_addr()));
__ lwu(t1, t1);
__ sw(t1, thread_epoch_addr);
+ // There are two ways this can work:
+ // - The writer did system icache shootdown after the instruction stream update.
+ // Hence do nothing.
+ // - The writer trust us to make sure our icache is in sync before entering.
+ // Hence use cmodx fence (fence.i, may change).
+ if (UseCtxFencei) {
+ __ cmodx_fence();
+ }
__ membar(__ LoadLoad);
}
@@ -4474,7 +4482,7 @@ class StubGenerator: public StubCodeGenerator {
RegSet reg_cache_saved_regs = RegSet::of(x24, x25, x26, x27); // s8, s9, s10, s11
RegSet reg_cache_regs;
reg_cache_regs += reg_cache_saved_regs;
- reg_cache_regs += RegSet::of(x28, x29, x30, x31); // t3, t4, t5, t6
+ reg_cache_regs += RegSet::of(t3, t4, t5, t6);
BufRegCache reg_cache(_masm, reg_cache_regs);
RegSet saved_regs;
@@ -5331,7 +5339,7 @@ class StubGenerator: public StubCodeGenerator {
* NOTE: each field will occupy a single vector register group
*/
void base64_vector_decode_round(Register src, Register dst, Register codec,
- Register size, Register stepSrc, Register stepDst, Register failedIdx, Register minusOne,
+ Register size, Register stepSrc, Register stepDst, Register failedIdx,
VectorRegister inputV1, VectorRegister inputV2, VectorRegister inputV3, VectorRegister inputV4,
VectorRegister idxV1, VectorRegister idxV2, VectorRegister idxV3, VectorRegister idxV4,
VectorRegister outputV1, VectorRegister outputV2, VectorRegister outputV3,
@@ -5358,8 +5366,11 @@ class StubGenerator: public StubCodeGenerator {
__ vor_vv(outputV1, outputV1, outputV2);
__ vmseq_vi(v0, outputV1, -1);
__ vfirst_m(failedIdx, v0);
- Label NoFailure;
- __ beq(failedIdx, minusOne, NoFailure);
+ Label NoFailure, FailureAtIdx0;
+ // valid value can only be -1 when < 0
+ __ bltz(failedIdx, NoFailure);
+ // when the first data (at index 0) fails, no need to process data anymore
+ __ beqz(failedIdx, FailureAtIdx0);
__ vsetvli(x0, failedIdx, Assembler::e8, lmul, Assembler::mu, Assembler::tu);
__ slli(stepDst, failedIdx, 1);
__ add(stepDst, failedIdx, stepDst);
@@ -5382,6 +5393,7 @@ class StubGenerator: public StubCodeGenerator {
// dst = dst + register_group_len_bytes * 3
__ add(dst, dst, stepDst);
+ __ BIND(FailureAtIdx0);
}
/**
@@ -5450,8 +5462,8 @@ class StubGenerator: public StubCodeGenerator {
Register isMIME = c_rarg6;
Register codec = c_rarg7;
- Register dstBackup = x31;
- Register length = x28; // t3, total length of src data in bytes
+ Register dstBackup = t6;
+ Register length = t3; // total length of src data in bytes
Label ProcessData, Exit;
Label ProcessScalar, ScalarLoop;
@@ -5486,10 +5498,8 @@ class StubGenerator: public StubCodeGenerator {
Register stepSrcM1 = send;
Register stepSrcM2 = doff;
Register stepDst = isURL;
- Register size = x29; // t4
- Register minusOne = x30; // t5
+ Register size = t4;
- __ mv(minusOne, -1);
__ mv(size, MaxVectorSize * 2);
__ mv(stepSrcM1, MaxVectorSize * 4);
__ slli(stepSrcM2, stepSrcM1, 1);
@@ -5501,7 +5511,7 @@ class StubGenerator: public StubCodeGenerator {
// Assembler::m2
__ BIND(ProcessM2);
base64_vector_decode_round(src, dst, codec,
- size, stepSrcM2, stepDst, failedIdx, minusOne,
+ size, stepSrcM2, stepDst, failedIdx,
v2, v4, v6, v8, // inputs
v10, v12, v14, v16, // indexes
v18, v20, v22, // outputs
@@ -5509,7 +5519,8 @@ class StubGenerator: public StubCodeGenerator {
__ sub(length, length, stepSrcM2);
// error check
- __ bne(failedIdx, minusOne, Exit);
+ // valid value of failedIdx can only be -1 when < 0
+ __ bgez(failedIdx, Exit);
__ bge(length, stepSrcM2, ProcessM2);
@@ -5521,7 +5532,7 @@ class StubGenerator: public StubCodeGenerator {
__ srli(size, size, 1);
__ srli(stepDst, stepDst, 1);
base64_vector_decode_round(src, dst, codec,
- size, stepSrcM1, stepDst, failedIdx, minusOne,
+ size, stepSrcM1, stepDst, failedIdx,
v1, v2, v3, v4, // inputs
v5, v6, v7, v8, // indexes
v9, v10, v11, // outputs
@@ -5529,7 +5540,8 @@ class StubGenerator: public StubCodeGenerator {
__ sub(length, length, stepSrcM1);
// error check
- __ bne(failedIdx, minusOne, Exit);
+ // valid value of failedIdx can only be -1 when < 0
+ __ bgez(failedIdx, Exit);
__ BIND(ProcessScalar);
__ beqz(length, Exit);
@@ -5538,7 +5550,7 @@ class StubGenerator: public StubCodeGenerator {
// scalar version
{
Register byte0 = soff, byte1 = send, byte2 = doff, byte3 = isURL;
- Register combined32Bits = x29; // t5
+ Register combined32Bits = t4;
// encoded: [byte0[5:0] : byte1[5:0] : byte2[5:0]] : byte3[5:0]] =>
// plain: [byte0[5:0]+byte1[5:4] : byte1[3:0]+byte2[5:2] : byte2[1:0]+byte3[5:0]]
@@ -5696,10 +5708,10 @@ class StubGenerator: public StubCodeGenerator {
Register nmax = c_rarg4;
Register base = c_rarg5;
Register count = c_rarg6;
- Register temp0 = x28; // t3
- Register temp1 = x29; // t4
- Register temp2 = x30; // t5
- Register temp3 = x31; // t6
+ Register temp0 = t3;
+ Register temp1 = t4;
+ Register temp2 = t5;
+ Register temp3 = t6;
VectorRegister vzero = v31;
VectorRegister vbytes = v8; // group: v8, v9, v10, v11
@@ -6059,6 +6071,58 @@ static const int64_t right_3_bits = right_n_bits(3);
return start;
}
+ void generate_vector_math_stubs() {
+ if (!UseRVV) {
+ log_info(library)("vector is not supported, skip loading vector math (sleef) library!");
+ return;
+ }
+
+ // Get native vector math stub routine addresses
+ void* libsleef = nullptr;
+ char ebuf[1024];
+ char dll_name[JVM_MAXPATHLEN];
+ if (os::dll_locate_lib(dll_name, sizeof(dll_name), Arguments::get_dll_dir(), "sleef")) {
+ libsleef = os::dll_load(dll_name, ebuf, sizeof ebuf);
+ }
+ if (libsleef == nullptr) {
+ log_info(library)("Failed to load native vector math (sleef) library, %s!", ebuf);
+ return;
+ }
+
+ // Method naming convention
+ // All the methods are named as _
+ //
+ // Where:
+ // is the operation name, e.g. sin, cos
+ // is to indicate float/double
+ // "fx/dx" for vector float/double operation
+ // is the precision level
+ // "u10/u05" represents 1.0/0.5 ULP error bounds
+ // We use "u10" for all operations by default
+ // But for those functions do not have u10 support, we use "u05" instead
+ // rvv, indicates riscv vector extension
+ //
+ // e.g. sinfx_u10rvv is the method for computing vector float sin using rvv instructions
+ //
+ log_info(library)("Loaded library %s, handle " INTPTR_FORMAT, JNI_LIB_PREFIX "sleef" JNI_LIB_SUFFIX, p2i(libsleef));
+
+ for (int op = 0; op < VectorSupport::NUM_VECTOR_OP_MATH; op++) {
+ int vop = VectorSupport::VECTOR_OP_MATH_START + op;
+ if (vop == VectorSupport::VECTOR_OP_TANH) { // skip tanh because of performance regression
+ continue;
+ }
+
+ // The native library does not support u10 level of "hypot".
+ const char* ulf = (vop == VectorSupport::VECTOR_OP_HYPOT) ? "u05" : "u10";
+
+ snprintf(ebuf, sizeof(ebuf), "%sfx_%srvv", VectorSupport::mathname[op], ulf);
+ StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_SCALABLE][op] = (address)os::dll_lookup(libsleef, ebuf);
+
+ snprintf(ebuf, sizeof(ebuf), "%sdx_%srvv", VectorSupport::mathname[op], ulf);
+ StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_SCALABLE][op] = (address)os::dll_lookup(libsleef, ebuf);
+ }
+ }
+
#endif // COMPILER2
/**
@@ -6080,26 +6144,17 @@ static const int64_t right_3_bits = right_n_bits(3);
address start = __ pc();
+ // input parameters
const Register crc = c_rarg0; // crc
const Register buf = c_rarg1; // source java byte array address
const Register len = c_rarg2; // length
- const Register table0 = c_rarg3; // crc_table address
- const Register table1 = c_rarg4;
- const Register table2 = c_rarg5;
- const Register table3 = c_rarg6;
-
- const Register tmp1 = c_rarg7;
- const Register tmp2 = t2;
- const Register tmp3 = x28; // t3
- const Register tmp4 = x29; // t4
- const Register tmp5 = x30; // t5
- const Register tmp6 = x31; // t6
BLOCK_COMMENT("Entry:");
__ enter(); // required for proper stackwalking of RuntimeStub frame
- __ kernel_crc32(crc, buf, len, table0, table1, table2,
- table3, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6);
+ __ kernel_crc32(crc, buf, len,
+ c_rarg3, c_rarg4, c_rarg5, c_rarg6, // tmp's for tables
+ c_rarg7, t2, t3, t4, t5, t6); // misc tmps
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret();
@@ -6121,6 +6176,29 @@ static const int64_t right_3_bits = right_n_bits(3);
return start;
}
+ // load Method* target of MethodHandle
+ // j_rarg0 = jobject receiver
+ // xmethod = Method* result
+ address generate_upcall_stub_load_target() {
+
+ StubCodeMark mark(this, "StubRoutines", "upcall_stub_load_target");
+ address start = __ pc();
+
+ __ resolve_global_jobject(j_rarg0, t0, t1);
+ // Load target method from receiver
+ __ load_heap_oop(xmethod, Address(j_rarg0, java_lang_invoke_MethodHandle::form_offset()), t0, t1);
+ __ load_heap_oop(xmethod, Address(xmethod, java_lang_invoke_LambdaForm::vmentry_offset()), t0, t1);
+ __ load_heap_oop(xmethod, Address(xmethod, java_lang_invoke_MemberName::method_offset()), t0, t1);
+ __ access_load_at(T_ADDRESS, IN_HEAP, xmethod,
+ Address(xmethod, java_lang_invoke_ResolvedMethodName::vmtarget_offset()),
+ noreg, noreg);
+ __ sd(xmethod, Address(xthread, JavaThread::callee_target_offset())); // just in case callee is deoptimized
+
+ __ ret();
+
+ return start;
+ }
+
#undef __
// Initialization
@@ -6186,6 +6264,7 @@ static const int64_t right_3_bits = right_n_bits(3);
#endif // COMPILER2
StubRoutines::_upcall_stub_exception_handler = generate_upcall_stub_exception_handler();
+ StubRoutines::_upcall_stub_load_target = generate_upcall_stub_load_target();
StubRoutines::riscv::set_completed();
}
@@ -6264,6 +6343,8 @@ static const int64_t right_3_bits = right_n_bits(3);
generate_string_indexof_stubs();
+ generate_vector_math_stubs();
+
#endif // COMPILER2
}
diff --git a/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp b/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp
index 05bdeaf757078..6d5492b86b3c3 100644
--- a/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp
+++ b/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp
@@ -276,4 +276,219 @@ ATTRIBUTE_ALIGNED(4096) juint StubRoutines::riscv::_crc_table[] =
0xb1b8e695UL, 0xa30d497bUL, 0x1bb12e1eUL, 0x43d23e48UL, 0xfb6e592dUL,
0xe9dbf6c3UL, 0x516791a6UL, 0xccb0a91fUL, 0x740cce7aUL, 0x66b96194UL,
0xde0506f1UL,
+
+ // Tables for vector version
+ // This improvement (vectorization) is based on java.base/share/native/libzip/zlib/zcrc32.c.
+ // To make it, following steps are taken:
+ // 1. in zcrc32.c, modify N to 16 and related code,
+ // 2. re-generate the tables needed, we use tables of (N == 16, W == 4)
+ // 3. finally vectorize the code (original implementation in zcrc32.c is just scalar code).
+ 0x00000000, 0x8f352d95, 0xc51b5d6b, 0x4a2e70fe, 0x5147bc97,
+ 0xde729102, 0x945ce1fc, 0x1b69cc69, 0xa28f792e, 0x2dba54bb,
+ 0x67942445, 0xe8a109d0, 0xf3c8c5b9, 0x7cfde82c, 0x36d398d2,
+ 0xb9e6b547, 0x9e6ff41d, 0x115ad988, 0x5b74a976, 0xd44184e3,
+ 0xcf28488a, 0x401d651f, 0x0a3315e1, 0x85063874, 0x3ce08d33,
+ 0xb3d5a0a6, 0xf9fbd058, 0x76cefdcd, 0x6da731a4, 0xe2921c31,
+ 0xa8bc6ccf, 0x2789415a, 0xe7aeee7b, 0x689bc3ee, 0x22b5b310,
+ 0xad809e85, 0xb6e952ec, 0x39dc7f79, 0x73f20f87, 0xfcc72212,
+ 0x45219755, 0xca14bac0, 0x803aca3e, 0x0f0fe7ab, 0x14662bc2,
+ 0x9b530657, 0xd17d76a9, 0x5e485b3c, 0x79c11a66, 0xf6f437f3,
+ 0xbcda470d, 0x33ef6a98, 0x2886a6f1, 0xa7b38b64, 0xed9dfb9a,
+ 0x62a8d60f, 0xdb4e6348, 0x547b4edd, 0x1e553e23, 0x916013b6,
+ 0x8a09dfdf, 0x053cf24a, 0x4f1282b4, 0xc027af21, 0x142cdab7,
+ 0x9b19f722, 0xd13787dc, 0x5e02aa49, 0x456b6620, 0xca5e4bb5,
+ 0x80703b4b, 0x0f4516de, 0xb6a3a399, 0x39968e0c, 0x73b8fef2,
+ 0xfc8dd367, 0xe7e41f0e, 0x68d1329b, 0x22ff4265, 0xadca6ff0,
+ 0x8a432eaa, 0x0576033f, 0x4f5873c1, 0xc06d5e54, 0xdb04923d,
+ 0x5431bfa8, 0x1e1fcf56, 0x912ae2c3, 0x28cc5784, 0xa7f97a11,
+ 0xedd70aef, 0x62e2277a, 0x798beb13, 0xf6bec686, 0xbc90b678,
+ 0x33a59bed, 0xf38234cc, 0x7cb71959, 0x369969a7, 0xb9ac4432,
+ 0xa2c5885b, 0x2df0a5ce, 0x67ded530, 0xe8ebf8a5, 0x510d4de2,
+ 0xde386077, 0x94161089, 0x1b233d1c, 0x004af175, 0x8f7fdce0,
+ 0xc551ac1e, 0x4a64818b, 0x6dedc0d1, 0xe2d8ed44, 0xa8f69dba,
+ 0x27c3b02f, 0x3caa7c46, 0xb39f51d3, 0xf9b1212d, 0x76840cb8,
+ 0xcf62b9ff, 0x4057946a, 0x0a79e494, 0x854cc901, 0x9e250568,
+ 0x111028fd, 0x5b3e5803, 0xd40b7596, 0x2859b56e, 0xa76c98fb,
+ 0xed42e805, 0x6277c590, 0x791e09f9, 0xf62b246c, 0xbc055492,
+ 0x33307907, 0x8ad6cc40, 0x05e3e1d5, 0x4fcd912b, 0xc0f8bcbe,
+ 0xdb9170d7, 0x54a45d42, 0x1e8a2dbc, 0x91bf0029, 0xb6364173,
+ 0x39036ce6, 0x732d1c18, 0xfc18318d, 0xe771fde4, 0x6844d071,
+ 0x226aa08f, 0xad5f8d1a, 0x14b9385d, 0x9b8c15c8, 0xd1a26536,
+ 0x5e9748a3, 0x45fe84ca, 0xcacba95f, 0x80e5d9a1, 0x0fd0f434,
+ 0xcff75b15, 0x40c27680, 0x0aec067e, 0x85d92beb, 0x9eb0e782,
+ 0x1185ca17, 0x5babbae9, 0xd49e977c, 0x6d78223b, 0xe24d0fae,
+ 0xa8637f50, 0x275652c5, 0x3c3f9eac, 0xb30ab339, 0xf924c3c7,
+ 0x7611ee52, 0x5198af08, 0xdead829d, 0x9483f263, 0x1bb6dff6,
+ 0x00df139f, 0x8fea3e0a, 0xc5c44ef4, 0x4af16361, 0xf317d626,
+ 0x7c22fbb3, 0x360c8b4d, 0xb939a6d8, 0xa2506ab1, 0x2d654724,
+ 0x674b37da, 0xe87e1a4f, 0x3c756fd9, 0xb340424c, 0xf96e32b2,
+ 0x765b1f27, 0x6d32d34e, 0xe207fedb, 0xa8298e25, 0x271ca3b0,
+ 0x9efa16f7, 0x11cf3b62, 0x5be14b9c, 0xd4d46609, 0xcfbdaa60,
+ 0x408887f5, 0x0aa6f70b, 0x8593da9e, 0xa21a9bc4, 0x2d2fb651,
+ 0x6701c6af, 0xe834eb3a, 0xf35d2753, 0x7c680ac6, 0x36467a38,
+ 0xb97357ad, 0x0095e2ea, 0x8fa0cf7f, 0xc58ebf81, 0x4abb9214,
+ 0x51d25e7d, 0xdee773e8, 0x94c90316, 0x1bfc2e83, 0xdbdb81a2,
+ 0x54eeac37, 0x1ec0dcc9, 0x91f5f15c, 0x8a9c3d35, 0x05a910a0,
+ 0x4f87605e, 0xc0b24dcb, 0x7954f88c, 0xf661d519, 0xbc4fa5e7,
+ 0x337a8872, 0x2813441b, 0xa726698e, 0xed081970, 0x623d34e5,
+ 0x45b475bf, 0xca81582a, 0x80af28d4, 0x0f9a0541, 0x14f3c928,
+ 0x9bc6e4bd, 0xd1e89443, 0x5eddb9d6, 0xe73b0c91, 0x680e2104,
+ 0x222051fa, 0xad157c6f, 0xb67cb006, 0x39499d93, 0x7367ed6d,
+ 0xfc52c0f8,
+ 0x00000000, 0x50b36adc, 0xa166d5b8, 0xf1d5bf64, 0x99bcad31,
+ 0xc90fc7ed, 0x38da7889, 0x68691255, 0xe8085c23, 0xb8bb36ff,
+ 0x496e899b, 0x19dde347, 0x71b4f112, 0x21079bce, 0xd0d224aa,
+ 0x80614e76, 0x0b61be07, 0x5bd2d4db, 0xaa076bbf, 0xfab40163,
+ 0x92dd1336, 0xc26e79ea, 0x33bbc68e, 0x6308ac52, 0xe369e224,
+ 0xb3da88f8, 0x420f379c, 0x12bc5d40, 0x7ad54f15, 0x2a6625c9,
+ 0xdbb39aad, 0x8b00f071, 0x16c37c0e, 0x467016d2, 0xb7a5a9b6,
+ 0xe716c36a, 0x8f7fd13f, 0xdfccbbe3, 0x2e190487, 0x7eaa6e5b,
+ 0xfecb202d, 0xae784af1, 0x5fadf595, 0x0f1e9f49, 0x67778d1c,
+ 0x37c4e7c0, 0xc61158a4, 0x96a23278, 0x1da2c209, 0x4d11a8d5,
+ 0xbcc417b1, 0xec777d6d, 0x841e6f38, 0xd4ad05e4, 0x2578ba80,
+ 0x75cbd05c, 0xf5aa9e2a, 0xa519f4f6, 0x54cc4b92, 0x047f214e,
+ 0x6c16331b, 0x3ca559c7, 0xcd70e6a3, 0x9dc38c7f, 0x2d86f81c,
+ 0x7d3592c0, 0x8ce02da4, 0xdc534778, 0xb43a552d, 0xe4893ff1,
+ 0x155c8095, 0x45efea49, 0xc58ea43f, 0x953dcee3, 0x64e87187,
+ 0x345b1b5b, 0x5c32090e, 0x0c8163d2, 0xfd54dcb6, 0xade7b66a,
+ 0x26e7461b, 0x76542cc7, 0x878193a3, 0xd732f97f, 0xbf5beb2a,
+ 0xefe881f6, 0x1e3d3e92, 0x4e8e544e, 0xceef1a38, 0x9e5c70e4,
+ 0x6f89cf80, 0x3f3aa55c, 0x5753b709, 0x07e0ddd5, 0xf63562b1,
+ 0xa686086d, 0x3b458412, 0x6bf6eece, 0x9a2351aa, 0xca903b76,
+ 0xa2f92923, 0xf24a43ff, 0x039ffc9b, 0x532c9647, 0xd34dd831,
+ 0x83feb2ed, 0x722b0d89, 0x22986755, 0x4af17500, 0x1a421fdc,
+ 0xeb97a0b8, 0xbb24ca64, 0x30243a15, 0x609750c9, 0x9142efad,
+ 0xc1f18571, 0xa9989724, 0xf92bfdf8, 0x08fe429c, 0x584d2840,
+ 0xd82c6636, 0x889f0cea, 0x794ab38e, 0x29f9d952, 0x4190cb07,
+ 0x1123a1db, 0xe0f61ebf, 0xb0457463, 0x5b0df038, 0x0bbe9ae4,
+ 0xfa6b2580, 0xaad84f5c, 0xc2b15d09, 0x920237d5, 0x63d788b1,
+ 0x3364e26d, 0xb305ac1b, 0xe3b6c6c7, 0x126379a3, 0x42d0137f,
+ 0x2ab9012a, 0x7a0a6bf6, 0x8bdfd492, 0xdb6cbe4e, 0x506c4e3f,
+ 0x00df24e3, 0xf10a9b87, 0xa1b9f15b, 0xc9d0e30e, 0x996389d2,
+ 0x68b636b6, 0x38055c6a, 0xb864121c, 0xe8d778c0, 0x1902c7a4,
+ 0x49b1ad78, 0x21d8bf2d, 0x716bd5f1, 0x80be6a95, 0xd00d0049,
+ 0x4dce8c36, 0x1d7de6ea, 0xeca8598e, 0xbc1b3352, 0xd4722107,
+ 0x84c14bdb, 0x7514f4bf, 0x25a79e63, 0xa5c6d015, 0xf575bac9,
+ 0x04a005ad, 0x54136f71, 0x3c7a7d24, 0x6cc917f8, 0x9d1ca89c,
+ 0xcdafc240, 0x46af3231, 0x161c58ed, 0xe7c9e789, 0xb77a8d55,
+ 0xdf139f00, 0x8fa0f5dc, 0x7e754ab8, 0x2ec62064, 0xaea76e12,
+ 0xfe1404ce, 0x0fc1bbaa, 0x5f72d176, 0x371bc323, 0x67a8a9ff,
+ 0x967d169b, 0xc6ce7c47, 0x768b0824, 0x263862f8, 0xd7eddd9c,
+ 0x875eb740, 0xef37a515, 0xbf84cfc9, 0x4e5170ad, 0x1ee21a71,
+ 0x9e835407, 0xce303edb, 0x3fe581bf, 0x6f56eb63, 0x073ff936,
+ 0x578c93ea, 0xa6592c8e, 0xf6ea4652, 0x7deab623, 0x2d59dcff,
+ 0xdc8c639b, 0x8c3f0947, 0xe4561b12, 0xb4e571ce, 0x4530ceaa,
+ 0x1583a476, 0x95e2ea00, 0xc55180dc, 0x34843fb8, 0x64375564,
+ 0x0c5e4731, 0x5ced2ded, 0xad389289, 0xfd8bf855, 0x6048742a,
+ 0x30fb1ef6, 0xc12ea192, 0x919dcb4e, 0xf9f4d91b, 0xa947b3c7,
+ 0x58920ca3, 0x0821667f, 0x88402809, 0xd8f342d5, 0x2926fdb1,
+ 0x7995976d, 0x11fc8538, 0x414fefe4, 0xb09a5080, 0xe0293a5c,
+ 0x6b29ca2d, 0x3b9aa0f1, 0xca4f1f95, 0x9afc7549, 0xf295671c,
+ 0xa2260dc0, 0x53f3b2a4, 0x0340d878, 0x8321960e, 0xd392fcd2,
+ 0x224743b6, 0x72f4296a, 0x1a9d3b3f, 0x4a2e51e3, 0xbbfbee87,
+ 0xeb48845b,
+ 0x00000000, 0xb61be070, 0xb746c6a1, 0x015d26d1, 0xb5fc8b03,
+ 0x03e76b73, 0x02ba4da2, 0xb4a1add2, 0xb0881047, 0x0693f037,
+ 0x07ced6e6, 0xb1d53696, 0x05749b44, 0xb36f7b34, 0xb2325de5,
+ 0x0429bd95, 0xba6126cf, 0x0c7ac6bf, 0x0d27e06e, 0xbb3c001e,
+ 0x0f9dadcc, 0xb9864dbc, 0xb8db6b6d, 0x0ec08b1d, 0x0ae93688,
+ 0xbcf2d6f8, 0xbdaff029, 0x0bb41059, 0xbf15bd8b, 0x090e5dfb,
+ 0x08537b2a, 0xbe489b5a, 0xafb34bdf, 0x19a8abaf, 0x18f58d7e,
+ 0xaeee6d0e, 0x1a4fc0dc, 0xac5420ac, 0xad09067d, 0x1b12e60d,
+ 0x1f3b5b98, 0xa920bbe8, 0xa87d9d39, 0x1e667d49, 0xaac7d09b,
+ 0x1cdc30eb, 0x1d81163a, 0xab9af64a, 0x15d26d10, 0xa3c98d60,
+ 0xa294abb1, 0x148f4bc1, 0xa02ee613, 0x16350663, 0x176820b2,
+ 0xa173c0c2, 0xa55a7d57, 0x13419d27, 0x121cbbf6, 0xa4075b86,
+ 0x10a6f654, 0xa6bd1624, 0xa7e030f5, 0x11fbd085, 0x841791ff,
+ 0x320c718f, 0x3351575e, 0x854ab72e, 0x31eb1afc, 0x87f0fa8c,
+ 0x86addc5d, 0x30b63c2d, 0x349f81b8, 0x828461c8, 0x83d94719,
+ 0x35c2a769, 0x81630abb, 0x3778eacb, 0x3625cc1a, 0x803e2c6a,
+ 0x3e76b730, 0x886d5740, 0x89307191, 0x3f2b91e1, 0x8b8a3c33,
+ 0x3d91dc43, 0x3cccfa92, 0x8ad71ae2, 0x8efea777, 0x38e54707,
+ 0x39b861d6, 0x8fa381a6, 0x3b022c74, 0x8d19cc04, 0x8c44ead5,
+ 0x3a5f0aa5, 0x2ba4da20, 0x9dbf3a50, 0x9ce21c81, 0x2af9fcf1,
+ 0x9e585123, 0x2843b153, 0x291e9782, 0x9f0577f2, 0x9b2cca67,
+ 0x2d372a17, 0x2c6a0cc6, 0x9a71ecb6, 0x2ed04164, 0x98cba114,
+ 0x999687c5, 0x2f8d67b5, 0x91c5fcef, 0x27de1c9f, 0x26833a4e,
+ 0x9098da3e, 0x243977ec, 0x9222979c, 0x937fb14d, 0x2564513d,
+ 0x214deca8, 0x97560cd8, 0x960b2a09, 0x2010ca79, 0x94b167ab,
+ 0x22aa87db, 0x23f7a10a, 0x95ec417a, 0xd35e25bf, 0x6545c5cf,
+ 0x6418e31e, 0xd203036e, 0x66a2aebc, 0xd0b94ecc, 0xd1e4681d,
+ 0x67ff886d, 0x63d635f8, 0xd5cdd588, 0xd490f359, 0x628b1329,
+ 0xd62abefb, 0x60315e8b, 0x616c785a, 0xd777982a, 0x693f0370,
+ 0xdf24e300, 0xde79c5d1, 0x686225a1, 0xdcc38873, 0x6ad86803,
+ 0x6b854ed2, 0xdd9eaea2, 0xd9b71337, 0x6facf347, 0x6ef1d596,
+ 0xd8ea35e6, 0x6c4b9834, 0xda507844, 0xdb0d5e95, 0x6d16bee5,
+ 0x7ced6e60, 0xcaf68e10, 0xcbaba8c1, 0x7db048b1, 0xc911e563,
+ 0x7f0a0513, 0x7e5723c2, 0xc84cc3b2, 0xcc657e27, 0x7a7e9e57,
+ 0x7b23b886, 0xcd3858f6, 0x7999f524, 0xcf821554, 0xcedf3385,
+ 0x78c4d3f5, 0xc68c48af, 0x7097a8df, 0x71ca8e0e, 0xc7d16e7e,
+ 0x7370c3ac, 0xc56b23dc, 0xc436050d, 0x722de57d, 0x760458e8,
+ 0xc01fb898, 0xc1429e49, 0x77597e39, 0xc3f8d3eb, 0x75e3339b,
+ 0x74be154a, 0xc2a5f53a, 0x5749b440, 0xe1525430, 0xe00f72e1,
+ 0x56149291, 0xe2b53f43, 0x54aedf33, 0x55f3f9e2, 0xe3e81992,
+ 0xe7c1a407, 0x51da4477, 0x508762a6, 0xe69c82d6, 0x523d2f04,
+ 0xe426cf74, 0xe57be9a5, 0x536009d5, 0xed28928f, 0x5b3372ff,
+ 0x5a6e542e, 0xec75b45e, 0x58d4198c, 0xeecff9fc, 0xef92df2d,
+ 0x59893f5d, 0x5da082c8, 0xebbb62b8, 0xeae64469, 0x5cfda419,
+ 0xe85c09cb, 0x5e47e9bb, 0x5f1acf6a, 0xe9012f1a, 0xf8faff9f,
+ 0x4ee11fef, 0x4fbc393e, 0xf9a7d94e, 0x4d06749c, 0xfb1d94ec,
+ 0xfa40b23d, 0x4c5b524d, 0x4872efd8, 0xfe690fa8, 0xff342979,
+ 0x492fc909, 0xfd8e64db, 0x4b9584ab, 0x4ac8a27a, 0xfcd3420a,
+ 0x429bd950, 0xf4803920, 0xf5dd1ff1, 0x43c6ff81, 0xf7675253,
+ 0x417cb223, 0x402194f2, 0xf63a7482, 0xf213c917, 0x44082967,
+ 0x45550fb6, 0xf34eefc6, 0x47ef4214, 0xf1f4a264, 0xf0a984b5,
+ 0x46b264c5,
+ 0x00000000, 0x7dcd4d3f, 0xfb9a9a7e, 0x8657d741, 0x2c4432bd,
+ 0x51897f82, 0xd7dea8c3, 0xaa13e5fc, 0x5888657a, 0x25452845,
+ 0xa312ff04, 0xdedfb23b, 0x74cc57c7, 0x09011af8, 0x8f56cdb9,
+ 0xf29b8086, 0xb110caf4, 0xccdd87cb, 0x4a8a508a, 0x37471db5,
+ 0x9d54f849, 0xe099b576, 0x66ce6237, 0x1b032f08, 0xe998af8e,
+ 0x9455e2b1, 0x120235f0, 0x6fcf78cf, 0xc5dc9d33, 0xb811d00c,
+ 0x3e46074d, 0x438b4a72, 0xb95093a9, 0xc49dde96, 0x42ca09d7,
+ 0x3f0744e8, 0x9514a114, 0xe8d9ec2b, 0x6e8e3b6a, 0x13437655,
+ 0xe1d8f6d3, 0x9c15bbec, 0x1a426cad, 0x678f2192, 0xcd9cc46e,
+ 0xb0518951, 0x36065e10, 0x4bcb132f, 0x0840595d, 0x758d1462,
+ 0xf3dac323, 0x8e178e1c, 0x24046be0, 0x59c926df, 0xdf9ef19e,
+ 0xa253bca1, 0x50c83c27, 0x2d057118, 0xab52a659, 0xd69feb66,
+ 0x7c8c0e9a, 0x014143a5, 0x871694e4, 0xfadbd9db, 0xa9d02113,
+ 0xd41d6c2c, 0x524abb6d, 0x2f87f652, 0x859413ae, 0xf8595e91,
+ 0x7e0e89d0, 0x03c3c4ef, 0xf1584469, 0x8c950956, 0x0ac2de17,
+ 0x770f9328, 0xdd1c76d4, 0xa0d13beb, 0x2686ecaa, 0x5b4ba195,
+ 0x18c0ebe7, 0x650da6d8, 0xe35a7199, 0x9e973ca6, 0x3484d95a,
+ 0x49499465, 0xcf1e4324, 0xb2d30e1b, 0x40488e9d, 0x3d85c3a2,
+ 0xbbd214e3, 0xc61f59dc, 0x6c0cbc20, 0x11c1f11f, 0x9796265e,
+ 0xea5b6b61, 0x1080b2ba, 0x6d4dff85, 0xeb1a28c4, 0x96d765fb,
+ 0x3cc48007, 0x4109cd38, 0xc75e1a79, 0xba935746, 0x4808d7c0,
+ 0x35c59aff, 0xb3924dbe, 0xce5f0081, 0x644ce57d, 0x1981a842,
+ 0x9fd67f03, 0xe21b323c, 0xa190784e, 0xdc5d3571, 0x5a0ae230,
+ 0x27c7af0f, 0x8dd44af3, 0xf01907cc, 0x764ed08d, 0x0b839db2,
+ 0xf9181d34, 0x84d5500b, 0x0282874a, 0x7f4fca75, 0xd55c2f89,
+ 0xa89162b6, 0x2ec6b5f7, 0x530bf8c8, 0x88d14467, 0xf51c0958,
+ 0x734bde19, 0x0e869326, 0xa49576da, 0xd9583be5, 0x5f0feca4,
+ 0x22c2a19b, 0xd059211d, 0xad946c22, 0x2bc3bb63, 0x560ef65c,
+ 0xfc1d13a0, 0x81d05e9f, 0x078789de, 0x7a4ac4e1, 0x39c18e93,
+ 0x440cc3ac, 0xc25b14ed, 0xbf9659d2, 0x1585bc2e, 0x6848f111,
+ 0xee1f2650, 0x93d26b6f, 0x6149ebe9, 0x1c84a6d6, 0x9ad37197,
+ 0xe71e3ca8, 0x4d0dd954, 0x30c0946b, 0xb697432a, 0xcb5a0e15,
+ 0x3181d7ce, 0x4c4c9af1, 0xca1b4db0, 0xb7d6008f, 0x1dc5e573,
+ 0x6008a84c, 0xe65f7f0d, 0x9b923232, 0x6909b2b4, 0x14c4ff8b,
+ 0x929328ca, 0xef5e65f5, 0x454d8009, 0x3880cd36, 0xbed71a77,
+ 0xc31a5748, 0x80911d3a, 0xfd5c5005, 0x7b0b8744, 0x06c6ca7b,
+ 0xacd52f87, 0xd11862b8, 0x574fb5f9, 0x2a82f8c6, 0xd8197840,
+ 0xa5d4357f, 0x2383e23e, 0x5e4eaf01, 0xf45d4afd, 0x899007c2,
+ 0x0fc7d083, 0x720a9dbc, 0x21016574, 0x5ccc284b, 0xda9bff0a,
+ 0xa756b235, 0x0d4557c9, 0x70881af6, 0xf6dfcdb7, 0x8b128088,
+ 0x7989000e, 0x04444d31, 0x82139a70, 0xffded74f, 0x55cd32b3,
+ 0x28007f8c, 0xae57a8cd, 0xd39ae5f2, 0x9011af80, 0xeddce2bf,
+ 0x6b8b35fe, 0x164678c1, 0xbc559d3d, 0xc198d002, 0x47cf0743,
+ 0x3a024a7c, 0xc899cafa, 0xb55487c5, 0x33035084, 0x4ece1dbb,
+ 0xe4ddf847, 0x9910b578, 0x1f476239, 0x628a2f06, 0x9851f6dd,
+ 0xe59cbbe2, 0x63cb6ca3, 0x1e06219c, 0xb415c460, 0xc9d8895f,
+ 0x4f8f5e1e, 0x32421321, 0xc0d993a7, 0xbd14de98, 0x3b4309d9,
+ 0x468e44e6, 0xec9da11a, 0x9150ec25, 0x17073b64, 0x6aca765b,
+ 0x29413c29, 0x548c7116, 0xd2dba657, 0xaf16eb68, 0x05050e94,
+ 0x78c843ab, 0xfe9f94ea, 0x8352d9d5, 0x71c95953, 0x0c04146c,
+ 0x8a53c32d, 0xf79e8e12, 0x5d8d6bee, 0x204026d1, 0xa617f190,
+ 0xdbdabcaf
};
diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
index 1f32488777d57..7c811aa3a0c26 100644
--- a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
+++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2003, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
* Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -27,6 +27,7 @@
#include "precompiled.hpp"
#include "asm/macroAssembler.inline.hpp"
#include "classfile/javaClasses.hpp"
+#include "compiler/disassembler.hpp"
#include "gc/shared/barrierSetAssembler.hpp"
#include "interpreter/bytecodeHistogram.hpp"
#include "interpreter/bytecodeTracer.hpp"
@@ -70,7 +71,7 @@
// Max size with JVMTI
int TemplateInterpreter::InterpreterCodeSize = 256 * 1024;
-#define __ _masm->
+#define __ Disassembler::hook(__FILE__, __LINE__, _masm)->
//-----------------------------------------------------------------------------
@@ -1748,13 +1749,21 @@ void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t,
address& vep) {
assert(t != nullptr && t->is_valid() && t->tos_in() == vtos, "illegal template");
Label L;
- aep = __ pc(); __ push_ptr(); __ j(L);
- fep = __ pc(); __ push_f(); __ j(L);
- dep = __ pc(); __ push_d(); __ j(L);
- lep = __ pc(); __ push_l(); __ j(L);
- bep = cep = sep =
- iep = __ pc(); __ push_i();
- vep = __ pc();
+ aep = __ pc(); // atos entry point
+ __ push_ptr();
+ __ j(L);
+ fep = __ pc(); // ftos entry point
+ __ push_f();
+ __ j(L);
+ dep = __ pc(); // dtos entry point
+ __ push_d();
+ __ j(L);
+ lep = __ pc(); // ltos entry point
+ __ push_l();
+ __ j(L);
+ bep = cep = sep = iep = __ pc(); // [bcsi]tos entry point
+ __ push_i();
+ vep = __ pc(); // vtos entry point
__ bind(L);
generate_and_dispatch(t);
}
diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
index 078f54adc3682..2fede262057ce 100644
--- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp
+++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
@@ -26,6 +26,7 @@
#include "precompiled.hpp"
#include "asm/macroAssembler.inline.hpp"
+#include "compiler/disassembler.hpp"
#include "gc/shared/barrierSetAssembler.hpp"
#include "gc/shared/collectedHeap.hpp"
#include "gc/shared/tlab_globals.hpp"
@@ -49,7 +50,7 @@
#include "runtime/synchronizer.hpp"
#include "utilities/powerOfTwo.hpp"
-#define __ _masm->
+#define __ Disassembler::hook(__FILE__, __LINE__, _masm)->
// Address computation: local variables
@@ -178,7 +179,6 @@ void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg,
__ la(temp_reg, Address(temp_reg, in_bytes(ResolvedFieldEntry::put_code_offset())));
}
// Load-acquire the bytecode to match store-release in ResolvedFieldEntry::fill_in()
- __ membar(MacroAssembler::AnyAny);
__ lbu(temp_reg, Address(temp_reg, 0));
__ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
__ mv(bc_reg, bc);
@@ -320,7 +320,6 @@ void TemplateTable::ldc(LdcType type) {
// get type
__ addi(x13, x11, tags_offset);
__ add(x13, x10, x13);
- __ membar(MacroAssembler::AnyAny);
__ lbu(x13, Address(x13, 0));
__ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
@@ -2189,7 +2188,6 @@ void TemplateTable::resolve_cache_and_index_for_method(int byte_no,
break;
}
// Load-acquire the bytecode to match store-release in InterpreterRuntime
- __ membar(MacroAssembler::AnyAny);
__ lbu(temp, Address(temp, 0));
__ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
@@ -2241,7 +2239,6 @@ void TemplateTable::resolve_cache_and_index_for_field(int byte_no,
__ la(temp, Address(Rcache, in_bytes(ResolvedFieldEntry::put_code_offset())));
}
// Load-acquire the bytecode to match store-release in ResolvedFieldEntry::fill_in()
- __ membar(MacroAssembler::AnyAny);
__ lbu(temp, Address(temp, 0));
__ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
__ mv(t0, (int) code); // have we resolved this bytecode?
@@ -2403,7 +2400,6 @@ void TemplateTable::load_invokedynamic_entry(Register method) {
Label resolved;
__ load_resolved_indy_entry(cache, index);
- __ membar(MacroAssembler::AnyAny);
__ ld(method, Address(cache, in_bytes(ResolvedIndyEntry::method_offset())));
__ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
@@ -2418,7 +2414,6 @@ void TemplateTable::load_invokedynamic_entry(Register method) {
__ call_VM(noreg, entry, method);
// Update registers with resolved info
__ load_resolved_indy_entry(cache, index);
- __ membar(MacroAssembler::AnyAny);
__ ld(method, Address(cache, in_bytes(ResolvedIndyEntry::method_offset())));
__ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
@@ -3533,7 +3528,6 @@ void TemplateTable::_new() {
const int tags_offset = Array::base_offset_in_bytes();
__ add(t0, x10, x13);
__ la(t0, Address(t0, tags_offset));
- __ membar(MacroAssembler::AnyAny);
__ lbu(t0, t0);
__ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
__ sub(t1, t0, (u1)JVM_CONSTANT_Class);
@@ -3651,7 +3645,6 @@ void TemplateTable::checkcast() {
// See if bytecode has already been quicked
__ add(t0, x13, Array::base_offset_in_bytes());
__ add(x11, t0, x9);
- __ membar(MacroAssembler::AnyAny);
__ lbu(x11, x11);
__ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
__ sub(t0, x11, (u1)JVM_CONSTANT_Class);
@@ -3707,7 +3700,6 @@ void TemplateTable::instanceof() {
// See if bytecode has already been quicked
__ add(t0, x13, Array::base_offset_in_bytes());
__ add(x11, t0, x9);
- __ membar(MacroAssembler::AnyAny);
__ lbu(x11, x11);
__ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
__ sub(t0, x11, (u1)JVM_CONSTANT_Class);
diff --git a/src/hotspot/cpu/riscv/upcallLinker_riscv.cpp b/src/hotspot/cpu/riscv/upcallLinker_riscv.cpp
index 383f332f8fd94..55160be99d0d8 100644
--- a/src/hotspot/cpu/riscv/upcallLinker_riscv.cpp
+++ b/src/hotspot/cpu/riscv/upcallLinker_riscv.cpp
@@ -25,6 +25,7 @@
#include "precompiled.hpp"
#include "asm/macroAssembler.hpp"
+#include "classfile/javaClasses.hpp"
#include "logging/logStream.hpp"
#include "memory/resourceArea.hpp"
#include "prims/upcallLinker.hpp"
@@ -117,7 +118,7 @@ static void restore_callee_saved_registers(MacroAssembler* _masm, const ABIDescr
static const int upcall_stub_code_base_size = 1024;
static const int upcall_stub_size_per_arg = 16;
-address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry,
+address UpcallLinker::make_upcall_stub(jobject receiver, Symbol* signature,
BasicType* out_sig_bt, int total_out_args,
BasicType ret_type,
jobject jabi, jobject jconv,
@@ -223,7 +224,6 @@ address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry,
__ block_comment("{ on_entry");
__ la(c_rarg0, Address(sp, frame_data_offset));
- __ movptr(c_rarg1, (address) receiver);
__ rt_call(CAST_FROM_FN_PTR(address, UpcallLinker::on_entry));
__ mv(xthread, x10);
__ reinit_heapbase();
@@ -260,12 +260,10 @@ address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry,
arg_shuffle.generate(_masm, as_VMStorage(shuffle_reg), abi._shadow_space_bytes, 0);
__ block_comment("} argument shuffle");
- __ block_comment("{ receiver ");
- __ get_vm_result(j_rarg0, xthread);
- __ block_comment("} receiver ");
-
- __ mov_metadata(xmethod, entry);
- __ sd(xmethod, Address(xthread, JavaThread::callee_target_offset())); // just in case callee is deoptimized
+ __ block_comment("{ load target ");
+ __ movptr(j_rarg0, (address) receiver);
+ __ far_call(RuntimeAddress(StubRoutines::upcall_stub_load_target())); // loads Method* into xmethod
+ __ block_comment("} load target ");
__ push_cont_fastpath(xthread);
@@ -338,7 +336,7 @@ address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry,
#ifndef PRODUCT
stringStream ss;
- ss.print("upcall_stub_%s", entry->signature()->as_C_string());
+ ss.print("upcall_stub_%s", signature->as_C_string());
const char *name = _masm->code_string(ss.as_string());
#else // PRODUCT
const char* name = "upcall_stub";
diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.hpp b/src/hotspot/cpu/riscv/vm_version_riscv.hpp
index bd4bfe86d9bf7..8fdde0094f40d 100644
--- a/src/hotspot/cpu/riscv/vm_version_riscv.hpp
+++ b/src/hotspot/cpu/riscv/vm_version_riscv.hpp
@@ -285,6 +285,7 @@ class VM_Version : public Abstract_VM_Version {
// RISCV64 supports fast class initialization checks
static bool supports_fast_class_init_checks() { return true; }
+ static bool supports_fencei_barrier() { return ext_Zifencei.enabled(); }
};
#endif // CPU_RISCV_VM_VERSION_RISCV_HPP
diff --git a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp
index d288f4a893d0a..8990cf1663dd5 100644
--- a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp
+++ b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp
@@ -2350,6 +2350,7 @@ void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, jint count, LIR_Opr de
void LIR_Assembler::emit_alloc_obj(LIR_OpAllocObj* op) {
if (op->init_check()) {
// Make sure klass is initialized & doesn't have finalizer.
+ // init_state needs acquire, but S390 is TSO, and so we are already good.
const int state_offset = in_bytes(InstanceKlass::init_state_offset());
Register iklass = op->klass()->as_register();
add_debug_info_for_null_check_here(op->stub()->info());
diff --git a/src/hotspot/cpu/s390/c1_MacroAssembler_s390.cpp b/src/hotspot/cpu/s390/c1_MacroAssembler_s390.cpp
index f3fa19ddb31e0..f6dd20db3d67f 100644
--- a/src/hotspot/cpu/s390/c1_MacroAssembler_s390.cpp
+++ b/src/hotspot/cpu/s390/c1_MacroAssembler_s390.cpp
@@ -79,7 +79,7 @@ void C1_MacroAssembler::lock_object(Register Rmark, Register Roop, Register Rbox
assert(LockingMode != LM_MONITOR, "LM_MONITOR is already handled, by emit_lock()");
if (LockingMode == LM_LIGHTWEIGHT) {
- lightweight_lock(Roop, Rmark, tmp, slow_case);
+ lightweight_lock(Rbox, Roop, Rmark, tmp, slow_case);
} else if (LockingMode == LM_LEGACY) {
NearLabel done;
diff --git a/src/hotspot/cpu/s390/c2_MacroAssembler_s390.cpp b/src/hotspot/cpu/s390/c2_MacroAssembler_s390.cpp
index 3641d82dabea9..025ef4c8915cd 100644
--- a/src/hotspot/cpu/s390/c2_MacroAssembler_s390.cpp
+++ b/src/hotspot/cpu/s390/c2_MacroAssembler_s390.cpp
@@ -34,12 +34,12 @@
#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
void C2_MacroAssembler::fast_lock_lightweight(Register obj, Register box, Register temp1, Register temp2) {
- compiler_fast_lock_lightweight_object(obj, temp1, temp2);
+ compiler_fast_lock_lightweight_object(obj, box, temp1, temp2);
}
void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register box, Register temp1, Register temp2) {
- compiler_fast_unlock_lightweight_object(obj, temp1, temp2);
+ compiler_fast_unlock_lightweight_object(obj, box, temp1, temp2);
}
//------------------------------------------------------
diff --git a/src/hotspot/cpu/s390/downcallLinker_s390.cpp b/src/hotspot/cpu/s390/downcallLinker_s390.cpp
index 383a32448745c..85ddc5bf18548 100644
--- a/src/hotspot/cpu/s390/downcallLinker_s390.cpp
+++ b/src/hotspot/cpu/s390/downcallLinker_s390.cpp
@@ -36,8 +36,8 @@
#define __ _masm->
-static const int native_invoker_code_base_size = 512;
-static const int native_invoker_size_per_args = 8;
+static const int native_invoker_code_base_size = 384;
+static const int native_invoker_size_per_args = 12;
RuntimeStub* DowncallLinker::make_downcall_stub(BasicType* signature,
int num_args,
diff --git a/src/hotspot/cpu/s390/gc/g1/g1BarrierSetAssembler_s390.cpp b/src/hotspot/cpu/s390/gc/g1/g1BarrierSetAssembler_s390.cpp
index 37631298920ca..544c82d34a769 100644
--- a/src/hotspot/cpu/s390/gc/g1/g1BarrierSetAssembler_s390.cpp
+++ b/src/hotspot/cpu/s390/gc/g1/g1BarrierSetAssembler_s390.cpp
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2019, 2023, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2018, 2023 SAP SE. All rights reserved.
+ * Copyright (c) 2019, 2024, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2024 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -42,11 +42,47 @@
#include "c1/c1_LIRAssembler.hpp"
#include "c1/c1_MacroAssembler.hpp"
#include "gc/g1/c1/g1BarrierSetC1.hpp"
-#endif
+#endif // COMPILER1
+#ifdef COMPILER2
+#include "gc/g1/c2/g1BarrierSetC2.hpp"
+#endif // COMPILER2
#define __ masm->
-#define BLOCK_COMMENT(str) if (PrintAssembly) __ block_comment(str)
+#define BLOCK_COMMENT(str) __ block_comment(str)
+
+static void generate_pre_barrier_fast_path(MacroAssembler* masm,
+ const Register thread,
+ const Register tmp1) {
+ Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
+ // Is marking active?
+ if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
+ __ load_and_test_int(tmp1, in_progress);
+ } else {
+ assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
+ __ load_and_test_byte(tmp1, in_progress);
+ }
+}
+
+static void generate_queue_test_and_insertion(MacroAssembler* masm, ByteSize index_offset, ByteSize buffer_offset, Label& runtime,
+ const Register Z_thread, const Register value, const Register temp) {
+ BLOCK_COMMENT("generate_queue_test_and_insertion {");
+
+ assert_different_registers(temp, value);
+ // Can we store a value in the given thread's buffer?
+ // (The index field is typed as size_t.)
+
+ __ load_and_test_long(temp, Address(Z_thread, in_bytes(index_offset))); // temp := *(index address)
+ __ branch_optimized(Assembler::bcondEqual, runtime); // jump to runtime if index == 0 (full buffer)
+
+ // The buffer is not full, store value into it.
+ __ add2reg(temp, -wordSize); // temp := next index
+ __ z_stg(temp, in_bytes(index_offset), Z_thread); // *(index address) := next index
+
+ __ z_ag(temp, Address(Z_thread, in_bytes(buffer_offset))); // temp := buffer address + next index
+ __ z_stg(value, 0, temp); // *(buffer address + next index) := value
+ BLOCK_COMMENT("} generate_queue_test_and_insertion");
+}
void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
Register addr, Register count) {
@@ -59,13 +95,8 @@ void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm
assert_different_registers(addr, Z_R0_scratch); // would be destroyed by push_frame()
assert_different_registers(count, Z_R0_scratch); // would be destroyed by push_frame()
Register Rtmp1 = Z_R0_scratch;
- const int active_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset());
- if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
- __ load_and_test_int(Rtmp1, Address(Z_thread, active_offset));
- } else {
- guarantee(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
- __ load_and_test_byte(Rtmp1, Address(Z_thread, active_offset));
- }
+
+ generate_pre_barrier_fast_path(masm, Z_thread, Rtmp1);
__ z_bre(filtered); // Activity indicator is zero, so there is no marking going on currently.
RegisterSaver::save_live_registers(masm, RegisterSaver::arg_registers); // Creates frame.
@@ -100,6 +131,181 @@ void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* mas
}
}
+#if defined(COMPILER2)
+
+#undef __
+#define __ masm->
+
+static void generate_c2_barrier_runtime_call(MacroAssembler* masm, G1BarrierStubC2* stub, const Register pre_val, const address runtime_path) {
+ BLOCK_COMMENT("generate_c2_barrier_runtime_call {");
+ SaveLiveRegisters save_registers(masm, stub);
+ __ call_VM_leaf(runtime_path, pre_val, Z_thread);
+ BLOCK_COMMENT("} generate_c2_barrier_runtime_call");
+}
+
+void G1BarrierSetAssembler::g1_write_barrier_pre_c2(MacroAssembler* masm,
+ Register obj,
+ Register pre_val,
+ Register thread,
+ Register tmp1,
+ G1PreBarrierStubC2* stub) {
+
+ BLOCK_COMMENT("g1_write_barrier_pre_c2 {");
+
+ assert(thread == Z_thread, "must be");
+ assert_different_registers(obj, pre_val, tmp1);
+ assert(pre_val != noreg && tmp1 != noreg, "expecting a register");
+
+ stub->initialize_registers(obj, pre_val, thread, tmp1, noreg);
+
+ generate_pre_barrier_fast_path(masm, thread, tmp1);
+ __ branch_optimized(Assembler::bcondNotEqual, *stub->entry()); // Activity indicator is zero, so there is no marking going on currently.
+
+ __ bind(*stub->continuation());
+
+ BLOCK_COMMENT("} g1_write_barrier_pre_c2");
+}
+
+void G1BarrierSetAssembler::generate_c2_pre_barrier_stub(MacroAssembler* masm,
+ G1PreBarrierStubC2* stub) const {
+
+ BLOCK_COMMENT("generate_c2_pre_barrier_stub {");
+
+ Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
+
+ Label runtime;
+ Register obj = stub->obj();
+ Register pre_val = stub->pre_val();
+ Register thread = stub->thread();
+ Register tmp1 = stub->tmp1();
+
+ __ bind(*stub->entry());
+
+ BLOCK_COMMENT("generate_pre_val_not_null_test {");
+ if (obj != noreg) {
+ __ load_heap_oop(pre_val, Address(obj), noreg, noreg, AS_RAW);
+ }
+ __ z_ltgr(pre_val, pre_val);
+ __ branch_optimized(Assembler::bcondEqual, *stub->continuation());
+ BLOCK_COMMENT("} generate_pre_val_not_null_test");
+
+ generate_queue_test_and_insertion(masm,
+ G1ThreadLocalData::satb_mark_queue_index_offset(),
+ G1ThreadLocalData::satb_mark_queue_buffer_offset(),
+ runtime,
+ Z_thread, pre_val, tmp1);
+
+ __ branch_optimized(Assembler::bcondAlways, *stub->continuation());
+
+ __ bind(runtime);
+
+ generate_c2_barrier_runtime_call(masm, stub, pre_val, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry));
+
+ __ branch_optimized(Assembler::bcondAlways, *stub->continuation());
+
+ BLOCK_COMMENT("} generate_c2_pre_barrier_stub");
+}
+
+void G1BarrierSetAssembler::g1_write_barrier_post_c2(MacroAssembler* masm,
+ Register store_addr,
+ Register new_val,
+ Register thread,
+ Register tmp1,
+ Register tmp2,
+ G1PostBarrierStubC2* stub) {
+ BLOCK_COMMENT("g1_write_barrier_post_c2 {");
+
+ assert(thread == Z_thread, "must be");
+ assert_different_registers(store_addr, new_val, thread, tmp1, tmp2, Z_R1_scratch);
+
+ assert(store_addr != noreg && new_val != noreg && tmp1 != noreg && tmp2 != noreg, "expecting a register");
+
+ stub->initialize_registers(thread, tmp1, tmp2);
+
+ BLOCK_COMMENT("generate_region_crossing_test {");
+ if (VM_Version::has_DistinctOpnds()) {
+ __ z_xgrk(tmp1, store_addr, new_val);
+ } else {
+ __ z_lgr(tmp1, store_addr);
+ __ z_xgr(tmp1, new_val);
+ }
+ __ z_srag(tmp1, tmp1, G1HeapRegion::LogOfHRGrainBytes);
+ __ branch_optimized(Assembler::bcondEqual, *stub->continuation());
+ BLOCK_COMMENT("} generate_region_crossing_test");
+
+ // crosses regions, storing null?
+ if ((stub->barrier_data() & G1C2BarrierPostNotNull) == 0) {
+ __ z_ltgr(new_val, new_val);
+ __ branch_optimized(Assembler::bcondEqual, *stub->continuation());
+ }
+
+ BLOCK_COMMENT("generate_card_young_test {");
+ CardTableBarrierSet* ct = barrier_set_cast(BarrierSet::barrier_set());
+ // calculate address of card
+ __ load_const_optimized(tmp2, (address)ct->card_table()->byte_map_base()); // Card table base.
+ __ z_srlg(tmp1, store_addr, CardTable::card_shift()); // Index into card table.
+ __ z_algr(tmp1, tmp2); // Explicit calculation needed for cli.
+
+ // Filter young.
+ __ z_cli(0, tmp1, G1CardTable::g1_young_card_val());
+
+ BLOCK_COMMENT("} generate_card_young_test");
+
+ // From here on, tmp1 holds the card address.
+ __ branch_optimized(Assembler::bcondNotEqual, *stub->entry());
+
+ __ bind(*stub->continuation());
+
+ BLOCK_COMMENT("} g1_write_barrier_post_c2");
+}
+
+void G1BarrierSetAssembler::generate_c2_post_barrier_stub(MacroAssembler* masm,
+ G1PostBarrierStubC2* stub) const {
+
+ BLOCK_COMMENT("generate_c2_post_barrier_stub {");
+
+ Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
+ Label runtime;
+
+ Register thread = stub->thread();
+ Register tmp1 = stub->tmp1(); // tmp1 holds the card address.
+ Register tmp2 = stub->tmp2();
+ Register Rcard_addr = tmp1;
+
+ __ bind(*stub->entry());
+
+ BLOCK_COMMENT("generate_card_clean_test {");
+ __ z_sync(); // Required to support concurrent cleaning.
+ __ z_cli(0, Rcard_addr, 0); // Reload after membar.
+ __ branch_optimized(Assembler::bcondEqual, *stub->continuation());
+ BLOCK_COMMENT("} generate_card_clean_test");
+
+ BLOCK_COMMENT("generate_dirty_card {");
+ // Storing a region crossing, non-null oop, card is clean.
+ // Dirty card and log.
+ STATIC_ASSERT(CardTable::dirty_card_val() == 0);
+ __ z_mvi(0, Rcard_addr, CardTable::dirty_card_val());
+ BLOCK_COMMENT("} generate_dirty_card");
+
+ generate_queue_test_and_insertion(masm,
+ G1ThreadLocalData::dirty_card_queue_index_offset(),
+ G1ThreadLocalData::dirty_card_queue_buffer_offset(),
+ runtime,
+ Z_thread, tmp1, tmp2);
+
+ __ branch_optimized(Assembler::bcondAlways, *stub->continuation());
+
+ __ bind(runtime);
+
+ generate_c2_barrier_runtime_call(masm, stub, tmp1, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry));
+
+ __ branch_optimized(Assembler::bcondAlways, *stub->continuation());
+
+ BLOCK_COMMENT("} generate_c2_post_barrier_stub");
+}
+
+#endif //COMPILER2
+
void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
const Address& src, Register dst, Register tmp1, Register tmp2, Label *L_handle_null) {
bool on_oop = is_reference_type(type);
@@ -136,9 +342,6 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, Decorator
const Register Robj = obj ? obj->base() : noreg,
Roff = obj ? obj->index() : noreg;
- const int active_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset());
- const int buffer_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset());
- const int index_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset());
assert_different_registers(Rtmp1, Rtmp2, Z_R0_scratch); // None of the Rtmp must be Z_R0!!
assert_different_registers(Robj, Z_R0_scratch); // Used for addressing. Furthermore, push_frame destroys Z_R0!!
assert_different_registers(Rval, Z_R0_scratch); // push_frame destroys Z_R0!!
@@ -147,14 +350,7 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, Decorator
BLOCK_COMMENT("g1_write_barrier_pre {");
- // Is marking active?
- // Note: value is loaded for test purposes only. No further use here.
- if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
- __ load_and_test_int(Rtmp1, Address(Z_thread, active_offset));
- } else {
- guarantee(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
- __ load_and_test_byte(Rtmp1, Address(Z_thread, active_offset));
- }
+ generate_pre_barrier_fast_path(masm, Z_thread, Rtmp1);
__ z_bre(filtered); // Activity indicator is zero, so there is no marking going on currently.
assert(Rpre_val != noreg, "must have a real register");
@@ -194,24 +390,14 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, Decorator
// We can store the original value in the thread's buffer
// only if index > 0. Otherwise, we need runtime to handle.
// (The index field is typed as size_t.)
- Register Rbuffer = Rtmp1, Rindex = Rtmp2;
- assert_different_registers(Rbuffer, Rindex, Rpre_val);
-
- __ z_lg(Rbuffer, buffer_offset, Z_thread);
- __ load_and_test_long(Rindex, Address(Z_thread, index_offset));
- __ z_bre(callRuntime); // If index == 0, goto runtime.
-
- __ add2reg(Rindex, -wordSize); // Decrement index.
- __ z_stg(Rindex, index_offset, Z_thread);
-
- // Record the previous value.
- __ z_stg(Rpre_val, 0, Rbuffer, Rindex);
+ generate_queue_test_and_insertion(masm,
+ G1ThreadLocalData::satb_mark_queue_index_offset(),
+ G1ThreadLocalData::satb_mark_queue_buffer_offset(),
+ callRuntime,
+ Z_thread, Rpre_val, Rtmp2);
__ z_bru(filtered); // We are done.
- Rbuffer = noreg; // end of life
- Rindex = noreg; // end of life
-
__ bind(callRuntime);
// Save some registers (inputs and result) over runtime call
@@ -326,23 +512,16 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, Decorato
Register Rcard_addr_x = Rcard_addr;
Register Rqueue_index = (Rtmp2 != Z_R0_scratch) ? Rtmp2 : Rtmp1;
- Register Rqueue_buf = (Rtmp3 != Z_R0_scratch) ? Rtmp3 : Rtmp1;
- const int qidx_off = in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset());
- const int qbuf_off = in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset());
- if ((Rcard_addr == Rqueue_buf) || (Rcard_addr == Rqueue_index)) {
+ if (Rcard_addr == Rqueue_index) {
Rcard_addr_x = Z_R0_scratch; // Register shortage. We have to use Z_R0.
}
__ lgr_if_needed(Rcard_addr_x, Rcard_addr);
- __ load_and_test_long(Rqueue_index, Address(Z_thread, qidx_off));
- __ z_bre(callRuntime); // Index == 0 then jump to runtime.
-
- __ z_lg(Rqueue_buf, qbuf_off, Z_thread);
-
- __ add2reg(Rqueue_index, -wordSize); // Decrement index.
- __ z_stg(Rqueue_index, qidx_off, Z_thread);
-
- __ z_stg(Rcard_addr_x, 0, Rqueue_index, Rqueue_buf); // Store card.
+ generate_queue_test_and_insertion(masm,
+ G1ThreadLocalData::dirty_card_queue_index_offset(),
+ G1ThreadLocalData::dirty_card_queue_buffer_offset(),
+ callRuntime,
+ Z_thread, Rcard_addr_x, Rqueue_index);
__ z_bru(filtered);
__ bind(callRuntime);
diff --git a/src/hotspot/cpu/s390/gc/g1/g1BarrierSetAssembler_s390.hpp b/src/hotspot/cpu/s390/gc/g1/g1BarrierSetAssembler_s390.hpp
index cc1d51d2fa13e..0f0bdd8b83cfd 100644
--- a/src/hotspot/cpu/s390/gc/g1/g1BarrierSetAssembler_s390.hpp
+++ b/src/hotspot/cpu/s390/gc/g1/g1BarrierSetAssembler_s390.hpp
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2018, 2023, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2018 SAP SE. All rights reserved.
+ * Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2024 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -34,6 +34,8 @@ class LIR_Assembler;
class StubAssembler;
class G1PreBarrierStub;
class G1PostBarrierStub;
+class G1PreBarrierStubC2;
+class G1PostBarrierStubC2;
class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
protected:
@@ -62,7 +64,27 @@ class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm);
-#endif
+#endif // COMPILER1
+
+#ifdef COMPILER2
+ void g1_write_barrier_pre_c2(MacroAssembler* masm,
+ Register obj,
+ Register pre_val,
+ Register thread,
+ Register tmp1,
+ G1PreBarrierStubC2* c2_stub);
+ void generate_c2_pre_barrier_stub(MacroAssembler* masm,
+ G1PreBarrierStubC2* stub) const;
+ void g1_write_barrier_post_c2(MacroAssembler* masm,
+ Register store_addr,
+ Register new_val,
+ Register thread,
+ Register tmp1,
+ Register tmp2,
+ G1PostBarrierStubC2* c2_stub);
+ void generate_c2_post_barrier_stub(MacroAssembler* masm,
+ G1PostBarrierStubC2* stub) const;
+#endif // COMPILER2
virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
const Address& src, Register dst, Register tmp1, Register tmp2, Label *L_handle_null = nullptr);
diff --git a/src/hotspot/cpu/s390/gc/g1/g1_s390.ad b/src/hotspot/cpu/s390/gc/g1/g1_s390.ad
new file mode 100644
index 0000000000000..31f60c4aeff0b
--- /dev/null
+++ b/src/hotspot/cpu/s390/gc/g1/g1_s390.ad
@@ -0,0 +1,457 @@
+//
+// Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
+// Copyright 2024 IBM Corporation. All rights reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+// or visit www.oracle.com if you need additional information or have any
+// questions.
+//
+
+source_hpp %{
+
+#include "gc/g1/c2/g1BarrierSetC2.hpp"
+#include "gc/shared/gc_globals.hpp"
+
+%}
+
+source %{
+
+#include "gc/g1/g1BarrierSetAssembler_s390.hpp"
+#include "gc/g1/g1BarrierSetRuntime.hpp"
+
+static void write_barrier_pre(MacroAssembler* masm,
+ const MachNode* node,
+ Register obj,
+ Register pre_val,
+ Register tmp1,
+ RegSet preserve = RegSet(),
+ RegSet no_preserve = RegSet()) {
+ if (!G1PreBarrierStubC2::needs_barrier(node)) {
+ return;
+ }
+ Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
+ G1BarrierSetAssembler* g1_asm = static_cast(BarrierSet::barrier_set()->barrier_set_assembler());
+ G1PreBarrierStubC2* const stub = G1PreBarrierStubC2::create(node);
+ for (RegSetIterator reg = preserve.begin(); *reg != noreg; ++reg) {
+ stub->preserve(*reg);
+ }
+ for (RegSetIterator reg = no_preserve.begin(); *reg != noreg; ++reg) {
+ stub->dont_preserve(*reg);
+ }
+ g1_asm->g1_write_barrier_pre_c2(masm, obj, pre_val, Z_thread, tmp1, stub);
+}
+
+static void write_barrier_post(MacroAssembler* masm,
+ const MachNode* node,
+ Register store_addr,
+ Register new_val,
+ Register tmp1,
+ Register tmp2) {
+ if (!G1PostBarrierStubC2::needs_barrier(node)) {
+ return;
+ }
+ Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
+ G1BarrierSetAssembler* g1_asm = static_cast(BarrierSet::barrier_set()->barrier_set_assembler());
+ G1PostBarrierStubC2* const stub = G1PostBarrierStubC2::create(node);
+ g1_asm->g1_write_barrier_post_c2(masm, store_addr, new_val, Z_thread, tmp1, tmp2, stub);
+}
+
+%} // source
+
+// store pointer
+instruct g1StoreP(indirect dst, memoryRegP src, iRegL tmp1, iRegL tmp2, flagsReg cr) %{
+ predicate(UseG1GC && n->as_Store()->barrier_data() != 0);
+ match(Set dst (StoreP dst src));
+ effect(TEMP tmp1, TEMP tmp2, KILL cr);
+ ins_cost(MEMORY_REF_COST);
+ format %{ "STG $src,$dst\t # ptr" %}
+ ins_encode %{
+ __ block_comment("g1StoreP {");
+ write_barrier_pre(masm, this,
+ $dst$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ RegSet::of($dst$$Register, $src$$Register) /* preserve */);
+
+ __ z_stg($src$$Register, Address($dst$$Register));
+
+ write_barrier_post(masm, this,
+ $dst$$Register, /* store_addr */
+ $src$$Register /* new_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */);
+ __ block_comment("} g1StoreP");
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Store Compressed Pointer
+instruct g1StoreN(indirect mem, iRegN_P2N src, iRegL tmp1, iRegL tmp2, iRegL tmp3, flagsReg cr) %{
+ predicate(UseG1GC && n->as_Store()->barrier_data() != 0);
+ match(Set mem (StoreN mem src));
+ effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ ins_cost(MEMORY_REF_COST);
+ format %{ "STY $src,$mem\t # (cOop)" %}
+ ins_encode %{
+ __ block_comment("g1StoreN {");
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ RegSet::of($mem$$Register, $src$$Register) /* preserve */);
+
+ __ z_sty($src$$Register, Address($mem$$Register));
+
+ if ((barrier_data() & G1C2BarrierPost) != 0) {
+ if ((barrier_data() & G1C2BarrierPostNotNull) == 0) {
+ __ oop_decoder($tmp1$$Register, $src$$Register, true /* maybe_null */);
+ } else {
+ __ oop_decoder($tmp1$$Register, $src$$Register, false /* maybe_null */);
+ }
+ }
+
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $tmp1$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ __ block_comment("} g1StoreN");
+ %}
+
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct g1CompareAndSwapN(indirect mem_ptr, rarg5RegN oldval, iRegN_P2N newval, iRegI res, iRegL tmp1, iRegL tmp2, iRegL tmp3, flagsReg cr) %{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0);
+ match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
+ match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
+ effect(USE mem_ptr, TEMP res, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL oldval, KILL cr);
+ format %{ "$res = CompareAndSwapN $oldval,$newval,$mem_ptr" %}
+ ins_encode %{
+ assert_different_registers($oldval$$Register, $mem_ptr$$Register);
+ assert_different_registers($newval$$Register, $mem_ptr$$Register);
+ __ block_comment("g1compareAndSwapN {");
+
+ Register Rcomp = reg_to_register_object($oldval$$reg);
+ Register Rnew = reg_to_register_object($newval$$reg);
+ Register Raddr = reg_to_register_object($mem_ptr$$reg);
+ Register Rres = reg_to_register_object($res$$reg);
+
+ write_barrier_pre(masm, this,
+ Raddr /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ RegSet::of(Raddr, Rcomp, Rnew) /* preserve */,
+ RegSet::of(Rres) /* no_preserve */);
+
+ __ z_cs(Rcomp, Rnew, 0, Raddr);
+
+ assert_different_registers(Rres, Raddr);
+ if (VM_Version::has_LoadStoreConditional()) {
+ __ load_const_optimized(Z_R0_scratch, 0L); // false (failed)
+ __ load_const_optimized(Rres, 1L); // true (succeed)
+ __ z_locgr(Rres, Z_R0_scratch, Assembler::bcondNotEqual);
+ } else {
+ Label done;
+ __ load_const_optimized(Rres, 0L); // false (failed)
+ __ z_brne(done); // Assume true to be the common case.
+ __ load_const_optimized(Rres, 1L); // true (succeed)
+ __ bind(done);
+ }
+
+ __ oop_decoder($tmp3$$Register, Rnew, true /* maybe_null */);
+
+ write_barrier_post(masm, this,
+ Raddr /* store_addr */,
+ $tmp3$$Register /* new_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */);
+ __ block_comment("} g1compareAndSwapN");
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct g1CompareAndExchangeN(iRegP mem_ptr, rarg5RegN oldval, iRegN_P2N newval, iRegN res, iRegL tmp1, iRegL tmp2, iRegL tmp3, flagsReg cr) %{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0);
+ match(Set res (CompareAndExchangeN mem_ptr (Binary oldval newval)));
+ effect(USE mem_ptr, TEMP res, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL oldval, KILL cr);
+ format %{ "$res = CompareAndExchangeN $oldval,$newval,$mem_ptr" %}
+ ins_encode %{
+ assert_different_registers($oldval$$Register, $mem_ptr$$Register);
+ assert_different_registers($newval$$Register, $mem_ptr$$Register);
+ __ block_comment("g1CompareAndExchangeN {");
+ write_barrier_pre(masm, this,
+ $mem_ptr$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ RegSet::of($mem_ptr$$Register, $oldval$$Register, $newval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+
+ Register Rcomp = reg_to_register_object($oldval$$reg);
+ Register Rnew = reg_to_register_object($newval$$reg);
+ Register Raddr = reg_to_register_object($mem_ptr$$reg);
+
+ Register Rres = reg_to_register_object($res$$reg);
+ assert_different_registers(Rres, Raddr);
+
+ __ z_lgr(Rres, Rcomp); // previous contents
+ __ z_csy(Rres, Rnew, 0, Raddr); // Try to store new value.
+
+ __ oop_decoder($tmp1$$Register, Rnew, true /* maybe_null */);
+
+ write_barrier_post(masm, this,
+ Raddr /* store_addr */,
+ $tmp1$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ __ block_comment("} g1CompareAndExchangeN");
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Load narrow oop
+instruct g1LoadN(iRegN dst, indirect mem, iRegP tmp1, iRegP tmp2, flagsReg cr) %{
+ predicate(UseG1GC && n->as_Load()->barrier_data() != 0);
+ match(Set dst (LoadN mem));
+ effect(TEMP dst, TEMP tmp1, TEMP tmp2, KILL cr);
+ ins_cost(MEMORY_REF_COST);
+ format %{ "LoadN $dst,$mem\t # (cOop)" %}
+ ins_encode %{
+ __ block_comment("g1LoadN {");
+ __ z_llgf($dst$$Register, Address($mem$$Register));
+ if ((barrier_data() & G1C2BarrierPre) != 0) {
+ __ oop_decoder($tmp1$$Register, $dst$$Register, true);
+ write_barrier_pre(masm, this,
+ noreg /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register );
+ }
+ __ block_comment("} g1LoadN");
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct g1GetAndSetN(indirect mem, iRegN dst, iRegI tmp, iRegL tmp1, iRegL tmp2, iRegL tmp3, flagsReg cr) %{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0);
+ match(Set dst (GetAndSetN mem dst));
+ effect(KILL cr, TEMP tmp, TEMP tmp1, TEMP tmp2, TEMP tmp3); // USE_DEF dst by match rule.
+ format %{ "XCHGN $dst,[$mem]\t # EXCHANGE (coop, atomic), temp $tmp" %}
+ ins_encode %{
+ __ block_comment("g1GetAndSetN {");
+ assert_different_registers($mem$$Register, $dst$$Register);
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ RegSet::of($mem$$Register, $dst$$Register) /* preserve */);
+
+ Register Rdst = reg_to_register_object($dst$$reg);
+ Register Rtmp = reg_to_register_object($tmp$$reg);
+ guarantee(Rdst != Rtmp, "Fix match rule to use TEMP_DEF");
+ Label retry;
+
+ // Iterate until swap succeeds.
+ __ z_llgf(Rtmp, Address($mem$$Register)); // current contents
+ __ bind(retry);
+ // Calculate incremented value.
+ __ z_csy(Rtmp, Rdst, Address($mem$$Register)); // Try to store new value.
+ __ z_brne(retry); // Yikes, concurrent update, need to retry.
+
+ __ oop_decoder($tmp1$$Register, $dst$$Register, true /* maybe_null */);
+
+ __ z_lgr(Rdst, Rtmp); // Exchanged value from memory is return value.
+
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $tmp1$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+
+ __ block_comment("} g1GetAndSetN");
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct g1CompareAndSwapP(iRegP mem_ptr, rarg5RegP oldval, iRegP_N2P newval, iRegI res, iRegL tmp1, iRegL tmp2, flagsReg cr) %{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0);
+ match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
+ match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
+ effect(TEMP res, TEMP tmp1, TEMP tmp2, USE mem_ptr, USE_KILL oldval, KILL cr);
+ format %{ "$res = CompareAndSwapP $oldval,$newval,$mem_ptr" %}
+ ins_encode %{
+ __ block_comment("g1CompareAndSwapP {");
+ assert_different_registers($oldval$$Register, $mem_ptr$$Register);
+ assert_different_registers($newval$$Register, $mem_ptr$$Register);
+
+ Register Rcomp = reg_to_register_object($oldval$$reg);
+ Register Rnew = reg_to_register_object($newval$$reg);
+ Register Raddr = reg_to_register_object($mem_ptr$$reg);
+ Register Rres = reg_to_register_object($res$$reg);
+
+ write_barrier_pre(masm, this,
+ noreg /* obj */,
+ Rcomp /* pre_val */,
+ $tmp1$$Register /* tmp1 */,
+ RegSet::of(Raddr, Rcomp, Rnew) /* preserve */,
+ RegSet::of(Rres) /* no_preserve */);
+
+ __ z_csg(Rcomp, Rnew, 0, Raddr);
+
+ if (VM_Version::has_LoadStoreConditional()) {
+ __ load_const_optimized(Z_R0_scratch, 0L); // false (failed)
+ __ load_const_optimized(Rres, 1L); // true (succeed)
+ __ z_locgr(Rres, Z_R0_scratch, Assembler::bcondNotEqual);
+ } else {
+ Label done;
+ __ load_const_optimized(Rres, 0L); // false (failed)
+ __ z_brne(done); // Assume true to be the common case.
+ __ load_const_optimized(Rres, 1L); // true (succeed)
+ __ bind(done);
+ }
+
+ write_barrier_post(masm, this,
+ Raddr /* store_addr */,
+ Rnew /* new_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */);
+ __ block_comment("} g1CompareAndSwapP");
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct g1CompareAndExchangeP(iRegP mem_ptr, rarg5RegP oldval, iRegP_N2P newval, iRegP res, iRegL tmp1, iRegL tmp2, flagsReg cr) %{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0);
+ match(Set res (CompareAndExchangeP mem_ptr (Binary oldval newval)));
+ effect(TEMP res, TEMP tmp1, TEMP tmp2, USE mem_ptr, USE_KILL oldval, KILL cr);
+ format %{ "$res = CompareAndExchangeP $oldval,$newval,$mem_ptr" %}
+ ins_encode %{
+ __ block_comment("g1CompareAndExchangeP {");
+ assert_different_registers($oldval$$Register, $mem_ptr$$Register);
+ assert_different_registers($newval$$Register, $mem_ptr$$Register);
+
+ // Pass $oldval to the pre-barrier (instead of loading from $mem), because
+ // $oldval is the only value that can be overwritten.
+ // The same holds for g1CompareAndSwapP.
+ write_barrier_pre(masm, this,
+ noreg /* obj */,
+ $oldval$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ RegSet::of($mem_ptr$$Register, $oldval$$Register, $newval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+
+ __ z_lgr($res$$Register, $oldval$$Register); // previous content
+
+ __ z_csg($oldval$$Register, $newval$$Register, 0, $mem_ptr$$reg);
+
+ write_barrier_post(masm, this,
+ $mem_ptr$$Register /* store_addr */,
+ $newval$$Register /* new_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */);
+ __ block_comment("} g1CompareAndExchangeP");
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+// Load Pointer
+instruct g1LoadP(iRegP dst, memory mem, iRegL tmp1, flagsReg cr) %{
+ predicate(UseG1GC && n->as_Load()->barrier_data() != 0);
+ match(Set dst (LoadP mem));
+ effect(TEMP dst, TEMP tmp1, KILL cr);
+ ins_cost(MEMORY_REF_COST);
+ format %{ "LG $dst,$mem\t # ptr" %}
+ ins_encode %{
+ __ block_comment("g1LoadP {");
+ __ z_lg($dst$$Register, $mem$$Address);
+ write_barrier_pre(masm, this,
+ noreg /* obj */,
+ $dst$$Register /* pre_val */,
+ $tmp1$$Register );
+ __ block_comment("} g1LoadP");
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct g1GetAndSetP(indirect mem, iRegP dst, iRegL tmp, iRegL tmp1, iRegL tmp2, flagsReg cr) %{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0);
+ match(Set dst (GetAndSetP mem dst));
+ effect(KILL cr, TEMP tmp, TEMP tmp1, TEMP tmp2); // USE_DEF dst by match rule.
+ format %{ "XCHGP $dst,[$mem]\t # EXCHANGE (oop, atomic), temp $tmp" %}
+ ins_encode %{
+ __ block_comment("g1GetAndSetP {");
+
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp$$Register /* pre_val (as a temporary register) */,
+ $tmp1$$Register /* tmp1 */,
+ RegSet::of($mem$$Register, $dst$$Register) /* preserve */);
+
+ __ z_lgr($tmp1$$Register, $dst$$Register);
+ Register Rdst = reg_to_register_object($dst$$reg);
+ Register Rtmp = reg_to_register_object($tmp$$reg);
+ guarantee(Rdst != Rtmp, "Fix match rule to use TEMP_DEF");
+ Label retry;
+
+ // Iterate until swap succeeds.
+ __ z_lg(Rtmp, Address($mem$$Register)); // current contents
+ __ bind(retry);
+ // Calculate incremented value.
+ __ z_csg(Rtmp, Rdst, Address($mem$$Register)); // Try to store new value.
+ __ z_brne(retry); // Yikes, concurrent update, need to retry.
+ __ z_lgr(Rdst, Rtmp); // Exchanged value from memory is return value.
+
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $tmp1$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp$$Register /* tmp2 */);
+ __ block_comment("} g1GetAndSetP");
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
+instruct g1EncodePAndStoreN(indirect mem, iRegP src, iRegL tmp1, iRegL tmp2, flagsReg cr)
+%{
+ predicate(UseG1GC && n->as_Store()->barrier_data() != 0);
+ match(Set mem (StoreN mem (EncodeP src)));
+ effect(TEMP tmp1, TEMP tmp2, KILL cr);
+ // ins_cost(INSN_COST);
+ format %{ "encode_heap_oop $tmp1, $src\n\t"
+ "st $tmp1, $mem\t# compressed ptr" %}
+ ins_encode %{
+ __ block_comment("g1EncodePAndStoreN {");
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp1 */,
+ RegSet::of($mem$$Register, $src$$Register) /* preserve */);
+ if ((barrier_data() & G1C2BarrierPostNotNull) == 0) {
+ __ oop_encoder($tmp1$$Register, $src$$Register, true /* maybe_null */);
+ } else {
+ __ oop_encoder($tmp1$$Register, $src$$Register, false /* maybe_null */);
+ }
+ __ z_st($tmp1$$Register, Address($mem$$Register));
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $src$$Register /* new_val */,
+ $tmp1$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */);
+ __ block_comment("} g1EncodePAndStoreN");
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
diff --git a/src/hotspot/cpu/s390/gc/shared/barrierSetAssembler_s390.cpp b/src/hotspot/cpu/s390/gc/shared/barrierSetAssembler_s390.cpp
index d3457916bc9d5..d826b4a06f336 100644
--- a/src/hotspot/cpu/s390/gc/shared/barrierSetAssembler_s390.cpp
+++ b/src/hotspot/cpu/s390/gc/shared/barrierSetAssembler_s390.cpp
@@ -33,6 +33,9 @@
#include "runtime/jniHandles.hpp"
#include "runtime/stubRoutines.hpp"
#include "utilities/macros.hpp"
+#ifdef COMPILER2
+#include "gc/shared/c2/barrierSetC2.hpp"
+#endif // COMPILER2
#define __ masm->
@@ -105,16 +108,60 @@ void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators
}
}
+// Generic implementation. GCs can provide an optimized one.
void BarrierSetAssembler::resolve_jobject(MacroAssembler* masm, Register value, Register tmp1, Register tmp2) {
- NearLabel Ldone;
- __ z_ltgr(tmp1, value);
- __ z_bre(Ldone); // Use null result as-is.
- __ z_nill(value, ~JNIHandles::tag_mask);
- __ z_lg(value, 0, value); // Resolve (untagged) jobject.
+ assert_different_registers(value, tmp1, tmp2);
+ NearLabel done, weak_tag, verify, tagged;
+ __ z_ltgr(value, value);
+ __ z_bre(done); // Use null result as-is.
+
+ __ z_tmll(value, JNIHandles::tag_mask);
+ __ z_btrue(tagged); // not zero
+
+ // Resolve Local handle
+ __ access_load_at(T_OBJECT, IN_NATIVE | AS_RAW, Address(value, 0), value, tmp1, tmp2);
+ __ z_bru(verify);
+
+ __ bind(tagged);
+ __ testbit(value, exact_log2(JNIHandles::TypeTag::weak_global)); // test for weak tag
+ __ z_btrue(weak_tag);
+
+ // resolve global handle
+ __ access_load_at(T_OBJECT, IN_NATIVE, Address(value, -JNIHandles::TypeTag::global), value, tmp1, tmp2);
+ __ z_bru(verify);
+
+ __ bind(weak_tag);
+ // resolve jweak.
+ __ access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF,
+ Address(value, -JNIHandles::TypeTag::weak_global), value, tmp1, tmp2);
+ __ bind(verify);
+ __ verify_oop(value, FILE_AND_LINE);
+ __ bind(done);
+}
+
+// Generic implementation. GCs can provide an optimized one.
+void BarrierSetAssembler::resolve_global_jobject(MacroAssembler* masm, Register value, Register tmp1, Register tmp2) {
+ assert_different_registers(value, tmp1, tmp2);
+ NearLabel done;
+
+ __ z_ltgr(value, value);
+ __ z_bre(done); // use null as-is.
+#ifdef ASSERT
+ {
+ NearLabel valid_global_tag;
+ __ testbit(value, exact_log2(JNIHandles::TypeTag::global)); // test for global tag
+ __ z_btrue(valid_global_tag);
+ __ stop("non global jobject using resolve_global_jobject");
+ __ bind(valid_global_tag);
+ }
+#endif // ASSERT
+
+ // Resolve global handle
+ __ access_load_at(T_OBJECT, IN_NATIVE, Address(value, -JNIHandles::TypeTag::global), value, tmp1, tmp2);
__ verify_oop(value, FILE_AND_LINE);
- __ bind(Ldone);
+ __ bind(done);
}
void BarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
@@ -150,8 +197,93 @@ void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm) {
#ifdef COMPILER2
-OptoReg::Name BarrierSetAssembler::refine_register(const Node* node, OptoReg::Name opto_reg) {
- Unimplemented(); // This must be implemented to support late barrier expansion.
+OptoReg::Name BarrierSetAssembler::refine_register(const Node* node, OptoReg::Name opto_reg) const {
+ if (!OptoReg::is_reg(opto_reg)) {
+ return OptoReg::Bad;
+ }
+
+ VMReg vm_reg = OptoReg::as_VMReg(opto_reg);
+ if ((vm_reg->is_Register() || vm_reg ->is_FloatRegister()) && (opto_reg & 1) != 0) {
+ return OptoReg::Bad;
+ }
+
+ return opto_reg;
+}
+
+#undef __
+#define __ _masm->
+
+SaveLiveRegisters::SaveLiveRegisters(MacroAssembler *masm, BarrierStubC2 *stub)
+ : _masm(masm), _reg_mask(stub->preserve_set()) {
+
+ const int register_save_size = iterate_over_register_mask(ACTION_COUNT_ONLY) * BytesPerWord;
+
+ _frame_size = align_up(register_save_size, frame::alignment_in_bytes) + frame::z_abi_160_size; // FIXME: this could be restricted to argument only
+
+ __ save_return_pc();
+ __ push_frame(_frame_size, Z_R14); // FIXME: check if Z_R1_scaratch can do a job here;
+
+ __ z_lg(Z_R14, _z_common_abi(return_pc) + _frame_size, Z_SP);
+
+ iterate_over_register_mask(ACTION_SAVE, _frame_size);
+}
+
+SaveLiveRegisters::~SaveLiveRegisters() {
+ iterate_over_register_mask(ACTION_RESTORE, _frame_size);
+
+ __ pop_frame();
+
+ __ restore_return_pc();
+}
+
+int SaveLiveRegisters::iterate_over_register_mask(IterationAction action, int offset) {
+ int reg_save_index = 0;
+ RegMaskIterator live_regs_iterator(_reg_mask);
+
+ while(live_regs_iterator.has_next()) {
+ const OptoReg::Name opto_reg = live_regs_iterator.next();
+
+ // Filter out stack slots (spilled registers, i.e., stack-allocated registers).
+ if (!OptoReg::is_reg(opto_reg)) {
+ continue;
+ }
+
+ const VMReg vm_reg = OptoReg::as_VMReg(opto_reg);
+ if (vm_reg->is_Register()) {
+ Register std_reg = vm_reg->as_Register();
+
+ if (std_reg->encoding() >= Z_R2->encoding() && std_reg->encoding() <= Z_R15->encoding()) {
+ reg_save_index++;
+
+ if (action == ACTION_SAVE) {
+ __ z_stg(std_reg, offset - reg_save_index * BytesPerWord, Z_SP);
+ } else if (action == ACTION_RESTORE) {
+ __ z_lg(std_reg, offset - reg_save_index * BytesPerWord, Z_SP);
+ } else {
+ assert(action == ACTION_COUNT_ONLY, "Sanity");
+ }
+ }
+ } else if (vm_reg->is_FloatRegister()) {
+ FloatRegister fp_reg = vm_reg->as_FloatRegister();
+ if (fp_reg->encoding() >= Z_F0->encoding() && fp_reg->encoding() <= Z_F15->encoding()
+ && fp_reg->encoding() != Z_F1->encoding()) {
+ reg_save_index++;
+
+ if (action == ACTION_SAVE) {
+ __ z_std(fp_reg, offset - reg_save_index * BytesPerWord, Z_SP);
+ } else if (action == ACTION_RESTORE) {
+ __ z_ld(fp_reg, offset - reg_save_index * BytesPerWord, Z_SP);
+ } else {
+ assert(action == ACTION_COUNT_ONLY, "Sanity");
+ }
+ }
+ } else if (false /* vm_reg->is_VectorRegister() */){
+ fatal("Vector register support is not there yet!");
+ } else {
+ fatal("Register type is not known");
+ }
+ }
+ return reg_save_index;
}
#endif // COMPILER2
diff --git a/src/hotspot/cpu/s390/gc/shared/barrierSetAssembler_s390.hpp b/src/hotspot/cpu/s390/gc/shared/barrierSetAssembler_s390.hpp
index f83bbb864ea47..fb61adc55b500 100644
--- a/src/hotspot/cpu/s390/gc/shared/barrierSetAssembler_s390.hpp
+++ b/src/hotspot/cpu/s390/gc/shared/barrierSetAssembler_s390.hpp
@@ -32,7 +32,9 @@
#ifdef COMPILER2
#include "code/vmreg.hpp"
#include "opto/optoreg.hpp"
+#include "opto/regmask.hpp"
+class BarrierStubC2;
class Node;
#endif // COMPILER2
@@ -51,6 +53,7 @@ class BarrierSetAssembler: public CHeapObj {
const Address& addr, Register val, Register tmp1, Register tmp2, Register tmp3);
virtual void resolve_jobject(MacroAssembler* masm, Register value, Register tmp1, Register tmp2);
+ virtual void resolve_global_jobject(MacroAssembler* masm, Register value, Register tmp1, Register tmp2);
virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
Register obj, Register tmp, Label& slowpath);
@@ -61,8 +64,42 @@ class BarrierSetAssembler: public CHeapObj {
#ifdef COMPILER2
OptoReg::Name refine_register(const Node* node,
- OptoReg::Name opto_reg);
+ OptoReg::Name opto_reg) const;
#endif // COMPILER2
};
+#ifdef COMPILER2
+
+// This class saves and restores the registers that need to be preserved across
+// the runtime call represented by a given C2 barrier stub. Use as follows:
+// {
+// SaveLiveRegisters save(masm, stub);
+// ..
+// __ call_VM_leaf(...);
+// ..
+// }
+
+class SaveLiveRegisters {
+ MacroAssembler* _masm;
+ RegMask _reg_mask;
+ Register _result_reg;
+ int _frame_size;
+
+ public:
+ SaveLiveRegisters(MacroAssembler *masm, BarrierStubC2 *stub);
+
+ ~SaveLiveRegisters();
+
+ private:
+ enum IterationAction : int {
+ ACTION_SAVE,
+ ACTION_RESTORE,
+ ACTION_COUNT_ONLY
+ };
+
+ int iterate_over_register_mask(IterationAction action, int offset = 0);
+};
+
+#endif // COMPILER2
+
#endif // CPU_S390_GC_SHARED_BARRIERSETASSEMBLER_S390_HPP
diff --git a/src/hotspot/cpu/s390/gc/shared/modRefBarrierSetAssembler_s390.cpp b/src/hotspot/cpu/s390/gc/shared/modRefBarrierSetAssembler_s390.cpp
index fd21dd85e1195..f44a72c27abc1 100644
--- a/src/hotspot/cpu/s390/gc/shared/modRefBarrierSetAssembler_s390.cpp
+++ b/src/hotspot/cpu/s390/gc/shared/modRefBarrierSetAssembler_s390.cpp
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2018, 2019 SAP SE. All rights reserved.
+ * Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2024 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -26,6 +26,7 @@
#include "precompiled.hpp"
#include "asm/macroAssembler.inline.hpp"
#include "gc/shared/modRefBarrierSetAssembler.hpp"
+#include "runtime/jniHandles.hpp"
#define __ masm->
@@ -58,3 +59,16 @@ void ModRefBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet deco
BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2, tmp3);
}
}
+
+void ModRefBarrierSetAssembler::resolve_jobject(MacroAssembler* masm, Register value, Register tmp1, Register tmp2) {
+ NearLabel done;
+
+ __ z_ltgr(value, value);
+ __ z_bre(done); // use null as-is.
+
+ __ z_nill(value, ~JNIHandles::tag_mask);
+ __ z_lg(value, 0, value); // Resolve (untagged) jobject.
+
+ __ verify_oop(value, FILE_AND_LINE);
+ __ bind(done);
+}
diff --git a/src/hotspot/cpu/s390/gc/shared/modRefBarrierSetAssembler_s390.hpp b/src/hotspot/cpu/s390/gc/shared/modRefBarrierSetAssembler_s390.hpp
index 865638477cd7a..7f53d033780c1 100644
--- a/src/hotspot/cpu/s390/gc/shared/modRefBarrierSetAssembler_s390.hpp
+++ b/src/hotspot/cpu/s390/gc/shared/modRefBarrierSetAssembler_s390.hpp
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2018 SAP SE. All rights reserved.
+ * Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2024 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -48,6 +48,8 @@ class ModRefBarrierSetAssembler: public BarrierSetAssembler {
virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
const Address& dst, Register val, Register tmp1, Register tmp2, Register tmp3);
+
+ virtual void resolve_jobject(MacroAssembler* masm, Register value, Register tmp1, Register tmp2);
};
#endif // CPU_S390_GC_SHARED_MODREFBARRIERSETASSEMBLER_S390_HPP
diff --git a/src/hotspot/cpu/s390/interp_masm_s390.cpp b/src/hotspot/cpu/s390/interp_masm_s390.cpp
index e56beaa9f569c..d00b6c3e2cc2e 100644
--- a/src/hotspot/cpu/s390/interp_masm_s390.cpp
+++ b/src/hotspot/cpu/s390/interp_masm_s390.cpp
@@ -1012,7 +1012,7 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) {
}
if (LockingMode == LM_LIGHTWEIGHT) {
- lightweight_lock(object, header, tmp, slow_case);
+ lightweight_lock(monitor, object, header, tmp, slow_case);
} else if (LockingMode == LM_LEGACY) {
// Load markWord from object into header.
diff --git a/src/hotspot/cpu/s390/macroAssembler_s390.cpp b/src/hotspot/cpu/s390/macroAssembler_s390.cpp
index 50de705cd9f0c..6bfe5125959ad 100644
--- a/src/hotspot/cpu/s390/macroAssembler_s390.cpp
+++ b/src/hotspot/cpu/s390/macroAssembler_s390.cpp
@@ -2127,8 +2127,9 @@ unsigned int MacroAssembler::push_frame_abi160(unsigned int bytes) {
// Pop current C frame.
void MacroAssembler::pop_frame() {
- BLOCK_COMMENT("pop_frame:");
+ BLOCK_COMMENT("pop_frame {");
Assembler::z_lg(Z_SP, _z_abi(callers_sp), Z_SP);
+ BLOCK_COMMENT("} pop_frame");
}
// Pop current C frame and restore return PC register (Z_R14).
@@ -3458,7 +3459,8 @@ void MacroAssembler::clinit_barrier(Register klass, Register thread, Label* L_fa
L_slow_path = &L_fallthrough;
}
- // Fast path check: class is fully initialized
+ // Fast path check: class is fully initialized.
+ // init_state needs acquire, but S390 is TSO, and so we are already good.
z_cli(Address(klass, InstanceKlass::init_state_offset()), InstanceKlass::fully_initialized);
z_bre(*L_fast_path);
@@ -3655,12 +3657,38 @@ void MacroAssembler::compiler_fast_unlock_object(Register oop, Register box, Reg
bind(not_recursive);
+ NearLabel check_succ, set_eq_unlocked;
+
+ // Set owner to null.
+ // Release to satisfy the JMM
+ z_release();
+ z_lghi(temp, 0);
+ z_stg(temp, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), currentHeader);
+ // We need a full fence after clearing owner to avoid stranding.
+ z_fence();
+
+ // Check if the entry lists are empty.
load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
- z_brne(done);
+ z_brne(check_succ);
load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
- z_brne(done);
- z_release();
- z_stg(temp/*=0*/, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), currentHeader);
+ z_bre(done); // If so we are done.
+
+ bind(check_succ);
+
+ // Check if there is a successor.
+ load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)));
+ z_brne(set_eq_unlocked); // If so we are done.
+
+ // Save the monitor pointer in the current thread, so we can try to
+ // reacquire the lock in SharedRuntime::monitor_exit_helper().
+ z_xilf(currentHeader, markWord::monitor_value);
+ z_stg(currentHeader, Address(Z_thread, JavaThread::unlocked_inflated_monitor_offset()));
+
+ z_ltgr(oop, oop); // Set flag = NE
+ z_bru(done);
+
+ bind(set_eq_unlocked);
+ z_cr(temp, temp); // Set flag = EQ
bind(done);
@@ -3674,6 +3702,11 @@ void MacroAssembler::resolve_jobject(Register value, Register tmp1, Register tmp
bs->resolve_jobject(this, value, tmp1, tmp2);
}
+void MacroAssembler::resolve_global_jobject(Register value, Register tmp1, Register tmp2) {
+ BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
+ bs->resolve_global_jobject(this, value, tmp1, tmp2);
+}
+
// Last_Java_sp must comply to the rules in frame_s390.hpp.
void MacroAssembler::set_last_Java_frame(Register last_Java_sp, Register last_Java_pc, bool allow_relocation) {
BLOCK_COMMENT("set_last_Java_frame {");
@@ -6002,10 +6035,10 @@ SkipIfEqual::~SkipIfEqual() {
// - obj: the object to be locked, contents preserved.
// - temp1, temp2: temporary registers, contents destroyed.
// Note: make sure Z_R1 is not manipulated here when C2 compiler is in play
-void MacroAssembler::lightweight_lock(Register obj, Register temp1, Register temp2, Label& slow) {
+void MacroAssembler::lightweight_lock(Register basic_lock, Register obj, Register temp1, Register temp2, Label& slow) {
assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking");
- assert_different_registers(obj, temp1, temp2);
+ assert_different_registers(basic_lock, obj, temp1, temp2);
Label push;
const Register top = temp1;
@@ -6017,6 +6050,11 @@ void MacroAssembler::lightweight_lock(Register obj, Register temp1, Register tem
// instruction emitted as it is part of C1's null check semantics.
z_lg(mark, Address(obj, mark_offset));
+ if (UseObjectMonitorTable) {
+ // Clear cache in case fast locking succeeds.
+ const Address om_cache_addr = Address(basic_lock, BasicObjectLock::lock_offset() + in_ByteSize((BasicLock::object_monitor_cache_offset_in_bytes())));
+ z_mvghi(om_cache_addr, 0);
+ }
// First we need to check if the lock-stack has room for pushing the object reference.
z_lgf(top, Address(Z_thread, ls_top_offset));
@@ -6140,8 +6178,8 @@ void MacroAssembler::lightweight_unlock(Register obj, Register temp1, Register t
bind(unlocked);
}
-void MacroAssembler::compiler_fast_lock_lightweight_object(Register obj, Register tmp1, Register tmp2) {
- assert_different_registers(obj, tmp1, tmp2);
+void MacroAssembler::compiler_fast_lock_lightweight_object(Register obj, Register box, Register tmp1, Register tmp2) {
+ assert_different_registers(obj, box, tmp1, tmp2);
// Handle inflated monitor.
NearLabel inflated;
@@ -6150,6 +6188,11 @@ void MacroAssembler::compiler_fast_lock_lightweight_object(Register obj, Registe
// Finish fast lock unsuccessfully. MUST branch to with flag == EQ
NearLabel slow_path;
+ if (UseObjectMonitorTable) {
+ // Clear cache in case fast locking succeeds.
+ z_mvghi(Address(box, BasicLock::object_monitor_cache_offset_in_bytes()), 0);
+ }
+
if (DiagnoseSyncOnValueBasedClasses != 0) {
load_klass(tmp1, obj);
z_tm(Address(tmp1, Klass::misc_flags_offset()), KlassFlags::_misc_is_value_based_class);
@@ -6214,33 +6257,77 @@ void MacroAssembler::compiler_fast_lock_lightweight_object(Register obj, Registe
{ // Handle inflated monitor.
bind(inflated);
+ const Register tmp1_monitor = tmp1;
if (!UseObjectMonitorTable) {
- // mark contains the tagged ObjectMonitor*.
- const Register tagged_monitor = mark;
- const Register zero = tmp2;
-
- // Try to CAS m->owner from null to current thread.
- // If m->owner is null, then csg succeeds and sets m->owner=THREAD and CR=EQ.
- // Otherwise, register zero is filled with the current owner.
- z_lghi(zero, 0);
- z_csg(zero, Z_thread, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), tagged_monitor);
- z_bre(locked);
-
- // Check if recursive.
- z_cgr(Z_thread, zero); // zero contains the owner from z_csg instruction
- z_brne(slow_path);
-
- // Recursive
- z_agsi(Address(tagged_monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), 1ll);
- z_cgr(zero, zero);
- // z_bru(locked);
- // Uncomment above line in the future, for now jump address is right next to us.
+ assert(tmp1_monitor == mark, "should be the same here");
} else {
- // OMCache lookup not supported yet. Take the slowpath.
- // Set flag to NE
- z_ltgr(obj, obj);
+ NearLabel monitor_found;
+
+ // load cache address
+ z_la(tmp1, Address(Z_thread, JavaThread::om_cache_oops_offset()));
+
+ const int num_unrolled = 2;
+ for (int i = 0; i < num_unrolled; i++) {
+ z_cg(obj, Address(tmp1));
+ z_bre(monitor_found);
+ add2reg(tmp1, in_bytes(OMCache::oop_to_oop_difference()));
+ }
+
+ NearLabel loop;
+ // Search for obj in cache
+
+ bind(loop);
+
+ // check for match.
+ z_cg(obj, Address(tmp1));
+ z_bre(monitor_found);
+
+ // search until null encountered, guaranteed _null_sentinel at end.
+ add2reg(tmp1, in_bytes(OMCache::oop_to_oop_difference()));
+ z_cghsi(0, tmp1, 0);
+ z_brne(loop); // if not EQ to 0, go for another loop
+
+ // we reached to the end, cache miss
+ z_ltgr(obj, obj); // set CC to NE
z_bru(slow_path);
+
+ // cache hit
+ bind(monitor_found);
+ z_lg(tmp1_monitor, Address(tmp1, OMCache::oop_to_monitor_difference()));
}
+ NearLabel monitor_locked;
+ // lock the monitor
+
+ // mark contains the tagged ObjectMonitor*.
+ const Register tagged_monitor = mark;
+ const Register zero = tmp2;
+
+ const ByteSize monitor_tag = in_ByteSize(UseObjectMonitorTable ? 0 : checked_cast(markWord::monitor_value));
+ const Address owner_address(tmp1_monitor, ObjectMonitor::owner_offset() - monitor_tag);
+ const Address recursions_address(tmp1_monitor, ObjectMonitor::recursions_offset() - monitor_tag);
+
+
+ // Try to CAS m->owner from null to current thread.
+ // If m->owner is null, then csg succeeds and sets m->owner=THREAD and CR=EQ.
+ // Otherwise, register zero is filled with the current owner.
+ z_lghi(zero, 0);
+ z_csg(zero, Z_thread, owner_address);
+ z_bre(monitor_locked);
+
+ // Check if recursive.
+ z_cgr(Z_thread, zero); // zero contains the owner from z_csg instruction
+ z_brne(slow_path);
+
+ // Recursive
+ z_agsi(recursions_address, 1ll);
+
+ bind(monitor_locked);
+ if (UseObjectMonitorTable) {
+ // Cache the monitor for unlock
+ z_stg(tmp1_monitor, Address(box, BasicLock::object_monitor_cache_offset_in_bytes()));
+ }
+ // set the CC now
+ z_cgr(obj, obj);
}
BLOCK_COMMENT("} handle_inflated_monitor_lightweight_locking");
@@ -6265,11 +6352,11 @@ void MacroAssembler::compiler_fast_lock_lightweight_object(Register obj, Registe
// C2 uses the value of flag (NE vs EQ) to determine the continuation.
}
-void MacroAssembler::compiler_fast_unlock_lightweight_object(Register obj, Register tmp1, Register tmp2) {
- assert_different_registers(obj, tmp1, tmp2);
+void MacroAssembler::compiler_fast_unlock_lightweight_object(Register obj, Register box, Register tmp1, Register tmp2) {
+ assert_different_registers(obj, box, tmp1, tmp2);
// Handle inflated monitor.
- NearLabel inflated, inflated_load_monitor;
+ NearLabel inflated, inflated_load_mark;
// Finish fast unlock successfully. MUST reach to with flag == EQ.
NearLabel unlocked;
// Finish fast unlock unsuccessfully. MUST branch to with flag == NE.
@@ -6289,7 +6376,7 @@ void MacroAssembler::compiler_fast_unlock_lightweight_object(Register obj, Regis
z_aghi(top, -oopSize);
z_cg(obj, Address(Z_thread, top));
- branch_optimized(bcondNotEqual, inflated_load_monitor);
+ branch_optimized(bcondNotEqual, inflated_load_mark);
// Pop lock-stack.
#ifdef ASSERT
@@ -6310,6 +6397,9 @@ void MacroAssembler::compiler_fast_unlock_lightweight_object(Register obj, Regis
// Not recursive
// Check for monitor (0b10).
+ // Because we got here by popping (meaning we pushed in locked)
+ // there will be no monitor in the box. So we need to push back the obj
+ // so that the runtime can fix any potential anonymous owner.
z_lg(mark, Address(obj, mark_offset));
z_tmll(mark, markWord::monitor_value);
if (!UseObjectMonitorTable) {
@@ -6348,7 +6438,7 @@ void MacroAssembler::compiler_fast_unlock_lightweight_object(Register obj, Regis
{ // Handle inflated monitor.
- bind(inflated_load_monitor);
+ bind(inflated_load_mark);
z_lg(mark, Address(obj, mark_offset));
@@ -6373,49 +6463,77 @@ void MacroAssembler::compiler_fast_unlock_lightweight_object(Register obj, Regis
bind(check_done);
#endif // ASSERT
+ const Register tmp1_monitor = tmp1;
+
if (!UseObjectMonitorTable) {
- // mark contains the tagged ObjectMonitor*.
- const Register monitor = mark;
+ assert(tmp1_monitor == mark, "should be the same here");
+ } else {
+ // Uses ObjectMonitorTable. Look for the monitor in our BasicLock on the stack.
+ z_lg(tmp1_monitor, Address(box, BasicLock::object_monitor_cache_offset_in_bytes()));
+ // null check with ZF == 0, no valid pointer below alignof(ObjectMonitor*)
+ z_cghi(tmp1_monitor, alignof(ObjectMonitor*));
- NearLabel not_recursive;
- const Register recursions = tmp2;
+ z_brl(slow_path);
+ }
- // Check if recursive.
- load_and_test_long(recursions, Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
- z_bre(not_recursive); // if 0 then jump, it's not recursive locking
+ // mark contains the tagged ObjectMonitor*.
+ const Register monitor = mark;
- // Recursive unlock
- z_agsi(Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), -1ll);
- z_cgr(monitor, monitor); // set the CC to EQUAL
- z_bru(unlocked);
+ const ByteSize monitor_tag = in_ByteSize(UseObjectMonitorTable ? 0 : checked_cast(markWord::monitor_value));
+ const Address recursions_address{monitor, ObjectMonitor::recursions_offset() - monitor_tag};
+ const Address cxq_address{monitor, ObjectMonitor::cxq_offset() - monitor_tag};
+ const Address succ_address{monitor, ObjectMonitor::succ_offset() - monitor_tag};
+ const Address EntryList_address{monitor, ObjectMonitor::EntryList_offset() - monitor_tag};
+ const Address owner_address{monitor, ObjectMonitor::owner_offset() - monitor_tag};
- bind(not_recursive);
+ NearLabel not_recursive;
+ const Register recursions = tmp2;
- NearLabel not_ok;
- // Check if the entry lists are empty.
- load_and_test_long(tmp2, Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
- z_brne(not_ok);
- load_and_test_long(tmp2, Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
- z_brne(not_ok);
+ // Check if recursive.
+ load_and_test_long(recursions, recursions_address);
+ z_bre(not_recursive); // if 0 then jump, it's not recursive locking
- z_release();
- z_stg(tmp2 /*=0*/, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), monitor);
+ // Recursive unlock
+ z_agsi(recursions_address, -1ll);
+ z_cgr(monitor, monitor); // set the CC to EQUAL
+ z_bru(unlocked);
- z_bru(unlocked); // CC = EQ here
+ bind(not_recursive);
- bind(not_ok);
+ NearLabel check_succ, set_eq_unlocked;
- // The owner may be anonymous, and we removed the last obj entry in
- // the lock-stack. This loses the information about the owner.
- // Write the thread to the owner field so the runtime knows the owner.
- z_stg(Z_thread, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), monitor);
- z_bru(slow_path); // CC = NE here
- } else {
- // OMCache lookup not supported yet. Take the slowpath.
- // Set flag to NE
- z_ltgr(obj, obj);
- z_bru(slow_path);
+ // Set owner to null.
+ // Release to satisfy the JMM
+ z_release();
+ z_lghi(tmp2, 0);
+ z_stg(tmp2 /*=0*/, owner_address);
+ // We need a full fence after clearing owner to avoid stranding.
+ z_fence();
+
+ // Check if the entry lists are empty.
+ load_and_test_long(tmp2, EntryList_address);
+ z_brne(check_succ);
+ load_and_test_long(tmp2, cxq_address);
+ z_bre(unlocked); // If so we are done.
+
+ bind(check_succ);
+
+ // Check if there is a successor.
+ load_and_test_long(tmp2, succ_address);
+ z_brne(set_eq_unlocked); // If so we are done.
+
+ // Save the monitor pointer in the current thread, so we can try to
+ // reacquire the lock in SharedRuntime::monitor_exit_helper().
+ if (!UseObjectMonitorTable) {
+ z_xilf(monitor, markWord::monitor_value);
}
+ z_stg(monitor, Address(Z_thread, JavaThread::unlocked_inflated_monitor_offset()));
+
+ z_ltgr(obj, obj); // Set flag = NE
+ z_bru(slow_path);
+
+ bind(set_eq_unlocked);
+ z_cr(tmp2, tmp2); // Set flag = EQ
}
bind(unlocked);
diff --git a/src/hotspot/cpu/s390/macroAssembler_s390.hpp b/src/hotspot/cpu/s390/macroAssembler_s390.hpp
index 90210eb28c3ad..5d3a4c2994091 100644
--- a/src/hotspot/cpu/s390/macroAssembler_s390.hpp
+++ b/src/hotspot/cpu/s390/macroAssembler_s390.hpp
@@ -752,12 +752,13 @@ class MacroAssembler: public Assembler {
void compiler_fast_lock_object(Register oop, Register box, Register temp1, Register temp2);
void compiler_fast_unlock_object(Register oop, Register box, Register temp1, Register temp2);
- void lightweight_lock(Register obj, Register tmp1, Register tmp2, Label& slow);
+ void lightweight_lock(Register basic_lock, Register obj, Register tmp1, Register tmp2, Label& slow);
void lightweight_unlock(Register obj, Register tmp1, Register tmp2, Label& slow);
- void compiler_fast_lock_lightweight_object(Register obj, Register tmp1, Register tmp2);
- void compiler_fast_unlock_lightweight_object(Register obj, Register tmp1, Register tmp2);
+ void compiler_fast_lock_lightweight_object(Register obj, Register box, Register tmp1, Register tmp2);
+ void compiler_fast_unlock_lightweight_object(Register obj, Register box, Register tmp1, Register tmp2);
void resolve_jobject(Register value, Register tmp1, Register tmp2);
+ void resolve_global_jobject(Register value, Register tmp1, Register tmp2);
// Support for last Java frame (but use call_VM instead where possible).
private:
@@ -819,7 +820,6 @@ class MacroAssembler: public Assembler {
void compare_klass_ptr(Register Rop1, int64_t disp, Register Rbase, bool maybenull);
// Access heap oop, handle encoding and GC barriers.
- private:
void access_store_at(BasicType type, DecoratorSet decorators,
const Address& addr, Register val,
Register tmp1, Register tmp2, Register tmp3);
diff --git a/src/hotspot/cpu/s390/matcher_s390.hpp b/src/hotspot/cpu/s390/matcher_s390.hpp
index 6c6cae3c58fc3..d8b1ae68f6f50 100644
--- a/src/hotspot/cpu/s390/matcher_s390.hpp
+++ b/src/hotspot/cpu/s390/matcher_s390.hpp
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2021, 2023, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2017, 2022 SAP SE. All rights reserved.
+ * Copyright (c) 2021, 2024, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2017, 2024 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -63,11 +63,16 @@
return true;
}
- // Suppress CMOVL. Conditional move available on z/Architecture only from z196 onwards. Not exploited yet.
- static int long_cmove_cost() { return ConditionalMoveLimit; }
+ // Use conditional move (CMOVL)
+ static int long_cmove_cost() {
+ // z196/z11 or later hardware support conditional moves
+ return VM_Version::has_LoadStoreConditional() ? 0 : ConditionalMoveLimit;
+ }
- // Suppress CMOVF. Conditional move available on z/Architecture only from z196 onwards. Not exploited yet.
- static int float_cmove_cost() { return ConditionalMoveLimit; }
+ static int float_cmove_cost() {
+ // z196/z11 or later hardware support conditional moves
+ return VM_Version::has_LoadStoreConditional() ? 0 : ConditionalMoveLimit;
+ }
// Set this as clone_shift_expressions.
static bool narrow_oop_use_complex_address() {
diff --git a/src/hotspot/cpu/s390/register_s390.hpp b/src/hotspot/cpu/s390/register_s390.hpp
index 931e899257e92..18af232e56970 100644
--- a/src/hotspot/cpu/s390/register_s390.hpp
+++ b/src/hotspot/cpu/s390/register_s390.hpp
@@ -448,4 +448,12 @@ constexpr Register Z_R0_scratch = Z_R0;
constexpr Register Z_R1_scratch = Z_R1;
constexpr FloatRegister Z_fscratch_1 = Z_F1;
+typedef AbstractRegSet RegSet;
+
+template <>
+inline Register AbstractRegSet::first() {
+ if (_bitset == 0) { return noreg; }
+ return as_Register(count_trailing_zeros(_bitset));
+}
+
#endif // CPU_S390_REGISTER_S390_HPP
diff --git a/src/hotspot/cpu/s390/s390.ad b/src/hotspot/cpu/s390/s390.ad
index 4de1a4e7b7f35..8b897033aa55d 100644
--- a/src/hotspot/cpu/s390/s390.ad
+++ b/src/hotspot/cpu/s390/s390.ad
@@ -1477,10 +1477,6 @@ const RegMask* Matcher::predicate_reg_mask(void) {
return nullptr;
}
-const TypeVectMask* Matcher::predicate_reg_type(const Type* elemTy, int length) {
- return nullptr;
-}
-
// Vector calling convention not yet implemented.
bool Matcher::supports_vector_calling_convention(void) {
return false;
@@ -1644,6 +1640,10 @@ const RegMask Matcher::method_handle_invoke_SP_save_mask() {
// Should the matcher clone input 'm' of node 'n'?
bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
+ if (is_encode_and_store_pattern(n, m)) {
+ mstack.push(m, Visit);
+ return true;
+ }
return false;
}
@@ -3913,6 +3913,7 @@ instruct loadL_unaligned(iRegL dst, memory mem) %{
// Load Pointer
instruct loadP(iRegP dst, memory mem) %{
match(Set dst (LoadP mem));
+ predicate(n->as_Load()->barrier_data() == 0);
ins_cost(MEMORY_REF_COST);
size(Z_DISP3_SIZE);
format %{ "LG $dst,$mem\t # ptr" %}
@@ -3924,6 +3925,7 @@ instruct loadP(iRegP dst, memory mem) %{
// LoadP + CastP2L
instruct castP2X_loadP(iRegL dst, memory mem) %{
match(Set dst (CastP2X (LoadP mem)));
+ predicate(n->as_Load()->barrier_data() == 0);
ins_cost(MEMORY_REF_COST);
size(Z_DISP3_SIZE);
format %{ "LG $dst,$mem\t # ptr + p2x" %}
@@ -4220,28 +4222,6 @@ instruct storeB(memory mem, iRegI src) %{
ins_pipe(pipe_class_dummy);
%}
-instruct storeCM(memory mem, immI_0 src) %{
- match(Set mem (StoreCM mem src));
- ins_cost(MEMORY_REF_COST);
- // TODO: s390 port size(VARIABLE_SIZE);
- format %{ "STC(Y) $src,$mem\t # CMS card-mark byte (must be 0!)" %}
- ins_encode %{
- guarantee($mem$$index$$Register != Z_R0, "content will not be used.");
- if ($mem$$index$$Register != noreg) {
- // Can't use clear_mem --> load const zero and store character.
- __ load_const_optimized(Z_R0_scratch, (long)0);
- if (Immediate::is_uimm12($mem$$disp)) {
- __ z_stc(Z_R0_scratch, $mem$$Address);
- } else {
- __ z_stcy(Z_R0_scratch, $mem$$Address);
- }
- } else {
- __ clear_mem(Address($mem$$Address), 1);
- }
- %}
- ins_pipe(pipe_class_dummy);
-%}
-
// CHAR/SHORT
// Store Char/Short
@@ -4286,6 +4266,7 @@ instruct storeL(memory mem, iRegL src) %{
// Store Pointer
instruct storeP(memory dst, memoryRegP src) %{
match(Set dst (StoreP dst src));
+ predicate(n->as_Store()->barrier_data() == 0);
ins_cost(MEMORY_REF_COST);
size(Z_DISP3_SIZE);
format %{ "STG $src,$dst\t # ptr" %}
@@ -4388,6 +4369,7 @@ instruct memInitL(memoryRS mem, immL16 src) %{
// Move Immediate to 8-byte memory.
instruct memInitP(memoryRS mem, immP16 src) %{
match(Set mem (StoreP mem src));
+ predicate(n->as_Store()->barrier_data() == 0);
ins_cost(MEMORY_REF_COST);
size(6);
format %{ "MVGHI $mem,$src\t # direct mem init 8" %}
@@ -4417,6 +4399,7 @@ instruct negL_reg_reg(iRegL dst, immL_0 zero, iRegL src, flagsReg cr) %{
// Load narrow oop
instruct loadN(iRegN dst, memory mem) %{
match(Set dst (LoadN mem));
+ predicate(n->as_Load()->barrier_data() == 0);
ins_cost(MEMORY_REF_COST);
size(Z_DISP3_SIZE);
format %{ "LoadN $dst,$mem\t # (cOop)" %}
@@ -4480,7 +4463,7 @@ instruct loadConNKlass(iRegN dst, immNKlass src) %{
instruct decodeLoadN(iRegP dst, memory mem) %{
match(Set dst (DecodeN (LoadN mem)));
- predicate(false && (CompressedOops::base()==nullptr)&&(CompressedOops::shift()==0));
+ predicate(false && (CompressedOops::base()==nullptr) && (CompressedOops::shift()==0));
ins_cost(MEMORY_REF_COST);
size(Z_DISP3_SIZE);
format %{ "DecodeLoadN $dst,$mem\t # (cOop Load+Decode)" %}
@@ -4628,7 +4611,7 @@ instruct encodeP(iRegN dst, iRegP src, flagsReg cr) %{
match(Set dst (EncodeP src));
effect(KILL cr);
predicate((n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull) &&
- (CompressedOops::base() == 0 ||
+ (CompressedOops::base() == nullptr ||
CompressedOops::base_disjoint() ||
!ExpandLoadingBaseEncode));
ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST);
@@ -4651,7 +4634,7 @@ instruct encodeP_NN(iRegN dst, iRegP src, flagsReg cr) %{
match(Set dst (EncodeP src));
effect(KILL cr);
predicate((n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull) &&
- (CompressedOops::base() == 0 ||
+ (CompressedOops::base() == nullptr ||
CompressedOops::base_disjoint() ||
!ExpandLoadingBaseEncode_NN));
ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST);
@@ -4735,6 +4718,7 @@ instruct encodeP_NN_Ex(iRegN dst, iRegP src, flagsReg cr) %{
// Store Compressed Pointer
instruct storeN(memory mem, iRegN_P2N src) %{
match(Set mem (StoreN mem src));
+ predicate(n->as_Store()->barrier_data() == 0);
ins_cost(MEMORY_REF_COST);
size(Z_DISP_SIZE);
format %{ "ST $src,$mem\t # (cOop)" %}
@@ -5146,6 +5130,7 @@ instruct compareAndSwapL_bool(iRegP mem_ptr, rarg5RegL oldval, iRegL newval, iRe
instruct compareAndSwapP_bool(iRegP mem_ptr, rarg5RegP oldval, iRegP_N2P newval, iRegI res, flagsReg cr) %{
match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
+ predicate(n->as_LoadStore()->barrier_data() == 0);
effect(USE mem_ptr, USE_KILL oldval, KILL cr);
size(18);
format %{ "$res = CompareAndSwapP $oldval,$newval,$mem_ptr" %}
@@ -5156,6 +5141,7 @@ instruct compareAndSwapP_bool(iRegP mem_ptr, rarg5RegP oldval, iRegP_N2P newval,
instruct compareAndSwapN_bool(iRegP mem_ptr, rarg5RegN oldval, iRegN_P2N newval, iRegI res, flagsReg cr) %{
match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
+ predicate(n->as_LoadStore()->barrier_data() == 0);
effect(USE mem_ptr, USE_KILL oldval, KILL cr);
size(16);
format %{ "$res = CompareAndSwapN $oldval,$newval,$mem_ptr" %}
@@ -5443,6 +5429,7 @@ instruct xchgL_reg_mem(memoryRSY mem, iRegL dst, iRegL tmp, flagsReg cr) %{
%}
instruct xchgN_reg_mem(memoryRSY mem, iRegN dst, iRegI tmp, flagsReg cr) %{
+ predicate(n->as_LoadStore()->barrier_data() == 0);
match(Set dst (GetAndSetN mem dst));
effect(KILL cr, TEMP tmp); // USE_DEF dst by match rule.
format %{ "XCHGN $dst,[$mem]\t # EXCHANGE (coop, atomic), temp $tmp" %}
@@ -5452,6 +5439,7 @@ instruct xchgN_reg_mem(memoryRSY mem, iRegN dst, iRegI tmp, flagsReg cr) %{
instruct xchgP_reg_mem(memoryRSY mem, iRegP dst, iRegL tmp, flagsReg cr) %{
match(Set dst (GetAndSetP mem dst));
+ predicate(n->as_LoadStore()->barrier_data() == 0);
effect(KILL cr, TEMP tmp); // USE_DEF dst by match rule.
format %{ "XCHGP $dst,[$mem]\t # EXCHANGE (oop, atomic), temp $tmp" %}
ins_encode(z_enc_SwapL(mem, dst, tmp));
@@ -5926,7 +5914,7 @@ instruct addP_regN_reg_imm20(iRegP dst, iRegP_N2P src1, iRegL src2, immL20 con)
instruct addP_mem_imm(memoryRSY mem, immL8 src, flagsReg cr) %{
match(Set mem (StoreP mem (AddP (LoadP mem) src)));
effect(KILL cr);
- predicate(VM_Version::has_MemWithImmALUOps());
+ predicate(VM_Version::has_MemWithImmALUOps() && n->as_LoadStore()->barrier_data() == 0);
ins_cost(MEMORY_REF_COST);
size(6);
format %{ "AGSI $mem,$src\t # direct mem add 8 (ptr)" %}
diff --git a/src/hotspot/cpu/s390/sharedRuntime_s390.cpp b/src/hotspot/cpu/s390/sharedRuntime_s390.cpp
index 9954c78ce1efa..468610b588e91 100644
--- a/src/hotspot/cpu/s390/sharedRuntime_s390.cpp
+++ b/src/hotspot/cpu/s390/sharedRuntime_s390.cpp
@@ -43,6 +43,7 @@
#include "runtime/sharedRuntime.hpp"
#include "runtime/signature.hpp"
#include "runtime/stubRoutines.hpp"
+#include "runtime/timerTrace.hpp"
#include "runtime/vframeArray.hpp"
#include "utilities/align.hpp"
#include "utilities/macros.hpp"
@@ -1713,7 +1714,7 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
// Try fastpath for locking.
if (LockingMode == LM_LIGHTWEIGHT) {
// Fast_lock kills r_temp_1, r_temp_2.
- __ compiler_fast_lock_lightweight_object(r_oop, r_tmp1, r_tmp2);
+ __ compiler_fast_lock_lightweight_object(r_oop, r_box, r_tmp1, r_tmp2);
} else {
// Fast_lock kills r_temp_1, r_temp_2.
__ compiler_fast_lock_object(r_oop, r_box, r_tmp1, r_tmp2);
@@ -1917,7 +1918,7 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
// Try fastpath for unlocking.
if (LockingMode == LM_LIGHTWEIGHT) {
// Fast_unlock kills r_tmp1, r_tmp2.
- __ compiler_fast_unlock_lightweight_object(r_oop, r_tmp1, r_tmp2);
+ __ compiler_fast_unlock_lightweight_object(r_oop, r_box, r_tmp1, r_tmp2);
} else {
// Fast_unlock kills r_tmp1, r_tmp2.
__ compiler_fast_unlock_object(r_oop, r_box, r_tmp1, r_tmp2);
diff --git a/src/hotspot/cpu/s390/stubGenerator_s390.cpp b/src/hotspot/cpu/s390/stubGenerator_s390.cpp
index d878731cca51f..dd9ed4c95462b 100644
--- a/src/hotspot/cpu/s390/stubGenerator_s390.cpp
+++ b/src/hotspot/cpu/s390/stubGenerator_s390.cpp
@@ -3053,6 +3053,29 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
+ // load Method* target of MethodHandle
+ // Z_ARG1 = jobject receiver
+ // Z_method = Method* result
+ address generate_upcall_stub_load_target() {
+ StubCodeMark mark(this, "StubRoutines", "upcall_stub_load_target");
+ address start = __ pc();
+
+ __ resolve_global_jobject(Z_ARG1, Z_tmp_1, Z_tmp_2);
+ // Load target method from receiver
+ __ load_heap_oop(Z_method, Address(Z_ARG1, java_lang_invoke_MethodHandle::form_offset()),
+ noreg, noreg, IS_NOT_NULL);
+ __ load_heap_oop(Z_method, Address(Z_method, java_lang_invoke_LambdaForm::vmentry_offset()),
+ noreg, noreg, IS_NOT_NULL);
+ __ load_heap_oop(Z_method, Address(Z_method, java_lang_invoke_MemberName::method_offset()),
+ noreg, noreg, IS_NOT_NULL);
+ __ z_lg(Z_method, Address(Z_method, java_lang_invoke_ResolvedMethodName::vmtarget_offset()));
+ __ z_stg(Z_method, Address(Z_thread, JavaThread::callee_target_offset())); // just in case callee is deoptimized
+
+ __ z_br(Z_R14);
+
+ return start;
+ }
+
void generate_initial_stubs() {
// Generates all stubs and initializes the entry points.
@@ -3110,6 +3133,7 @@ class StubGenerator: public StubCodeGenerator {
}
StubRoutines::_upcall_stub_exception_handler = generate_upcall_stub_exception_handler();
+ StubRoutines::_upcall_stub_load_target = generate_upcall_stub_load_target();
}
void generate_compiler_stubs() {
diff --git a/src/hotspot/cpu/s390/templateInterpreterGenerator_s390.cpp b/src/hotspot/cpu/s390/templateInterpreterGenerator_s390.cpp
index c16e444904563..2c2e8ed9e3b3a 100644
--- a/src/hotspot/cpu/s390/templateInterpreterGenerator_s390.cpp
+++ b/src/hotspot/cpu/s390/templateInterpreterGenerator_s390.cpp
@@ -1224,6 +1224,7 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M
case Interpreter::java_lang_math_sin : runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dsin); break;
case Interpreter::java_lang_math_cos : runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dcos); break;
case Interpreter::java_lang_math_tan : runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dtan); break;
+ case Interpreter::java_lang_math_tanh : /* run interpreted */ break;
case Interpreter::java_lang_math_abs : /* run interpreted */ break;
case Interpreter::java_lang_math_sqrt : /* runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dsqrt); not available */ break;
case Interpreter::java_lang_math_log : runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dlog); break;
diff --git a/src/hotspot/cpu/s390/upcallLinker_s390.cpp b/src/hotspot/cpu/s390/upcallLinker_s390.cpp
index 734b4e89c7cb2..8baad40a519a4 100644
--- a/src/hotspot/cpu/s390/upcallLinker_s390.cpp
+++ b/src/hotspot/cpu/s390/upcallLinker_s390.cpp
@@ -23,6 +23,7 @@
#include "precompiled.hpp"
#include "asm/macroAssembler.inline.hpp"
+#include "classfile/javaClasses.hpp"
#include "logging/logStream.hpp"
#include "memory/resourceArea.hpp"
#include "prims/upcallLinker.hpp"
@@ -116,7 +117,7 @@ static void restore_callee_saved_registers(MacroAssembler* _masm, const ABIDescr
static const int upcall_stub_code_base_size = 1024;
static const int upcall_stub_size_per_arg = 16; // arg save & restore + move
-address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry,
+address UpcallLinker::make_upcall_stub(jobject receiver, Symbol* signature,
BasicType* out_sig_bt, int total_out_args,
BasicType ret_type,
jobject jabi, jobject jconv,
@@ -206,7 +207,6 @@ address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry,
__ block_comment("on_entry {");
__ load_const_optimized(call_target_address, CAST_FROM_FN_PTR(uint64_t, UpcallLinker::on_entry));
__ z_aghik(Z_ARG1, Z_SP, frame_data_offset);
- __ load_const_optimized(Z_ARG2, (intptr_t)receiver);
__ call(call_target_address);
__ z_lgr(Z_thread, Z_RET);
__ block_comment("} on_entry");
@@ -216,12 +216,11 @@ address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry,
arg_shuffle.generate(_masm, shuffle_reg, abi._shadow_space_bytes, frame::z_jit_out_preserve_size);
__ block_comment("} argument_shuffle");
- __ block_comment("receiver {");
- __ get_vm_result(Z_ARG1);
- __ block_comment("} receiver");
-
- __ load_const_optimized(Z_method, (intptr_t)entry);
- __ z_stg(Z_method, Address(Z_thread, in_bytes(JavaThread::callee_target_offset())));
+ __ block_comment("load_target {");
+ __ load_const_optimized(Z_ARG1, (intptr_t)receiver);
+ __ load_const_optimized(call_target_address, StubRoutines::upcall_stub_load_target());
+ __ call(call_target_address); // load taget Method* into Z_method
+ __ block_comment("} load_target");
__ z_lg(call_target_address, Address(Z_method, in_bytes(Method::from_compiled_offset())));
__ call(call_target_address);
@@ -274,7 +273,7 @@ address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry,
#ifndef PRODUCT
stringStream ss;
- ss.print("upcall_stub_%s", entry->signature()->as_C_string());
+ ss.print("upcall_stub_%s", signature->as_C_string());
const char* name = _masm->code_string(ss.as_string());
#else // PRODUCT
const char* name = "upcall_stub";
diff --git a/src/hotspot/cpu/x86/assembler_x86.cpp b/src/hotspot/cpu/x86/assembler_x86.cpp
index 352cfc0018848..c1679cd111f5a 100644
--- a/src/hotspot/cpu/x86/assembler_x86.cpp
+++ b/src/hotspot/cpu/x86/assembler_x86.cpp
@@ -1919,6 +1919,11 @@ void Assembler::cmpb(Address dst, int imm8) {
emit_int8(imm8);
}
+void Assembler::cmpb(Register dst, int imm8) {
+ prefix(dst);
+ emit_arith_b(0x80, 0xF8, dst, imm8);
+}
+
void Assembler::cmpl(Address dst, int32_t imm32) {
InstructionMark im(this);
prefix(dst);
@@ -8048,6 +8053,14 @@ void Assembler::andpd(XMMRegister dst, XMMRegister src) {
emit_int16(0x54, (0xC0 | encode));
}
+void Assembler::andnpd(XMMRegister dst, XMMRegister src) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
+ attributes.set_rex_vex_w_reverted();
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int16(0x55, (0xC0 | encode));
+}
+
void Assembler::andps(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
@@ -9659,6 +9672,15 @@ void Assembler::vinserti64x4(XMMRegister dst, XMMRegister nds, XMMRegister src,
emit_int24(0x3A, (0xC0 | encode), imm8 & 0x01);
}
+void Assembler::evinserti64x2(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8, int vector_len) {
+ assert(VM_Version::supports_avx512dq(), "");
+ assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), "");
+ InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+ emit_int24(0x38, (0xC0 | encode), imm8 & 0x03);
+}
+
// vinsertf forms
@@ -11723,6 +11745,21 @@ void Assembler::vbroadcastf128(XMMRegister dst, Address src, int vector_len) {
emit_operand(dst, src, 0);
}
+void Assembler::evbroadcastf64x2(XMMRegister dst, Address src, int vector_len) {
+ assert(VM_Version::supports_avx512dq(), "");
+ assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), "");
+ assert(dst != xnoreg, "sanity");
+ InstructionMark im(this);
+ InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_T2, /* input_size_in_bits */ EVEX_64bit);
+ attributes.set_is_evex_instruction();
+ // swap src<->dst for encoding
+ vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int8(0x1A);
+ emit_operand(dst, src, 0);
+}
+
+
// gpr source broadcast forms
// duplicate 1-byte integer data from src into programmed locations in dest : requires AVX512BW and AVX512VL
diff --git a/src/hotspot/cpu/x86/assembler_x86.hpp b/src/hotspot/cpu/x86/assembler_x86.hpp
index 7f4790e05665e..eace7bb9cc169 100644
--- a/src/hotspot/cpu/x86/assembler_x86.hpp
+++ b/src/hotspot/cpu/x86/assembler_x86.hpp
@@ -1239,6 +1239,7 @@ class Assembler : public AbstractAssembler {
void cmpb(Address dst, int imm8);
void cmpb(Address dst, Register reg);
void cmpb(Register reg, Address dst);
+ void cmpb(Register reg, int imm8);
void cmpl(Address dst, int32_t imm32);
void cmpl(Register dst, int32_t imm32);
@@ -2631,6 +2632,7 @@ class Assembler : public AbstractAssembler {
// Bitwise Logical AND of Packed Floating-Point Values
void andpd(XMMRegister dst, XMMRegister src);
+ void andnpd(XMMRegister dst, XMMRegister src);
void andps(XMMRegister dst, XMMRegister src);
void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
@@ -2985,6 +2987,7 @@ class Assembler : public AbstractAssembler {
void vinserti32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
void vinserti32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
void vinserti64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
+ void evinserti64x2(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8, int vector_len);
// vinsertf forms
void vinsertf128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
@@ -3034,6 +3037,7 @@ class Assembler : public AbstractAssembler {
void vbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len);
void vbroadcastsd(XMMRegister dst, Address src, int vector_len);
void vbroadcastf128(XMMRegister dst, Address src, int vector_len);
+ void evbroadcastf64x2(XMMRegister dst, Address src, int vector_len);
// gpr sourced byte/word/dword/qword replicate
void evpbroadcastb(XMMRegister dst, Register src, int vector_len);
diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
index c3444d5a5abce..6d9812c11ae6e 100644
--- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
@@ -1578,6 +1578,7 @@ void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) {
void LIR_Assembler::emit_alloc_obj(LIR_OpAllocObj* op) {
if (op->init_check()) {
add_debug_info_for_null_check_here(op->stub()->info());
+ // init_state needs acquire, but x86 is TSO, and so we are already good.
__ cmpb(Address(op->klass()->as_register(),
InstanceKlass::init_state_offset()),
InstanceKlass::fully_initialized);
diff --git a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp
index ff237d16d2216..36e2021138f2e 100644
--- a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp
+++ b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp
@@ -807,7 +807,11 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
if (x->id() == vmIntrinsics::_dexp || x->id() == vmIntrinsics::_dlog ||
x->id() == vmIntrinsics::_dpow || x->id() == vmIntrinsics::_dcos ||
x->id() == vmIntrinsics::_dsin || x->id() == vmIntrinsics::_dtan ||
- x->id() == vmIntrinsics::_dlog10) {
+ x->id() == vmIntrinsics::_dlog10
+#ifdef _LP64
+ || x->id() == vmIntrinsics::_dtanh
+#endif
+ ) {
do_LibmIntrinsic(x);
return;
}
@@ -989,11 +993,17 @@ void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) {
break;
case vmIntrinsics::_dtan:
if (StubRoutines::dtan() != nullptr) {
- __ call_runtime_leaf(StubRoutines::dtan(), getThreadTemp(), result_reg, cc->args());
+ __ call_runtime_leaf(StubRoutines::dtan(), getThreadTemp(), result_reg, cc->args());
} else {
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), getThreadTemp(), result_reg, cc->args());
}
break;
+ case vmIntrinsics::_dtanh:
+ assert(StubRoutines::dtanh() != nullptr, "tanh intrinsic not found");
+ if (StubRoutines::dtanh() != nullptr) {
+ __ call_runtime_leaf(StubRoutines::dtanh(), getThreadTemp(), result_reg, cc->args());
+ }
+ break;
default: ShouldNotReachHere();
}
#endif // _LP64
diff --git a/src/hotspot/cpu/x86/c2_CodeStubs_x86.cpp b/src/hotspot/cpu/x86/c2_CodeStubs_x86.cpp
index 1990488d8a0df..44f897529e7ce 100644
--- a/src/hotspot/cpu/x86/c2_CodeStubs_x86.cpp
+++ b/src/hotspot/cpu/x86/c2_CodeStubs_x86.cpp
@@ -80,8 +80,6 @@ int C2FastUnlockLightweightStub::max_size() const {
void C2FastUnlockLightweightStub::emit(C2_MacroAssembler& masm) {
assert(_t == rax, "must be");
- Label restore_held_monitor_count_and_slow_path;
-
{ // Restore lock-stack and handle the unlock in runtime.
__ bind(_push_and_slow_path);
@@ -91,61 +89,9 @@ void C2FastUnlockLightweightStub::emit(C2_MacroAssembler& masm) {
__ movptr(Address(_thread, _t), _obj);
#endif
__ addl(Address(_thread, JavaThread::lock_stack_top_offset()), oopSize);
- }
-
- { // Restore held monitor count and slow path.
-
- __ bind(restore_held_monitor_count_and_slow_path);
- __ bind(_slow_path);
- // Restore held monitor count.
- __ increment(Address(_thread, JavaThread::held_monitor_count_offset()));
- // increment will always result in ZF = 0 (no overflows).
+ // addl will always result in ZF = 0 (no overflows).
__ jmp(slow_path_continuation());
}
-
- { // Handle monitor medium path.
-
- __ bind(_check_successor);
-
- Label fix_zf_and_unlocked;
- const Register monitor = _mark;
-
-#ifndef _LP64
- __ jmpb(restore_held_monitor_count_and_slow_path);
-#else // _LP64
- const ByteSize monitor_tag = in_ByteSize(UseObjectMonitorTable ? 0 : checked_cast(markWord::monitor_value));
- const Address succ_address(monitor, ObjectMonitor::succ_offset() - monitor_tag);
- const Address owner_address(monitor, ObjectMonitor::owner_offset() - monitor_tag);
-
- // successor null check.
- __ cmpptr(succ_address, NULL_WORD);
- __ jccb(Assembler::equal, restore_held_monitor_count_and_slow_path);
-
- // Release lock.
- __ movptr(owner_address, NULL_WORD);
-
- // Fence.
- // Instead of MFENCE we use a dummy locked add of 0 to the top-of-stack.
- __ lock(); __ addl(Address(rsp, 0), 0);
-
- // Recheck successor.
- __ cmpptr(succ_address, NULL_WORD);
- // Observed a successor after the release -> fence we have handed off the monitor
- __ jccb(Assembler::notEqual, fix_zf_and_unlocked);
-
- // Try to relock, if it fails the monitor has been handed over
- // TODO: Caveat, this may fail due to deflation, which does
- // not handle the monitor handoff. Currently only works
- // due to the responsible thread.
- __ xorptr(rax, rax);
- __ lock(); __ cmpxchgptr(_thread, owner_address);
- __ jccb (Assembler::equal, restore_held_monitor_count_and_slow_path);
-#endif
-
- __ bind(fix_zf_and_unlocked);
- __ xorl(rax, rax);
- __ jmp(unlocked_continuation());
- }
}
#undef __
diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
index c2801a791cb5a..aba5344b7e434 100644
--- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
@@ -459,87 +459,43 @@ void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register t
// IA32's memory-model is SPO, so STs are ordered with respect to
// each other and there's no need for an explicit barrier (fence).
// See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
-#ifndef _LP64
- // Note that we could employ various encoding schemes to reduce
- // the number of loads below (currently 4) to just 2 or 3.
- // Refer to the comments in synchronizer.cpp.
- // In practice the chain of fetches doesn't seem to impact performance, however.
- xorptr(boxReg, boxReg);
- orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
- jccb (Assembler::notZero, DONE_LABEL);
- movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
- orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
- jccb (Assembler::notZero, DONE_LABEL);
- movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
- jmpb (DONE_LABEL);
-#else // _LP64
- // It's inflated
- Label CheckSucc, LNotRecursive, LSuccess, LGoSlowPath;
+ Label LSuccess, LNotRecursive;
cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), 0);
jccb(Assembler::equal, LNotRecursive);
// Recursive inflated unlock
- decq(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
+ decrement(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
jmpb(LSuccess);
bind(LNotRecursive);
- movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
- orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
- jccb (Assembler::notZero, CheckSucc);
- // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
+
+ // Set owner to null.
+ // Release to satisfy the JMM
movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
- jmpb (DONE_LABEL);
+ // We need a full fence after clearing owner to avoid stranding.
+ // StoreLoad achieves this.
+ membar(StoreLoad);
- // Try to avoid passing control into the slow_path ...
- bind (CheckSucc);
+ // Check if the entry lists are empty.
+ movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
+ orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
+ jccb(Assembler::zero, LSuccess); // If so we are done.
- // The following optional optimization can be elided if necessary
- // Effectively: if (succ == null) goto slow path
- // The code reduces the window for a race, however,
- // and thus benefits performance.
+ // Check if there is a successor.
cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), NULL_WORD);
- jccb (Assembler::zero, LGoSlowPath);
-
- xorptr(boxReg, boxReg);
- // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
- movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
+ jccb(Assembler::notZero, LSuccess); // If so we are done.
- // Memory barrier/fence
- // Dekker pivot point -- fulcrum : ST Owner; MEMBAR; LD Succ
- // Instead of MFENCE we use a dummy locked add of 0 to the top-of-stack.
- // This is faster on Nehalem and AMD Shanghai/Barcelona.
- // See https://blogs.oracle.com/dave/entry/instruction_selection_for_volatile_fences
- // We might also restructure (ST Owner=0;barrier;LD _Succ) to
- // (mov box,0; xchgq box, &m->Owner; LD _succ) .
- lock(); addl(Address(rsp, 0), 0);
+ // Save the monitor pointer in the current thread, so we can try to
+ // reacquire the lock in SharedRuntime::monitor_exit_helper().
+ andptr(tmpReg, ~(int32_t)markWord::monitor_value);
+#ifndef _LP64
+ get_thread(boxReg);
+ movptr(Address(boxReg, JavaThread::unlocked_inflated_monitor_offset()), tmpReg);
+#else // _LP64
+ movptr(Address(r15_thread, JavaThread::unlocked_inflated_monitor_offset()), tmpReg);
+#endif
- cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), NULL_WORD);
- jccb (Assembler::notZero, LSuccess);
-
- // Rare inopportune interleaving - race.
- // The successor vanished in the small window above.
- // The lock is contended -- (cxq|EntryList) != null -- and there's no apparent successor.
- // We need to ensure progress and succession.
- // Try to reacquire the lock.
- // If that fails then the new owner is responsible for succession and this
- // thread needs to take no further action and can exit via the fast path (success).
- // If the re-acquire succeeds then pass control into the slow path.
- // As implemented, this latter mode is horrible because we generated more
- // coherence traffic on the lock *and* artificially extended the critical section
- // length while by virtue of passing control into the slow path.
-
- // box is really RAX -- the following CMPXCHG depends on that binding
- // cmpxchg R,[M] is equivalent to rax = CAS(M,rax,R)
- lock();
- cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
- // There's no successor so we tried to regrab the lock.
- // If that didn't work, then another thread grabbed the
- // lock so we're done (and exit was a success).
- jccb (Assembler::notEqual, LSuccess);
- // Intentional fall-through into slow path
-
- bind (LGoSlowPath);
orl (boxReg, 1); // set ICC.ZF=0 to indicate failure
jmpb (DONE_LABEL);
@@ -547,7 +503,6 @@ void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register t
testl (boxReg, 0); // set ICC.ZF=1 to indicate success
jmpb (DONE_LABEL);
-#endif
if (LockingMode == LM_LEGACY) {
bind (Stacked);
movptr(tmpReg, Address (boxReg, 0)); // re-fetch
@@ -744,10 +699,7 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register reg_rax,
// Handle inflated monitor.
Label inflated, inflated_check_lock_stack;
// Finish fast unlock successfully. MUST jump with ZF == 1
- Label unlocked;
-
- // Assume success.
- decrement(Address(thread, JavaThread::held_monitor_count_offset()));
+ Label unlocked, slow_path;
const Register mark = t;
const Register monitor = t;
@@ -763,8 +715,6 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register reg_rax,
}
Label& push_and_slow_path = stub == nullptr ? dummy : stub->push_and_slow_path();
- Label& check_successor = stub == nullptr ? dummy : stub->check_successor();
- Label& slow_path = stub == nullptr ? dummy : stub->slow_path();
{ // Lightweight Unlock
@@ -839,6 +789,7 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register reg_rax,
const ByteSize monitor_tag = in_ByteSize(UseObjectMonitorTable ? 0 : checked_cast(markWord::monitor_value));
const Address recursions_address{monitor, ObjectMonitor::recursions_offset() - monitor_tag};
const Address cxq_address{monitor, ObjectMonitor::cxq_offset() - monitor_tag};
+ const Address succ_address{monitor, ObjectMonitor::succ_offset() - monitor_tag};
const Address EntryList_address{monitor, ObjectMonitor::EntryList_offset() - monitor_tag};
const Address owner_address{monitor, ObjectMonitor::owner_offset() - monitor_tag};
@@ -846,27 +797,42 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register reg_rax,
// Check if recursive.
cmpptr(recursions_address, 0);
- jccb(Assembler::notEqual, recursive);
+ jccb(Assembler::notZero, recursive);
+
+ // Set owner to null.
+ // Release to satisfy the JMM
+ movptr(owner_address, NULL_WORD);
+ // We need a full fence after clearing owner to avoid stranding.
+ // StoreLoad achieves this.
+ membar(StoreLoad);
// Check if the entry lists are empty.
movptr(reg_rax, cxq_address);
orptr(reg_rax, EntryList_address);
- jcc(Assembler::notZero, check_successor);
+ jccb(Assembler::zero, unlocked); // If so we are done.
- // Release lock.
- movptr(owner_address, NULL_WORD);
- jmpb(unlocked);
+ // Check if there is a successor.
+ cmpptr(succ_address, NULL_WORD);
+ jccb(Assembler::notZero, unlocked); // If so we are done.
+
+ // Save the monitor pointer in the current thread, so we can try to
+ // reacquire the lock in SharedRuntime::monitor_exit_helper().
+ if (!UseObjectMonitorTable) {
+ andptr(monitor, ~(int32_t)markWord::monitor_value);
+ }
+ movptr(Address(thread, JavaThread::unlocked_inflated_monitor_offset()), monitor);
+
+ orl(t, 1); // Fast Unlock ZF = 0
+ jmpb(slow_path);
// Recursive unlock.
bind(recursive);
decrement(recursions_address);
- xorl(t, t);
}
bind(unlocked);
- if (stub != nullptr) {
- bind(stub->unlocked_continuation());
- }
+ decrement(Address(thread, JavaThread::held_monitor_count_offset()));
+ xorl(t, t); // Fast Unlock ZF = 1
#ifdef ASSERT
// Check that unlocked label is reached with ZF set.
@@ -875,6 +841,7 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register reg_rax,
stop("Fast Unlock ZF != 1");
#endif
+ bind(slow_path);
if (stub != nullptr) {
bind(stub->slow_path_continuation());
}
diff --git a/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp
index b52be627776b8..b6be4012519a0 100644
--- a/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp
@@ -38,7 +38,10 @@
#include "c1/c1_LIRAssembler.hpp"
#include "c1/c1_MacroAssembler.hpp"
#include "gc/g1/c1/g1BarrierSetC1.hpp"
-#endif
+#endif // COMPILER1
+#ifdef COMPILER2
+#include "gc/g1/c2/g1BarrierSetC2.hpp"
+#endif // COMPILER2
#define __ masm->
@@ -160,6 +163,56 @@ void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorator
}
}
+static void generate_queue_insertion(MacroAssembler* masm, ByteSize index_offset, ByteSize buffer_offset, Label& runtime,
+ const Register thread, const Register value, const Register temp) {
+ // This code assumes that buffer index is pointer sized.
+ STATIC_ASSERT(in_bytes(SATBMarkQueue::byte_width_of_index()) == sizeof(intptr_t));
+ // Can we store a value in the given thread's buffer?
+ // (The index field is typed as size_t.)
+ __ movptr(temp, Address(thread, in_bytes(index_offset))); // temp := *(index address)
+ __ testptr(temp, temp); // index == 0?
+ __ jcc(Assembler::zero, runtime); // jump to runtime if index == 0 (full buffer)
+ // The buffer is not full, store value into it.
+ __ subptr(temp, wordSize); // temp := next index
+ __ movptr(Address(thread, in_bytes(index_offset)), temp); // *(index address) := next index
+ __ addptr(temp, Address(thread, in_bytes(buffer_offset))); // temp := buffer address + next index
+ __ movptr(Address(temp, 0), value); // *(buffer address + next index) := value
+}
+
+static void generate_pre_barrier_fast_path(MacroAssembler* masm,
+ const Register thread) {
+ Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
+ // Is marking active?
+ if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
+ __ cmpl(in_progress, 0);
+ } else {
+ assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
+ __ cmpb(in_progress, 0);
+ }
+}
+
+static void generate_pre_barrier_slow_path(MacroAssembler* masm,
+ const Register obj,
+ const Register pre_val,
+ const Register thread,
+ const Register tmp,
+ Label& done,
+ Label& runtime) {
+ // Do we need to load the previous value?
+ if (obj != noreg) {
+ __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW);
+ }
+ // Is the previous value null?
+ __ cmpptr(pre_val, NULL_WORD);
+ __ jcc(Assembler::equal, done);
+ generate_queue_insertion(masm,
+ G1ThreadLocalData::satb_mark_queue_index_offset(),
+ G1ThreadLocalData::satb_mark_queue_buffer_offset(),
+ runtime,
+ thread, pre_val, tmp);
+ __ jmp(done);
+}
+
void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
Register obj,
Register pre_val,
@@ -185,43 +238,10 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
assert(pre_val != rax, "check this code");
}
- Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
- Address index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()));
- Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()));
-
- // Is marking active?
- if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
- __ cmpl(in_progress, 0);
- } else {
- assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
- __ cmpb(in_progress, 0);
- }
- __ jcc(Assembler::equal, done);
-
- // Do we need to load the previous value?
- if (obj != noreg) {
- __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW);
- }
-
- // Is the previous value null?
- __ cmpptr(pre_val, NULL_WORD);
+ generate_pre_barrier_fast_path(masm, thread);
+ // If marking is not active (*(mark queue active address) == 0), jump to done
__ jcc(Assembler::equal, done);
-
- // Can we store original value in the thread's buffer?
- // Is index == 0?
- // (The index field is typed as size_t.)
-
- __ movptr(tmp, index); // tmp := *index_adr
- __ cmpptr(tmp, 0); // tmp == 0?
- __ jcc(Assembler::equal, runtime); // If yes, goto runtime
-
- __ subptr(tmp, wordSize); // tmp := tmp - wordSize
- __ movptr(index, tmp); // *index_adr := tmp
- __ addptr(tmp, buffer); // tmp := tmp + *buffer_adr
-
- // Record the previous value
- __ movptr(Address(tmp, 0), pre_val);
- __ jmp(done);
+ generate_pre_barrier_slow_path(masm, obj, pre_val, thread, tmp, done, runtime);
__ bind(runtime);
@@ -263,6 +283,54 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
__ bind(done);
}
+static void generate_post_barrier_fast_path(MacroAssembler* masm,
+ const Register store_addr,
+ const Register new_val,
+ const Register tmp,
+ const Register tmp2,
+ Label& done,
+ bool new_val_may_be_null) {
+ CardTableBarrierSet* ct = barrier_set_cast(BarrierSet::barrier_set());
+ // Does store cross heap regions?
+ __ movptr(tmp, store_addr); // tmp := store address
+ __ xorptr(tmp, new_val); // tmp := store address ^ new value
+ __ shrptr(tmp, G1HeapRegion::LogOfHRGrainBytes); // ((store address ^ new value) >> LogOfHRGrainBytes) == 0?
+ __ jcc(Assembler::equal, done);
+ // Crosses regions, storing null?
+ if (new_val_may_be_null) {
+ __ cmpptr(new_val, NULL_WORD); // new value == null?
+ __ jcc(Assembler::equal, done);
+ }
+ // Storing region crossing non-null, is card young?
+ __ movptr(tmp, store_addr); // tmp := store address
+ __ shrptr(tmp, CardTable::card_shift()); // tmp := card address relative to card table base
+ // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT
+ // a valid address and therefore is not properly handled by the relocation code.
+ __ movptr(tmp2, (intptr_t)ct->card_table()->byte_map_base()); // tmp2 := card table base address
+ __ addptr(tmp, tmp2); // tmp := card address
+ __ cmpb(Address(tmp, 0), G1CardTable::g1_young_card_val()); // *(card address) == young_card_val?
+}
+
+static void generate_post_barrier_slow_path(MacroAssembler* masm,
+ const Register thread,
+ const Register tmp,
+ const Register tmp2,
+ Label& done,
+ Label& runtime) {
+ __ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad)); // StoreLoad membar
+ __ cmpb(Address(tmp, 0), G1CardTable::dirty_card_val()); // *(card address) == dirty_card_val?
+ __ jcc(Assembler::equal, done);
+ // Storing a region crossing, non-null oop, card is clean.
+ // Dirty card and log.
+ __ movb(Address(tmp, 0), G1CardTable::dirty_card_val()); // *(card address) := dirty_card_val
+ generate_queue_insertion(masm,
+ G1ThreadLocalData::dirty_card_queue_index_offset(),
+ G1ThreadLocalData::dirty_card_queue_buffer_offset(),
+ runtime,
+ thread, tmp, tmp2);
+ __ jmp(done);
+}
+
void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
Register store_addr,
Register new_val,
@@ -273,74 +341,125 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
assert(thread == r15_thread, "must be");
#endif // _LP64
- Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
- Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
-
- CardTableBarrierSet* ct =
- barrier_set_cast(BarrierSet::barrier_set());
-
Label done;
Label runtime;
- // Does store cross heap regions?
-
- __ movptr(tmp, store_addr);
- __ xorptr(tmp, new_val);
- __ shrptr(tmp, G1HeapRegion::LogOfHRGrainBytes);
+ generate_post_barrier_fast_path(masm, store_addr, new_val, tmp, tmp2, done, true /* new_val_may_be_null */);
+ // If card is young, jump to done
__ jcc(Assembler::equal, done);
+ generate_post_barrier_slow_path(masm, thread, tmp, tmp2, done, runtime);
- // crosses regions, storing null?
+ __ bind(runtime);
+ // save the live input values
+ RegSet saved = RegSet::of(store_addr NOT_LP64(COMMA thread));
+ __ push_set(saved);
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), tmp, thread);
+ __ pop_set(saved);
- __ cmpptr(new_val, NULL_WORD);
- __ jcc(Assembler::equal, done);
+ __ bind(done);
+}
- // storing region crossing non-null, is card already dirty?
+#if defined(COMPILER2)
- const Register card_addr = tmp;
- const Register cardtable = tmp2;
+static void generate_c2_barrier_runtime_call(MacroAssembler* masm, G1BarrierStubC2* stub, const Register arg, const address runtime_path) {
+#ifdef _LP64
+ SaveLiveRegisters save_registers(masm, stub);
+ if (c_rarg0 != arg) {
+ __ mov(c_rarg0, arg);
+ }
+ __ mov(c_rarg1, r15_thread);
+ // rax is a caller-saved, non-argument-passing register, so it does not
+ // interfere with c_rarg0 or c_rarg1. If it contained any live value before
+ // entering this stub, it is saved at this point, and restored after the
+ // call. If it did not contain any live value, it is free to be used. In
+ // either case, it is safe to use it here as a call scratch register.
+ __ call(RuntimeAddress(runtime_path), rax);
+#else
+ Unimplemented();
+#endif // _LP64
+}
- __ movptr(card_addr, store_addr);
- __ shrptr(card_addr, CardTable::card_shift());
- // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT
- // a valid address and therefore is not properly handled by the relocation code.
- __ movptr(cardtable, (intptr_t)ct->card_table()->byte_map_base());
- __ addptr(card_addr, cardtable);
+void G1BarrierSetAssembler::g1_write_barrier_pre_c2(MacroAssembler* masm,
+ Register obj,
+ Register pre_val,
+ Register thread,
+ Register tmp,
+ G1PreBarrierStubC2* stub) {
+#ifdef _LP64
+ assert(thread == r15_thread, "must be");
+#endif // _LP64
+ assert(pre_val != noreg, "check this code");
+ if (obj != noreg) {
+ assert_different_registers(obj, pre_val, tmp);
+ }
- __ cmpb(Address(card_addr, 0), G1CardTable::g1_young_card_val());
- __ jcc(Assembler::equal, done);
+ stub->initialize_registers(obj, pre_val, thread, tmp);
- __ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
- __ cmpb(Address(card_addr, 0), G1CardTable::dirty_card_val());
- __ jcc(Assembler::equal, done);
+ generate_pre_barrier_fast_path(masm, thread);
+ // If marking is active (*(mark queue active address) != 0), jump to stub (slow path)
+ __ jcc(Assembler::notEqual, *stub->entry());
+ __ bind(*stub->continuation());
+}
- // storing a region crossing, non-null oop, card is clean.
- // dirty card and log.
+void G1BarrierSetAssembler::generate_c2_pre_barrier_stub(MacroAssembler* masm,
+ G1PreBarrierStubC2* stub) const {
+ Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
+ Label runtime;
+ Register obj = stub->obj();
+ Register pre_val = stub->pre_val();
+ Register thread = stub->thread();
+ Register tmp = stub->tmp1();
+ assert(stub->tmp2() == noreg, "not needed in this platform");
- __ movb(Address(card_addr, 0), G1CardTable::dirty_card_val());
+ __ bind(*stub->entry());
+ generate_pre_barrier_slow_path(masm, obj, pre_val, thread, tmp, *stub->continuation(), runtime);
- // The code below assumes that buffer index is pointer sized.
- STATIC_ASSERT(in_bytes(G1DirtyCardQueue::byte_width_of_index()) == sizeof(intptr_t));
+ __ bind(runtime);
+ generate_c2_barrier_runtime_call(masm, stub, pre_val, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry));
+ __ jmp(*stub->continuation());
+}
- __ movptr(tmp2, queue_index);
- __ testptr(tmp2, tmp2);
- __ jcc(Assembler::zero, runtime);
- __ subptr(tmp2, wordSize);
- __ movptr(queue_index, tmp2);
- __ addptr(tmp2, buffer);
- __ movptr(Address(tmp2, 0), card_addr);
- __ jmp(done);
+void G1BarrierSetAssembler::g1_write_barrier_post_c2(MacroAssembler* masm,
+ Register store_addr,
+ Register new_val,
+ Register thread,
+ Register tmp,
+ Register tmp2,
+ G1PostBarrierStubC2* stub) {
+#ifdef _LP64
+ assert(thread == r15_thread, "must be");
+#endif // _LP64
- __ bind(runtime);
- // save the live input values
- RegSet saved = RegSet::of(store_addr NOT_LP64(COMMA thread));
- __ push_set(saved);
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread);
- __ pop_set(saved);
+ stub->initialize_registers(thread, tmp, tmp2);
- __ bind(done);
+ bool new_val_may_be_null = (stub->barrier_data() & G1C2BarrierPostNotNull) == 0;
+ generate_post_barrier_fast_path(masm, store_addr, new_val, tmp, tmp2, *stub->continuation(), new_val_may_be_null);
+ // If card is not young, jump to stub (slow path)
+ __ jcc(Assembler::notEqual, *stub->entry());
+
+ __ bind(*stub->continuation());
+}
+
+void G1BarrierSetAssembler::generate_c2_post_barrier_stub(MacroAssembler* masm,
+ G1PostBarrierStubC2* stub) const {
+ Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
+ Label runtime;
+ Register thread = stub->thread();
+ Register tmp = stub->tmp1(); // tmp holds the card address.
+ Register tmp2 = stub->tmp2();
+ assert(stub->tmp3() == noreg, "not needed in this platform");
+
+ __ bind(*stub->entry());
+ generate_post_barrier_slow_path(masm, thread, tmp, tmp2, *stub->continuation(), runtime);
+
+ __ bind(runtime);
+ generate_c2_barrier_runtime_call(masm, stub, tmp, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry));
+ __ jmp(*stub->continuation());
}
+#endif // COMPILER2
+
void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) {
bool in_heap = (decorators & IN_HEAP) != 0;
diff --git a/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.hpp b/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.hpp
index a5695f5657a4a..4dbb1efd885ea 100644
--- a/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.hpp
+++ b/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.hpp
@@ -32,6 +32,9 @@ class LIR_Assembler;
class StubAssembler;
class G1PreBarrierStub;
class G1PostBarrierStub;
+class G1BarrierStubC2;
+class G1PreBarrierStubC2;
+class G1PostBarrierStubC2;
class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
protected:
@@ -65,6 +68,26 @@ class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
Register dst, Address src, Register tmp1, Register tmp_thread);
+
+#ifdef COMPILER2
+ void g1_write_barrier_pre_c2(MacroAssembler* masm,
+ Register obj,
+ Register pre_val,
+ Register thread,
+ Register tmp,
+ G1PreBarrierStubC2* c2_stub);
+ void generate_c2_pre_barrier_stub(MacroAssembler* masm,
+ G1PreBarrierStubC2* stub) const;
+ void g1_write_barrier_post_c2(MacroAssembler* masm,
+ Register store_addr,
+ Register new_val,
+ Register thread,
+ Register tmp,
+ Register tmp2,
+ G1PostBarrierStubC2* c2_stub);
+ void generate_c2_post_barrier_stub(MacroAssembler* masm,
+ G1PostBarrierStubC2* stub) const;
+#endif // COMPILER2
};
#endif // CPU_X86_GC_G1_G1BARRIERSETASSEMBLER_X86_HPP
diff --git a/src/hotspot/cpu/x86/gc/g1/g1_x86_64.ad b/src/hotspot/cpu/x86/gc/g1/g1_x86_64.ad
new file mode 100644
index 0000000000000..8c1559f90f46d
--- /dev/null
+++ b/src/hotspot/cpu/x86/gc/g1/g1_x86_64.ad
@@ -0,0 +1,371 @@
+//
+// Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+// or visit www.oracle.com if you need additional information or have any
+// questions.
+//
+
+source_hpp %{
+
+#include "gc/g1/c2/g1BarrierSetC2.hpp"
+#include "gc/shared/gc_globals.hpp"
+
+%}
+
+source %{
+
+#include "gc/g1/g1BarrierSetAssembler_x86.hpp"
+#include "gc/g1/g1BarrierSetRuntime.hpp"
+
+static void write_barrier_pre(MacroAssembler* masm,
+ const MachNode* node,
+ Register obj,
+ Register pre_val,
+ Register tmp,
+ RegSet preserve = RegSet(),
+ RegSet no_preserve = RegSet()) {
+ if (!G1PreBarrierStubC2::needs_barrier(node)) {
+ return;
+ }
+ Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
+ G1BarrierSetAssembler* g1_asm = static_cast(BarrierSet::barrier_set()->barrier_set_assembler());
+ G1PreBarrierStubC2* const stub = G1PreBarrierStubC2::create(node);
+ for (RegSetIterator reg = preserve.begin(); *reg != noreg; ++reg) {
+ stub->preserve(*reg);
+ }
+ for (RegSetIterator reg = no_preserve.begin(); *reg != noreg; ++reg) {
+ stub->dont_preserve(*reg);
+ }
+ g1_asm->g1_write_barrier_pre_c2(masm, obj, pre_val, r15_thread, tmp, stub);
+}
+
+static void write_barrier_post(MacroAssembler* masm,
+ const MachNode* node,
+ Register store_addr,
+ Register new_val,
+ Register tmp1,
+ Register tmp2) {
+ if (!G1PostBarrierStubC2::needs_barrier(node)) {
+ return;
+ }
+ Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
+ G1BarrierSetAssembler* g1_asm = static_cast(BarrierSet::barrier_set()->barrier_set_assembler());
+ G1PostBarrierStubC2* const stub = G1PostBarrierStubC2::create(node);
+ g1_asm->g1_write_barrier_post_c2(masm, store_addr, new_val, r15_thread, tmp1, tmp2, stub);
+}
+
+%}
+
+instruct g1StoreP(memory mem, any_RegP src, rRegP tmp1, rRegP tmp2, rRegP tmp3, rFlagsReg cr)
+%{
+ predicate(UseG1GC && n->as_Store()->barrier_data() != 0);
+ match(Set mem (StoreP mem src));
+ effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ ins_cost(125); // XXX
+ format %{ "movq $mem, $src\t# ptr" %}
+ ins_encode %{
+ // Materialize the store address internally (as opposed to defining 'mem' as
+ // an indirect memory operand) to reduce the overhead of LCM when processing
+ // large basic blocks with many stores. Such basic blocks arise, for
+ // instance, from static initializations of large String arrays.
+ // The same holds for g1StoreN and g1EncodePAndStoreN.
+ __ lea($tmp1$$Register, $mem$$Address);
+ write_barrier_pre(masm, this,
+ $tmp1$$Register /* obj */,
+ $tmp2$$Register /* pre_val */,
+ $tmp3$$Register /* tmp */,
+ RegSet::of($tmp1$$Register, $src$$Register) /* preserve */);
+ __ movq(Address($tmp1$$Register, 0), $src$$Register);
+ write_barrier_post(masm, this,
+ $tmp1$$Register /* store_addr */,
+ $src$$Register /* new_val */,
+ $tmp3$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */);
+ %}
+ ins_pipe(ialu_mem_reg);
+%}
+
+instruct g1StoreN(memory mem, rRegN src, rRegP tmp1, rRegP tmp2, rRegP tmp3, rFlagsReg cr)
+%{
+ predicate(UseG1GC && n->as_Store()->barrier_data() != 0);
+ match(Set mem (StoreN mem src));
+ effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ ins_cost(125); // XXX
+ format %{ "movl $mem, $src\t# ptr" %}
+ ins_encode %{
+ __ lea($tmp1$$Register, $mem$$Address);
+ write_barrier_pre(masm, this,
+ $tmp1$$Register /* obj */,
+ $tmp2$$Register /* pre_val */,
+ $tmp3$$Register /* tmp */,
+ RegSet::of($tmp1$$Register, $src$$Register) /* preserve */);
+ __ movl(Address($tmp1$$Register, 0), $src$$Register);
+ if ((barrier_data() & G1C2BarrierPost) != 0) {
+ __ movl($tmp2$$Register, $src$$Register);
+ if ((barrier_data() & G1C2BarrierPostNotNull) == 0) {
+ __ decode_heap_oop($tmp2$$Register);
+ } else {
+ __ decode_heap_oop_not_null($tmp2$$Register);
+ }
+ }
+ write_barrier_post(masm, this,
+ $tmp1$$Register /* store_addr */,
+ $tmp2$$Register /* new_val */,
+ $tmp3$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */);
+ %}
+ ins_pipe(ialu_mem_reg);
+%}
+
+instruct g1EncodePAndStoreN(memory mem, any_RegP src, rRegP tmp1, rRegP tmp2, rRegP tmp3, rFlagsReg cr)
+%{
+ predicate(UseG1GC && n->as_Store()->barrier_data() != 0);
+ match(Set mem (StoreN mem (EncodeP src)));
+ effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ ins_cost(125); // XXX
+ format %{ "encode_heap_oop $src\n\t"
+ "movl $mem, $src\t# ptr" %}
+ ins_encode %{
+ __ lea($tmp1$$Register, $mem$$Address);
+ write_barrier_pre(masm, this,
+ $tmp1$$Register /* obj */,
+ $tmp2$$Register /* pre_val */,
+ $tmp3$$Register /* tmp */,
+ RegSet::of($tmp1$$Register, $src$$Register) /* preserve */);
+ __ movq($tmp2$$Register, $src$$Register);
+ if ((barrier_data() & G1C2BarrierPostNotNull) == 0) {
+ __ encode_heap_oop($tmp2$$Register);
+ } else {
+ __ encode_heap_oop_not_null($tmp2$$Register);
+ }
+ __ movl(Address($tmp1$$Register, 0), $tmp2$$Register);
+ write_barrier_post(masm, this,
+ $tmp1$$Register /* store_addr */,
+ $src$$Register /* new_val */,
+ $tmp3$$Register /* tmp1 */,
+ $tmp2$$Register /* tmp2 */);
+ %}
+ ins_pipe(ialu_mem_reg);
+%}
+
+instruct g1CompareAndExchangeP(indirect mem, rRegP newval, rRegP tmp1, rRegP tmp2, rRegP tmp3, rax_RegP oldval, rFlagsReg cr)
+%{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0);
+ match(Set oldval (CompareAndExchangeP mem (Binary oldval newval)));
+ effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ format %{ "lock\n\t"
+ "cmpxchgq $newval, $mem" %}
+ ins_encode %{
+ assert_different_registers($oldval$$Register, $mem$$Register);
+ // Pass $oldval to the pre-barrier (instead of loading from $mem), because
+ // $oldval is the only value that can be overwritten.
+ // The same holds for g1CompareAndSwapP.
+ write_barrier_pre(masm, this,
+ noreg /* obj */,
+ $oldval$$Register /* pre_val */,
+ $tmp3$$Register /* tmp */,
+ RegSet::of($mem$$Register, $newval$$Register, $oldval$$Register) /* preserve */);
+ __ movq($tmp1$$Register, $newval$$Register);
+ __ lock();
+ __ cmpxchgq($tmp1$$Register, Address($mem$$Register, 0));
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $tmp1$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_cmpxchg);
+%}
+
+instruct g1CompareAndExchangeN(indirect mem, rRegN newval, rRegP tmp1, rRegP tmp2, rRegP tmp3, rax_RegN oldval, rFlagsReg cr)
+%{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0);
+ match(Set oldval (CompareAndExchangeN mem (Binary oldval newval)));
+ effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ format %{ "lock\n\t"
+ "cmpxchgq $newval, $mem" %}
+ ins_encode %{
+ assert_different_registers($oldval$$Register, $mem$$Register);
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp2$$Register /* pre_val */,
+ $tmp3$$Register /* tmp */,
+ RegSet::of($mem$$Register, $newval$$Register, $oldval$$Register) /* preserve */);
+ __ movl($tmp1$$Register, $newval$$Register);
+ __ lock();
+ __ cmpxchgl($tmp1$$Register, Address($mem$$Register, 0));
+ __ decode_heap_oop($tmp1$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $tmp1$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_cmpxchg);
+%}
+
+instruct g1CompareAndSwapP(rRegI res, indirect mem, rRegP newval, rRegP tmp1, rRegP tmp2, rRegP tmp3, rax_RegP oldval, rFlagsReg cr)
+%{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0);
+ match(Set res (CompareAndSwapP mem (Binary oldval newval)));
+ match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
+ effect(TEMP res, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL oldval, KILL cr);
+ format %{ "lock\n\t"
+ "cmpxchgq $newval, $mem\n\t"
+ "sete $res\n\t"
+ "movzbl $res, $res" %}
+ ins_encode %{
+ assert_different_registers($oldval$$Register, $mem$$Register);
+ write_barrier_pre(masm, this,
+ noreg /* obj */,
+ $oldval$$Register /* pre_val */,
+ $tmp3$$Register /* tmp */,
+ RegSet::of($mem$$Register, $newval$$Register, $oldval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+ __ movq($tmp1$$Register, $newval$$Register);
+ __ lock();
+ __ cmpxchgq($tmp1$$Register, Address($mem$$Register, 0));
+ __ setb(Assembler::equal, $res$$Register);
+ __ movzbl($res$$Register, $res$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $tmp1$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_cmpxchg);
+%}
+
+instruct g1CompareAndSwapN(rRegI res, indirect mem, rRegN newval, rRegP tmp1, rRegP tmp2, rRegP tmp3, rax_RegN oldval, rFlagsReg cr)
+%{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0);
+ match(Set res (CompareAndSwapN mem (Binary oldval newval)));
+ match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
+ effect(TEMP res, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL oldval, KILL cr);
+ format %{ "lock\n\t"
+ "cmpxchgq $newval, $mem\n\t"
+ "sete $res\n\t"
+ "movzbl $res, $res" %}
+ ins_encode %{
+ assert_different_registers($oldval$$Register, $mem$$Register);
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp2$$Register /* pre_val */,
+ $tmp3$$Register /* tmp */,
+ RegSet::of($mem$$Register, $newval$$Register, $oldval$$Register) /* preserve */,
+ RegSet::of($res$$Register) /* no_preserve */);
+ __ movl($tmp1$$Register, $newval$$Register);
+ __ lock();
+ __ cmpxchgl($tmp1$$Register, Address($mem$$Register, 0));
+ __ setb(Assembler::equal, $res$$Register);
+ __ movzbl($res$$Register, $res$$Register);
+ __ decode_heap_oop($tmp1$$Register);
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $tmp1$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_cmpxchg);
+%}
+
+instruct g1GetAndSetP(indirect mem, rRegP newval, rRegP tmp1, rRegP tmp2, rRegP tmp3, rFlagsReg cr)
+%{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0);
+ match(Set newval (GetAndSetP mem newval));
+ effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ format %{ "xchgq $newval, $mem" %}
+ ins_encode %{
+ assert_different_registers($mem$$Register, $newval$$Register);
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp2$$Register /* pre_val */,
+ $tmp3$$Register /* tmp */,
+ RegSet::of($mem$$Register, $newval$$Register) /* preserve */);
+ __ movq($tmp1$$Register, $newval$$Register);
+ __ xchgq($newval$$Register, Address($mem$$Register, 0));
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $tmp1$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_cmpxchg);
+%}
+
+instruct g1GetAndSetN(indirect mem, rRegN newval, rRegP tmp1, rRegP tmp2, rRegP tmp3, rFlagsReg cr)
+%{
+ predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0);
+ match(Set newval (GetAndSetN mem newval));
+ effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+ format %{ "xchgq $newval, $mem" %}
+ ins_encode %{
+ assert_different_registers($mem$$Register, $newval$$Register);
+ write_barrier_pre(masm, this,
+ $mem$$Register /* obj */,
+ $tmp2$$Register /* pre_val */,
+ $tmp3$$Register /* tmp */,
+ RegSet::of($mem$$Register, $newval$$Register) /* preserve */);
+ __ movl($tmp1$$Register, $newval$$Register);
+ __ decode_heap_oop($tmp1$$Register);
+ __ xchgl($newval$$Register, Address($mem$$Register, 0));
+ write_barrier_post(masm, this,
+ $mem$$Register /* store_addr */,
+ $tmp1$$Register /* new_val */,
+ $tmp2$$Register /* tmp1 */,
+ $tmp3$$Register /* tmp2 */);
+ %}
+ ins_pipe(pipe_cmpxchg);
+%}
+
+instruct g1LoadP(rRegP dst, memory mem, rRegP tmp, rFlagsReg cr)
+%{
+ predicate(UseG1GC && n->as_Load()->barrier_data() != 0);
+ match(Set dst (LoadP mem));
+ effect(TEMP dst, TEMP tmp, KILL cr);
+ ins_cost(125); // XXX
+ format %{ "movq $dst, $mem\t# ptr" %}
+ ins_encode %{
+ __ movq($dst$$Register, $mem$$Address);
+ write_barrier_pre(masm, this,
+ noreg /* obj */,
+ $dst$$Register /* pre_val */,
+ $tmp$$Register /* tmp */);
+ %}
+ ins_pipe(ialu_reg_mem); // XXX
+%}
+
+instruct g1LoadN(rRegN dst, memory mem, rRegP tmp1, rRegP tmp2, rFlagsReg cr)
+%{
+ predicate(UseG1GC && n->as_Load()->barrier_data() != 0);
+ match(Set dst (LoadN mem));
+ effect(TEMP dst, TEMP tmp1, TEMP tmp2, KILL cr);
+ ins_cost(125); // XXX
+ format %{ "movl $dst, $mem\t# compressed ptr" %}
+ ins_encode %{
+ __ movl($dst$$Register, $mem$$Address);
+ __ movl($tmp1$$Register, $dst$$Register);
+ __ decode_heap_oop($tmp1$$Register);
+ write_barrier_pre(masm, this,
+ noreg /* obj */,
+ $tmp1$$Register /* pre_val */,
+ $tmp2$$Register /* tmp */);
+ %}
+ ins_pipe(ialu_reg_mem); // XXX
+%}
diff --git a/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp
index 47078dff90738..a7682fe0c3879 100644
--- a/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp
@@ -163,12 +163,12 @@ void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, Dec
assert(dst == rsi, "expected");
assert(count == rdx, "expected");
if (UseCompressedOops) {
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop_entry),
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop),
src, dst, count);
} else
#endif
{
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop_entry),
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop),
src, dst, count);
}
@@ -296,9 +296,9 @@ void ShenandoahBarrierSetAssembler::satb_write_barrier_pre(MacroAssembler* masm,
__ push(thread);
__ push(pre_val);
#endif
- __ MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), 2);
+ __ MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre), 2);
} else {
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), LP64_ONLY(c_rarg0) NOT_LP64(pre_val), thread);
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre), LP64_ONLY(c_rarg0) NOT_LP64(pre_val), thread);
}
NOT_LP64( __ pop(thread); )
@@ -925,7 +925,7 @@ void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAss
// load the pre-value
__ load_parameter(0, rcx);
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), rcx, thread);
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre), rcx, thread);
__ restore_live_registers(true);
diff --git a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp
index f5f0d6c884198..bc51a2b446848 100644
--- a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp
@@ -636,7 +636,7 @@ void ZBarrierSetAssembler::copy_load_at(MacroAssembler* masm,
// Remove metadata bits so that the store side (vectorized or non-vectorized) can
// inject the store-good color with an or instruction.
- __ andq(dst, _zpointer_address_mask);
+ __ andq(dst, ZPointerAddressMask);
if ((decorators & ARRAYCOPY_CHECKCAST) != 0) {
// The checkcast arraycopy needs to be able to dereference the oops in order to perform a typechecks.
@@ -1260,6 +1260,8 @@ void ZBarrierSetAssembler::generate_c2_store_barrier_stub(MacroAssembler* masm,
__ call(RuntimeAddress(ZBarrierSetRuntime::store_barrier_on_native_oop_field_without_healing_addr()));
} else if (stub->is_atomic()) {
__ call(RuntimeAddress(ZBarrierSetRuntime::store_barrier_on_oop_field_with_healing_addr()));
+ } else if (stub->is_nokeepalive()) {
+ __ call(RuntimeAddress(ZBarrierSetRuntime::no_keepalive_store_barrier_on_oop_field_without_healing_addr()));
} else {
__ call(RuntimeAddress(ZBarrierSetRuntime::store_barrier_on_oop_field_without_healing_addr()));
}
diff --git a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp
index 7c3716ba0da9f..5fbc7ea1be16e 100644
--- a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp
+++ b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp
@@ -64,7 +64,7 @@ class ZBarrierSetAssembler : public ZBarrierSetAssemblerBase {
GrowableArrayCHeap _store_good_relocations;
public:
- static const int32_t _zpointer_address_mask = 0xFFFF0000;
+ static const int32_t ZPointerAddressMask = 0xFFFF0000;
ZBarrierSetAssembler();
diff --git a/src/hotspot/cpu/x86/gc/z/z_x86_64.ad b/src/hotspot/cpu/x86/gc/z/z_x86_64.ad
index 30220c0629e5e..f55ad70e8616e 100644
--- a/src/hotspot/cpu/x86/gc/z/z_x86_64.ad
+++ b/src/hotspot/cpu/x86/gc/z/z_x86_64.ad
@@ -91,7 +91,8 @@ static void z_store_barrier(MacroAssembler* masm, const MachNode* node, Address
}
} else {
bool is_native = (node->barrier_data() & ZBarrierNative) != 0;
- ZStoreBarrierStubC2* const stub = ZStoreBarrierStubC2::create(node, ref_addr, rnew_zaddress, rnew_zpointer, is_native, is_atomic);
+ bool is_nokeepalive = (node->barrier_data() & ZBarrierNoKeepalive) != 0;
+ ZStoreBarrierStubC2* const stub = ZStoreBarrierStubC2::create(node, ref_addr, rnew_zaddress, rnew_zpointer, is_native, is_atomic, is_nokeepalive);
ZBarrierSetAssembler* bs_asm = ZBarrierSet::assembler();
bs_asm->store_barrier_fast(masm, ref_addr, rnew_zaddress, rnew_zpointer, true /* in_nmethod */, is_atomic, *stub->entry(), *stub->continuation());
}
@@ -141,7 +142,7 @@ instruct zLoadPNullCheck(rFlagsReg cr, memory op, immP0 zero)
ins_encode %{
// A null pointer will have all address bits 0. This mask sign extends
// all address bits, so we can test if the address is 0.
- __ testq($op$$Address, ZBarrierSetAssembler::_zpointer_address_mask);
+ __ testq($op$$Address, ZBarrierSetAssembler::ZPointerAddressMask);
%}
ins_pipe(ialu_cr_reg_imm);
%}
diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.cpp b/src/hotspot/cpu/x86/macroAssembler_x86.cpp
index d634c1e575799..018258a012e57 100644
--- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp
@@ -5084,7 +5084,8 @@ void MacroAssembler::clinit_barrier(Register klass, Register thread, Label* L_fa
L_slow_path = &L_fallthrough;
}
- // Fast path check: class is fully initialized
+ // Fast path check: class is fully initialized.
+ // init_state needs acquire, but x86 is TSO, and so we are already good.
cmpb(Address(klass, InstanceKlass::init_state_offset()), InstanceKlass::fully_initialized);
jcc(Assembler::equal, *L_fast_path);
@@ -5756,7 +5757,7 @@ void MacroAssembler::verify_heapbase(const char* msg) {
assert (Universe::heap() != nullptr, "java heap should be initialized");
if (CheckCompressedOops) {
Label ok;
- ExternalAddress src2(CompressedOops::ptrs_base_addr());
+ ExternalAddress src2(CompressedOops::base_addr());
const bool is_src2_reachable = reachable(src2);
if (!is_src2_reachable) {
push(rscratch1); // cmpptr trashes rscratch1
@@ -6047,10 +6048,10 @@ void MacroAssembler::reinit_heapbase() {
if (CompressedOops::base() == nullptr) {
MacroAssembler::xorptr(r12_heapbase, r12_heapbase);
} else {
- mov64(r12_heapbase, (int64_t)CompressedOops::ptrs_base());
+ mov64(r12_heapbase, (int64_t)CompressedOops::base());
}
} else {
- movptr(r12_heapbase, ExternalAddress(CompressedOops::ptrs_base_addr()));
+ movptr(r12_heapbase, ExternalAddress(CompressedOops::base_addr()));
}
}
}
diff --git a/src/hotspot/cpu/x86/macroAssembler_x86_md5.cpp b/src/hotspot/cpu/x86/macroAssembler_x86_md5.cpp
index 439c17b10d37a..09d379a4296d4 100644
--- a/src/hotspot/cpu/x86/macroAssembler_x86_md5.cpp
+++ b/src/hotspot/cpu/x86/macroAssembler_x86_md5.cpp
@@ -81,8 +81,8 @@ void MacroAssembler::fast_md5(Register buf, Address state, Address ofs, Address
notl(rsi); \
andl(rdi, r2); \
andl(rsi, r3); \
- orl(rsi, rdi); \
addl(r1, rsi); \
+ addl(r1, rdi); \
roll(r1, s); \
addl(r1, r2);
diff --git a/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp b/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp
index 4bd91f640fca7..174e2e0277903 100644
--- a/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp
+++ b/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp
@@ -2674,7 +2674,7 @@ void SharedRuntime::generate_deopt_blob() {
int reexecute_offset = __ pc() - start;
#if INCLUDE_JVMCI && !defined(COMPILER1)
- if (EnableJVMCI && UseJVMCICompiler) {
+ if (UseJVMCICompiler) {
// JVMCI does not use this kind of deoptimization
__ should_not_reach_here();
}
diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp
index 2bc4a0a9cba94..ee6311c25f6fe 100644
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp
@@ -24,6 +24,7 @@
#include "precompiled.hpp"
#include "asm/macroAssembler.hpp"
+#include "classfile/javaClasses.hpp"
#include "classfile/vmIntrinsics.hpp"
#include "compiler/oopMap.hpp"
#include "gc/shared/barrierSet.hpp"
@@ -3573,6 +3574,9 @@ void StubGenerator::generate_libm_stubs() {
if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dtan)) {
StubRoutines::_dtan = generate_libmTan(); // from stubGenerator_x86_64_tan.cpp
}
+ if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dtanh)) {
+ StubRoutines::_dtanh = generate_libmTanh(); // from stubGenerator_x86_64_tanh.cpp
+ }
if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dexp)) {
StubRoutines::_dexp = generate_libmExp(); // from stubGenerator_x86_64_exp.cpp
}
@@ -3793,6 +3797,28 @@ address StubGenerator::generate_upcall_stub_exception_handler() {
return start;
}
+// load Method* target of MethodHandle
+// j_rarg0 = jobject receiver
+// rbx = result
+address StubGenerator::generate_upcall_stub_load_target() {
+ StubCodeMark mark(this, "StubRoutines", "upcall_stub_load_target");
+ address start = __ pc();
+
+ __ resolve_global_jobject(j_rarg0, r15_thread, rscratch1);
+ // Load target method from receiver
+ __ load_heap_oop(rbx, Address(j_rarg0, java_lang_invoke_MethodHandle::form_offset()), rscratch1);
+ __ load_heap_oop(rbx, Address(rbx, java_lang_invoke_LambdaForm::vmentry_offset()), rscratch1);
+ __ load_heap_oop(rbx, Address(rbx, java_lang_invoke_MemberName::method_offset()), rscratch1);
+ __ access_load_at(T_ADDRESS, IN_HEAP, rbx,
+ Address(rbx, java_lang_invoke_ResolvedMethodName::vmtarget_offset()),
+ noreg, noreg);
+ __ movptr(Address(r15_thread, JavaThread::callee_target_offset()), rbx); // just in case callee is deoptimized
+
+ __ ret(0);
+
+ return start;
+}
+
address StubGenerator::generate_lookup_secondary_supers_table_stub(u1 super_klass_index) {
StubCodeMark mark(this, "StubRoutines", "lookup_secondary_supers_table");
@@ -3952,6 +3978,7 @@ void StubGenerator::generate_final_stubs() {
}
StubRoutines::_upcall_stub_exception_handler = generate_upcall_stub_exception_handler();
+ StubRoutines::_upcall_stub_load_target = generate_upcall_stub_load_target();
}
void StubGenerator::generate_compiler_stubs() {
@@ -4157,41 +4184,41 @@ void StubGenerator::generate_compiler_stubs() {
log_info(library)("Loaded library %s, handle " INTPTR_FORMAT, JNI_LIB_PREFIX "jsvml" JNI_LIB_SUFFIX, p2i(libjsvml));
if (UseAVX > 2) {
- for (int op = 0; op < VectorSupport::NUM_SVML_OP; op++) {
- int vop = VectorSupport::VECTOR_OP_SVML_START + op;
+ for (int op = 0; op < VectorSupport::NUM_VECTOR_OP_MATH; op++) {
+ int vop = VectorSupport::VECTOR_OP_MATH_START + op;
if ((!VM_Version::supports_avx512dq()) &&
(vop == VectorSupport::VECTOR_OP_LOG || vop == VectorSupport::VECTOR_OP_LOG10 || vop == VectorSupport::VECTOR_OP_POW)) {
continue;
}
- snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf16_ha_z0", VectorSupport::svmlname[op]);
+ snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf16_ha_z0", VectorSupport::mathname[op]);
StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_512][op] = (address)os::dll_lookup(libjsvml, ebuf);
- snprintf(ebuf, sizeof(ebuf), "__jsvml_%s8_ha_z0", VectorSupport::svmlname[op]);
+ snprintf(ebuf, sizeof(ebuf), "__jsvml_%s8_ha_z0", VectorSupport::mathname[op]);
StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_512][op] = (address)os::dll_lookup(libjsvml, ebuf);
}
}
const char* avx_sse_str = (UseAVX >= 2) ? "l9" : ((UseAVX == 1) ? "e9" : "ex");
- for (int op = 0; op < VectorSupport::NUM_SVML_OP; op++) {
- int vop = VectorSupport::VECTOR_OP_SVML_START + op;
+ for (int op = 0; op < VectorSupport::NUM_VECTOR_OP_MATH; op++) {
+ int vop = VectorSupport::VECTOR_OP_MATH_START + op;
if (vop == VectorSupport::VECTOR_OP_POW) {
continue;
}
- snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf4_ha_%s", VectorSupport::svmlname[op], avx_sse_str);
+ snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf4_ha_%s", VectorSupport::mathname[op], avx_sse_str);
StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_64][op] = (address)os::dll_lookup(libjsvml, ebuf);
- snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf4_ha_%s", VectorSupport::svmlname[op], avx_sse_str);
+ snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf4_ha_%s", VectorSupport::mathname[op], avx_sse_str);
StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_128][op] = (address)os::dll_lookup(libjsvml, ebuf);
- snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf8_ha_%s", VectorSupport::svmlname[op], avx_sse_str);
+ snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf8_ha_%s", VectorSupport::mathname[op], avx_sse_str);
StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_256][op] = (address)os::dll_lookup(libjsvml, ebuf);
- snprintf(ebuf, sizeof(ebuf), "__jsvml_%s1_ha_%s", VectorSupport::svmlname[op], avx_sse_str);
+ snprintf(ebuf, sizeof(ebuf), "__jsvml_%s1_ha_%s", VectorSupport::mathname[op], avx_sse_str);
StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_64][op] = (address)os::dll_lookup(libjsvml, ebuf);
- snprintf(ebuf, sizeof(ebuf), "__jsvml_%s2_ha_%s", VectorSupport::svmlname[op], avx_sse_str);
+ snprintf(ebuf, sizeof(ebuf), "__jsvml_%s2_ha_%s", VectorSupport::mathname[op], avx_sse_str);
StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_128][op] = (address)os::dll_lookup(libjsvml, ebuf);
- snprintf(ebuf, sizeof(ebuf), "__jsvml_%s4_ha_%s", VectorSupport::svmlname[op], avx_sse_str);
+ snprintf(ebuf, sizeof(ebuf), "__jsvml_%s4_ha_%s", VectorSupport::mathname[op], avx_sse_str);
StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_256][op] = (address)os::dll_lookup(libjsvml, ebuf);
}
}
diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp b/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp
index d65c681585d6d..7280e9fbe957e 100644
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp
@@ -376,11 +376,22 @@ class StubGenerator: public StubCodeGenerator {
void roundDec(XMMRegister key, int rnum);
void lastroundDec(XMMRegister key, int rnum);
void gfmul_avx512(XMMRegister ghash, XMMRegister hkey);
- void generateHtbl_48_block_zmm(Register htbl, Register avx512_subkeyHtbl, Register rscratch);
- void ghash16_encrypt16_parallel(Register key, Register subkeyHtbl, XMMRegister ctr_blockx,
- XMMRegister aad_hashx, Register in, Register out, Register data, Register pos, bool reduction,
- XMMRegister addmask, bool no_ghash_input, Register rounds, Register ghash_pos,
- bool final_reduction, int index, XMMRegister counter_inc_mask);
+ void ghash16_encrypt_parallel16_avx512(Register in, Register out, Register ct, Register pos, Register avx512_subkeyHtbl,
+ Register CTR_CHECK, Register NROUNDS, Register key, XMMRegister CTR, XMMRegister GHASH,
+ XMMRegister ADDBE_4x4, XMMRegister ADDBE_1234, XMMRegister ADD_1234, XMMRegister SHUF_MASK,
+ bool hk_broadcast, bool is_hash_start, bool do_hash_reduction, bool do_hash_hxor,
+ bool no_ghash_in, int ghashin_offset, int aesout_offset, int hashkey_offset);
+ void generateHtbl_32_blocks_avx512(Register htbl, Register avx512_htbl);
+ void initial_blocks_16_avx512(Register in, Register out, Register ct, Register pos, Register key, Register avx512_subkeyHtbl,
+ Register CTR_CHECK, Register rounds, XMMRegister CTR, XMMRegister GHASH, XMMRegister ADDBE_4x4,
+ XMMRegister ADDBE_1234, XMMRegister ADD_1234, XMMRegister SHUF_MASK, int stack_offset);
+ void gcm_enc_dec_last_avx512(Register len, Register in, Register pos, XMMRegister HASH, XMMRegister SHUFM, Register subkeyHtbl,
+ int ghashin_offset, int hashkey_offset, bool start_ghash, bool do_reduction);
+ void ghash16_avx512(bool start_ghash, bool do_reduction, bool uload_shuffle, bool hk_broadcast, bool do_hxor,
+ Register in, Register pos, Register subkeyHtbl, XMMRegister HASH, XMMRegister SHUFM, int in_offset,
+ int in_disp, int displacement, int hashkey_offset);
+ void aesgcm_avx512(Register in, Register len, Register ct, Register out, Register key,
+ Register state, Register subkeyHtbl, Register avx512_subkeyHtbl, Register counter);
// AVX2 AES-GCM related functions
void initial_blocks_avx2(XMMRegister ctr, Register rounds, Register key, Register len,
Register in, Register out, Register ct, XMMRegister aad_hashx, Register pos);
@@ -546,6 +557,7 @@ class StubGenerator: public StubCodeGenerator {
address generate_libmSin();
address generate_libmCos();
address generate_libmTan();
+ address generate_libmTanh();
address generate_libmExp();
address generate_libmPow();
address generate_libmLog();
@@ -608,6 +620,7 @@ class StubGenerator: public StubCodeGenerator {
// shared exception handler for FFM upcall stubs
address generate_upcall_stub_exception_handler();
+ address generate_upcall_stub_load_target();
// Specialized stub implementations for UseSecondarySupersTable.
address generate_lookup_secondary_supers_table_stub(u1 super_klass_index);
diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64_aes.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64_aes.cpp
index 9744169498c8b..f14d368c376e1 100644
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64_aes.cpp
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64_aes.cpp
@@ -1,5 +1,5 @@
/*
-* Copyright (c) 2019, 2023, Intel Corporation. All rights reserved.
+* Copyright (c) 2019, 2024, Intel Corporation. All rights reserved.
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -172,6 +172,38 @@ static address ghash_polynomial_two_one_addr() {
return (address)GHASH_POLYNOMIAL_TWO_ONE;
}
+// This mask is used for incrementing counter value
+ATTRIBUTE_ALIGNED(64) static const uint64_t COUNTER_MASK_ADDBE_4444[] = {
+ 0x0000000000000000ULL, 0x0400000000000000ULL,
+ 0x0000000000000000ULL, 0x0400000000000000ULL,
+ 0x0000000000000000ULL, 0x0400000000000000ULL,
+ 0x0000000000000000ULL, 0x0400000000000000ULL,
+};
+static address counter_mask_addbe_4444_addr() {
+ return (address)COUNTER_MASK_ADDBE_4444;
+}
+
+// This mask is used for incrementing counter value
+ATTRIBUTE_ALIGNED(64) static const uint64_t COUNTER_MASK_ADDBE_1234[] = {
+ 0x0000000000000000ULL, 0x0100000000000000ULL,
+ 0x0000000000000000ULL, 0x0200000000000000ULL,
+ 0x0000000000000000ULL, 0x0300000000000000ULL,
+ 0x0000000000000000ULL, 0x0400000000000000ULL,
+};
+static address counter_mask_addbe_1234_addr() {
+ return (address)COUNTER_MASK_ADDBE_1234;
+}
+
+// This mask is used for incrementing counter value
+ATTRIBUTE_ALIGNED(64) static const uint64_t COUNTER_MASK_ADD_1234[] = {
+ 0x0000000000000001ULL, 0x0000000000000000ULL,
+ 0x0000000000000002ULL, 0x0000000000000000ULL,
+ 0x0000000000000003ULL, 0x0000000000000000ULL,
+ 0x0000000000000004ULL, 0x0000000000000000ULL,
+};
+static address counter_mask_add_1234_addr() {
+ return (address)COUNTER_MASK_ADD_1234;
+}
// AES intrinsic stubs
@@ -209,10 +241,10 @@ void StubGenerator::generate_aes_stubs() {
// len = rdx (c_rarg1) | rdi (c_rarg1)
// ct = r8 (c_rarg2) | rdx (c_rarg2)
// out = r9 (c_rarg3) | rcx (c_rarg3)
-// key = r10 | r8 (c_rarg4)
-// state = r13 | r9 (c_rarg5)
-// subkeyHtbl = r14 | r11
-// counter = rsi | r12
+// key = rsi | r8 (c_rarg4)
+// state = rdi | r9 (c_rarg5)
+// subkeyHtbl = r10 | r10
+// counter = r11 | r11
//
// Output:
// rax - number of processed bytes
@@ -230,31 +262,31 @@ address StubGenerator::generate_galoisCounterMode_AESCrypt() {
const Register key = c_rarg4;
const Register state = c_rarg5;
const Address subkeyH_mem(rbp, 2 * wordSize);
- const Register subkeyHtbl = r11;
- const Register avx512_subkeyHtbl = r13;
+ const Register subkeyHtbl = r10;
+ const Register avx512_subkeyHtbl = r12;
const Address counter_mem(rbp, 3 * wordSize);
- const Register counter = r12;
+ const Register counter = r11;
#else
const Address key_mem(rbp, 6 * wordSize);
- const Register key = r10;
+ const Register key = rsi;
const Address state_mem(rbp, 7 * wordSize);
- const Register state = r13;
+ const Register state = rdi;
const Address subkeyH_mem(rbp, 8 * wordSize);
- const Register subkeyHtbl = r14;
+ const Register subkeyHtbl = r10;
const Register avx512_subkeyHtbl = r12;
const Address counter_mem(rbp, 9 * wordSize);
- const Register counter = rsi;
+ const Register counter = r11;
#endif
__ enter();
// Save state before entering routine
- __ push(r12);
- __ push(r13);
- __ push(r14);
- __ push(r15);
- __ push(rbx);
+ __ push(r12);//holds pointer to avx512_subkeyHtbl
+ __ push(r14);//holds CTR_CHECK value to check for overflow
+ __ push(r15);//holds number of rounds
+ __ push(rbx);//scratch register
#ifdef _WIN64
// on win64, fill len_reg from stack position
__ push(rsi);
+ __ push(rdi);
__ movptr(key, key_mem);
__ movptr(state, state_mem);
#endif
@@ -262,24 +294,24 @@ address StubGenerator::generate_galoisCounterMode_AESCrypt() {
__ movptr(counter, counter_mem);
// Align stack
__ andq(rsp, -64);
- __ subptr(rsp, 96 * longSize); // Create space on the stack for htbl entries
+ __ subptr(rsp, 200 * longSize); // Create space on the stack for 64 htbl entries and 8 zmm AES entries
__ movptr(avx512_subkeyHtbl, rsp);
- aesgcm_encrypt(in, len, ct, out, key, state, subkeyHtbl, avx512_subkeyHtbl, counter);
+ aesgcm_avx512(in, len, ct, out, key, state, subkeyHtbl, avx512_subkeyHtbl, counter);
__ vzeroupper();
// Restore state before leaving routine
#ifdef _WIN64
__ lea(rsp, Address(rbp, -6 * wordSize));
+ __ pop(rdi);
__ pop(rsi);
#else
- __ lea(rsp, Address(rbp, -5 * wordSize));
+ __ lea(rsp, Address(rbp, -4 * wordSize));
#endif
__ pop(rbx);
__ pop(r15);
__ pop(r14);
- __ pop(r13);
__ pop(r12);
__ leave(); // required for proper stackwalking of RuntimeStub frame
@@ -2708,87 +2740,100 @@ void StubGenerator::gfmul_avx512(XMMRegister GH, XMMRegister HK) {
__ vpternlogq(GH, 0x96, TMP1, TMP2, Assembler::AVX_512bit);
}
-void StubGenerator::generateHtbl_48_block_zmm(Register htbl, Register avx512_htbl, Register rscratch) {
+// Holds 64 Htbl entries, 32 HKey and 32 HkKey (derived from HKey)
+void StubGenerator::generateHtbl_32_blocks_avx512(Register htbl, Register avx512_htbl) {
const XMMRegister HK = xmm6;
- const XMMRegister ZT5 = xmm4;
- const XMMRegister ZT7 = xmm7;
- const XMMRegister ZT8 = xmm8;
-
- Label GFMUL_AVX512;
+ const XMMRegister ZT1 = xmm0, ZT2 = xmm1, ZT3 = xmm2, ZT4 = xmm3;
+ const XMMRegister ZT5 = xmm4, ZT6 = xmm5, ZT7 = xmm7, ZT8 = xmm8;
+ const XMMRegister ZT10 = xmm10, ZT11 = xmm11, ZT12 = xmm12;
__ movdqu(HK, Address(htbl, 0));
- __ movdqu(xmm10, ExternalAddress(ghash_long_swap_mask_addr()), rscratch);
- __ vpshufb(HK, HK, xmm10, Assembler::AVX_128bit);
-
- __ movdqu(xmm11, ExternalAddress(ghash_polynomial_addr()), rscratch);
- __ movdqu(xmm12, ExternalAddress(ghash_polynomial_two_one_addr()), rscratch);
+ __ movdqu(ZT10, ExternalAddress(ghash_long_swap_mask_addr()), r15);
+ __ vpshufb(HK, HK, ZT10, Assembler::AVX_128bit);
+ __ movdqu(ZT11, ExternalAddress(ghash_polynomial_addr()), r15);
+ __ movdqu(ZT12, ExternalAddress(ghash_polynomial_two_one_addr()), r15);
// Compute H ^ 2 from the input subkeyH
- __ movdqu(xmm2, xmm6);
- __ vpsllq(xmm6, xmm6, 1, Assembler::AVX_128bit);
- __ vpsrlq(xmm2, xmm2, 63, Assembler::AVX_128bit);
- __ movdqu(xmm1, xmm2);
- __ vpslldq(xmm2, xmm2, 8, Assembler::AVX_128bit);
- __ vpsrldq(xmm1, xmm1, 8, Assembler::AVX_128bit);
- __ vpor(xmm6, xmm6, xmm2, Assembler::AVX_128bit);
+ __ movdqu(ZT3, HK);
+ __ vpsllq(HK, HK, 1, Assembler::AVX_128bit);
+ __ vpsrlq(ZT3, ZT3, 63, Assembler::AVX_128bit);
+ __ movdqu(ZT2, ZT3);
+ __ vpslldq(ZT3, ZT3, 8, Assembler::AVX_128bit);
+ __ vpsrldq(ZT2, ZT2, 8, Assembler::AVX_128bit);
+ __ vpor(HK, HK, ZT3, Assembler::AVX_128bit);
+ __ vpshufd(ZT3, ZT2, 0x24, Assembler::AVX_128bit);
+ __ vpcmpeqd(ZT3, ZT3, ZT12, Assembler::AVX_128bit);
+ __ vpand(ZT3, ZT3, ZT11, Assembler::AVX_128bit);
+ __ vpxor(HK, HK, ZT3, Assembler::AVX_128bit);
+ __ movdqu(Address(avx512_htbl, 16 * 31), HK); // H ^ 2
- __ vpshufd(xmm2, xmm1, 0x24, Assembler::AVX_128bit);
- __ vpcmpeqd(xmm2, xmm2, xmm12, Assembler::AVX_128bit);
- __ vpand(xmm2, xmm2, xmm11, Assembler::AVX_128bit);
- __ vpxor(xmm6, xmm6, xmm2, Assembler::AVX_128bit);
- __ movdqu(Address(avx512_htbl, 16 * 47), xmm6); // H ^ 2
- // Compute the remaining three powers of H using XMM registers and all following powers using ZMM
__ movdqu(ZT5, HK);
- __ vinserti32x4(ZT7, ZT7, HK, 3);
+ __ evinserti64x2(ZT7, ZT7, HK, 3, Assembler::AVX_512bit);
+ //calculate HashKey ^ 2 << 1 mod poly
gfmul_avx512(ZT5, HK);
- __ movdqu(Address(avx512_htbl, 16 * 46), ZT5); // H ^ 2 * 2
- __ vinserti32x4(ZT7, ZT7, ZT5, 2);
+ __ movdqu(Address(avx512_htbl, 16 * 30), ZT5);
+ __ evinserti64x2(ZT7, ZT7, ZT5, 2, Assembler::AVX_512bit);
+ //calculate HashKey ^ 3 << 1 mod poly
gfmul_avx512(ZT5, HK);
- __ movdqu(Address(avx512_htbl, 16 * 45), ZT5); // H ^ 2 * 3
- __ vinserti32x4(ZT7, ZT7, ZT5, 1);
+ __ movdqu(Address(avx512_htbl, 16 * 29), ZT5);
+ __ evinserti64x2(ZT7, ZT7, ZT5, 1, Assembler::AVX_512bit);
+ //calculate HashKey ^ 4 << 1 mod poly
gfmul_avx512(ZT5, HK);
- __ movdqu(Address(avx512_htbl, 16 * 44), ZT5); // H ^ 2 * 4
- __ vinserti32x4(ZT7, ZT7, ZT5, 0);
-
- __ evshufi64x2(ZT5, ZT5, ZT5, 0x00, Assembler::AVX_512bit);
- __ evmovdquq(ZT8, ZT7, Assembler::AVX_512bit);
- gfmul_avx512(ZT7, ZT5);
- __ evmovdquq(Address(avx512_htbl, 16 * 40), ZT7, Assembler::AVX_512bit);
- __ evshufi64x2(ZT5, ZT7, ZT7, 0x00, Assembler::AVX_512bit);
- gfmul_avx512(ZT8, ZT5);
- __ evmovdquq(Address(avx512_htbl, 16 * 36), ZT8, Assembler::AVX_512bit);
+ __ movdqu(Address(avx512_htbl, 16 * 28), ZT5);
+ __ evinserti64x2(ZT7, ZT7, ZT5, 0, Assembler::AVX_512bit);
+ // ZT5 amd ZT7 to be cleared(hash key)
+ //calculate HashKeyK = HashKey x POLY
+ __ evmovdquq(xmm11, ExternalAddress(ghash_polynomial_addr()), Assembler::AVX_512bit, r15);
+ __ evpclmulqdq(ZT1, ZT7, xmm11, 0x10, Assembler::AVX_512bit);
+ __ vpshufd(ZT2, ZT7, 78, Assembler::AVX_512bit);
+ __ evpxorq(ZT1, ZT1, ZT2, Assembler::AVX_512bit);
+ __ evmovdquq(Address(avx512_htbl, 16 * 60), ZT1, Assembler::AVX_512bit);
+ //**ZT1 amd ZT2 to be cleared(hash key)
+
+ //switch to 4x128 - bit computations now
+ __ evshufi64x2(ZT5, ZT5, ZT5, 0x00, Assembler::AVX_512bit); //;; broadcast HashKey ^ 4 across all ZT5
+ __ evmovdquq(ZT8, ZT7, Assembler::AVX_512bit);//; save HashKey ^ 4 to HashKey ^ 1 in ZT8
+ //**ZT8 to be cleared(hash key)
+
+ //calculate HashKey ^ 5 << 1 mod poly, HashKey ^ 6 << 1 mod poly, ... HashKey ^ 8 << 1 mod poly
gfmul_avx512(ZT7, ZT5);
- __ evmovdquq(Address(avx512_htbl, 16 * 32), ZT7, Assembler::AVX_512bit);
- gfmul_avx512(ZT8, ZT5);
- __ evmovdquq(Address(avx512_htbl, 16 * 28), ZT8, Assembler::AVX_512bit);
- gfmul_avx512(ZT7, ZT5);
- __ evmovdquq(Address(avx512_htbl, 16 * 24), ZT7, Assembler::AVX_512bit);
- gfmul_avx512(ZT8, ZT5);
- __ evmovdquq(Address(avx512_htbl, 16 * 20), ZT8, Assembler::AVX_512bit);
- gfmul_avx512(ZT7, ZT5);
- __ evmovdquq(Address(avx512_htbl, 16 * 16), ZT7, Assembler::AVX_512bit);
- gfmul_avx512(ZT8, ZT5);
- __ evmovdquq(Address(avx512_htbl, 16 * 12), ZT8, Assembler::AVX_512bit);
- gfmul_avx512(ZT7, ZT5);
- __ evmovdquq(Address(avx512_htbl, 16 * 8), ZT7, Assembler::AVX_512bit);
- gfmul_avx512(ZT8, ZT5);
- __ evmovdquq(Address(avx512_htbl, 16 * 4), ZT8, Assembler::AVX_512bit);
- gfmul_avx512(ZT7, ZT5);
- __ evmovdquq(Address(avx512_htbl, 16 * 0), ZT7, Assembler::AVX_512bit);
- __ ret(0);
-}
-
-#define vclmul_reduce(out, poly, hi128, lo128, tmp0, tmp1) \
-__ evpclmulqdq(tmp0, poly, lo128, 0x01, Assembler::AVX_512bit); \
-__ vpslldq(tmp0, tmp0, 8, Assembler::AVX_512bit); \
-__ evpxorq(tmp0, lo128, tmp0, Assembler::AVX_512bit); \
-__ evpclmulqdq(tmp1, poly, tmp0, 0x00, Assembler::AVX_512bit); \
-__ vpsrldq(tmp1, tmp1, 4, Assembler::AVX_512bit); \
-__ evpclmulqdq(out, poly, tmp0, 0x10, Assembler::AVX_512bit); \
-__ vpslldq(out, out, 4, Assembler::AVX_512bit); \
-__ vpternlogq(out, 0x96, tmp1, hi128, Assembler::AVX_512bit); \
+ __ evmovdquq(Address(avx512_htbl, 16 * 24), ZT7, Assembler::AVX_512bit);//; HashKey ^ 8 to HashKey ^ 5 in ZT7 now
+
+ //calculate HashKeyX = HashKey x POLY
+ __ evpclmulqdq(ZT1, ZT7, xmm11, 0x10, Assembler::AVX_512bit);
+ __ vpshufd(ZT2, ZT7, 78, Assembler::AVX_512bit);
+ __ evpxorq(ZT1, ZT1, ZT2, Assembler::AVX_512bit);
+ __ evmovdquq(Address(avx512_htbl, 16 * 56), ZT1, Assembler::AVX_512bit);
+
+ __ evshufi64x2(ZT5, ZT7, ZT7, 0x00, Assembler::AVX_512bit);//;; broadcast HashKey ^ 8 across all ZT5
+
+ for (int i = 20, j = 52; i > 0;) {
+ gfmul_avx512(ZT8, ZT5);
+ __ evmovdquq(Address(avx512_htbl, 16 * i), ZT8, Assembler::AVX_512bit);
+ //calculate HashKeyK = HashKey x POLY
+ __ evpclmulqdq(ZT1, ZT8, xmm11, 0x10, Assembler::AVX_512bit);
+ __ vpshufd(ZT2, ZT8, 78, Assembler::AVX_512bit);
+ __ evpxorq(ZT1, ZT1, ZT2, Assembler::AVX_512bit);
+ __ evmovdquq(Address(avx512_htbl, 16 * j), ZT1, Assembler::AVX_512bit);
+
+ i -= 4;
+ j -= 4;
+ //compute HashKey ^ (8 + n), HashKey ^ (7 + n), ... HashKey ^ (5 + n)
+ gfmul_avx512(ZT7, ZT5);
+ __ evmovdquq(Address(avx512_htbl, 16 * i), ZT7, Assembler::AVX_512bit);
+
+ //calculate HashKeyK = HashKey x POLY
+ __ evpclmulqdq(ZT1, ZT7, xmm11, 0x10, Assembler::AVX_512bit);
+ __ vpshufd(ZT2, ZT7, 78, Assembler::AVX_512bit);
+ __ evpxorq(ZT1, ZT1, ZT2, Assembler::AVX_512bit);
+ __ evmovdquq(Address(avx512_htbl, 16 * j), ZT1, Assembler::AVX_512bit);
+
+ i -= 4;
+ j -= 4;
+ }
+ }
#define vhpxori4x128(reg, tmp) \
__ vextracti64x4(tmp, reg, 1); \
@@ -2820,21 +2865,17 @@ __ evmovdquq(dst2, Address(src, position, Address::times_1, 1 * 64), Assembler::
__ evmovdquq(dst3, Address(src, position, Address::times_1, 2 * 64), Assembler::AVX_512bit); \
__ evmovdquq(dst4, Address(src, position, Address::times_1, 3 * 64), Assembler::AVX_512bit); \
-#define carrylessMultiply(dst00, dst01, dst10, dst11, ghdata, hkey) \
-__ evpclmulqdq(dst00, ghdata, hkey, 0x00, Assembler::AVX_512bit); \
-__ evpclmulqdq(dst01, ghdata, hkey, 0x01, Assembler::AVX_512bit); \
-__ evpclmulqdq(dst10, ghdata, hkey, 0x10, Assembler::AVX_512bit); \
-__ evpclmulqdq(dst11, ghdata, hkey, 0x11, Assembler::AVX_512bit); \
-
-#define shuffleExorRnd1Key(dst0, dst1, dst2, dst3, shufmask, rndkey) \
-__ vpshufb(dst0, dst0, shufmask, Assembler::AVX_512bit); \
-__ evpxorq(dst0, dst0, rndkey, Assembler::AVX_512bit); \
-__ vpshufb(dst1, dst1, shufmask, Assembler::AVX_512bit); \
-__ evpxorq(dst1, dst1, rndkey, Assembler::AVX_512bit); \
-__ vpshufb(dst2, dst2, shufmask, Assembler::AVX_512bit); \
-__ evpxorq(dst2, dst2, rndkey, Assembler::AVX_512bit); \
-__ vpshufb(dst3, dst3, shufmask, Assembler::AVX_512bit); \
-__ evpxorq(dst3, dst3, rndkey, Assembler::AVX_512bit); \
+#define carrylessMultiply(dst00, dst01, dst10, dst11, ghdata, hkey2, hkey1) \
+__ evpclmulqdq(dst00, ghdata, hkey2, 0x00, Assembler::AVX_512bit); \
+__ evpclmulqdq(dst01, ghdata, hkey2, 0x10, Assembler::AVX_512bit); \
+__ evpclmulqdq(dst10, ghdata, hkey1, 0x01, Assembler::AVX_512bit); \
+__ evpclmulqdq(dst11, ghdata, hkey1, 0x11, Assembler::AVX_512bit); \
+
+#define shuffle(dst0, dst1, dst2, dst3, src0, src1, src2, src3, shufmask) \
+__ vpshufb(dst0, src0, shufmask, Assembler::AVX_512bit); \
+__ vpshufb(dst1, src1, shufmask, Assembler::AVX_512bit); \
+__ vpshufb(dst2, src2, shufmask, Assembler::AVX_512bit); \
+__ vpshufb(dst3, src3, shufmask, Assembler::AVX_512bit); \
#define xorBeforeStore(dst0, dst1, dst2, dst3, src0, src1, src2, src3) \
__ evpxorq(dst0, dst0, src0, Assembler::AVX_512bit); \
@@ -2848,211 +2889,462 @@ __ vpternlogq(dst1, 0x96, src12, src13, Assembler::AVX_512bit); \
__ vpternlogq(dst2, 0x96, src22, src23, Assembler::AVX_512bit); \
__ vpternlogq(dst3, 0x96, src32, src33, Assembler::AVX_512bit); \
-void StubGenerator::ghash16_encrypt16_parallel(Register key, Register subkeyHtbl, XMMRegister ctr_blockx, XMMRegister aad_hashx,
- Register in, Register out, Register data, Register pos, bool first_time_reduction, XMMRegister addmask, bool ghash_input, Register rounds,
- Register ghash_pos, bool final_reduction, int i, XMMRegister counter_inc_mask) {
- Label AES_192, AES_256, LAST_AES_RND;
+//schoolbook multiply of 16 blocks(8 x 16 bytes)
+//it is assumed that data read is already shuffledand
+void StubGenerator::ghash16_avx512(bool start_ghash, bool do_reduction, bool uload_shuffle, bool hk_broadcast, bool do_hxor,
+ Register in, Register pos, Register subkeyHtbl, XMMRegister HASH, XMMRegister SHUFM, int in_offset,
+ int in_disp, int displacement, int hashkey_offset) {
const XMMRegister ZTMP0 = xmm0;
const XMMRegister ZTMP1 = xmm3;
const XMMRegister ZTMP2 = xmm4;
const XMMRegister ZTMP3 = xmm5;
+ const XMMRegister ZTMP4 = xmm6;
const XMMRegister ZTMP5 = xmm7;
const XMMRegister ZTMP6 = xmm10;
const XMMRegister ZTMP7 = xmm11;
const XMMRegister ZTMP8 = xmm12;
const XMMRegister ZTMP9 = xmm13;
- const XMMRegister ZTMP10 = xmm15;
- const XMMRegister ZTMP11 = xmm16;
- const XMMRegister ZTMP12 = xmm17;
+ const XMMRegister ZTMPA = xmm26;
+ const XMMRegister ZTMPB = xmm23;
+ const XMMRegister GH = xmm24;
+ const XMMRegister GL = xmm25;
+ const int hkey_gap = 16 * 32;
+
+ if (uload_shuffle) {
+ __ evmovdquq(ZTMP9, Address(subkeyHtbl, in_offset * 16 + in_disp), Assembler::AVX_512bit);
+ __ vpshufb(ZTMP9, ZTMP9, SHUFM, Assembler::AVX_512bit);
+ } else {
+ __ evmovdquq(ZTMP9, Address(subkeyHtbl, in_offset * 16 + in_disp), Assembler::AVX_512bit);
+ }
- const XMMRegister ZTMP13 = xmm19;
- const XMMRegister ZTMP14 = xmm20;
- const XMMRegister ZTMP15 = xmm21;
- const XMMRegister ZTMP16 = xmm30;
- const XMMRegister ZTMP17 = xmm31;
- const XMMRegister ZTMP18 = xmm1;
- const XMMRegister ZTMP19 = xmm2;
- const XMMRegister ZTMP20 = xmm8;
- const XMMRegister ZTMP21 = xmm22;
- const XMMRegister ZTMP22 = xmm23;
+ if (start_ghash) {
+ __ evpxorq(ZTMP9, ZTMP9, HASH, Assembler::AVX_512bit);
+ }
+ if (hk_broadcast) {
+ __ evbroadcastf64x2(ZTMP8, Address(subkeyHtbl, hashkey_offset + displacement + 0 * 64), Assembler::AVX_512bit);
+ __ evbroadcastf64x2(ZTMPA, Address(subkeyHtbl, hashkey_offset + displacement + hkey_gap + 0 * 64), Assembler::AVX_512bit);
+ } else {
+ __ evmovdquq(ZTMP8, Address(subkeyHtbl, hashkey_offset + displacement + 0 * 64), Assembler::AVX_512bit);
+ __ evmovdquq(ZTMPA, Address(subkeyHtbl, hashkey_offset + displacement + hkey_gap + 0 * 64), Assembler::AVX_512bit);
+ }
+
+ carrylessMultiply(ZTMP0, ZTMP1, ZTMP2, ZTMP3, ZTMP9, ZTMPA, ZTMP8);
+
+ //ghash blocks 4 - 7
+ if (uload_shuffle) {
+ __ evmovdquq(ZTMP9, Address(subkeyHtbl, in_offset * 16 + in_disp + 64), Assembler::AVX_512bit);
+ __ vpshufb(ZTMP9, ZTMP9, SHUFM, Assembler::AVX_512bit);
+ } else {
+ __ evmovdquq(ZTMP9, Address(subkeyHtbl, in_offset * 16 + in_disp + 64), Assembler::AVX_512bit);
+ }
+
+ if (hk_broadcast) {
+ __ evbroadcastf64x2(ZTMP8, Address(subkeyHtbl, hashkey_offset + displacement + 1 * 64), Assembler::AVX_512bit);;
+ __ evbroadcastf64x2(ZTMPA, Address(subkeyHtbl, hashkey_offset + displacement + hkey_gap + 1 * 64), Assembler::AVX_512bit);
+ } else {
+ __ evmovdquq(ZTMP8, Address(subkeyHtbl, hashkey_offset + displacement + 1 * 64), Assembler::AVX_512bit);
+ __ evmovdquq(ZTMPA, Address(subkeyHtbl, hashkey_offset + displacement + hkey_gap + 1 * 64), Assembler::AVX_512bit);
+ }
+
+ carrylessMultiply(ZTMP4, ZTMP5, ZTMP6, ZTMP7, ZTMP9, ZTMPA, ZTMP8);
+
+ //update sums
+ if (start_ghash) {
+ __ evpxorq(GL, ZTMP0, ZTMP2, Assembler::AVX_512bit);//T2 = THL + TLL
+ __ evpxorq(GH, ZTMP1, ZTMP3, Assembler::AVX_512bit);//T1 = THH + TLH
+ } else { //mid, end, end_reduce
+ __ vpternlogq(GL, 0x96, ZTMP0, ZTMP2, Assembler::AVX_512bit);//T2 = THL + TLL
+ __ vpternlogq(GH, 0x96, ZTMP1, ZTMP3, Assembler::AVX_512bit);//T1 = THH + TLH
+ }
+ //ghash blocks 8 - 11
+ if (uload_shuffle) {
+ __ evmovdquq(ZTMP9, Address(subkeyHtbl, in_offset * 16 + in_disp + 128), Assembler::AVX_512bit);
+ __ vpshufb(ZTMP9, ZTMP9, SHUFM, Assembler::AVX_512bit);
+ } else {
+ __ evmovdquq(ZTMP9, Address(subkeyHtbl, in_offset * 16 + in_disp + 128), Assembler::AVX_512bit);
+ }
+ if (hk_broadcast) {
+ __ evbroadcastf64x2(ZTMP8, Address(subkeyHtbl, hashkey_offset + displacement + 2 * 64), Assembler::AVX_512bit);
+ __ evbroadcastf64x2(ZTMPA, Address(subkeyHtbl, hashkey_offset + displacement + hkey_gap + 2 * 64), Assembler::AVX_512bit);
+ } else {
+ __ evmovdquq(ZTMP8, Address(subkeyHtbl, hashkey_offset + displacement + 2 * 64), Assembler::AVX_512bit);
+ __ evmovdquq(ZTMPA, Address(subkeyHtbl, hashkey_offset + displacement + hkey_gap + 2 * 64), Assembler::AVX_512bit);
+ }
+
+ carrylessMultiply(ZTMP0, ZTMP1, ZTMP2, ZTMP3, ZTMP9, ZTMPA, ZTMP8);
+
+ //update sums
+ __ vpternlogq(GL, 0x96, ZTMP6, ZTMP4, Assembler::AVX_512bit);//T2 = THL + TLL
+ __ vpternlogq(GH, 0x96, ZTMP7, ZTMP5, Assembler::AVX_512bit);//T1 = THH + TLH
+ //ghash blocks 12 - 15
+ if (uload_shuffle) {
+ __ evmovdquq(ZTMP9, Address(subkeyHtbl, in_offset * 16 + in_disp + 192), Assembler::AVX_512bit);
+ __ vpshufb(ZTMP9, ZTMP9, SHUFM, Assembler::AVX_512bit);
+ } else {
+ __ evmovdquq(ZTMP9, Address(subkeyHtbl, in_offset * 16 + in_disp + 192), Assembler::AVX_512bit);
+ }
+
+ if (hk_broadcast) {
+ __ evbroadcastf64x2(ZTMP8, Address(subkeyHtbl, hashkey_offset + displacement + 3 * 64), Assembler::AVX_512bit);
+ __ evbroadcastf64x2(ZTMPA, Address(subkeyHtbl, hashkey_offset + displacement + hkey_gap + 3 * 64), Assembler::AVX_512bit);
+ } else {
+ __ evmovdquq(ZTMP8, Address(subkeyHtbl, hashkey_offset + displacement + 3 * 64), Assembler::AVX_512bit);
+ __ evmovdquq(ZTMPA, Address(subkeyHtbl, hashkey_offset + displacement + hkey_gap + 3 * 64), Assembler::AVX_512bit);
+ }
+ carrylessMultiply(ZTMP4, ZTMP5, ZTMP6, ZTMP7, ZTMP9, ZTMPA, ZTMP8);
+
+ //update sums
+ xorGHASH(GL, GH, GL, GH, ZTMP0, ZTMP2, ZTMP1, ZTMP3, ZTMP6, ZTMP4, ZTMP7, ZTMP5);
+
+ if (do_reduction) {
+ //new reduction
+ __ evmovdquq(ZTMPB, ExternalAddress(ghash_polynomial_addr()), Assembler::AVX_512bit, rbx /*rscratch*/);
+ __ evpclmulqdq(HASH, GL, ZTMPB, 0x10, Assembler::AVX_512bit);
+ __ vpshufd(ZTMP0, GL, 78, Assembler::AVX_512bit);
+ __ vpternlogq(HASH, 0x96, GH, ZTMP0, Assembler::AVX_512bit);
+ if (do_hxor) {
+ vhpxori4x128(HASH, ZTMP0);
+ }
+ }
+}
- // Pre increment counters
- __ vpaddd(ZTMP0, ctr_blockx, counter_inc_mask, Assembler::AVX_512bit);
- __ vpaddd(ZTMP1, ZTMP0, counter_inc_mask, Assembler::AVX_512bit);
- __ vpaddd(ZTMP2, ZTMP1, counter_inc_mask, Assembler::AVX_512bit);
- __ vpaddd(ZTMP3, ZTMP2, counter_inc_mask, Assembler::AVX_512bit);
- // Save counter value
- __ evmovdquq(ctr_blockx, ZTMP3, Assembler::AVX_512bit);
-
- // Reuse ZTMP17 / ZTMP18 for loading AES Keys
- // Pre-load AES round keys
- ev_load_key(ZTMP17, key, 0, xmm29);
- ev_load_key(ZTMP18, key, 1 * 16, xmm29);
-
- // ZTMP19 & ZTMP20 used for loading hash key
- // Pre-load hash key
- __ evmovdquq(ZTMP19, Address(subkeyHtbl, i * 64), Assembler::AVX_512bit);
- __ evmovdquq(ZTMP20, Address(subkeyHtbl, ++i * 64), Assembler::AVX_512bit);
- // Load data for computing ghash
- __ evmovdquq(ZTMP21, Address(data, ghash_pos, Address::times_1, 0 * 64), Assembler::AVX_512bit);
- __ vpshufb(ZTMP21, ZTMP21, xmm24, Assembler::AVX_512bit);
-
- // Xor cipher block 0 with input ghash, if available
- if (ghash_input) {
- __ evpxorq(ZTMP21, ZTMP21, aad_hashx, Assembler::AVX_512bit);
+//Stitched GHASH of 16 blocks(with reduction) with encryption of 0 blocks
+void StubGenerator::gcm_enc_dec_last_avx512(Register len, Register in, Register pos, XMMRegister HASH, XMMRegister SHUFM, Register subkeyHtbl,
+ int ghashin_offset, int hashkey_offset, bool start_ghash, bool do_reduction) {
+ //there is 0 blocks to cipher so there are only 16 blocks for ghash and reduction
+ ghash16_avx512(start_ghash, do_reduction, false, false, true, in, pos, subkeyHtbl, HASH, SHUFM, ghashin_offset, 0, 0, hashkey_offset);
+}
+
+//Main GCM macro stitching cipher with GHASH
+//encrypts 16 blocks at a time
+//ghash the 16 previously encrypted ciphertext blocks
+void StubGenerator::ghash16_encrypt_parallel16_avx512(Register in, Register out, Register ct, Register pos, Register avx512_subkeyHtbl,
+ Register CTR_CHECK, Register NROUNDS, Register key, XMMRegister CTR_BE, XMMRegister GHASH_IN,
+ XMMRegister ADDBE_4x4, XMMRegister ADDBE_1234, XMMRegister ADD_1234, XMMRegister SHFMSK,
+ bool hk_broadcast, bool is_hash_start, bool do_hash_reduction, bool do_hash_hxor,
+ bool no_ghash_in, int ghashin_offset, int aesout_offset, int hashkey_offset) {
+ const XMMRegister B00_03 = xmm0;
+ const XMMRegister B04_07 = xmm3;
+ const XMMRegister B08_11 = xmm4;
+ const XMMRegister B12_15 = xmm5;
+ const XMMRegister THH1 = xmm6;
+ const XMMRegister THL1 = xmm7;
+ const XMMRegister TLH1 = xmm10;
+ const XMMRegister TLL1 = xmm11, THH2 = xmm12, THL2 = xmm13, TLH2 = xmm15;
+ const XMMRegister TLL2 = xmm16, THH3 = xmm17, THL3 = xmm19, TLH3 = xmm20;
+ const XMMRegister TLL3 = xmm21, DATA1 = xmm17, DATA2 = xmm19, DATA3 = xmm20, DATA4 = xmm21;
+ const XMMRegister AESKEY1 = xmm30, AESKEY2 = xmm31;
+ const XMMRegister GHKEY1 = xmm1, GHKEY2 = xmm18, GHDAT1 = xmm8, GHDAT2 = xmm22;
+ const XMMRegister ZT = xmm23, TO_REDUCE_L = xmm25, TO_REDUCE_H = xmm24;
+ const int hkey_gap = 16 * 32;
+
+ Label blocks_overflow, blocks_ok, skip_shuffle, cont, aes_256, aes_192, last_aes_rnd;
+
+ __ cmpb(CTR_CHECK, (256 - 16));
+ __ jcc(Assembler::aboveEqual, blocks_overflow);
+ __ vpaddd(B00_03, CTR_BE, ADDBE_1234, Assembler::AVX_512bit);
+ __ vpaddd(B04_07, B00_03, ADDBE_4x4, Assembler::AVX_512bit);
+ __ vpaddd(B08_11, B04_07, ADDBE_4x4, Assembler::AVX_512bit);
+ __ vpaddd(B12_15, B08_11, ADDBE_4x4, Assembler::AVX_512bit);
+ __ jmp(blocks_ok);
+ __ bind(blocks_overflow);
+ __ vpshufb(CTR_BE, CTR_BE, SHFMSK, Assembler::AVX_512bit);
+ __ evmovdquq(B12_15, ExternalAddress(counter_mask_linc4_addr()), Assembler::AVX_512bit, rbx /*rscratch*/);
+ __ vpaddd(B00_03, CTR_BE, ADD_1234, Assembler::AVX_512bit);
+ __ vpaddd(B04_07, B00_03, B12_15, Assembler::AVX_512bit);
+ __ vpaddd(B08_11, B04_07, B12_15, Assembler::AVX_512bit);
+ __ vpaddd(B12_15, B08_11, B12_15, Assembler::AVX_512bit);
+ shuffle(B00_03, B04_07, B08_11, B12_15, B00_03, B04_07, B08_11, B12_15, SHFMSK);
+
+ __ bind(blocks_ok);
+
+ //pre - load constants
+ ev_load_key(AESKEY1, key, 0, rbx);
+ if (!no_ghash_in) {
+ __ evpxorq(GHDAT1, GHASH_IN, Address(avx512_subkeyHtbl, 16 * ghashin_offset), Assembler::AVX_512bit);
+ } else {
+ __ evmovdquq(GHDAT1, Address(avx512_subkeyHtbl, 16 * ghashin_offset), Assembler::AVX_512bit);
+ }
+
+ if (hk_broadcast) {
+ __ evbroadcastf64x2(GHKEY1, Address(avx512_subkeyHtbl, hashkey_offset + 0 * 64), Assembler::AVX_512bit);
+ __ evbroadcastf64x2(GHKEY2, Address(avx512_subkeyHtbl, hashkey_offset + hkey_gap + 0 * 64), Assembler::AVX_512bit);
+ } else {
+ __ evmovdquq(GHKEY1, Address(avx512_subkeyHtbl, hashkey_offset + 0 * 64), Assembler::AVX_512bit);
+ __ evmovdquq(GHKEY2, Address(avx512_subkeyHtbl, hashkey_offset + hkey_gap + 0 * 64), Assembler::AVX_512bit);
+ }
+
+ //save counter for the next round
+ //increment counter overflow check register
+ __ evshufi64x2(CTR_BE, B12_15, B12_15, 255, Assembler::AVX_512bit);
+ __ addb(CTR_CHECK, 16);
+
+ //pre - load constants
+ ev_load_key(AESKEY2, key, 1 * 16, rbx);
+ __ evmovdquq(GHDAT2, Address(avx512_subkeyHtbl, 16 * (ghashin_offset +4)), Assembler::AVX_512bit);
+
+ //stitch AES rounds with GHASH
+ //AES round 0
+ __ evpxorq(B00_03, B00_03, AESKEY1, Assembler::AVX_512bit);
+ __ evpxorq(B04_07, B04_07, AESKEY1, Assembler::AVX_512bit);
+ __ evpxorq(B08_11, B08_11, AESKEY1, Assembler::AVX_512bit);
+ __ evpxorq(B12_15, B12_15, AESKEY1, Assembler::AVX_512bit);
+ ev_load_key(AESKEY1, key, 2 * 16, rbx);
+
+ //GHASH 4 blocks(15 to 12)
+ carrylessMultiply(TLL1, TLH1, THL1, THH1, GHDAT1, GHKEY2, GHKEY1);
+
+ if (hk_broadcast) {
+ __ evbroadcastf64x2(GHKEY1, Address(avx512_subkeyHtbl, hashkey_offset + 1 * 64), Assembler::AVX_512bit);
+ __ evbroadcastf64x2(GHKEY2, Address(avx512_subkeyHtbl, hashkey_offset + hkey_gap + 1 * 64), Assembler::AVX_512bit);
+ } else {
+ __ evmovdquq(GHKEY1, Address(avx512_subkeyHtbl, hashkey_offset + 1 * 64), Assembler::AVX_512bit);
+ __ evmovdquq(GHKEY2, Address(avx512_subkeyHtbl, hashkey_offset + hkey_gap + 1 * 64), Assembler::AVX_512bit);
+ }
+
+ __ evmovdquq(GHDAT1, Address(avx512_subkeyHtbl, 16 * (ghashin_offset + 8)), Assembler::AVX_512bit);
+
+ //AES round 1
+ roundEncode(AESKEY2, B00_03, B04_07, B08_11, B12_15);
+
+ ev_load_key(AESKEY2, key, 3 * 16, rbx);
+
+ //GHASH 4 blocks(11 to 8)
+ carrylessMultiply(TLL2, TLH2, THL2, THH2, GHDAT2, GHKEY2, GHKEY1);
+
+ if (hk_broadcast) {
+ __ evbroadcastf64x2(GHKEY1, Address(avx512_subkeyHtbl, hashkey_offset + 2 * 64), Assembler::AVX_512bit);
+ __ evbroadcastf64x2(GHKEY2, Address(avx512_subkeyHtbl, hashkey_offset + hkey_gap + 2 * 64), Assembler::AVX_512bit);
+ } else {
+ __ evmovdquq(GHKEY1, Address(avx512_subkeyHtbl, hashkey_offset + 2 * 64 ), Assembler::AVX_512bit);
+ __ evmovdquq(GHKEY2, Address(avx512_subkeyHtbl, hashkey_offset + hkey_gap + 2 * 64), Assembler::AVX_512bit);
}
- // Load data for computing ghash
- __ evmovdquq(ZTMP22, Address(data, ghash_pos, Address::times_1, 1 * 64), Assembler::AVX_512bit);
- __ vpshufb(ZTMP22, ZTMP22, xmm24, Assembler::AVX_512bit);
-
- // stitch AES rounds with GHASH
- // AES round 0, xmm24 has shuffle mask
- shuffleExorRnd1Key(ZTMP0, ZTMP1, ZTMP2, ZTMP3, xmm24, ZTMP17);
- // Reuse ZTMP17 / ZTMP18 for loading remaining AES Keys
- ev_load_key(ZTMP17, key, 2 * 16, xmm29);
- // GHASH 4 blocks
- carrylessMultiply(ZTMP6, ZTMP7, ZTMP8, ZTMP5, ZTMP21, ZTMP19);
- // Load the next hkey and Ghash data
- __ evmovdquq(ZTMP19, Address(subkeyHtbl, ++i * 64), Assembler::AVX_512bit);
- __ evmovdquq(ZTMP21, Address(data, ghash_pos, Address::times_1, 2 * 64), Assembler::AVX_512bit);
- __ vpshufb(ZTMP21, ZTMP21, xmm24, Assembler::AVX_512bit);
-
- // AES round 1
- roundEncode(ZTMP18, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
- ev_load_key(ZTMP18, key, 3 * 16, xmm29);
-
- // GHASH 4 blocks(11 to 8)
- carrylessMultiply(ZTMP10, ZTMP12, ZTMP11, ZTMP9, ZTMP22, ZTMP20);
- // Load the next hkey and GDATA
- __ evmovdquq(ZTMP20, Address(subkeyHtbl, ++i * 64), Assembler::AVX_512bit);
- __ evmovdquq(ZTMP22, Address(data, ghash_pos, Address::times_1, 3 * 64), Assembler::AVX_512bit);
- __ vpshufb(ZTMP22, ZTMP22, xmm24, Assembler::AVX_512bit);
-
- // AES round 2
- roundEncode(ZTMP17, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
- ev_load_key(ZTMP17, key, 4 * 16, xmm29);
-
- // GHASH 4 blocks(7 to 4)
- carrylessMultiply(ZTMP14, ZTMP16, ZTMP15, ZTMP13, ZTMP21, ZTMP19);
- // AES rounds 3
- roundEncode(ZTMP18, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
- ev_load_key(ZTMP18, key, 5 * 16, xmm29);
-
- // Gather(XOR) GHASH for 12 blocks
- xorGHASH(ZTMP5, ZTMP6, ZTMP8, ZTMP7, ZTMP9, ZTMP13, ZTMP10, ZTMP14, ZTMP12, ZTMP16, ZTMP11, ZTMP15);
-
- // AES rounds 4
- roundEncode(ZTMP17, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
- ev_load_key(ZTMP17, key, 6 * 16, xmm29);
-
- // load plain / cipher text(recycle registers)
- loadData(in, pos, ZTMP13, ZTMP14, ZTMP15, ZTMP16);
-
- // AES rounds 5
- roundEncode(ZTMP18, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
- ev_load_key(ZTMP18, key, 7 * 16, xmm29);
- // GHASH 4 blocks(3 to 0)
- carrylessMultiply(ZTMP10, ZTMP12, ZTMP11, ZTMP9, ZTMP22, ZTMP20);
-
- // AES round 6
- roundEncode(ZTMP17, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
- ev_load_key(ZTMP17, key, 8 * 16, xmm29);
-
- // gather GHASH in ZTMP6(low) and ZTMP5(high)
- if (first_time_reduction) {
- __ vpternlogq(ZTMP7, 0x96, ZTMP8, ZTMP12, Assembler::AVX_512bit);
- __ evpxorq(xmm25, ZTMP7, ZTMP11, Assembler::AVX_512bit);
- __ evpxorq(xmm27, ZTMP5, ZTMP9, Assembler::AVX_512bit);
- __ evpxorq(xmm26, ZTMP6, ZTMP10, Assembler::AVX_512bit);
- } else if (!first_time_reduction && !final_reduction) {
- xorGHASH(ZTMP7, xmm25, xmm27, xmm26, ZTMP8, ZTMP12, ZTMP7, ZTMP11, ZTMP5, ZTMP9, ZTMP6, ZTMP10);
+ __ evmovdquq(GHDAT2, Address(avx512_subkeyHtbl, 16 * (ghashin_offset + 12)), Assembler::AVX_512bit);
+
+ //AES round 2
+ roundEncode(AESKEY1, B00_03, B04_07, B08_11, B12_15);
+ ev_load_key(AESKEY1, key, 4 * 16, rbx);
+
+ //GHASH 4 blocks(7 to 4)
+ carrylessMultiply(TLL3, TLH3, THL3, THH3, GHDAT1, GHKEY2, GHKEY1);
+
+ if (hk_broadcast) {
+ __ evbroadcastf64x2(GHKEY1, Address(avx512_subkeyHtbl, hashkey_offset + 3 * 64), Assembler::AVX_512bit);
+ __ evbroadcastf64x2(GHKEY2, Address(avx512_subkeyHtbl, hashkey_offset + hkey_gap + 3 * 64), Assembler::AVX_512bit);
+ } else {
+ __ evmovdquq(GHKEY1, Address(avx512_subkeyHtbl, hashkey_offset + 3 * 64), Assembler::AVX_512bit);
+ __ evmovdquq(GHKEY2, Address(avx512_subkeyHtbl, hashkey_offset + hkey_gap + 3 * 64), Assembler::AVX_512bit);
}
- if (final_reduction) {
- // Phase one: Add mid products together
- // Also load polynomial constant for reduction
- __ vpternlogq(ZTMP7, 0x96, ZTMP8, ZTMP12, Assembler::AVX_512bit);
- __ vpternlogq(ZTMP7, 0x96, xmm25, ZTMP11, Assembler::AVX_512bit);
- __ vpsrldq(ZTMP11, ZTMP7, 8, Assembler::AVX_512bit);
- __ vpslldq(ZTMP7, ZTMP7, 8, Assembler::AVX_512bit);
- __ evmovdquq(ZTMP12, ExternalAddress(ghash_polynomial_reduction_addr()), Assembler::AVX_512bit, rbx /*rscratch*/);
+ //AES rounds 3
+ roundEncode(AESKEY2, B00_03, B04_07, B08_11, B12_15);
+ ev_load_key(AESKEY2, key, 5 * 16, rbx);
+
+ //Gather(XOR) GHASH for 12 blocks
+ xorGHASH(TLL1, TLH1, THL1, THH1, TLL2, TLL3, TLH2, TLH3, THL2, THL3, THH2, THH3);
+
+ //AES rounds 4
+ roundEncode(AESKEY1, B00_03, B04_07, B08_11, B12_15);
+ ev_load_key(AESKEY1, key, 6 * 16, rbx);
+
+ //load plain / cipher text(recycle GH3xx registers)
+ loadData(in, pos, DATA1, DATA2, DATA3, DATA4);
+
+ //AES rounds 5
+ roundEncode(AESKEY2, B00_03, B04_07, B08_11, B12_15);
+ ev_load_key(AESKEY2, key, 7 * 16, rbx);
+
+ //GHASH 4 blocks(3 to 0)
+ carrylessMultiply(TLL2, TLH2, THL2, THH2, GHDAT2, GHKEY2, GHKEY1);
+
+ //AES round 6
+ roundEncode(AESKEY1, B00_03, B04_07, B08_11, B12_15);
+ ev_load_key(AESKEY1, key, 8 * 16, rbx);
+
+ //gather GHASH in TO_REDUCE_H / L
+ if (is_hash_start) {
+ __ evpxorq(TO_REDUCE_L, TLL2, THL2, Assembler::AVX_512bit);
+ __ evpxorq(TO_REDUCE_H, THH2, TLH2, Assembler::AVX_512bit);
+ __ vpternlogq(TO_REDUCE_L, 0x96, TLL1, THL1, Assembler::AVX_512bit);
+ __ vpternlogq(TO_REDUCE_H, 0x96, THH1, TLH1, Assembler::AVX_512bit);
+ } else {
+ //not the first round so sums need to be updated
+ xorGHASH(TO_REDUCE_L, TO_REDUCE_H, TO_REDUCE_L, TO_REDUCE_H, TLL2, THL2, THH2, TLH2, TLL1, THL1, THH1, TLH1);
}
- // AES round 7
- roundEncode(ZTMP18, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
- ev_load_key(ZTMP18, key, 9 * 16, xmm29);
- if (final_reduction) {
- __ vpternlogq(ZTMP5, 0x96, ZTMP9, ZTMP11, Assembler::AVX_512bit);
- __ evpxorq(ZTMP5, ZTMP5, xmm27, Assembler::AVX_512bit);
- __ vpternlogq(ZTMP6, 0x96, ZTMP10, ZTMP7, Assembler::AVX_512bit);
- __ evpxorq(ZTMP6, ZTMP6, xmm26, Assembler::AVX_512bit);
+
+ //AES round 7
+ roundEncode(AESKEY2, B00_03, B04_07, B08_11, B12_15);
+ ev_load_key(AESKEY2, key, 9 * 16, rbx);
+
+ //new reduction
+ if (do_hash_reduction) {
+ __ evmovdquq(ZT, ExternalAddress(ghash_polynomial_reduction_addr()), Assembler::AVX_512bit, rbx /*rscratch*/);
+ __ evpclmulqdq(THH1, TO_REDUCE_L, ZT, 0x10, Assembler::AVX_512bit);
+ __ vpshufd(TO_REDUCE_L, TO_REDUCE_L, 78, Assembler::AVX_512bit);
+ __ vpternlogq(THH1, 0x96, TO_REDUCE_H, TO_REDUCE_L, Assembler::AVX_512bit);
}
- // AES round 8
- roundEncode(ZTMP17, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
- ev_load_key(ZTMP17, key, 10 * 16, xmm29);
-
- // Horizontal xor of low and high 4*128
- if (final_reduction) {
- vhpxori4x128(ZTMP5, ZTMP9);
- vhpxori4x128(ZTMP6, ZTMP10);
+
+ //AES round 8
+ roundEncode(AESKEY1, B00_03, B04_07, B08_11, B12_15);
+ ev_load_key(AESKEY1, key, 10 * 16, rbx);
+
+ //horizontalxor of 4 reduced hashes
+ if (do_hash_hxor) {
+ vhpxori4x128(THH1, TLL1);
}
- // AES round 9
- roundEncode(ZTMP18, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
- // First phase of reduction
- if (final_reduction) {
- __ evpclmulqdq(ZTMP10, ZTMP12, ZTMP6, 0x01, Assembler::AVX_128bit);
- __ vpslldq(ZTMP10, ZTMP10, 8, Assembler::AVX_128bit);
- __ evpxorq(ZTMP10, ZTMP6, ZTMP10, Assembler::AVX_128bit);
+
+ //AES round 9
+ roundEncode(AESKEY2, B00_03, B04_07, B08_11, B12_15);
+ ev_load_key(AESKEY2, key, 11 * 16, rbx);
+ //AES rounds up to 11 (AES192) or 13 (AES256)
+ //AES128 is done
+ __ cmpl(NROUNDS, 52);
+ __ jcc(Assembler::less, last_aes_rnd);
+ __ bind(aes_192);
+ roundEncode(AESKEY1, B00_03, B04_07, B08_11, B12_15);
+ ev_load_key(AESKEY1, key, 12 * 16, rbx);
+ roundEncode(AESKEY2, B00_03, B04_07, B08_11, B12_15);
+ __ cmpl(NROUNDS, 60);
+ __ jcc(Assembler::less, last_aes_rnd);
+ __ bind(aes_256);
+ ev_load_key(AESKEY2, key, 13 * 16, rbx);
+ roundEncode(AESKEY1, B00_03, B04_07, B08_11, B12_15);
+ ev_load_key(AESKEY1, key, 14 * 16, rbx);
+ roundEncode(AESKEY2, B00_03, B04_07, B08_11, B12_15);
+
+ __ bind(last_aes_rnd);
+ //the last AES round
+ lastroundEncode(AESKEY1, B00_03, B04_07, B08_11, B12_15);
+ //AESKEY1and AESKEY2 contain AES round keys
+
+ //XOR against plain / cipher text
+ xorBeforeStore(B00_03, B04_07, B08_11, B12_15, DATA1, DATA2, DATA3, DATA4);
+
+ //store cipher / plain text
+ storeData(out, pos, B00_03, B04_07, B08_11, B12_15);
+ //**B00_03, B04_07, B08_011, B12_B15 may contain sensitive data
+
+ //shuffle cipher text blocks for GHASH computation
+ __ cmpptr(ct, out);
+ __ jcc(Assembler::notEqual, skip_shuffle);
+ shuffle(B00_03, B04_07, B08_11, B12_15, B00_03, B04_07, B08_11, B12_15, SHFMSK);
+ __ jmp(cont);
+ __ bind(skip_shuffle);
+ shuffle(B00_03, B04_07, B08_11, B12_15, DATA1, DATA2, DATA3, DATA4, SHFMSK);
+
+ //**B00_03, B04_07, B08_011, B12_B15 overwritten with shuffled cipher text
+ __ bind(cont);
+ //store shuffled cipher text for ghashing
+ __ evmovdquq(Address(avx512_subkeyHtbl, 16 * aesout_offset), B00_03, Assembler::AVX_512bit);
+ __ evmovdquq(Address(avx512_subkeyHtbl, 16 * (aesout_offset + 4)), B04_07, Assembler::AVX_512bit);
+ __ evmovdquq(Address(avx512_subkeyHtbl, 16 * (aesout_offset + 8)), B08_11, Assembler::AVX_512bit);
+ __ evmovdquq(Address(avx512_subkeyHtbl, 16 * (aesout_offset + 12)), B12_15, Assembler::AVX_512bit);
+}
+
+
+//Encrypt / decrypt the initial 16 blocks
+void StubGenerator::initial_blocks_16_avx512(Register in, Register out, Register ct, Register pos, Register key, Register avx512_subkeyHtbl,
+ Register CTR_CHECK, Register rounds, XMMRegister CTR, XMMRegister GHASH, XMMRegister ADDBE_4x4,
+ XMMRegister ADDBE_1234, XMMRegister ADD_1234, XMMRegister SHUF_MASK, int stack_offset) {
+ const XMMRegister B00_03 = xmm7;
+ const XMMRegister B04_07 = xmm10;
+ const XMMRegister B08_11 = xmm11;
+ const XMMRegister B12_15 = xmm12;
+ const XMMRegister T0 = xmm0;
+ const XMMRegister T1 = xmm3;
+ const XMMRegister T2 = xmm4;
+ const XMMRegister T3 = xmm5;
+ const XMMRegister T4 = xmm6;
+ const XMMRegister T5 = xmm30;
+
+ Label next_16_overflow, next_16_ok, cont, skip_shuffle, aes_256, aes_192, last_aes_rnd;
+ //prepare counter blocks
+ __ cmpb(CTR_CHECK, (256 - 16));
+ __ jcc(Assembler::aboveEqual, next_16_overflow);
+ __ vpaddd(B00_03, CTR, ADDBE_1234, Assembler::AVX_512bit);
+ __ vpaddd(B04_07, B00_03, ADDBE_4x4, Assembler::AVX_512bit);
+ __ vpaddd(B08_11, B04_07, ADDBE_4x4, Assembler::AVX_512bit);
+ __ vpaddd(B12_15, B08_11, ADDBE_4x4, Assembler::AVX_512bit);
+ __ jmp(next_16_ok);
+ __ bind(next_16_overflow);
+ __ vpshufb(CTR, CTR, SHUF_MASK, Assembler::AVX_512bit);
+ __ evmovdquq(B12_15, ExternalAddress(counter_mask_linc4_addr()), Assembler::AVX_512bit, rbx);
+ __ vpaddd(B00_03, CTR, ADD_1234, Assembler::AVX_512bit);
+ __ vpaddd(B04_07, B00_03, B12_15, Assembler::AVX_512bit);
+ __ vpaddd(B08_11, B04_07, B12_15, Assembler::AVX_512bit);
+ __ vpaddd(B12_15, B08_11, B12_15, Assembler::AVX_512bit);
+ shuffle(B00_03, B04_07, B08_11, B12_15, B00_03, B04_07, B08_11, B12_15, SHUF_MASK);
+ __ bind(next_16_ok);
+ __ evshufi64x2(CTR, B12_15, B12_15, 255, Assembler::AVX_512bit);
+ __ addb(CTR_CHECK, 16);
+
+ //load 16 blocks of data
+ loadData(in, pos, T0, T1, T2, T3);
+
+ //move to AES encryption rounds
+ __ movdqu(T5, ExternalAddress(key_shuffle_mask_addr()), rbx /*rscratch*/);
+ ev_load_key(T4, key, 0, T5);
+ __ evpxorq(B00_03, B00_03, T4, Assembler::AVX_512bit);
+ __ evpxorq(B04_07, B04_07, T4, Assembler::AVX_512bit);
+ __ evpxorq(B08_11, B08_11, T4, Assembler::AVX_512bit);
+ __ evpxorq(B12_15, B12_15, T4, Assembler::AVX_512bit);
+
+ for (int i = 1; i < 10; i++) {
+ ev_load_key(T4, key, i * 16, T5);
+ roundEncode(T4, B00_03, B04_07, B08_11, B12_15);
}
+
+ ev_load_key(T4, key, 10 * 16, T5);
__ cmpl(rounds, 52);
- __ jcc(Assembler::greaterEqual, AES_192);
- __ jmp(LAST_AES_RND);
- // AES rounds up to 11 (AES192) or 13 (AES256)
- __ bind(AES_192);
- roundEncode(ZTMP17, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
- ev_load_key(ZTMP18, key, 11 * 16, xmm29);
- roundEncode(ZTMP18, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
- ev_load_key(ZTMP17, key, 12 * 16, xmm29);
+ __ jcc(Assembler::less, last_aes_rnd);
+ __ bind(aes_192);
+ roundEncode(T4, B00_03, B04_07, B08_11, B12_15);
+ ev_load_key(T4, key, 16 * 11, T5);
+ roundEncode(T4, B00_03, B04_07, B08_11, B12_15);
+ ev_load_key(T4, key, 16 * 12, T5);
__ cmpl(rounds, 60);
- __ jcc(Assembler::aboveEqual, AES_256);
- __ jmp(LAST_AES_RND);
-
- __ bind(AES_256);
- roundEncode(ZTMP17, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
- ev_load_key(ZTMP18, key, 13 * 16, xmm29);
- roundEncode(ZTMP18, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
- ev_load_key(ZTMP17, key, 14 * 16, xmm29);
-
- __ bind(LAST_AES_RND);
- // Second phase of reduction
- if (final_reduction) {
- __ evpclmulqdq(ZTMP9, ZTMP12, ZTMP10, 0x00, Assembler::AVX_128bit);
- __ vpsrldq(ZTMP9, ZTMP9, 4, Assembler::AVX_128bit); // Shift-R 1-DW to obtain 2-DWs shift-R
- __ evpclmulqdq(ZTMP11, ZTMP12, ZTMP10, 0x10, Assembler::AVX_128bit);
- __ vpslldq(ZTMP11, ZTMP11, 4, Assembler::AVX_128bit); // Shift-L 1-DW for result
- // ZTMP5 = ZTMP5 X ZTMP11 X ZTMP9
- __ vpternlogq(ZTMP5, 0x96, ZTMP11, ZTMP9, Assembler::AVX_128bit);
- }
- // Last AES round
- lastroundEncode(ZTMP17, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
- // XOR against plain / cipher text
- xorBeforeStore(ZTMP0, ZTMP1, ZTMP2, ZTMP3, ZTMP13, ZTMP14, ZTMP15, ZTMP16);
- // store cipher / plain text
- storeData(out, pos, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
+ __ jcc(Assembler::less, last_aes_rnd);
+ __ bind(aes_256);
+ roundEncode(T4, B00_03, B04_07, B08_11, B12_15);
+ ev_load_key(T4, key, 16 * 13, T5);
+ roundEncode(T4, B00_03, B04_07, B08_11, B12_15);
+ ev_load_key(T4, key, 16 * 14, T5);
+
+ __ bind(last_aes_rnd);
+ lastroundEncode(T4, B00_03, B04_07, B08_11, B12_15);
+
+ //xor against text
+ xorBeforeStore(B00_03, B04_07, B08_11, B12_15, T0, T1, T2, T3);
+
+ //store
+ storeData(out, pos, B00_03, B04_07, B08_11, B12_15);
+
+ __ cmpptr(ct, out);
+ __ jcc(Assembler::equal, skip_shuffle);
+ //decryption - cipher text needs to go to GHASH phase
+ shuffle(B00_03, B04_07, B08_11, B12_15, T0, T1, T2, T3, SHUF_MASK);
+ __ jmp(cont);
+ __ bind(skip_shuffle);
+ shuffle(B00_03, B04_07, B08_11, B12_15, B00_03, B04_07, B08_11, B12_15, SHUF_MASK);
+
+ //B00_03, B04_07, B08_11, B12_15 overwritten with shuffled cipher text
+ __ bind(cont);
+ __ evmovdquq(Address(avx512_subkeyHtbl, 16 * stack_offset), B00_03, Assembler::AVX_512bit);
+ __ evmovdquq(Address(avx512_subkeyHtbl, 16 * (stack_offset + 4)), B04_07, Assembler::AVX_512bit);
+ __ evmovdquq(Address(avx512_subkeyHtbl, 16 * (stack_offset + 8)), B08_11, Assembler::AVX_512bit);
+ __ evmovdquq(Address(avx512_subkeyHtbl, 16 * (stack_offset + 12)), B12_15, Assembler::AVX_512bit);
}
-void StubGenerator::aesgcm_encrypt(Register in, Register len, Register ct, Register out, Register key,
- Register state, Register subkeyHtbl, Register avx512_subkeyHtbl, Register counter) {
- Label ENC_DEC_DONE, GENERATE_HTBL_48_BLKS, AES_192, AES_256, STORE_CT, GHASH_LAST_32,
- AES_32_BLOCKS, GHASH_AES_PARALLEL, LOOP, ACCUMULATE, GHASH_16_AES_16;
- const XMMRegister CTR_BLOCKx = xmm9;
+void StubGenerator::aesgcm_avx512(Register in, Register len, Register ct, Register out, Register key, Register state,
+ Register subkeyHtbl, Register avx512_subkeyHtbl, Register counter) {
+ Label ENC_DEC_DONE, MESG_BELOW_32_BLKS, NO_BIG_BLKS, ENCRYPT_BIG_BLKS_NO_HXOR,
+ ENCRYPT_BIG_NBLKS, ENCRYPT_16_BLKS, ENCRYPT_N_GHASH_32_N_BLKS, GHASH_DONE;
+ const XMMRegister CTR_BLOCKx = xmm2;
const XMMRegister AAD_HASHx = xmm14;
- const Register pos = rax;
- const Register rounds = r15;
- const Register ghash_pos = NOT_WIN64( r14) WIN64_ONLY( r11 );
const XMMRegister ZTMP0 = xmm0;
- const XMMRegister ZTMP1 = xmm3;
- const XMMRegister ZTMP2 = xmm4;
- const XMMRegister ZTMP3 = xmm5;
+ const XMMRegister ZTMP1 = xmm3; //**sensitive
+ const XMMRegister ZTMP2 = xmm4; //**sensitive(small data)
+ const XMMRegister ZTMP3 = xmm5; //**sensitive(small data)
const XMMRegister ZTMP4 = xmm6;
const XMMRegister ZTMP5 = xmm7;
const XMMRegister ZTMP6 = xmm10;
@@ -3066,235 +3358,170 @@ void StubGenerator::aesgcm_encrypt(Register in, Register len, Register ct, Regis
const XMMRegister ZTMP14 = xmm20;
const XMMRegister ZTMP15 = xmm21;
const XMMRegister ZTMP16 = xmm30;
- const XMMRegister COUNTER_INC_MASK = xmm18;
-
- __ movl(pos, 0); // Total length processed
- // Min data size processed = 768 bytes
- __ cmpl(len, 768);
- __ jcc(Assembler::less, ENC_DEC_DONE);
+ const XMMRegister ZTMP17 = xmm31;
+ const XMMRegister ZTMP18 = xmm1;
+ const XMMRegister ZTMP19 = xmm18;
+ const XMMRegister ZTMP20 = xmm8;
+ const XMMRegister ZTMP21 = xmm22;
+ const XMMRegister ZTMP22 = xmm23;
+ const XMMRegister ZTMP23 = xmm26;
+ const XMMRegister GH = xmm24;
+ const XMMRegister GL = xmm25;
+ const XMMRegister SHUF_MASK = xmm29;
+ const XMMRegister ADDBE_4x4 = xmm27;
+ const XMMRegister ADDBE_1234 = xmm28;
+ const XMMRegister ADD_1234 = xmm9;
+ const KRegister MASKREG = k1;
+ const Register pos = rax;
+ const Register rounds = r15;
+ const Register CTR_CHECK = r14;
- // Generate 48 constants for htbl
- __ call(GENERATE_HTBL_48_BLKS, relocInfo::none);
- int index = 0; // Index for choosing subkeyHtbl entry
- __ movl(ghash_pos, 0); // Pointer for ghash read and store operations
+ const int stack_offset = 64;
+ const int ghashin_offset = 64;
+ const int aesout_offset = 64;
+ const int hashkey_offset = 0;
+ const int hashkey_gap = 16 * 32;
+ const int HashKey_32 = 0;
+ const int HashKey_16 = 16 * 16;
- // Move initial counter value and STATE value into variables
+ __ movl(pos, 0);
+ __ cmpl(len, 256);
+ __ jcc(Assembler::lessEqual, ENC_DEC_DONE);
+
+ /* Structure of the Htbl is as follows:
+ * Where 0 - 31 we have 32 Hashkey's and 32-63 we have 32 HashKeyK (derived from HashKey)
+ * Rest 8 entries are for storing CTR values post AES rounds
+ * ----------------------------------------------------------------------------------------
+ Hashkey32 -> 16 * 0
+ Hashkey31 -> 16 * 1
+ Hashkey30 -> 16 * 2
+ ........
+ Hashkey1 -> 16 * 31
+ ---------------------
+ HaskeyK32 -> 16 * 32
+ HashkeyK31 -> 16 * 33
+ .........
+ HashkeyK1 -> 16 * 63
+ ---------------------
+ 1st set of AES Entries
+ B00_03 -> 16 * 64
+ B04_07 -> 16 * 68
+ B08_11 -> 16 * 72
+ B12_15 -> 16 * 80
+ ---------------------
+ 2nd set of AES Entries
+ B00_03 -> 16 * 84
+ B04_07 -> 16 * 88
+ B08_11 -> 16 * 92
+ B12_15 -> 16 * 96
+ ---------------------*/
+ generateHtbl_32_blocks_avx512(subkeyHtbl, avx512_subkeyHtbl);
+
+ //Move initial counter value and STATE value into variables
__ movdqu(CTR_BLOCKx, Address(counter, 0));
__ movdqu(AAD_HASHx, Address(state, 0));
- // Load lswap mask for ghash
+
+ //Load lswap mask for ghash
__ movdqu(xmm24, ExternalAddress(ghash_long_swap_mask_addr()), rbx /*rscratch*/);
- // Shuffle input state using lswap mask
+ //Shuffle input state using lswap mask
__ vpshufb(AAD_HASHx, AAD_HASHx, xmm24, Assembler::AVX_128bit);
// Compute #rounds for AES based on the length of the key array
__ movl(rounds, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
- // Broadcast counter value to 512 bit register
+ __ evmovdquq(ADDBE_4x4, ExternalAddress(counter_mask_addbe_4444_addr()), Assembler::AVX_512bit, rbx /*rscratch*/);
+ __ evmovdquq(ADDBE_1234, ExternalAddress(counter_mask_addbe_1234_addr()), Assembler::AVX_512bit, rbx /*rscratch*/);
+ __ evmovdquq(SHUF_MASK, ExternalAddress(counter_shuffle_mask_addr()), Assembler::AVX_512bit, rbx /*rscratch*/);
+ __ evmovdquq(ADD_1234, ExternalAddress(counter_mask_add_1234_addr()), Assembler::AVX_512bit, rbx /*rscratch*/);
+
+ //Shuffle counter, subtract 1 from the pre-incremented counter value and broadcast counter value to 512 bit register
+ __ vpshufb(CTR_BLOCKx, CTR_BLOCKx, SHUF_MASK, Assembler::AVX_128bit);
+ __ vpsubd(CTR_BLOCKx, CTR_BLOCKx, ADD_1234, Assembler::AVX_128bit);
__ evshufi64x2(CTR_BLOCKx, CTR_BLOCKx, CTR_BLOCKx, 0, Assembler::AVX_512bit);
- // Load counter shuffle mask
- __ evmovdquq(xmm24, ExternalAddress(counter_shuffle_mask_addr()), Assembler::AVX_512bit, rbx /*rscratch*/);
- // Shuffle counter
- __ vpshufb(CTR_BLOCKx, CTR_BLOCKx, xmm24, Assembler::AVX_512bit);
-
- // Load mask for incrementing counter
- __ evmovdquq(COUNTER_INC_MASK, ExternalAddress(counter_mask_linc4_addr()), Assembler::AVX_512bit, rbx /*rscratch*/);
- // Pre-increment counter
- __ vpaddd(ZTMP5, CTR_BLOCKx, ExternalAddress(counter_mask_linc0_addr()), Assembler::AVX_512bit, rbx /*rscratch*/);
- __ vpaddd(ZTMP6, ZTMP5, COUNTER_INC_MASK, Assembler::AVX_512bit);
- __ vpaddd(ZTMP7, ZTMP6, COUNTER_INC_MASK, Assembler::AVX_512bit);
- __ vpaddd(ZTMP8, ZTMP7, COUNTER_INC_MASK, Assembler::AVX_512bit);
-
- // Begin 32 blocks of AES processing
- __ bind(AES_32_BLOCKS);
- // Save incremented counter before overwriting it with AES data
- __ evmovdquq(CTR_BLOCKx, ZTMP8, Assembler::AVX_512bit);
-
- // Move 256 bytes of data
- loadData(in, pos, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
- // Load key shuffle mask
- __ movdqu(xmm29, ExternalAddress(key_shuffle_mask_addr()), rbx /*rscratch*/);
- // Load 0th AES round key
- ev_load_key(ZTMP4, key, 0, xmm29);
- // AES-ROUND0, xmm24 has the shuffle mask
- shuffleExorRnd1Key(ZTMP5, ZTMP6, ZTMP7, ZTMP8, xmm24, ZTMP4);
-
- for (int j = 1; j < 10; j++) {
- ev_load_key(ZTMP4, key, j * 16, xmm29);
- roundEncode(ZTMP4, ZTMP5, ZTMP6, ZTMP7, ZTMP8);
- }
- ev_load_key(ZTMP4, key, 10 * 16, xmm29);
- // AES rounds up to 11 (AES192) or 13 (AES256)
- __ cmpl(rounds, 52);
- __ jcc(Assembler::greaterEqual, AES_192);
- lastroundEncode(ZTMP4, ZTMP5, ZTMP6, ZTMP7, ZTMP8);
- __ jmp(STORE_CT);
-
- __ bind(AES_192);
- roundEncode(ZTMP4, ZTMP5, ZTMP6, ZTMP7, ZTMP8);
- ev_load_key(ZTMP4, key, 11 * 16, xmm29);
- roundEncode(ZTMP4, ZTMP5, ZTMP6, ZTMP7, ZTMP8);
- __ cmpl(rounds, 60);
- __ jcc(Assembler::aboveEqual, AES_256);
- ev_load_key(ZTMP4, key, 12 * 16, xmm29);
- lastroundEncode(ZTMP4, ZTMP5, ZTMP6, ZTMP7, ZTMP8);
- __ jmp(STORE_CT);
-
- __ bind(AES_256);
- ev_load_key(ZTMP4, key, 12 * 16, xmm29);
- roundEncode(ZTMP4, ZTMP5, ZTMP6, ZTMP7, ZTMP8);
- ev_load_key(ZTMP4, key, 13 * 16, xmm29);
- roundEncode(ZTMP4, ZTMP5, ZTMP6, ZTMP7, ZTMP8);
- ev_load_key(ZTMP4, key, 14 * 16, xmm29);
- // Last AES round
- lastroundEncode(ZTMP4, ZTMP5, ZTMP6, ZTMP7, ZTMP8);
-
- __ bind(STORE_CT);
- // Xor the encrypted key with PT to obtain CT
- xorBeforeStore(ZTMP5, ZTMP6, ZTMP7, ZTMP8, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
- storeData(out, pos, ZTMP5, ZTMP6, ZTMP7, ZTMP8);
- // 16 blocks encryption completed
- __ addl(pos, 256);
- __ cmpl(pos, 512);
- __ jcc(Assembler::aboveEqual, GHASH_AES_PARALLEL);
- __ vpaddd(ZTMP5, CTR_BLOCKx, COUNTER_INC_MASK, Assembler::AVX_512bit);
- __ vpaddd(ZTMP6, ZTMP5, COUNTER_INC_MASK, Assembler::AVX_512bit);
- __ vpaddd(ZTMP7, ZTMP6, COUNTER_INC_MASK, Assembler::AVX_512bit);
- __ vpaddd(ZTMP8, ZTMP7, COUNTER_INC_MASK, Assembler::AVX_512bit);
- __ jmp(AES_32_BLOCKS);
-
- __ bind(GHASH_AES_PARALLEL);
- // Ghash16_encrypt16_parallel takes place in the order with three reduction values:
- // 1) First time -> cipher xor input ghash
- // 2) No reduction -> accumulate multiplication values
- // 3) Final reduction post 48 blocks -> new ghash value is computed for the next round
- // Reduction value = first time
- ghash16_encrypt16_parallel(key, avx512_subkeyHtbl, CTR_BLOCKx, AAD_HASHx, in, out, ct, pos, true, xmm24, true, rounds, ghash_pos, false, index, COUNTER_INC_MASK);
- __ addl(pos, 256);
- __ addl(ghash_pos, 256);
- index += 4;
-
- // At this point we have processed 768 bytes of AES and 256 bytes of GHASH.
- // If the remaining length is less than 768, process remaining 512 bytes of ghash in GHASH_LAST_32 code
- __ subl(len, 768);
- __ cmpl(len, 768);
- __ jcc(Assembler::less, GHASH_LAST_32);
-
- // AES 16 blocks and GHASH 16 blocks in parallel
- // For multiples of 48 blocks we will do ghash16_encrypt16 interleaved multiple times
- // Reduction value = no reduction means that the carryless multiplication values are accumulated for further calculations
- // Each call uses 4 subkeyHtbl values, so increment the index by 4.
- __ bind(GHASH_16_AES_16);
- // Reduction value = no reduction
- ghash16_encrypt16_parallel(key, avx512_subkeyHtbl, CTR_BLOCKx, AAD_HASHx, in, out, ct, pos, false, xmm24, false, rounds, ghash_pos, false, index, COUNTER_INC_MASK);
- __ addl(pos, 256);
- __ addl(ghash_pos, 256);
- index += 4;
- // Reduction value = final reduction means that the accumulated values have to be reduced as we have completed 48 blocks of ghash
- ghash16_encrypt16_parallel(key, avx512_subkeyHtbl, CTR_BLOCKx, AAD_HASHx, in, out, ct, pos, false, xmm24, false, rounds, ghash_pos, true, index, COUNTER_INC_MASK);
- __ addl(pos, 256);
- __ addl(ghash_pos, 256);
- // Calculated ghash value needs to be __ moved to AAD_HASHX so that we can restart the ghash16-aes16 pipeline
- __ movdqu(AAD_HASHx, ZTMP5);
- index = 0; // Reset subkeyHtbl index
-
- // Restart the pipeline
- // Reduction value = first time
- ghash16_encrypt16_parallel(key, avx512_subkeyHtbl, CTR_BLOCKx, AAD_HASHx, in, out, ct, pos, true, xmm24, true, rounds, ghash_pos, false, index, COUNTER_INC_MASK);
- __ addl(pos, 256);
- __ addl(ghash_pos, 256);
- index += 4;
-
- __ subl(len, 768);
- __ cmpl(len, 768);
- __ jcc(Assembler::greaterEqual, GHASH_16_AES_16);
-
- // GHASH last 32 blocks processed here
- // GHASH products accumulated in ZMM27, ZMM25 and ZMM26 during GHASH16-AES16 operation is used
- __ bind(GHASH_LAST_32);
- // Use rbx as a pointer to the htbl; For last 32 blocks of GHASH, use key# 4-11 entry in subkeyHtbl
- __ movl(rbx, 256);
- // Load cipher blocks
- __ evmovdquq(ZTMP13, Address(ct, ghash_pos, Address::times_1, 0 * 64), Assembler::AVX_512bit);
- __ evmovdquq(ZTMP14, Address(ct, ghash_pos, Address::times_1, 1 * 64), Assembler::AVX_512bit);
- __ vpshufb(ZTMP13, ZTMP13, xmm24, Assembler::AVX_512bit);
- __ vpshufb(ZTMP14, ZTMP14, xmm24, Assembler::AVX_512bit);
- // Load ghash keys
- __ evmovdquq(ZTMP15, Address(avx512_subkeyHtbl, rbx, Address::times_1, 0 * 64), Assembler::AVX_512bit);
- __ evmovdquq(ZTMP16, Address(avx512_subkeyHtbl, rbx, Address::times_1, 1 * 64), Assembler::AVX_512bit);
-
- // Ghash blocks 0 - 3
- carrylessMultiply(ZTMP2, ZTMP3, ZTMP4, ZTMP1, ZTMP13, ZTMP15);
- // Ghash blocks 4 - 7
- carrylessMultiply(ZTMP6, ZTMP7, ZTMP8, ZTMP5, ZTMP14, ZTMP16);
-
- __ vpternlogq(ZTMP1, 0x96, ZTMP5, xmm27, Assembler::AVX_512bit); // ZTMP1 = ZTMP1 + ZTMP5 + zmm27
- __ vpternlogq(ZTMP2, 0x96, ZTMP6, xmm26, Assembler::AVX_512bit); // ZTMP2 = ZTMP2 + ZTMP6 + zmm26
- __ vpternlogq(ZTMP3, 0x96, ZTMP7, xmm25, Assembler::AVX_512bit); // ZTMP3 = ZTMP3 + ZTMP7 + zmm25
- __ evpxorq(ZTMP4, ZTMP4, ZTMP8, Assembler::AVX_512bit); // ZTMP4 = ZTMP4 + ZTMP8
-
- __ addl(ghash_pos, 128);
- __ addl(rbx, 128);
-
- // Ghash remaining blocks
- __ bind(LOOP);
- __ cmpl(ghash_pos, pos);
- __ jcc(Assembler::aboveEqual, ACCUMULATE);
- // Load next cipher blocks and corresponding ghash keys
- __ evmovdquq(ZTMP13, Address(ct, ghash_pos, Address::times_1, 0 * 64), Assembler::AVX_512bit);
- __ evmovdquq(ZTMP14, Address(ct, ghash_pos, Address::times_1, 1 * 64), Assembler::AVX_512bit);
- __ vpshufb(ZTMP13, ZTMP13, xmm24, Assembler::AVX_512bit);
- __ vpshufb(ZTMP14, ZTMP14, xmm24, Assembler::AVX_512bit);
- __ evmovdquq(ZTMP15, Address(avx512_subkeyHtbl, rbx, Address::times_1, 0 * 64), Assembler::AVX_512bit);
- __ evmovdquq(ZTMP16, Address(avx512_subkeyHtbl, rbx, Address::times_1, 1 * 64), Assembler::AVX_512bit);
-
- // ghash blocks 0 - 3
- carrylessMultiply(ZTMP6, ZTMP7, ZTMP8, ZTMP5, ZTMP13, ZTMP15);
-
- // ghash blocks 4 - 7
- carrylessMultiply(ZTMP10, ZTMP11, ZTMP12, ZTMP9, ZTMP14, ZTMP16);
-
- // update sums
- // ZTMP1 = ZTMP1 + ZTMP5 + ZTMP9
- // ZTMP2 = ZTMP2 + ZTMP6 + ZTMP10
- // ZTMP3 = ZTMP3 + ZTMP7 xor ZTMP11
- // ZTMP4 = ZTMP4 + ZTMP8 xor ZTMP12
- xorGHASH(ZTMP1, ZTMP2, ZTMP3, ZTMP4, ZTMP5, ZTMP9, ZTMP6, ZTMP10, ZTMP7, ZTMP11, ZTMP8, ZTMP12);
- __ addl(ghash_pos, 128);
- __ addl(rbx, 128);
- __ jmp(LOOP);
- // Integrate ZTMP3/ZTMP4 into ZTMP1 and ZTMP2
- __ bind(ACCUMULATE);
- __ evpxorq(ZTMP3, ZTMP3, ZTMP4, Assembler::AVX_512bit);
- __ vpsrldq(ZTMP7, ZTMP3, 8, Assembler::AVX_512bit);
- __ vpslldq(ZTMP8, ZTMP3, 8, Assembler::AVX_512bit);
- __ evpxorq(ZTMP1, ZTMP1, ZTMP7, Assembler::AVX_512bit);
- __ evpxorq(ZTMP2, ZTMP2, ZTMP8, Assembler::AVX_512bit);
-
- // Add ZTMP1 and ZTMP2 128 - bit words horizontally
- vhpxori4x128(ZTMP1, ZTMP11);
- vhpxori4x128(ZTMP2, ZTMP12);
- // Load reduction polynomial and compute final reduction
- __ evmovdquq(ZTMP15, ExternalAddress(ghash_polynomial_reduction_addr()), Assembler::AVX_512bit, rbx /*rscratch*/);
- vclmul_reduce(AAD_HASHx, ZTMP15, ZTMP1, ZTMP2, ZTMP3, ZTMP4);
-
- // Pre-increment counter for next operation
- __ vpaddd(CTR_BLOCKx, CTR_BLOCKx, xmm18, Assembler::AVX_128bit);
- // Shuffle counter and save the updated value
- __ vpshufb(CTR_BLOCKx, CTR_BLOCKx, xmm24, Assembler::AVX_512bit);
+ __ movdl(CTR_CHECK, CTR_BLOCKx);
+ __ andl(CTR_CHECK, 255);
+
+ // Reshuffle counter
+ __ vpshufb(CTR_BLOCKx, CTR_BLOCKx, SHUF_MASK, Assembler::AVX_512bit);
+
+ initial_blocks_16_avx512(in, out, ct, pos, key, avx512_subkeyHtbl, CTR_CHECK, rounds, CTR_BLOCKx, AAD_HASHx, ADDBE_4x4, ADDBE_1234, ADD_1234, SHUF_MASK, stack_offset);
+ __ addl(pos, 16 * 16);
+ __ cmpl(len, 32 * 16);
+ __ jcc(Assembler::below, MESG_BELOW_32_BLKS);
+
+ initial_blocks_16_avx512(in, out, ct, pos, key, avx512_subkeyHtbl, CTR_CHECK, rounds, CTR_BLOCKx, AAD_HASHx, ADDBE_4x4, ADDBE_1234, ADD_1234, SHUF_MASK, stack_offset + 16);
+ __ addl(pos, 16 * 16);
+ __ subl(len, 32 * 16);
+
+ __ cmpl(len, 32 * 16);
+ __ jcc(Assembler::below, NO_BIG_BLKS);
+
+ __ bind(ENCRYPT_BIG_BLKS_NO_HXOR);
+ __ cmpl(len, 2 * 32 * 16);
+ __ jcc(Assembler::below, ENCRYPT_BIG_NBLKS);
+ ghash16_encrypt_parallel16_avx512(in, out, ct, pos, avx512_subkeyHtbl, CTR_CHECK, rounds, key, CTR_BLOCKx, AAD_HASHx, ADDBE_4x4, ADDBE_1234, ADD_1234, SHUF_MASK,
+ true, true, false, false, false, ghashin_offset, aesout_offset, HashKey_32);
+ __ addl(pos, 16 * 16);
+
+ ghash16_encrypt_parallel16_avx512(in, out, ct, pos, avx512_subkeyHtbl, CTR_CHECK, rounds, key, CTR_BLOCKx, AAD_HASHx, ADDBE_4x4, ADDBE_1234, ADD_1234, SHUF_MASK,
+ true, false, true, false, true, ghashin_offset + 16, aesout_offset + 16, HashKey_16);
+ __ evmovdquq(AAD_HASHx, ZTMP4, Assembler::AVX_512bit);
+ __ addl(pos, 16 * 16);
+ __ subl(len, 32 * 16);
+ __ jmp(ENCRYPT_BIG_BLKS_NO_HXOR);
+
+ __ bind(ENCRYPT_BIG_NBLKS);
+ ghash16_encrypt_parallel16_avx512(in, out, ct, pos, avx512_subkeyHtbl, CTR_CHECK, rounds, key, CTR_BLOCKx, AAD_HASHx, ADDBE_4x4, ADDBE_1234, ADD_1234, SHUF_MASK,
+ false, true, false, false, false, ghashin_offset, aesout_offset, HashKey_32);
+ __ addl(pos, 16 * 16);
+ ghash16_encrypt_parallel16_avx512(in, out, ct, pos, avx512_subkeyHtbl, CTR_CHECK, rounds, key, CTR_BLOCKx, AAD_HASHx, ADDBE_4x4, ADDBE_1234, ADD_1234, SHUF_MASK,
+ false, false, true, true, true, ghashin_offset + 16, aesout_offset + 16, HashKey_16);
+
+ __ movdqu(AAD_HASHx, ZTMP4);
+ __ addl(pos, 16 * 16);
+ __ subl(len, 32 * 16);
+
+ __ bind(NO_BIG_BLKS);
+ __ cmpl(len, 16 * 16);
+ __ jcc(Assembler::aboveEqual, ENCRYPT_16_BLKS);
+
+ __ bind(ENCRYPT_N_GHASH_32_N_BLKS);
+ ghash16_avx512(true, false, false, false, true, in, pos, avx512_subkeyHtbl, AAD_HASHx, SHUF_MASK, stack_offset, 0, 0, HashKey_32);
+ gcm_enc_dec_last_avx512(len, in, pos, AAD_HASHx, SHUF_MASK, avx512_subkeyHtbl, ghashin_offset + 16, HashKey_16, false, true);
+ __ jmp(GHASH_DONE);
+
+ __ bind(ENCRYPT_16_BLKS);
+ ghash16_encrypt_parallel16_avx512(in, out, ct, pos, avx512_subkeyHtbl, CTR_CHECK, rounds, key, CTR_BLOCKx, AAD_HASHx, ADDBE_4x4, ADDBE_1234, ADD_1234, SHUF_MASK,
+ false, true, false, false, false, ghashin_offset, aesout_offset, HashKey_32);
+
+ ghash16_avx512(false, true, false, false, true, in, pos, avx512_subkeyHtbl, AAD_HASHx, SHUF_MASK, stack_offset, 16 * 16, 0, HashKey_16);
+
+ __ bind(MESG_BELOW_32_BLKS);
+ __ subl(len, 16 * 16);
+ __ addl(pos, 16 * 16);
+ gcm_enc_dec_last_avx512(len, in, pos, AAD_HASHx, SHUF_MASK, avx512_subkeyHtbl, ghashin_offset, HashKey_16, true, true);
+
+ __ bind(GHASH_DONE);
+ //Pre-increment counter for next operation, make sure that counter value is incremented on the LSB
+ __ vpshufb(CTR_BLOCKx, CTR_BLOCKx, SHUF_MASK, Assembler::AVX_128bit);
+ __ vpaddd(CTR_BLOCKx, CTR_BLOCKx, ADD_1234, Assembler::AVX_128bit);
+ __ vpshufb(CTR_BLOCKx, CTR_BLOCKx, SHUF_MASK, Assembler::AVX_128bit);
__ movdqu(Address(counter, 0), CTR_BLOCKx);
- // Load ghash lswap mask
+ //Load ghash lswap mask
__ movdqu(xmm24, ExternalAddress(ghash_long_swap_mask_addr()), rbx /*rscratch*/);
- // Shuffle ghash using lbswap_mask and store it
+ //Shuffle ghash using lbswap_mask and store it
__ vpshufb(AAD_HASHx, AAD_HASHx, xmm24, Assembler::AVX_128bit);
__ movdqu(Address(state, 0), AAD_HASHx);
- __ jmp(ENC_DEC_DONE);
- __ bind(GENERATE_HTBL_48_BLKS);
- generateHtbl_48_block_zmm(subkeyHtbl, avx512_subkeyHtbl, rbx /*rscratch*/);
+ //Zero out sensitive data
+ __ evpxorq(ZTMP21, ZTMP21, ZTMP21, Assembler::AVX_512bit);
+ __ evpxorq(ZTMP0, ZTMP0, ZTMP0, Assembler::AVX_512bit);
+ __ evpxorq(ZTMP1, ZTMP1, ZTMP1, Assembler::AVX_512bit);
+ __ evpxorq(ZTMP2, ZTMP2, ZTMP2, Assembler::AVX_512bit);
+ __ evpxorq(ZTMP3, ZTMP3, ZTMP3, Assembler::AVX_512bit);
__ bind(ENC_DEC_DONE);
- __ movq(rax, pos);
}
//Implements data * hashkey mod (128, 127, 126, 121, 0)
diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64_ghash.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64_ghash.cpp
index 2056fa057654e..5a9b084841376 100644
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64_ghash.cpp
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64_ghash.cpp
@@ -1,5 +1,5 @@
/*
-* Copyright (c) 2019, 2021, Intel Corporation. All rights reserved.
+* Copyright (c) 2019, 2024, Intel Corporation. All rights reserved.
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -57,7 +57,10 @@ address StubGenerator::ghash_byte_swap_mask_addr() {
// Polynomial x^128+x^127+x^126+x^121+1
ATTRIBUTE_ALIGNED(16) static const uint64_t GHASH_POLYNOMIAL[] = {
- 0x0000000000000001UL, 0xC200000000000000UL,
+ 0x0000000000000001ULL, 0xC200000000000000ULL,
+ 0x0000000000000001ULL, 0xC200000000000000ULL,
+ 0x0000000000000001ULL, 0xC200000000000000ULL,
+ 0x0000000000000001ULL, 0xC200000000000000ULL
};
address StubGenerator::ghash_polynomial_addr() {
return (address)GHASH_POLYNOMIAL;
diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64_tanh.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64_tanh.cpp
new file mode 100644
index 0000000000000..92ac78e15cba9
--- /dev/null
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64_tanh.cpp
@@ -0,0 +1,502 @@
+/*
+* Copyright (c) 2024, Intel Corporation. All rights reserved.
+* Intel Math Library (LIBM) Source Code
+*
+* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+*
+* This code is free software; you can redistribute it and/or modify it
+* under the terms of the GNU General Public License version 2 only, as
+* published by the Free Software Foundation.
+*
+* This code is distributed in the hope that it will be useful, but WITHOUT
+* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+* version 2 for more details (a copy is included in the LICENSE file that
+* accompanied this code).
+*
+* You should have received a copy of the GNU General Public License version
+* 2 along with this work; if not, write to the Free Software Foundation,
+* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+*
+* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+* or visit www.oracle.com if you need additional information or have any
+* questions.
+*
+*/
+
+#include "precompiled.hpp"
+#include "macroAssembler_x86.hpp"
+#include "stubGenerator_x86_64.hpp"
+
+/******************************************************************************/
+// ALGORITHM DESCRIPTION
+// ---------------------
+//
+// tanh(x)=(exp(x)-exp(-x))/(exp(x)+exp(-x))=(1-exp(-2*x))/(1+exp(-2*x))
+//
+// Let |x|=xH+xL (upper 26 bits, lower 27 bits)
+// log2(e) rounded to 26 bits (high part) plus a double precision low part is
+// L2EH+L2EL (upper 26, lower 53 bits)
+//
+// Let xH*L2EH=k+f+r`, where (k+f)*2^8*2=int(xH*L2EH*2^9),
+// f=0.b1 b2 ... b8, k integer
+// 2^{-f} is approximated as Tn[f]+Dn[f]
+// Tn stores the high 53 bits, Dn stores (2^{-f}-Tn[f]) rounded to double precision
+//
+// r=r`+xL*L2EH+|x|*L2EL, |r|<2^{-9}+2^{-14},
+// for |x| in [23/64,3*2^7)
+// e^{-2*|x|}=2^{-k-f}*2^{-r} ~ 2^{-k}*(Tn+Dn)*(1+p)=(T0+D0)*(1+p)
+//
+// For |x| in [2^{-4},2^5):
+// 2^{-r}-1 ~ p=c1*r+c2*r^2+..+c5*r^5
+// Let R=1/(1+T0+p*T0), truncated to 35 significant bits
+// R=1/(1+T0+D0+p*(T0+D0))*(1+eps), |eps|<2^{-33}
+// 1+T0+D0+p*(T0+D0)=KH+KL, where
+// KH=(1+T0+c1*r*T0)_high (leading 17 bits)
+// KL=T0_low+D0+(c1*r*T0)_low+c1*r*D0+(c2*r^2+..c5*r^5)*T0
+// eps ~ (R*KH-1)+R*KL
+// 1/(1+T0+D0+p*(T0+D0)) ~ R-R*eps
+// The result is approximated as (1-T0-D0-(T0+D0)*p)*(R-R*eps)
+// 1-T0-D0-(T0+D0)*p=-((KH-2)+KL)
+// The result is formed as
+// (KH-2)*R+(-(KH-2)*R*eps+(KL*R-KL*R*eps)), with the correct sign
+// set at the end
+//
+// For |x| in [2^{-64},2^{-4}):
+// A Taylor series expansion is used (x+p3*x^3+..+p13*x^{13})
+//
+// For |x|<2^{-64}: x is returned
+//
+// For |x|>=2^32: return +/-1
+//
+// Special cases:
+// tanh(NaN) = quiet NaN, and raise invalid exception
+// tanh(INF) = that INF
+// tanh(+/-0) = +/-0
+//
+/******************************************************************************/
+
+ATTRIBUTE_ALIGNED(4) static const juint _HALFMASK[] =
+{
+ 4160749568, 2147483647
+};
+
+ATTRIBUTE_ALIGNED(4) static const juint _ONEMASK[] =
+{
+ 0, 1072693248
+};
+
+ATTRIBUTE_ALIGNED(4) static const juint _TWOMASK[] =
+{
+ 0, 1073741824
+};
+
+ATTRIBUTE_ALIGNED(16) static const juint _MASK3[] =
+{
+ 0, 4294967280, 0, 4294967280
+};
+
+ATTRIBUTE_ALIGNED(16) static const juint _RMASK[] =
+{
+ 4294705152, 4294967295, 4294705152, 4294967295
+};
+
+ATTRIBUTE_ALIGNED(16) static const juint _L2E[] =
+{
+ 1610612736, 1082594631, 4166901572, 1055174155
+};
+
+ATTRIBUTE_ALIGNED(16) static const juint _Shifter[] =
+{
+ 0, 1127743488, 0, 3275227136
+};
+
+ATTRIBUTE_ALIGNED(16) static const juint _cv[] =
+{
+ 3884607281, 3168131199, 3607404735, 3190582024, 1874480759,
+ 1032041131, 4286760334, 1053736893, 4277811695, 3211144770,
+ 0, 0
+};
+
+ATTRIBUTE_ALIGNED(4) static const juint _pv[] =
+{
+ 236289503, 1064135997, 463583772, 3215696314, 1441186365,
+ 3212977891, 286331153, 1069617425, 2284589306, 1066820852,
+ 1431655765, 3218429269
+};
+
+ATTRIBUTE_ALIGNED(16) static const juint _T2_neg_f[] =
+{
+ 0, 1072693248, 0, 0, 1797923801, 1072687577,
+ 1950547427, 1013229059, 730821105, 1072681922, 2523232743, 1012067188,
+ 915592468, 1072676282, 352947894, 3161024371, 2174652632, 1072670657,
+ 4087714590, 1014450259, 35929225, 1072665048, 2809788041, 3159436968,
+ 2912730644, 1072659453, 3490067722, 3163405074, 2038973688, 1072653874,
+ 892941374, 1016046459, 1533953344, 1072648310, 769171851, 1015665633,
+ 1222472308, 1072642761, 1054357470, 3161021018, 929806999, 1072637227,
+ 3205336643, 1015259557, 481706282, 1072631708, 1696079173, 3162710528,
+ 3999357479, 1072626203, 2258941616, 1015924724, 2719515920, 1072620714,
+ 2760332941, 1015137933, 764307441, 1072615240, 3021057420, 3163329523,
+ 2256325230, 1072609780, 580117746, 1015317295, 2728693978, 1072604335,
+ 396109971, 3163462691, 2009970496, 1072598905, 2159039665, 3162572948,
+ 4224142467, 1072593489, 3389820386, 1015207202, 610758006, 1072588089,
+ 1965209397, 3161866232, 3884662774, 1072582702, 2158611599, 1014210185,
+ 991358482, 1072577331, 838715019, 3163157668, 351641897, 1072571974,
+ 2172261526, 3163010599, 1796832535, 1072566631, 3176955716, 3160585513,
+ 863738719, 1072561303, 1326992220, 3162613197, 1679558232, 1072555989,
+ 2390342287, 3163333970, 4076975200, 1072550689, 2029000899, 1015208535,
+ 3594158869, 1072545404, 2456521700, 3163256561, 64696965, 1072540134,
+ 1768797490, 1015816960, 1912561781, 1072534877, 3147495102, 1015678253,
+ 382305176, 1072529635, 2347622376, 3162578625, 3898795731, 1072524406,
+ 1249994144, 1011869818, 3707479175, 1072519192, 3613079303, 1014164738,
+ 3939148246, 1072513992, 3210352148, 1015274323, 135105010, 1072508807,
+ 1906148728, 3163375739, 721996136, 1072503635, 563754734, 1015371318,
+ 1242007932, 1072498477, 1132034716, 3163339831, 1532734324, 1072493333,
+ 3094216535, 3163162857, 1432208378, 1072488203, 1401068914, 3162363963,
+ 778901109, 1072483087, 2248183955, 3161268751, 3706687593, 1072477984,
+ 3521726940, 1013253067, 1464976603, 1072472896, 3507292405, 3161977534,
+ 2483480501, 1072467821, 1216371780, 1013034172, 2307442995, 1072462760,
+ 3190117721, 3162404539, 777507147, 1072457713, 4282924205, 1015187533,
+ 2029714210, 1072452679, 613660079, 1015099143, 1610600570, 1072447659,
+ 3766732298, 1015760183, 3657065772, 1072442652, 399025623, 3162957078,
+ 3716502172, 1072437659, 2303740125, 1014042725, 1631695677, 1072432680,
+ 2717633076, 3162344026, 1540824585, 1072427714, 1064017011, 3163487690,
+ 3287523847, 1072422761, 1625971539, 3157009955, 2420883922, 1072417822,
+ 2049810052, 1014119888, 3080351519, 1072412896, 3379126788, 3157218001,
+ 815859274, 1072407984, 240396590, 3163487443, 4062661092, 1072403084,
+ 1422616006, 3163255318, 4076559943, 1072398198, 2119478331, 3160758351,
+ 703710506, 1072393326, 1384660846, 1015195891, 2380618042, 1072388466,
+ 3149557219, 3163320799, 364333489, 1072383620, 3923737744, 3161421373,
+ 3092190715, 1072378786, 814012168, 3159523422, 1822067026, 1072373966,
+ 1241994956, 1015340290, 697153126, 1072369159, 1283515429, 3163283189,
+ 3861050111, 1072364364, 254893773, 3162813180, 2572866477, 1072359583,
+ 878562433, 1015521741, 977020788, 1072354815, 3065100517, 1015541563,
+ 3218338682, 1072350059, 3404164304, 3162477108, 557149882, 1072345317,
+ 3672720709, 1014537265, 1434058175, 1072340587, 251133233, 1015085769,
+ 1405169241, 1072335870, 2998539689, 3162830951, 321958744, 1072331166,
+ 3401933767, 1015794558, 2331271250, 1072326474, 812057446, 1012207446,
+ 2990417245, 1072321795, 3683467745, 3163369326, 2152073944, 1072317129,
+ 1486860576, 3163203456, 3964284211, 1072312475, 2111583915, 1015427164,
+ 3985553595, 1072307834, 4002146062, 1015834136, 2069751141, 1072303206,
+ 1562170675, 3162724681, 2366108318, 1072298590, 2867985102, 3161762254,
+ 434316067, 1072293987, 2028358766, 1013458122, 424392917, 1072289396,
+ 2749202995, 3162838718, 2191782032, 1072284817, 2960257726, 1013742662,
+ 1297350157, 1072280251, 1308022040, 3163412558, 1892288442, 1072275697,
+ 2446255666, 3162600381, 3833209506, 1072271155, 2722920684, 1013754842,
+ 2682146384, 1072266626, 2082178513, 3163363419, 2591453363, 1072262109,
+ 2132396182, 3159074198, 3418903055, 1072257604, 2527457337, 3160820604,
+ 727685349, 1072253112, 2038246809, 3162358742, 2966275557, 1072248631,
+ 2176155324, 3159842759, 1403662306, 1072244163, 2788809599, 3161671007,
+ 194117574, 1072239707, 777528612, 3163412089, 3492293770, 1072235262,
+ 2248032210, 1015386826, 2568320822, 1072230830, 2732824428, 1014352915,
+ 1577608921, 1072226410, 1875489510, 3162968394, 380978316, 1072222002,
+ 854188970, 3160462686, 3134592888, 1072217605, 4232266862, 1015991134,
+ 1110089947, 1072213221, 1451641639, 1015474673, 2759350287, 1072208848,
+ 1148526634, 1015894933, 3649726105, 1072204487, 4085036346, 1015649474,
+ 3643909174, 1072200138, 3537586109, 1014354647, 2604962541, 1072195801,
+ 2614425274, 3163539192, 396319521, 1072191476, 4172420816, 3159074632,
+ 1176749997, 1072187162, 2738998779, 3162035844, 515457527, 1072182860,
+ 836709333, 1015651226, 2571947539, 1072178569, 3558159064, 3163376669,
+ 2916157145, 1072174290, 219487565, 1015309367, 1413356050, 1072170023,
+ 1651349291, 3162668166, 2224145553, 1072165767, 3482522030, 3161489169,
+ 919555682, 1072161523, 3121969534, 1012948226, 1660913392, 1072157290,
+ 4218599604, 1015135707, 19972402, 1072153069, 3507899862, 1016009292,
+ 158781403, 1072148859, 2221464712, 3163286453, 1944781191, 1072144660,
+ 3993278767, 3161724279, 950803702, 1072140473, 1655364926, 1015237032,
+ 1339972927, 1072136297, 167908909, 1015572152, 2980802057, 1072132132,
+ 378619896, 1015773303, 1447192521, 1072127979, 1462857171, 3162514521,
+ 903334909, 1072123837, 1636462108, 1015039997, 1218806132, 1072119706,
+ 1818613052, 3162548441, 2263535754, 1072115586, 752233586, 3162639008,
+ 3907805044, 1072111477, 2257091225, 3161550407, 1727278727, 1072107380,
+ 3562710623, 1011471940, 4182873220, 1072103293, 629542646, 3161996303,
+ 2555984613, 1072099218, 2652555442, 3162552692, 1013258799, 1072095154,
+ 1748797611, 3160129082, 3721688645, 1072091100, 3069276937, 1015839401,
+ 1963711167, 1072087058, 1744767757, 3160574294, 4201977662, 1072083026,
+ 748330254, 1013594357, 1719614413, 1072079006, 330458198, 3163282740,
+ 2979960120, 1072074996, 2599109725, 1014498493, 3561793907, 1072070997,
+ 1157054053, 1011890350, 3339203574, 1072067009, 1483497780, 3162408754,
+ 2186617381, 1072063032, 2270764084, 3163272713, 4273770423, 1072059065,
+ 3383180809, 3163218901, 885834528, 1072055110, 1973258547, 3162261564,
+ 488188413, 1072051165, 3199821029, 1015564048, 2956612997, 1072047230,
+ 2118169751, 3162735553, 3872257780, 1072043306, 1253592103, 1015958334,
+ 3111574537, 1072039393, 2606161479, 3162759746, 551349105, 1072035491,
+ 3821916050, 3162106589, 363667784, 1072031599, 813753950, 1015785209,
+ 2425981843, 1072027717, 2830390851, 3163346599, 2321106615, 1072023846,
+ 2171176610, 1009535771, 4222122499, 1072019985, 1277378074, 3163256737,
+ 3712504873, 1072016135, 88491949, 1015427660, 671025100, 1072012296,
+ 3832014351, 3163022030, 3566716925, 1072008466, 1536826856, 1014142433,
+ 3689071823, 1072004647, 2321004996, 3162552716, 917841882, 1072000839,
+ 18715565, 1015659308, 3723038930, 1071997040, 378465264, 3162569582,
+ 3395129871, 1071993252, 4025345435, 3162335388, 4109806887, 1071989474,
+ 422403966, 1014469229, 1453150082, 1071985707, 498154669, 3161488062,
+ 3896463087, 1071981949, 1139797873, 3161233805, 2731501122, 1071978202,
+ 1774031855, 3162470021, 2135241198, 1071974465, 1236747871, 1013589147,
+ 1990012071, 1071970738, 3529070563, 3162813193, 2178460671, 1071967021,
+ 777878098, 3162842493, 2583551245, 1071963314, 3161094195, 1015606491,
+ 3088564500, 1071959617, 1762311517, 1015045673, 3577096743, 1071955930,
+ 2951496418, 1013793687, 3933059031, 1071952253, 2133366768, 3161531832,
+ 4040676318, 1071948586, 4090609238, 1015663458, 3784486610, 1071944929,
+ 1581883040, 3161698953, 3049340112, 1071941282, 3062915824, 1013170595,
+ 1720398391, 1071937645, 3980678963, 3163300080, 3978100823, 1071934017,
+ 3513027190, 1015845963, 1118294578, 1071930400, 2197495694, 3159909401,
+ 1617004845, 1071926792, 82804944, 1010342778, 1065662932, 1071923194,
+ 2533670915, 1014530238, 3645941911, 1071919605, 3814685081, 3161573341,
+ 654919306, 1071916027, 3232961757, 3163047469, 569847338, 1071912458,
+ 472945272, 3159290729, 3278348324, 1071908898, 3069497416, 1014750712,
+ 78413852, 1071905349, 4183226867, 3163017251, 3743175029, 1071901808,
+ 2072812490, 3162175075, 1276261410, 1071898278, 300981948, 1014684169,
+ 1156440435, 1071894757, 2351451249, 1013967056, 3272845541, 1071891245,
+ 928852419, 3163488248, 3219942644, 1071887743, 3798990616, 1015368806,
+ 887463927, 1071884251, 3596744163, 3160794166, 460407023, 1071880768,
+ 4237175092, 3163138469, 1829099622, 1071877294, 1016661181, 3163461005,
+ 589198666, 1071873830, 2664346172, 3163157962, 926591435, 1071870375,
+ 3208833762, 3162913514, 2732492859, 1071866929, 2691479646, 3162255684,
+ 1603444721, 1071863493, 1548633640, 3162201326, 1726216749, 1071860066,
+ 2466808228, 3161676405, 2992903935, 1071856648, 2218154406, 1015228193,
+ 1000925746, 1071853240, 1018491672, 3163309544, 4232894513, 1071849840,
+ 2383938684, 1014668519, 3991843581, 1071846450, 4092853457, 1014585763,
+ 171030293, 1071843070, 3526460132, 1014428778, 1253935211, 1071839698,
+ 1395382931, 3159702613, 2839424854, 1071836335, 1171596163, 1013041679,
+ 526652809, 1071832982, 4223459736, 1015879375, 2799960843, 1071829637,
+ 1423655381, 1015022151, 964107055, 1071826302, 2800439588, 3162833221,
+ 3504003472, 1071822975, 3594001060, 3157330652, 1724976915, 1071819658,
+ 420909223, 3163117379, 4112506593, 1071816349, 2947355221, 1014371048,
+ 1972484976, 1071813050, 675290301, 3161640050, 3790955393, 1071809759,
+ 2352942462, 3163180090, 874372905, 1071806478, 100263788, 1015940732,
+ 1709341917, 1071803205, 2571168217, 1014152499, 1897844341, 1071799941,
+ 1254300460, 1015275938, 1337108031, 1071796686, 3203724452, 1014677845,
+ 4219606026, 1071793439, 2434574742, 1014681548, 1853186616, 1071790202,
+ 3066496371, 1015656574, 2725843665, 1071786973, 1433917087, 1014838523,
+ 2440944790, 1071783753, 2492769774, 1014147454, 897099801, 1071780542,
+ 754756297, 1015241005, 2288159958, 1071777339, 2169144469, 1014876021,
+ 2218315341, 1071774145, 2694295388, 3163288868, 586995997, 1071770960,
+ 41662348, 3162627992, 1588871207, 1071767783, 143439582, 3162963416,
+ 828946858, 1071764615, 10642492, 1015939438, 2502433899, 1071761455,
+ 2148595913, 1015023991, 2214878420, 1071758304, 892270087, 3163116422,
+ 4162030108, 1071755161, 2763428480, 1015529349, 3949972341, 1071752027,
+ 2068408548, 1014913868, 1480023343, 1071748902, 2247196168, 1015327453,
+ 948735466, 1071745785, 3516338028, 3162574883, 2257959872, 1071742676,
+ 3802946148, 1012964927, 1014845819, 1071739576, 3117910646, 3161559105,
+ 1416741826, 1071736484, 2196380210, 1011413563, 3366293073, 1071733400,
+ 3119426314, 1014120554, 2471440686, 1071730325, 968836267, 3162214888,
+ 2930322912, 1071727258, 2599499422, 3162714047, 351405227, 1071724200,
+ 3125337328, 3159822479, 3228316108, 1071721149, 3010241991, 3158422804,
+ 2875075254, 1071718107, 4144233330, 3163333716, 3490863953, 1071715073,
+ 960797498, 3162948880, 685187902, 1071712048, 378731989, 1014843115,
+ 2952712987, 1071709030, 3293494651, 3160120301, 1608493509, 1071706021,
+ 3159622171, 3162807737, 852742562, 1071703020, 667253586, 1009793559,
+ 590962156, 1071700027, 3829346666, 3163275597, 728909815, 1071697042,
+ 383930225, 1015029468, 1172597893, 1071694065, 114433263, 1015347593,
+ 1828292879, 1071691096, 1255956747, 1015588398, 2602514713, 1071688135,
+ 2268929336, 1014354284, 3402036099, 1071685182, 405889334, 1015105656,
+ 4133881824, 1071682237, 2148155345, 3162931299, 410360776, 1071679301,
+ 1269990655, 1011975870, 728934454, 1071676372, 1413842688, 1014178612,
+ 702412510, 1071673451, 3803266087, 3162280415, 238821257, 1071670538,
+ 1469694871, 3162884987, 3541402996, 1071667632, 2759177317, 1014854626,
+ 1928746161, 1071664735, 983617676, 1014285177, 3899555717, 1071661845,
+ 427280750, 3162546972, 772914124, 1071658964, 4004372762, 1012230161,
+ 1048019041, 1071656090, 1398474845, 3160510595, 339411585, 1071653224,
+ 264588982, 3161636657, 2851812149, 1071650365, 2595802551, 1015767337,
+ 4200250559, 1071647514, 2808127345, 3161781938
+};
+
+#define __ _masm->
+
+address StubGenerator::generate_libmTanh() {
+ StubCodeMark mark(this, "StubRoutines", "libmTanh");
+ address start = __ pc();
+
+ Label L_2TAG_PACKET_0_0_1, L_2TAG_PACKET_1_0_1, L_2TAG_PACKET_2_0_1, L_2TAG_PACKET_3_0_1;
+ Label L_2TAG_PACKET_4_0_1, L_2TAG_PACKET_5_0_1;
+ Label B1_2, B1_4;
+
+ address HALFMASK = (address)_HALFMASK;
+ address ONEMASK = (address)_ONEMASK;
+ address TWOMASK = (address)_TWOMASK;
+ address MASK3 = (address)_MASK3;
+ address RMASK = (address)_RMASK;
+ address L2E = (address)_L2E;
+ address Shifter = (address)_Shifter;
+ address cv = (address)_cv;
+ address pv = (address)_pv;
+ address T2_neg_f = (address) _T2_neg_f;
+
+ __ enter(); // required for proper stackwalking of RuntimeStub frame
+
+ __ bind(B1_2);
+ __ movsd(xmm3, ExternalAddress(HALFMASK), r11 /*rscratch*/);
+ __ xorpd(xmm4, xmm4);
+ __ movsd(xmm1, ExternalAddress(L2E), r11 /*rscratch*/);
+ __ movsd(xmm2, ExternalAddress(L2E + 8), r11 /*rscratch*/);
+ __ movl(rax, 32768);
+ __ pinsrw(xmm4, rax, 3);
+ __ movsd(xmm6, ExternalAddress(Shifter), r11 /*rscratch*/);
+ __ pextrw(rcx, xmm0, 3);
+ __ andpd(xmm3, xmm0);
+ __ andnpd(xmm4, xmm0);
+ __ pshufd(xmm5, xmm4, 68);
+ __ movl(rdx, 32768);
+ __ andl(rdx, rcx);
+ __ andl(rcx, 32767);
+ __ subl(rcx, 16304);
+ __ cmpl(rcx, 144);
+ __ jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_1);
+ __ subsd(xmm4, xmm3);
+ __ mulsd(xmm3, xmm1);
+ __ mulsd(xmm2, xmm5);
+ __ cvtsd2siq(rax, xmm3);
+ __ movq(xmm7, xmm3);
+ __ addsd(xmm3, xmm6);
+ __ mulsd(xmm1, xmm4);
+ __ movsd(xmm4, ExternalAddress(ONEMASK), r11 /*rscratch*/);
+ __ subsd(xmm3, xmm6);
+ __ xorpd(xmm0, xmm0);
+ __ addsd(xmm2, xmm1);
+ __ subsd(xmm7, xmm3);
+ __ movdqu(xmm6, ExternalAddress(cv), r11 /*rscratch*/);
+ __ addsd(xmm2, xmm7);
+ __ movl(rcx, 255);
+ __ andl(rcx, rax);
+ __ addl(rcx, rcx);
+ __ lea(r8, ExternalAddress(T2_neg_f));
+ __ movdqu(xmm5, Address(r8, rcx, Address::times(8)));
+ __ shrl(rax, 4);
+ __ andl(rax, 65520);
+ __ subl(rax, 16368);
+ __ negl(rax);
+ __ pinsrw(xmm0, rax, 3);
+ __ movdqu(xmm1, ExternalAddress(cv + 16), r11 /*rscratch*/);
+ __ pshufd(xmm0, xmm0, 68);
+ __ mulpd(xmm0, xmm5);
+ __ movsd(xmm7, ExternalAddress(cv + 32), r11 /*rscratch*/);
+ __ pshufd(xmm2, xmm2, 68);
+ __ movq(xmm5, xmm4);
+ __ addsd(xmm4, xmm0);
+ __ mulpd(xmm6, xmm2);
+ __ mulsd(xmm7, xmm2);
+ __ mulpd(xmm2, xmm2);
+ __ addpd(xmm1, xmm6);
+ __ mulsd(xmm2, xmm2);
+ __ movsd(xmm3, ExternalAddress(ONEMASK), r11 /*rscratch*/);
+ __ mulpd(xmm1, xmm2);
+ __ pshufd(xmm6, xmm1, 78);
+ __ addsd(xmm1, xmm6);
+ __ movq(xmm6, xmm1);
+ __ addsd(xmm1, xmm7);
+ __ mulsd(xmm1, xmm0);
+ __ addsd(xmm1, xmm4);
+ __ andpd(xmm4, ExternalAddress(MASK3), r11 /*rscratch*/);
+ __ divsd(xmm5, xmm1);
+ __ subsd(xmm3, xmm4);
+ __ pshufd(xmm1, xmm0, 238);
+ __ addsd(xmm3, xmm0);
+ __ movq(xmm2, xmm4);
+ __ addsd(xmm3, xmm1);
+ __ mulsd(xmm1, xmm7);
+ __ mulsd(xmm7, xmm0);
+ __ addsd(xmm3, xmm1);
+ __ addsd(xmm4, xmm7);
+ __ movsd(xmm1, ExternalAddress(RMASK), r11 /*rscratch*/);
+ __ mulsd(xmm6, xmm0);
+ __ andpd(xmm4, ExternalAddress(MASK3), r11 /*rscratch*/);
+ __ addsd(xmm3, xmm6);
+ __ movq(xmm6, xmm4);
+ __ subsd(xmm2, xmm4);
+ __ addsd(xmm2, xmm7);
+ __ movsd(xmm7, ExternalAddress(ONEMASK), r11 /*rscratch*/);
+ __ andpd(xmm5, xmm1);
+ __ addsd(xmm3, xmm2);
+ __ mulsd(xmm4, xmm5);
+ __ xorpd(xmm2, xmm2);
+ __ mulsd(xmm3, xmm5);
+ __ subsd(xmm6, ExternalAddress(TWOMASK), r11 /*rscratch*/);
+ __ subsd(xmm4, xmm7);
+ __ xorl(rdx, 32768);
+ __ pinsrw(xmm2, rdx, 3);
+ __ addsd(xmm4, xmm3);
+ __ mulsd(xmm6, xmm5);
+ __ movq(xmm1, xmm3);
+ __ mulsd(xmm3, xmm4);
+ __ movq(xmm0, xmm6);
+ __ mulsd(xmm6, xmm4);
+ __ subsd(xmm1, xmm3);
+ __ subsd(xmm1, xmm6);
+ __ addsd(xmm0, xmm1);
+ __ xorpd(xmm0, xmm2);
+ __ jmp(B1_4);
+
+ __ bind(L_2TAG_PACKET_0_0_1);
+ __ addl(rcx, 960);
+ __ cmpl(rcx, 1104);
+ __ jcc(Assembler::aboveEqual, L_2TAG_PACKET_1_0_1);
+ __ movdqu(xmm2, ExternalAddress(pv), r11 /*rscratch*/);
+ __ pshufd(xmm1, xmm0, 68);
+ __ movdqu(xmm3, ExternalAddress(pv + 16), r11 /*rscratch*/);
+ __ mulpd(xmm1, xmm1);
+ __ movdqu(xmm4, ExternalAddress(pv + 32), r11 /*rscratch*/);
+ __ mulpd(xmm2, xmm1);
+ __ pshufd(xmm5, xmm1, 68);
+ __ addpd(xmm2, xmm3);
+ __ mulsd(xmm5, xmm5);
+ __ mulpd(xmm2, xmm1);
+ __ mulsd(xmm5, xmm5);
+ __ addpd(xmm2, xmm4);
+ __ mulpd(xmm2, xmm5);
+ __ pshufd(xmm5, xmm2, 238);
+ __ addsd(xmm2, xmm5);
+ __ mulsd(xmm2, xmm0);
+ __ addsd(xmm0, xmm2);
+ __ jmp(B1_4);
+
+ __ bind(L_2TAG_PACKET_1_0_1);
+ __ addl(rcx, 15344);
+ __ cmpl(rcx, 16448);
+ __ jcc(Assembler::aboveEqual, L_2TAG_PACKET_2_0_1);
+ __ cmpl(rcx, 16);
+ __ jcc(Assembler::below, L_2TAG_PACKET_3_0_1);
+ __ xorpd(xmm2, xmm2);
+ __ movl(rax, 17392);
+ __ pinsrw(xmm2, rax, 3);
+ __ mulsd(xmm2, xmm0);
+ __ addsd(xmm2, xmm0);
+ __ jmp(B1_4);
+
+ __ bind(L_2TAG_PACKET_3_0_1);
+ __ movq(xmm2, xmm0);
+ __ mulsd(xmm2, xmm2);
+ __ jmp(B1_4);
+
+ __ bind(L_2TAG_PACKET_2_0_1);
+ __ cmpl(rcx, 32752);
+ __ jcc(Assembler::aboveEqual, L_2TAG_PACKET_4_0_1);
+ __ xorpd(xmm2, xmm2);
+ __ movl(rcx, 15344);
+ __ pinsrw(xmm2, rcx, 3);
+ __ movq(xmm3, xmm2);
+ __ mulsd(xmm2, xmm2);
+ __ addsd(xmm2, xmm3);
+
+ __ bind(L_2TAG_PACKET_5_0_1);
+ __ xorpd(xmm0, xmm0);
+ __ orl(rdx, 16368);
+ __ pinsrw(xmm0, rdx, 3);
+ __ jmp(B1_4);
+
+ __ bind(L_2TAG_PACKET_4_0_1);
+ __ movq(xmm2, xmm0);
+ __ movdl(rax, xmm0);
+ __ psrlq(xmm2, 20);
+ __ movdl(rcx, xmm2);
+ __ orl(rcx, rax);
+ __ cmpl(rcx, 0);
+ __ jcc(Assembler::equal, L_2TAG_PACKET_5_0_1);
+ __ addsd(xmm0, xmm0);
+
+ __ bind(B1_4);
+ __ leave(); // required for proper stackwalking of RuntimeStub frame
+ __ ret(0);
+
+ return start;
+}
+
+#undef __
diff --git a/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_32.cpp b/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_32.cpp
index ba9eb32e8c13e..75611524e3b0a 100644
--- a/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_32.cpp
+++ b/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_32.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -373,6 +373,10 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M
// [ lo(arg) ]
// [ hi(arg) ]
//
+ if (kind == Interpreter::java_lang_math_tanh) {
+ return nullptr;
+ }
+
if (kind == Interpreter::java_lang_math_fmaD) {
if (!UseFMA) {
return nullptr; // Generate a vanilla entry
diff --git a/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_64.cpp b/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_64.cpp
index 26eea4c1d6a5f..5ea2d8eba259b 100644
--- a/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_64.cpp
+++ b/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_64.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2003, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -465,6 +465,10 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M
} else {
__ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dtan));
}
+ } else if (kind == Interpreter::java_lang_math_tanh) {
+ assert(StubRoutines::dtanh() != nullptr, "not initialized");
+ __ movdbl(xmm0, Address(rsp, wordSize));
+ __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dtanh())));
} else if (kind == Interpreter::java_lang_math_abs) {
assert(StubRoutines::x86::double_sign_mask() != nullptr, "not initialized");
__ movdbl(xmm0, Address(rsp, wordSize));
diff --git a/src/hotspot/cpu/x86/templateTable_x86.cpp b/src/hotspot/cpu/x86/templateTable_x86.cpp
index 5e783225fcbfc..527d961259ecc 100644
--- a/src/hotspot/cpu/x86/templateTable_x86.cpp
+++ b/src/hotspot/cpu/x86/templateTable_x86.cpp
@@ -4048,6 +4048,7 @@ void TemplateTable::_new() {
__ push(rcx); // save the contexts of klass for initializing the header
// make sure klass is initialized
+ // init_state needs acquire, but x86 is TSO, and so we are already good.
#ifdef _LP64
assert(VM_Version::supports_fast_class_init_checks(), "must support fast class initialization checks");
__ clinit_barrier(rcx, r15_thread, nullptr /*L_fast_path*/, &slow_case);
diff --git a/src/hotspot/cpu/x86/upcallLinker_x86_32.cpp b/src/hotspot/cpu/x86/upcallLinker_x86_32.cpp
index e5075e180d9d6..d795c751d02b5 100644
--- a/src/hotspot/cpu/x86/upcallLinker_x86_32.cpp
+++ b/src/hotspot/cpu/x86/upcallLinker_x86_32.cpp
@@ -24,7 +24,7 @@
#include "precompiled.hpp"
#include "prims/upcallLinker.hpp"
-address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry,
+address UpcallLinker::make_upcall_stub(jobject receiver, Symbol* signature,
BasicType* out_sig_bt, int total_out_args,
BasicType ret_type,
jobject jabi, jobject jconv,
diff --git a/src/hotspot/cpu/x86/upcallLinker_x86_64.cpp b/src/hotspot/cpu/x86/upcallLinker_x86_64.cpp
index 82179f9022e92..bc261bfd93f44 100644
--- a/src/hotspot/cpu/x86/upcallLinker_x86_64.cpp
+++ b/src/hotspot/cpu/x86/upcallLinker_x86_64.cpp
@@ -23,7 +23,7 @@
#include "precompiled.hpp"
#include "asm/macroAssembler.hpp"
-#include "code/codeBlob.hpp"
+#include "classfile/javaClasses.hpp"
#include "code/codeBlob.hpp"
#include "code/vmreg.inline.hpp"
#include "compiler/disassembler.hpp"
@@ -169,10 +169,10 @@ static void restore_callee_saved_registers(MacroAssembler* _masm, const ABIDescr
__ block_comment("} restore_callee_saved_regs ");
}
-static const int upcall_stub_code_base_size = 1024;
+static const int upcall_stub_code_base_size = 1200;
static const int upcall_stub_size_per_arg = 16;
-address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry,
+address UpcallLinker::make_upcall_stub(jobject receiver, Symbol* signature,
BasicType* out_sig_bt, int total_out_args,
BasicType ret_type,
jobject jabi, jobject jconv,
@@ -281,7 +281,6 @@ address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry,
__ block_comment("{ on_entry");
__ vzeroupper();
__ lea(c_rarg0, Address(rsp, frame_data_offset));
- __ movptr(c_rarg1, (intptr_t)receiver);
// stack already aligned
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, UpcallLinker::on_entry)));
__ movptr(r15_thread, rax);
@@ -297,12 +296,10 @@ address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry,
arg_shuffle.generate(_masm, shuffle_reg, abi._shadow_space_bytes, 0);
__ block_comment("} argument shuffle");
- __ block_comment("{ receiver ");
- __ get_vm_result(j_rarg0, r15_thread);
- __ block_comment("} receiver ");
-
- __ mov_metadata(rbx, entry);
- __ movptr(Address(r15_thread, JavaThread::callee_target_offset()), rbx); // just in case callee is deoptimized
+ __ block_comment("{ load target ");
+ __ movptr(j_rarg0, (intptr_t)receiver);
+ __ call(RuntimeAddress(StubRoutines::upcall_stub_load_target())); // puts target Method* in rbx
+ __ block_comment("} load target ");
__ push_cont_fastpath();
@@ -377,7 +374,7 @@ address UpcallLinker::make_upcall_stub(jobject receiver, Method* entry,
#ifndef PRODUCT
stringStream ss;
- ss.print("upcall_stub_%s", entry->signature()->as_C_string());
+ ss.print("upcall_stub_%s", signature->as_C_string());
const char* name = _masm->code_string(ss.freeze());
#else // PRODUCT
const char* name = "upcall_stub";
diff --git a/src/hotspot/cpu/x86/vm_version_x86.cpp b/src/hotspot/cpu/x86/vm_version_x86.cpp
index 2549feb8a4069..038797924a92d 100644
--- a/src/hotspot/cpu/x86/vm_version_x86.cpp
+++ b/src/hotspot/cpu/x86/vm_version_x86.cpp
@@ -437,6 +437,7 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
__ cmpl(rax, 0x80000);
__ jcc(Assembler::notEqual, vector_save_restore);
+#ifndef PRODUCT
bool save_apx = UseAPX;
VM_Version::set_apx_cpuFeatures();
UseAPX = true;
@@ -453,6 +454,7 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
__ movq(Address(rsi, 8), r31);
UseAPX = save_apx;
+#endif
#endif
__ bind(vector_save_restore);
//
diff --git a/src/hotspot/cpu/x86/x86.ad b/src/hotspot/cpu/x86/x86.ad
index 2b29dd14e4b27..c88fa1ec5ce15 100644
--- a/src/hotspot/cpu/x86/x86.ad
+++ b/src/hotspot/cpu/x86/x86.ad
@@ -2231,10 +2231,6 @@ const RegMask* Matcher::predicate_reg_mask(void) {
return &_VECTMASK_REG_mask;
}
-const TypeVectMask* Matcher::predicate_reg_type(const Type* elemTy, int length) {
- return new TypeVectMask(elemTy, length);
-}
-
// Max vector size in bytes. 0 if not supported.
int Matcher::vector_width_in_bytes(BasicType bt) {
assert(is_java_primitive(bt), "only primitive type vectors");
@@ -2457,6 +2453,10 @@ bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
mstack.push(m, Visit); // m = ShiftCntV
return true;
}
+ if (is_encode_and_store_pattern(n, m)) {
+ mstack.push(m, Visit);
+ return true;
+ }
return false;
}
diff --git a/src/hotspot/cpu/x86/x86_32.ad b/src/hotspot/cpu/x86/x86_32.ad
index 7aa4f6a29a116..7c9695571daec 100644
--- a/src/hotspot/cpu/x86/x86_32.ad
+++ b/src/hotspot/cpu/x86/x86_32.ad
@@ -6322,17 +6322,6 @@ instruct storeImmB(memory mem, immI8 src) %{
ins_pipe( ialu_mem_imm );
%}
-// Store CMS card-mark Immediate
-instruct storeImmCM(memory mem, immI8 src) %{
- match(Set mem (StoreCM mem src));
-
- ins_cost(150);
- format %{ "MOV8 $mem,$src\t! CMS card-mark imm0" %}
- opcode(0xC6); /* C6 /0 */
- ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con8or32(src), ClearInstMark);
- ins_pipe( ialu_mem_imm );
-%}
-
// Store Double
instruct storeDPR( memory mem, regDPR1 src) %{
predicate(UseSSE<=1);
diff --git a/src/hotspot/cpu/x86/x86_64.ad b/src/hotspot/cpu/x86/x86_64.ad
index 1b271683bd60d..c3fa4c16e553f 100644
--- a/src/hotspot/cpu/x86/x86_64.ad
+++ b/src/hotspot/cpu/x86/x86_64.ad
@@ -4341,6 +4341,7 @@ instruct loadP(rRegP dst, memory mem)
// Load Compressed Pointer
instruct loadN(rRegN dst, memory mem)
%{
+ predicate(n->as_Load()->barrier_data() == 0);
match(Set dst (LoadN mem));
ins_cost(125); // XXX
@@ -5126,6 +5127,7 @@ instruct storeImmP(memory mem, immP31 src)
// Store Compressed Pointer
instruct storeN(memory mem, rRegN src)
%{
+ predicate(n->as_Store()->barrier_data() == 0);
match(Set mem (StoreN mem src));
ins_cost(125); // XXX
@@ -5150,7 +5152,7 @@ instruct storeNKlass(memory mem, rRegN src)
instruct storeImmN0(memory mem, immN0 zero)
%{
- predicate(CompressedOops::base() == nullptr);
+ predicate(CompressedOops::base() == nullptr && n->as_Store()->barrier_data() == 0);
match(Set mem (StoreN mem zero));
ins_cost(125); // XXX
@@ -5163,6 +5165,7 @@ instruct storeImmN0(memory mem, immN0 zero)
instruct storeImmN(memory mem, immN src)
%{
+ predicate(n->as_Store()->barrier_data() == 0);
match(Set mem (StoreN mem src));
ins_cost(150); // XXX
@@ -5295,32 +5298,6 @@ instruct storeImmB(memory mem, immI8 src)
ins_pipe(ialu_mem_imm);
%}
-// Store CMS card-mark Immediate
-instruct storeImmCM0_reg(memory mem, immI_0 zero)
-%{
- predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
- match(Set mem (StoreCM mem zero));
-
- ins_cost(125); // XXX
- format %{ "movb $mem, R12\t# CMS card-mark byte 0 (R12_heapbase==0)" %}
- ins_encode %{
- __ movb($mem$$Address, r12);
- %}
- ins_pipe(ialu_mem_reg);
-%}
-
-instruct storeImmCM0(memory mem, immI_0 src)
-%{
- match(Set mem (StoreCM mem src));
-
- ins_cost(150); // XXX
- format %{ "movb $mem, $src\t# CMS card-mark byte 0" %}
- ins_encode %{
- __ movb($mem$$Address, $src$$constant);
- %}
- ins_pipe(ialu_mem_imm);
-%}
-
// Store Float
instruct storeF(memory mem, regF src)
%{
@@ -7162,6 +7139,7 @@ instruct compareAndSwapN(rRegI res,
memory mem_ptr,
rax_RegN oldval, rRegN newval,
rFlagsReg cr) %{
+ predicate(n->as_LoadStore()->barrier_data() == 0);
match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
effect(KILL cr, KILL oldval);
@@ -7249,6 +7227,7 @@ instruct compareAndExchangeN(
memory mem_ptr,
rax_RegN oldval, rRegN newval,
rFlagsReg cr) %{
+ predicate(n->as_LoadStore()->barrier_data() == 0);
match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
effect(KILL cr);
@@ -7470,6 +7449,7 @@ instruct xchgP( memory mem, rRegP newval) %{
%}
instruct xchgN( memory mem, rRegN newval) %{
+ predicate(n->as_LoadStore()->barrier_data() == 0);
match(Set newval (GetAndSetN mem newval));
format %{ "XCHGL $newval,$mem]" %}
ins_encode %{
@@ -11659,6 +11639,7 @@ instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
%{
+ predicate(n->in(2)->as_Load()->barrier_data() == 0);
match(Set cr (CmpN src (LoadN mem)));
format %{ "cmpl $src, $mem\t# compressed ptr" %}
@@ -11680,6 +11661,7 @@ instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
%{
+ predicate(n->in(2)->as_Load()->barrier_data() == 0);
match(Set cr (CmpN src (LoadN mem)));
format %{ "cmpl $mem, $src\t# compressed ptr" %}
@@ -11720,7 +11702,8 @@ instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
%{
- predicate(CompressedOops::base() != nullptr);
+ predicate(CompressedOops::base() != nullptr &&
+ n->in(1)->as_Load()->barrier_data() == 0);
match(Set cr (CmpN (LoadN mem) zero));
ins_cost(500); // XXX
@@ -11733,7 +11716,8 @@ instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
%{
- predicate(CompressedOops::base() == nullptr);
+ predicate(CompressedOops::base() == nullptr &&
+ n->in(1)->as_Load()->barrier_data() == 0);
match(Set cr (CmpN (LoadN mem) zero));
format %{ "cmpl R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
diff --git a/src/hotspot/cpu/zero/upcallLinker_zero.cpp b/src/hotspot/cpu/zero/upcallLinker_zero.cpp
index 6447dac86c915..408ebc328205d 100644
--- a/src/hotspot/cpu/zero/upcallLinker_zero.cpp
+++ b/src/hotspot/cpu/zero/upcallLinker_zero.cpp
@@ -24,7 +24,7 @@
#include "precompiled.hpp"
#include "prims/upcallLinker.hpp"
-address UpcallLinker::make_upcall_stub(jobject mh, Method* entry,
+address UpcallLinker::make_upcall_stub(jobject mh, Symbol* signature,
BasicType* out_sig_bt, int total_out_args,
BasicType ret_type,
jobject jabi, jobject jconv,
diff --git a/src/hotspot/cpu/zero/vm_version_zero.cpp b/src/hotspot/cpu/zero/vm_version_zero.cpp
index 1fcf4b1086253..7312dd116468c 100644
--- a/src/hotspot/cpu/zero/vm_version_zero.cpp
+++ b/src/hotspot/cpu/zero/vm_version_zero.cpp
@@ -116,11 +116,6 @@ void VM_Version::initialize() {
FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
}
- if ((LockingMode != LM_LEGACY) && (LockingMode != LM_MONITOR)) {
- warning("Unsupported locking mode for this CPU.");
- FLAG_SET_DEFAULT(LockingMode, LM_LEGACY);
- }
-
// Enable error context decoding on known platforms
#if defined(IA32) || defined(AMD64) || defined(ARM) || \
defined(AARCH64) || defined(PPC) || defined(RISCV) || \
diff --git a/src/hotspot/cpu/zero/zeroInterpreter_zero.cpp b/src/hotspot/cpu/zero/zeroInterpreter_zero.cpp
index 2b53042ef1017..aab43e733964e 100644
--- a/src/hotspot/cpu/zero/zeroInterpreter_zero.cpp
+++ b/src/hotspot/cpu/zero/zeroInterpreter_zero.cpp
@@ -485,26 +485,30 @@ int ZeroInterpreter::native_entry(Method* method, intptr_t UNUSED, TRAPS) {
// Unlock if necessary
if (monitor) {
- BasicLock *lock = monitor->lock();
- markWord header = lock->displaced_header();
- oop rcvr = monitor->obj();
- monitor->set_obj(nullptr);
-
- bool dec_monitor_count = true;
- if (header.to_pointer() != nullptr) {
- markWord old_header = markWord::encode(lock);
- if (rcvr->cas_set_mark(header, old_header) != old_header) {
- monitor->set_obj(rcvr);
- dec_monitor_count = false;
- InterpreterRuntime::monitorexit(monitor);
+ bool success = false;
+ if (LockingMode == LM_LEGACY) {
+ BasicLock* lock = monitor->lock();
+ oop rcvr = monitor->obj();
+ monitor->set_obj(nullptr);
+ success = true;
+ markWord header = lock->displaced_header();
+ if (header.to_pointer() != nullptr) { // Check for recursive lock
+ markWord old_header = markWord::encode(lock);
+ if (rcvr->cas_set_mark(header, old_header) != old_header) {
+ monitor->set_obj(rcvr);
+ success = false;
+ }
+ }
+ if (success) {
+ THREAD->dec_held_monitor_count();
}
}
- if (dec_monitor_count) {
- THREAD->dec_held_monitor_count();
+ if (!success) {
+ InterpreterRuntime::monitorexit(monitor);
}
}
- unwind_and_return:
+ unwind_and_return:
// Unwind the current activation
thread->pop_zero_frame();
diff --git a/src/hotspot/os/aix/osThread_aix.cpp b/src/hotspot/os/aix/osThread_aix.cpp
index 4049d6b58b777..ab08a766156fe 100644
--- a/src/hotspot/os/aix/osThread_aix.cpp
+++ b/src/hotspot/os/aix/osThread_aix.cpp
@@ -23,32 +23,27 @@
*
*/
-// no precompiled headers
-
-#include "memory/allocation.inline.hpp"
-#include "runtime/handles.inline.hpp"
-#include "runtime/mutexLocker.hpp"
-#include "runtime/os.hpp"
+#include "precompiled.hpp"
+#include "memory/allocation.hpp"
+#include "runtime/mutex.hpp"
#include "runtime/osThread.hpp"
-#include "runtime/safepoint.hpp"
-#include "runtime/vmThread.hpp"
-
-void OSThread::pd_initialize() {
- _thread_id = 0;
- _kernel_thread_id = 0;
- _siginfo = nullptr;
- _ucontext = nullptr;
- _expanding_stack = 0;
- _alt_sig_stack = nullptr;
- _last_cpu_times.sys = _last_cpu_times.user = 0L;
+#include
+OSThread::OSThread()
+ : _thread_id(0),
+ _thread_type(),
+ _kernel_thread_id(0),
+ _caller_sigmask(),
+ sr(),
+ _siginfo(nullptr),
+ _ucontext(nullptr),
+ _expanding_stack(0),
+ _alt_sig_stack(nullptr),
+ _startThread_lock(new Monitor(Mutex::event, "startThread_lock")) {
sigemptyset(&_caller_sigmask);
-
- _startThread_lock = new Monitor(Mutex::event, "startThread_lock");
- assert(_startThread_lock != nullptr, "check");
}
-void OSThread::pd_destroy() {
+OSThread::~OSThread() {
delete _startThread_lock;
}
diff --git a/src/hotspot/os/aix/osThread_aix.hpp b/src/hotspot/os/aix/osThread_aix.hpp
index 5feb3c5799aa0..8f3799d070142 100644
--- a/src/hotspot/os/aix/osThread_aix.hpp
+++ b/src/hotspot/os/aix/osThread_aix.hpp
@@ -26,22 +26,17 @@
#ifndef OS_AIX_OSTHREAD_AIX_HPP
#define OS_AIX_OSTHREAD_AIX_HPP
- public:
- typedef pthread_t thread_id_t;
+#include "runtime/osThreadBase.hpp"
+#include "suspendResume_posix.hpp"
+#include "utilities/globalDefinitions.hpp"
- private:
- int _thread_type;
+class OSThread : public OSThreadBase {
+ friend class VMStructs;
- public:
-
- int thread_type() const {
- return _thread_type;
- }
- void set_thread_type(int type) {
- _thread_type = type;
- }
+ typedef pthread_t thread_id_t;
- private:
+ thread_id_t _thread_id;
+ int _thread_type;
// On AIX, we use the pthread id as OSThread::thread_id and keep the kernel thread id
// separately for diagnostic purposes.
@@ -54,15 +49,27 @@
sigset_t _caller_sigmask; // Caller's signal mask
public:
+ OSThread();
+ ~OSThread();
+
+ int thread_type() const {
+ return _thread_type;
+ }
+ void set_thread_type(int type) {
+ _thread_type = type;
+ }
// Methods to save/restore caller's signal mask
sigset_t caller_sigmask() const { return _caller_sigmask; }
void set_caller_sigmask(sigset_t sigmask) { _caller_sigmask = sigmask; }
-#ifndef PRODUCT
- // Used for debugging, return a unique integer for each thread.
- int thread_identifier() const { return _thread_id; }
-#endif
+ thread_id_t thread_id() const {
+ return _thread_id;
+ }
+ void set_thread_id(thread_id_t id) {
+ _thread_id = id;
+ }
+
tid_t kernel_thread_id() const {
return _kernel_thread_id;
}
@@ -71,7 +78,7 @@
}
pthread_t pthread_id() const {
- // Here: same as OSThread::thread_id()
+ // Here: same as thread_id()
return _thread_id;
}
@@ -79,7 +86,6 @@
// suspension support.
// ***************************************************************
- public:
// flags that support signal based suspend/resume on Aix are in a
// separate class to avoid confusion with many flags in OSThread that
// are used by VM level suspend/resume.
@@ -125,22 +131,10 @@
return _startThread_lock;
}
- // ***************************************************************
- // Platform dependent initialization and cleanup
- // ***************************************************************
-
- private:
-
- void pd_initialize();
- void pd_destroy();
-
- public:
-
- // The last measured values of cpu timing to prevent the "stale
- // value return" bug in thread_cpu_time.
- volatile struct {
- jlong sys;
- jlong user;
- } _last_cpu_times;
+ // Printing
+ uintx thread_id_for_printing() const override {
+ return (uintx)_thread_id;
+ }
+};
#endif // OS_AIX_OSTHREAD_AIX_HPP
diff --git a/src/hotspot/os/aix/os_aix.cpp b/src/hotspot/os/aix/os_aix.cpp
index fd16a7984a6f3..63aa53f0a2350 100644
--- a/src/hotspot/os/aix/os_aix.cpp
+++ b/src/hotspot/os/aix/os_aix.cpp
@@ -2483,16 +2483,6 @@ int os::open(const char *path, int oflag, int mode) {
return fd;
}
-// return current position of file pointer
-jlong os::current_file_offset(int fd) {
- return (jlong)::lseek(fd, (off_t)0, SEEK_CUR);
-}
-
-// move file pointer to the specified offset
-jlong os::seek_to_file_offset(int fd, jlong offset) {
- return (jlong)::lseek(fd, (off_t)offset, SEEK_SET);
-}
-
// current_thread_cpu_time(bool) and thread_cpu_time(Thread*, bool)
// are used by JVM M&M and JVMTI to get user+sys or user CPU time
// of a thread.
diff --git a/src/hotspot/os/aix/vmStructs_aix.hpp b/src/hotspot/os/aix/vmStructs_aix.hpp
index 1a2f4c4bf6e21..f3bbc80e62c72 100644
--- a/src/hotspot/os/aix/vmStructs_aix.hpp
+++ b/src/hotspot/os/aix/vmStructs_aix.hpp
@@ -29,9 +29,20 @@
// constants required by the Serviceability Agent. This file is
// referenced by vmStructs.cpp.
-#define VM_STRUCTS_OS(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field)
-
-#define VM_TYPES_OS(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type)
+#define VM_STRUCTS_OS(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
+ \
+ /******************************/ \
+ /* Threads (NOTE: incomplete) */ \
+ /******************************/ \
+ nonstatic_field(OSThread, _thread_id, pthread_t) \
+
+#define VM_TYPES_OS(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \
+ \
+ /**********************/ \
+ /* Posix Thread IDs */ \
+ /**********************/ \
+ \
+ declare_unsigned_integer_type(pthread_t)
#define VM_INT_CONSTANTS_OS(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
diff --git a/src/hotspot/os/bsd/gc/z/zPhysicalMemoryBacking_bsd.cpp b/src/hotspot/os/bsd/gc/z/zPhysicalMemoryBacking_bsd.cpp
index 29825a9eab291..2e56c092a79b5 100644
--- a/src/hotspot/os/bsd/gc/z/zPhysicalMemoryBacking_bsd.cpp
+++ b/src/hotspot/os/bsd/gc/z/zPhysicalMemoryBacking_bsd.cpp
@@ -22,10 +22,10 @@
*/
#include "precompiled.hpp"
-#include "gc/shared/gcLogPrecious.hpp"
#include "gc/z/zAddress.inline.hpp"
#include "gc/z/zErrno.hpp"
#include "gc/z/zGlobals.hpp"
+#include "gc/z/zInitialize.hpp"
#include "gc/z/zLargePages.inline.hpp"
#include "gc/z/zPhysicalMemory.inline.hpp"
#include "gc/z/zPhysicalMemoryBacking_bsd.hpp"
@@ -82,7 +82,7 @@ ZPhysicalMemoryBacking::ZPhysicalMemoryBacking(size_t max_capacity)
_base = (uintptr_t)os::reserve_memory(max_capacity);
if (_base == 0) {
// Failed
- log_error_pd(gc)("Failed to reserve address space for backing memory");
+ ZInitialize::error("Failed to reserve address space for backing memory");
return;
}
diff --git a/src/hotspot/os/bsd/osThread_bsd.cpp b/src/hotspot/os/bsd/osThread_bsd.cpp
index 7b9ad1f76a855..4080ea1bf297b 100644
--- a/src/hotspot/os/bsd/osThread_bsd.cpp
+++ b/src/hotspot/os/bsd/osThread_bsd.cpp
@@ -22,30 +22,32 @@
*
*/
-// no precompiled headers
-#include "memory/allocation.inline.hpp"
-#include "runtime/mutexLocker.hpp"
+#include "precompiled.hpp"
+#include "memory/allocation.hpp"
+#include "runtime/mutex.hpp"
#include "runtime/osThread.hpp"
#include
-void OSThread::pd_initialize() {
+OSThread::OSThread()
+ : _thread_id(
#ifdef __APPLE__
- _thread_id = 0;
+ 0
#else
- _thread_id = nullptr;
+ nullptr
#endif
- _unique_thread_id = 0;
- _pthread_id = nullptr;
- _siginfo = nullptr;
- _ucontext = nullptr;
- _expanding_stack = 0;
- _alt_sig_stack = nullptr;
-
+ ),
+ _thread_type(),
+ _pthread_id(nullptr),
+ _unique_thread_id(0),
+ _caller_sigmask(),
+ sr(),
+ _siginfo(nullptr),
+ _ucontext(nullptr),
+ _expanding_stack(0),
+ _alt_sig_stack(nullptr),
+ _startThread_lock(new Monitor(Mutex::event, "startThread_lock")) {
sigemptyset(&_caller_sigmask);
-
- _startThread_lock = new Monitor(Mutex::event, "startThread_lock");
- assert(_startThread_lock !=nullptr, "check");
}
// Additional thread_id used to correlate threads in SA
@@ -64,6 +66,6 @@ void OSThread::set_unique_thread_id() {
#endif
}
-void OSThread::pd_destroy() {
+OSThread::~OSThread() {
delete _startThread_lock;
}
diff --git a/src/hotspot/os/bsd/osThread_bsd.hpp b/src/hotspot/os/bsd/osThread_bsd.hpp
index 11376835063c4..e54e7195f9870 100644
--- a/src/hotspot/os/bsd/osThread_bsd.hpp
+++ b/src/hotspot/os/bsd/osThread_bsd.hpp
@@ -25,19 +25,12 @@
#ifndef OS_BSD_OSTHREAD_BSD_HPP
#define OS_BSD_OSTHREAD_BSD_HPP
- private:
- int _thread_type;
+#include "runtime/osThreadBase.hpp"
+#include "suspendResume_posix.hpp"
+#include "utilities/globalDefinitions.hpp"
- public:
-
- int thread_type() const {
- return _thread_type;
- }
- void set_thread_type(int type) {
- _thread_type = type;
- }
-
- private:
+class OSThread : public OSThreadBase {
+ friend class VMStructs;
#ifdef __APPLE__
typedef thread_t thread_id_t;
@@ -45,6 +38,9 @@
typedef pid_t thread_id_t;
#endif
+ thread_id_t _thread_id;
+ int _thread_type;
+
// _pthread_id is the pthread id, which is used by library calls
// (e.g. pthread_kill).
pthread_t _pthread_id;
@@ -57,15 +53,26 @@
sigset_t _caller_sigmask; // Caller's signal mask
public:
+ OSThread();
+ ~OSThread();
+
+ int thread_type() const {
+ return _thread_type;
+ }
+ void set_thread_type(int type) {
+ _thread_type = type;
+ }
// Methods to save/restore caller's signal mask
sigset_t caller_sigmask() const { return _caller_sigmask; }
void set_caller_sigmask(sigset_t sigmask) { _caller_sigmask = sigmask; }
-#ifndef PRODUCT
- // Used for debugging, return a unique integer for each thread.
- intptr_t thread_identifier() const { return (intptr_t)_pthread_id; }
-#endif
+ thread_id_t thread_id() const {
+ return _thread_id;
+ }
+ void set_thread_id(thread_id_t id) {
+ _thread_id = id;
+ }
pthread_t pthread_id() const {
return _pthread_id;
@@ -80,7 +87,6 @@
// suspension support.
// ***************************************************************
-public:
// flags that support signal based suspend/resume on Bsd are in a
// separate class to avoid confusion with many flags in OSThread that
// are used by VM level suspend/resume.
@@ -126,17 +132,9 @@
return _startThread_lock;
}
- // ***************************************************************
- // Platform dependent initialization and cleanup
- // ***************************************************************
-
-private:
-
- void pd_initialize();
- void pd_destroy();
-
-// Reconciliation History
-// osThread_solaris.hpp 1.24 99/08/27 13:11:54
-// End
+ uintx thread_id_for_printing() const override {
+ return (uintx)_thread_id;
+ }
+};
#endif // OS_BSD_OSTHREAD_BSD_HPP
diff --git a/src/hotspot/os/bsd/os_bsd.cpp b/src/hotspot/os/bsd/os_bsd.cpp
index 9ad7c35e6bdef..18818268c1f9a 100644
--- a/src/hotspot/os/bsd/os_bsd.cpp
+++ b/src/hotspot/os/bsd/os_bsd.cpp
@@ -2400,16 +2400,6 @@ int os::open(const char *path, int oflag, int mode) {
return fd;
}
-// return current position of file pointer
-jlong os::current_file_offset(int fd) {
- return (jlong)::lseek(fd, (off_t)0, SEEK_CUR);
-}
-
-// move file pointer to the specified offset
-jlong os::seek_to_file_offset(int fd, jlong offset) {
- return (jlong)::lseek(fd, (off_t)offset, SEEK_SET);
-}
-
// current_thread_cpu_time(bool) and thread_cpu_time(Thread*, bool)
// are used by JVM M&M and JVMTI to get user+sys or user CPU time
// of a thread.
diff --git a/src/hotspot/os/bsd/vmStructs_bsd.hpp b/src/hotspot/os/bsd/vmStructs_bsd.hpp
index 84c1be77374d0..8c9c132e1c25c 100644
--- a/src/hotspot/os/bsd/vmStructs_bsd.hpp
+++ b/src/hotspot/os/bsd/vmStructs_bsd.hpp
@@ -31,9 +31,21 @@
// constants required by the Serviceability Agent. This file is
// referenced by vmStructs.cpp.
-#define VM_STRUCTS_OS(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field)
-
-#define VM_TYPES_OS(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type)
+#define VM_STRUCTS_OS(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
+ \
+ /******************************/ \
+ /* Threads (NOTE: incomplete) */ \
+ /******************************/ \
+ nonstatic_field(OSThread, _thread_id, OSThread::thread_id_t) \
+ nonstatic_field(OSThread, _unique_thread_id, uint64_t)
+
+#define VM_TYPES_OS(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \
+ \
+ /**********************/ \
+ /* Thread IDs */ \
+ /**********************/ \
+ \
+ declare_unsigned_integer_type(OSThread::thread_id_t)
#define VM_INT_CONSTANTS_OS(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
diff --git a/src/hotspot/os/linux/cgroupV2Subsystem_linux.hpp b/src/hotspot/os/linux/cgroupV2Subsystem_linux.hpp
index 527573644a816..56dcadd670f82 100644
--- a/src/hotspot/os/linux/cgroupV2Subsystem_linux.hpp
+++ b/src/hotspot/os/linux/cgroupV2Subsystem_linux.hpp
@@ -64,16 +64,16 @@ class CgroupV2CpuController: public CgroupCpuController {
bool is_read_only() override {
return reader()->is_read_only();
}
- const char* subsystem_path() {
+ const char* subsystem_path() override {
return reader()->subsystem_path();
}
bool needs_hierarchy_adjustment() override {
return reader()->needs_hierarchy_adjustment();
}
- void set_subsystem_path(const char* cgroup_path) {
+ void set_subsystem_path(const char* cgroup_path) override {
reader()->set_subsystem_path(cgroup_path);
}
- const char* mount_point() { return reader()->mount_point(); }
+ const char* mount_point() override { return reader()->mount_point(); }
const char* cgroup_path() override { return reader()->cgroup_path(); }
};
@@ -97,16 +97,16 @@ class CgroupV2MemoryController final: public CgroupMemoryController {
bool is_read_only() override {
return reader()->is_read_only();
}
- const char* subsystem_path() {
+ const char* subsystem_path() override {
return reader()->subsystem_path();
}
bool needs_hierarchy_adjustment() override {
return reader()->needs_hierarchy_adjustment();
}
- void set_subsystem_path(const char* cgroup_path) {
+ void set_subsystem_path(const char* cgroup_path) override {
reader()->set_subsystem_path(cgroup_path);
}
- const char* mount_point() { return reader()->mount_point(); }
+ const char* mount_point() override { return reader()->mount_point(); }
const char* cgroup_path() override { return reader()->cgroup_path(); }
};
diff --git a/src/hotspot/os/linux/gc/z/zPhysicalMemoryBacking_linux.cpp b/src/hotspot/os/linux/gc/z/zPhysicalMemoryBacking_linux.cpp
index b80124cc34e43..b648876ac602c 100644
--- a/src/hotspot/os/linux/gc/z/zPhysicalMemoryBacking_linux.cpp
+++ b/src/hotspot/os/linux/gc/z/zPhysicalMemoryBacking_linux.cpp
@@ -27,6 +27,7 @@
#include "gc/z/zArray.inline.hpp"
#include "gc/z/zErrno.hpp"
#include "gc/z/zGlobals.hpp"
+#include "gc/z/zInitialize.hpp"
#include "gc/z/zLargePages.inline.hpp"
#include "gc/z/zMountPoint_linux.hpp"
#include "gc/z/zNUMA.inline.hpp"
@@ -103,14 +104,14 @@
#define ZFILENAME_HEAP "java_heap"
// Preferred tmpfs mount points, ordered by priority
-static const char* z_preferred_tmpfs_mountpoints[] = {
+static const char* ZPreferredTmpfsMountpoints[] = {
"/dev/shm",
"/run/shm",
nullptr
};
// Preferred hugetlbfs mount points, ordered by priority
-static const char* z_preferred_hugetlbfs_mountpoints[] = {
+static const char* ZPreferredHugetlbfsMountpoints[] = {
"/dev/hugepages",
"/hugepages",
nullptr
@@ -129,6 +130,7 @@ ZPhysicalMemoryBacking::ZPhysicalMemoryBacking(size_t max_capacity)
// Create backing file
_fd = create_fd(ZFILENAME_HEAP);
if (_fd == -1) {
+ ZInitialize::error("Failed to create heap backing file");
return;
}
@@ -136,7 +138,7 @@ ZPhysicalMemoryBacking::ZPhysicalMemoryBacking(size_t max_capacity)
while (ftruncate(_fd, max_capacity) == -1) {
if (errno != EINTR) {
ZErrno err;
- log_error_p(gc)("Failed to truncate backing file (%s)", err.to_string());
+ ZInitialize::error("Failed to truncate backing file (%s)", err.to_string());
return;
}
}
@@ -145,7 +147,7 @@ ZPhysicalMemoryBacking::ZPhysicalMemoryBacking(size_t max_capacity)
struct statfs buf;
if (fstatfs(_fd, &buf) == -1) {
ZErrno err;
- log_error_p(gc)("Failed to determine filesystem type for backing file (%s)", err.to_string());
+ ZInitialize::error("Failed to determine filesystem type for backing file (%s)", err.to_string());
return;
}
@@ -158,39 +160,39 @@ ZPhysicalMemoryBacking::ZPhysicalMemoryBacking(size_t max_capacity)
// Make sure the filesystem type matches requested large page type
if (ZLargePages::is_transparent() && !is_tmpfs()) {
- log_error_p(gc)("-XX:+UseTransparentHugePages can only be enabled when using a %s filesystem",
- ZFILESYSTEM_TMPFS);
+ ZInitialize::error("-XX:+UseTransparentHugePages can only be enabled when using a %s filesystem",
+ ZFILESYSTEM_TMPFS);
return;
}
if (ZLargePages::is_transparent() && !tmpfs_supports_transparent_huge_pages()) {
- log_error_p(gc)("-XX:+UseTransparentHugePages on a %s filesystem not supported by kernel",
- ZFILESYSTEM_TMPFS);
+ ZInitialize::error("-XX:+UseTransparentHugePages on a %s filesystem not supported by kernel",
+ ZFILESYSTEM_TMPFS);
return;
}
if (ZLargePages::is_explicit() && !is_hugetlbfs()) {
- log_error_p(gc)("-XX:+UseLargePages (without -XX:+UseTransparentHugePages) can only be enabled "
- "when using a %s filesystem", ZFILESYSTEM_HUGETLBFS);
+ ZInitialize::error("-XX:+UseLargePages (without -XX:+UseTransparentHugePages) can only be enabled "
+ "when using a %s filesystem", ZFILESYSTEM_HUGETLBFS);
return;
}
if (!ZLargePages::is_explicit() && is_hugetlbfs()) {
- log_error_p(gc)("-XX:+UseLargePages must be enabled when using a %s filesystem",
- ZFILESYSTEM_HUGETLBFS);
+ ZInitialize::error("-XX:+UseLargePages must be enabled when using a %s filesystem",
+ ZFILESYSTEM_HUGETLBFS);
return;
}
// Make sure the filesystem block size is compatible
if (ZGranuleSize % _block_size != 0) {
- log_error_p(gc)("Filesystem backing the heap has incompatible block size (" SIZE_FORMAT ")",
- _block_size);
+ ZInitialize::error("Filesystem backing the heap has incompatible block size (" SIZE_FORMAT ")",
+ _block_size);
return;
}
if (is_hugetlbfs() && _block_size != ZGranuleSize) {
- log_error_p(gc)("%s filesystem has unexpected block size " SIZE_FORMAT " (expected " SIZE_FORMAT ")",
- ZFILESYSTEM_HUGETLBFS, _block_size, ZGranuleSize);
+ ZInitialize::error("%s filesystem has unexpected block size " SIZE_FORMAT " (expected " SIZE_FORMAT ")",
+ ZFILESYSTEM_HUGETLBFS, _block_size, ZGranuleSize);
return;
}
@@ -226,8 +228,8 @@ int ZPhysicalMemoryBacking::create_file_fd(const char* name) const {
? ZFILESYSTEM_HUGETLBFS
: ZFILESYSTEM_TMPFS;
const char** const preferred_mountpoints = ZLargePages::is_explicit()
- ? z_preferred_hugetlbfs_mountpoints
- : z_preferred_tmpfs_mountpoints;
+ ? ZPreferredHugetlbfsMountpoints
+ : ZPreferredTmpfsMountpoints;
// Find mountpoint
ZMountPoint mountpoint(filesystem, preferred_mountpoints);
diff --git a/src/hotspot/os/linux/osThread_linux.cpp b/src/hotspot/os/linux/osThread_linux.cpp
index 9c77cb32f6d1c..3dd6e3bbcd15c 100644
--- a/src/hotspot/os/linux/osThread_linux.cpp
+++ b/src/hotspot/os/linux/osThread_linux.cpp
@@ -22,27 +22,27 @@
*
*/
-// no precompiled headers
-#include "memory/allocation.inline.hpp"
+#include "precompiled.hpp"
+#include "memory/allocation.hpp"
#include "runtime/mutex.hpp"
#include "runtime/osThread.hpp"
#include
-void OSThread::pd_initialize() {
- _thread_id = 0;
- _pthread_id = 0;
- _siginfo = nullptr;
- _ucontext = nullptr;
- _expanding_stack = 0;
- _alt_sig_stack = nullptr;
-
+OSThread::OSThread()
+ : _thread_id(0),
+ _thread_type(),
+ _pthread_id(0),
+ _caller_sigmask(),
+ sr(),
+ _siginfo(nullptr),
+ _ucontext(nullptr),
+ _expanding_stack(0),
+ _alt_sig_stack(nullptr),
+ _startThread_lock(new Monitor(Mutex::event, "startThread_lock")) {
sigemptyset(&_caller_sigmask);
-
- _startThread_lock = new Monitor(Mutex::event, "startThread_lock");
- assert(_startThread_lock !=nullptr, "check");
}
-void OSThread::pd_destroy() {
+OSThread::~OSThread() {
delete _startThread_lock;
}
diff --git a/src/hotspot/os/linux/osThread_linux.hpp b/src/hotspot/os/linux/osThread_linux.hpp
index a849673af62db..f8dfd5a213bbb 100644
--- a/src/hotspot/os/linux/osThread_linux.hpp
+++ b/src/hotspot/os/linux/osThread_linux.hpp
@@ -24,13 +24,28 @@
#ifndef OS_LINUX_OSTHREAD_LINUX_HPP
#define OS_LINUX_OSTHREAD_LINUX_HPP
- public:
+
+#include "runtime/osThreadBase.hpp"
+#include "suspendResume_posix.hpp"
+#include "utilities/globalDefinitions.hpp"
+
+class OSThread : public OSThreadBase {
+ friend class VMStructs;
+
typedef pid_t thread_id_t;
- private:
+ thread_id_t _thread_id;
int _thread_type;
+ // _pthread_id is the pthread id, which is used by library calls
+ // (e.g. pthread_kill).
+ pthread_t _pthread_id;
+
+ sigset_t _caller_sigmask; // Caller's signal mask
+
public:
+ OSThread();
+ ~OSThread();
int thread_type() const {
return _thread_type;
@@ -39,22 +54,16 @@
_thread_type = type;
}
- // _pthread_id is the pthread id, which is used by library calls
- // (e.g. pthread_kill).
- pthread_t _pthread_id;
-
- sigset_t _caller_sigmask; // Caller's signal mask
-
- public:
-
// Methods to save/restore caller's signal mask
sigset_t caller_sigmask() const { return _caller_sigmask; }
void set_caller_sigmask(sigset_t sigmask) { _caller_sigmask = sigmask; }
-#ifndef PRODUCT
- // Used for debugging, return a unique integer for each thread.
- int thread_identifier() const { return _thread_id; }
-#endif
+ thread_id_t thread_id() const {
+ return _thread_id;
+ }
+ void set_thread_id(thread_id_t id) {
+ _thread_id = id;
+ }
pthread_t pthread_id() const {
return _pthread_id;
@@ -67,7 +76,6 @@
// suspension support.
// ***************************************************************
-public:
// flags that support signal based suspend/resume on Linux are in a
// separate class to avoid confusion with many flags in OSThread that
// are used by VM level suspend/resume.
@@ -113,17 +121,10 @@
return _startThread_lock;
}
- // ***************************************************************
- // Platform dependent initialization and cleanup
- // ***************************************************************
-
-private:
-
- void pd_initialize();
- void pd_destroy();
-
-// Reconciliation History
-// osThread_solaris.hpp 1.24 99/08/27 13:11:54
-// End
+ // Printing
+ uintx thread_id_for_printing() const override {
+ return (uintx)_thread_id;
+ }
+};
#endif // OS_LINUX_OSTHREAD_LINUX_HPP
diff --git a/src/hotspot/os/linux/os_linux.cpp b/src/hotspot/os/linux/os_linux.cpp
index d4699567733b2..c80663fec3d3a 100644
--- a/src/hotspot/os/linux/os_linux.cpp
+++ b/src/hotspot/os/linux/os_linux.cpp
@@ -817,7 +817,7 @@ static void *thread_native_entry(Thread *thread) {
OSThread* osthread = thread->osthread();
Monitor* sync = osthread->startThread_lock();
- osthread->set_thread_id(checked_cast(os::current_thread_id()));
+ osthread->set_thread_id(checked_cast(os::current_thread_id()));
if (UseNUMA) {
int lgrp_id = os::numa_get_group_id();
@@ -4602,7 +4602,7 @@ static void workaround_expand_exec_shield_cs_limit() {
return; // No matter, we tried, best effort.
}
- MemTracker::record_virtual_memory_type((address)codebuf, mtInternal);
+ MemTracker::record_virtual_memory_tag((address)codebuf, mtInternal);
log_info(os)("[CS limit NX emulation work-around, exec code at: %p]", codebuf);
@@ -5053,16 +5053,6 @@ int os::open(const char *path, int oflag, int mode) {
return fd;
}
-// return current position of file pointer
-jlong os::current_file_offset(int fd) {
- return (jlong)::lseek(fd, (off_t)0, SEEK_CUR);
-}
-
-// move file pointer to the specified offset
-jlong os::seek_to_file_offset(int fd, jlong offset) {
- return (jlong)::lseek(fd, (off_t)offset, SEEK_SET);
-}
-
static jlong slow_thread_cpu_time(Thread *thread, bool user_sys_cpu_time);
static jlong fast_cpu_time(Thread *thread) {
diff --git a/src/hotspot/os/linux/vmStructs_linux.hpp b/src/hotspot/os/linux/vmStructs_linux.hpp
index 818f6bb188fe8..3b82ac58ac697 100644
--- a/src/hotspot/os/linux/vmStructs_linux.hpp
+++ b/src/hotspot/os/linux/vmStructs_linux.hpp
@@ -31,9 +31,22 @@
// constants required by the Serviceability Agent. This file is
// referenced by vmStructs.cpp.
-#define VM_STRUCTS_OS(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field)
-
-#define VM_TYPES_OS(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type)
+#define VM_STRUCTS_OS(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
+ \
+ /******************************/ \
+ /* Threads (NOTE: incomplete) */ \
+ /******************************/ \
+ nonstatic_field(OSThread, _thread_id, pid_t) \
+ nonstatic_field(OSThread, _pthread_id, pthread_t)
+
+#define VM_TYPES_OS(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \
+ \
+ /**********************/ \
+ /* Posix Thread IDs */ \
+ /**********************/ \
+ \
+ declare_integer_type(pid_t) \
+ declare_unsigned_integer_type(pthread_t)
#define VM_INT_CONSTANTS_OS(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
diff --git a/src/hotspot/os/posix/os_posix.cpp b/src/hotspot/os/posix/os_posix.cpp
index 26bff6c8bd4e6..78fd2a678886e 100644
--- a/src/hotspot/os/posix/os_posix.cpp
+++ b/src/hotspot/os/posix/os_posix.cpp
@@ -104,49 +104,44 @@ static int clock_tics_per_sec = 100;
size_t os::_os_min_stack_allowed = PTHREAD_STACK_MIN;
// Check core dump limit and report possible place where core can be found
-void os::check_dump_limit(char* buffer, size_t bufferSize) {
+void os::check_core_dump_prerequisites(char* buffer, size_t bufferSize, bool check_only) {
if (!FLAG_IS_DEFAULT(CreateCoredumpOnCrash) && !CreateCoredumpOnCrash) {
jio_snprintf(buffer, bufferSize, "CreateCoredumpOnCrash is disabled from command line");
VMError::record_coredump_status(buffer, false);
- return;
- }
-
- int n;
- struct rlimit rlim;
- bool success;
-
- char core_path[PATH_MAX];
- n = get_core_path(core_path, PATH_MAX);
-
- if (n <= 0) {
- jio_snprintf(buffer, bufferSize, "core.%d (may not exist)", current_process_id());
- success = true;
+ } else {
+ struct rlimit rlim;
+ bool success = true;
+ bool warn = true;
+ char core_path[PATH_MAX];
+ if (get_core_path(core_path, PATH_MAX) <= 0) {
+ jio_snprintf(buffer, bufferSize, "core.%d (may not exist)", current_process_id());
#ifdef LINUX
- } else if (core_path[0] == '"') { // redirect to user process
- jio_snprintf(buffer, bufferSize, "Core dumps may be processed with %s", core_path);
- success = true;
+ } else if (core_path[0] == '"') { // redirect to user process
+ jio_snprintf(buffer, bufferSize, "Core dumps may be processed with %s", core_path);
#endif
- } else if (getrlimit(RLIMIT_CORE, &rlim) != 0) {
- jio_snprintf(buffer, bufferSize, "%s (may not exist)", core_path);
- success = true;
- } else {
- switch(rlim.rlim_cur) {
- case RLIM_INFINITY:
- jio_snprintf(buffer, bufferSize, "%s", core_path);
- success = true;
- break;
- case 0:
- jio_snprintf(buffer, bufferSize, "Core dumps have been disabled. To enable core dumping, try \"ulimit -c unlimited\" before starting Java again");
- success = false;
- break;
- default:
- jio_snprintf(buffer, bufferSize, "%s (max size " UINT64_FORMAT " k). To ensure a full core dump, try \"ulimit -c unlimited\" before starting Java again", core_path, uint64_t(rlim.rlim_cur) / K);
- success = true;
- break;
+ } else if (getrlimit(RLIMIT_CORE, &rlim) != 0) {
+ jio_snprintf(buffer, bufferSize, "%s (may not exist)", core_path);
+ } else {
+ switch(rlim.rlim_cur) {
+ case RLIM_INFINITY:
+ jio_snprintf(buffer, bufferSize, "%s", core_path);
+ warn = false;
+ break;
+ case 0:
+ jio_snprintf(buffer, bufferSize, "Core dumps have been disabled. To enable core dumping, try \"ulimit -c unlimited\" before starting Java again");
+ success = false;
+ break;
+ default:
+ jio_snprintf(buffer, bufferSize, "%s (max size " UINT64_FORMAT " k). To ensure a full core dump, try \"ulimit -c unlimited\" before starting Java again", core_path, uint64_t(rlim.rlim_cur) / K);
+ break;
+ }
+ }
+ if (!check_only) {
+ VMError::record_coredump_status(buffer, success);
+ } else if (warn) {
+ warning("CreateCoredumpOnCrash specified, but %s", buffer);
}
}
-
- VMError::record_coredump_status(buffer, success);
}
bool os::committed_in_range(address start, size_t size, address& committed_start, size_t& committed_size) {
@@ -348,6 +343,16 @@ int os::create_file_for_heap(const char* dir) {
return fd;
}
+// return current position of file pointer
+jlong os::current_file_offset(int fd) {
+ return (jlong)::lseek(fd, (off_t)0, SEEK_CUR);
+}
+
+// move file pointer to the specified offset
+jlong os::seek_to_file_offset(int fd, jlong offset) {
+ return (jlong)::lseek(fd, (off_t)offset, SEEK_SET);
+}
+
// Is a (classpath) directory empty?
bool os::dir_is_empty(const char* path) {
DIR *dir = nullptr;
@@ -367,7 +372,7 @@ bool os::dir_is_empty(const char* path) {
return result;
}
-static char* reserve_mmapped_memory(size_t bytes, char* requested_addr, MEMFLAGS flag) {
+static char* reserve_mmapped_memory(size_t bytes, char* requested_addr, MemTag mem_tag) {
char * addr;
int flags = MAP_PRIVATE NOT_AIX( | MAP_NORESERVE ) | MAP_ANONYMOUS;
if (requested_addr != nullptr) {
@@ -382,7 +387,7 @@ static char* reserve_mmapped_memory(size_t bytes, char* requested_addr, MEMFLAGS
flags, -1, 0);
if (addr != MAP_FAILED) {
- MemTracker::record_virtual_memory_reserve((address)addr, bytes, CALLER_PC, flag);
+ MemTracker::record_virtual_memory_reserve((address)addr, bytes, CALLER_PC, mem_tag);
return addr;
}
return nullptr;
@@ -495,7 +500,7 @@ char* os::reserve_memory_aligned(size_t size, size_t alignment, bool exec) {
return chop_extra_memory(size, alignment, extra_base, extra_size);
}
-char* os::map_memory_to_file_aligned(size_t size, size_t alignment, int file_desc, MEMFLAGS flag) {
+char* os::map_memory_to_file_aligned(size_t size, size_t alignment, int file_desc, MemTag mem_tag) {
size_t extra_size = calculate_aligned_extra_size(size, alignment);
// For file mapping, we do not call os:map_memory_to_file(size,fd) since:
// - we later chop away parts of the mapping using os::release_memory and that could fail if the
@@ -503,7 +508,7 @@ char* os::map_memory_to_file_aligned(size_t size, size_t alignment, int file_des
// - The memory API os::reserve_memory uses is an implementation detail. It may (and usually is)
// mmap but it also may System V shared memory which cannot be uncommitted as a whole, so
// chopping off and unmapping excess bits back and front (see below) would not work.
- char* extra_base = reserve_mmapped_memory(extra_size, nullptr, flag);
+ char* extra_base = reserve_mmapped_memory(extra_size, nullptr, mem_tag);
if (extra_base == nullptr) {
return nullptr;
}
diff --git a/src/hotspot/os/windows/globals_windows.hpp b/src/hotspot/os/windows/globals_windows.hpp
index 78cbac6e9ccc5..8f0a6261cc0db 100644
--- a/src/hotspot/os/windows/globals_windows.hpp
+++ b/src/hotspot/os/windows/globals_windows.hpp
@@ -38,6 +38,10 @@
product(bool, UseAllWindowsProcessorGroups, false, \
"Use all processor groups on supported Windows versions") \
\
+product(bool, EnableAllLargePageSizesForWindows, false, \
+ "Enable support for multiple large page sizes on " \
+ "Windows Server") \
+ \
product(bool, UseOSErrorReporting, false, \
"Let VM fatal error propagate to the OS (ie. WER on Windows)")
diff --git a/src/hotspot/os/windows/memMapPrinter_windows.cpp b/src/hotspot/os/windows/memMapPrinter_windows.cpp
new file mode 100644
index 0000000000000..eb6b24a9d139a
--- /dev/null
+++ b/src/hotspot/os/windows/memMapPrinter_windows.cpp
@@ -0,0 +1,253 @@
+/*
+ * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2024, Red Hat, Inc. and/or its affiliates.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "nmt/memMapPrinter.hpp"
+#include "os_windows.hpp"
+#include "runtime/vm_version.hpp"
+
+#include
+#include
+#include
+#include
+
+/* maximum number of mapping records returned */
+static const int MAX_REGIONS_RETURNED = 1000000;
+
+class MappingInfo {
+public:
+ stringStream _ap_buffer;
+ stringStream _state_buffer;
+ stringStream _protect_buffer;
+ stringStream _type_buffer;
+ char _file_name[MAX_PATH];
+
+ MappingInfo() {}
+
+ void process(MEMORY_BASIC_INFORMATION& mem_info) {
+ _ap_buffer.reset();
+ _state_buffer.reset();
+ _protect_buffer.reset();
+ _type_buffer.reset();
+ get_protect_string(_ap_buffer, mem_info.AllocationProtect);
+ get_state_string(_state_buffer, mem_info);
+ get_protect_string(_protect_buffer, mem_info.Protect);
+ get_type_string(_type_buffer, mem_info);
+ _file_name[0] = 0;
+ if (mem_info.Type == MEM_IMAGE) {
+ HMODULE hModule = 0;
+ if (GetModuleHandleEx(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS, static_cast(mem_info.AllocationBase), &hModule)) {
+ GetModuleFileName(hModule, _file_name, sizeof(_file_name));
+ }
+ }
+ }
+
+ void get_protect_string(outputStream& out, DWORD prot) {
+ const char read_c = prot & (PAGE_READONLY | PAGE_READWRITE | PAGE_EXECUTE_READ | PAGE_EXECUTE_READWRITE | PAGE_WRITECOPY | PAGE_EXECUTE_WRITECOPY) ? 'r' : '-';
+ const char write_c = prot & (PAGE_READWRITE | PAGE_WRITECOPY | PAGE_EXECUTE_READWRITE | PAGE_EXECUTE_WRITECOPY) ? 'w' : '-';
+ const char execute_c = prot & (PAGE_EXECUTE | PAGE_EXECUTE_READ | PAGE_EXECUTE_READWRITE | PAGE_EXECUTE_WRITECOPY) ? 'x' : '-';
+ out.print("%c%c%c", read_c, write_c, execute_c);
+ if (prot & (PAGE_WRITECOPY | PAGE_EXECUTE_WRITECOPY)) {
+ out.put('c');
+ }
+ if (prot & PAGE_GUARD) {
+ out.put('g');
+ }
+ if (prot & PAGE_NOCACHE) {
+ out.put('n');
+ }
+ if (prot & PAGE_WRITECOMBINE) {
+ out.put('W');
+ }
+ const DWORD bits = PAGE_NOACCESS | PAGE_READONLY | PAGE_READWRITE | PAGE_EXECUTE_READ | PAGE_EXECUTE_READWRITE
+ | PAGE_WRITECOPY | PAGE_EXECUTE_WRITECOPY | PAGE_EXECUTE
+ | PAGE_GUARD | PAGE_NOCACHE | PAGE_WRITECOMBINE;
+ if ((prot & bits) != prot) {
+ out.print_cr("Unknown Windows memory protection value: 0x%x unknown bits: 0x%x", prot, prot & ~bits);
+ assert(false, "Unknown Windows memory protection value: 0x%x unknown bits: 0x%x", prot, prot & ~bits);
+ }
+ }
+
+ void get_state_string(outputStream& out, MEMORY_BASIC_INFORMATION& mem_info) {
+ if (mem_info.State == MEM_COMMIT) {
+ out.put('c');
+ } else if (mem_info.State == MEM_FREE) {
+ out.put('f');
+ } else if (mem_info.State == MEM_RESERVE) {
+ out.put('r');
+ } else {
+ out.print_cr("Unknown Windows memory state value: 0x%x", mem_info.State);
+ assert(false, "Unknown Windows memory state value: 0x%x", mem_info.State);
+ }
+ }
+
+ void get_type_string(outputStream& out, MEMORY_BASIC_INFORMATION& mem_info) {
+ if (mem_info.Type == MEM_IMAGE) {
+ out.print("img");
+ } else if (mem_info.Type == MEM_MAPPED) {
+ out.print("map");
+ } else if (mem_info.Type == MEM_PRIVATE) {
+ out.print("pvt");
+ } else if (mem_info.Type == 0 && mem_info.State == MEM_FREE) {
+ out.print("---");
+ } else {
+ out.print_cr("Unknown Windows memory type 0x%x", mem_info.Type);
+ assert(false, "Unknown Windows memory type 0x%x", mem_info.Type);
+ }
+ }
+};
+
+class MappingInfoSummary {
+ unsigned _num_mappings;
+ size_t _total_region_size; // combined resident set size
+ size_t _total_committed; // combined committed size
+ class WinOsInfo : public os::win32 {
+ public:
+ static void printOsInfo(outputStream* st) {
+ st->print("OS:");
+ os::win32::print_windows_version(st);
+ os::win32::print_uptime_info(st);
+ VM_Version::print_platform_virtualization_info(st);
+ os::print_memory_info(st);
+ }
+ };
+public:
+ MappingInfoSummary() : _num_mappings(0), _total_region_size(0),
+ _total_committed(0) {}
+
+ void add_mapping(const MEMORY_BASIC_INFORMATION& mem_info, const MappingInfo& mapping_info) {
+ if (mem_info.State != MEM_FREE) {
+ _num_mappings++;
+ _total_region_size += mem_info.RegionSize;
+ _total_committed += mem_info.State == MEM_COMMIT ? mem_info.RegionSize : 0;
+ }
+ }
+
+ void print_on(const MappingPrintSession& session) const {
+ outputStream* st = session.out();
+ WinOsInfo::printOsInfo(st);
+ st->print_cr("current process reserved memory: " PROPERFMT, PROPERFMTARGS(_total_region_size));
+ st->print_cr("current process committed memory: " PROPERFMT, PROPERFMTARGS(_total_committed));
+ st->print_cr("current process region count: " PROPERFMT, PROPERFMTARGS(_num_mappings));
+ }
+};
+
+class MappingInfoPrinter {
+ const MappingPrintSession& _session;
+public:
+ MappingInfoPrinter(const MappingPrintSession& session) :
+ _session(session)
+ {}
+
+ void print_single_mapping(const MEMORY_BASIC_INFORMATION& mem_info, const MappingInfo& mapping_info) const {
+ outputStream* st = _session.out();
+#define INDENT_BY(n) \
+ if (st->fill_to(n) == 0) { \
+ st->print(" "); \
+ }
+ st->print(PTR_FORMAT "-" PTR_FORMAT, mem_info.BaseAddress, static_cast(mem_info.BaseAddress) + mem_info.RegionSize);
+ INDENT_BY(38);
+ st->print("%12zu", mem_info.RegionSize);
+ INDENT_BY(51);
+ st->print("%s", mapping_info._protect_buffer.base());
+ INDENT_BY(57);
+ st->print("%s-%s", mapping_info._state_buffer.base(), mapping_info._type_buffer.base());
+ INDENT_BY(63);
+ st->print("%#11llx", reinterpret_cast(mem_info.BaseAddress) - reinterpret_cast(mem_info.AllocationBase));
+ INDENT_BY(72);
+ if (_session.print_nmt_info_for_region(mem_info.BaseAddress, static_cast(mem_info.BaseAddress) + mem_info.RegionSize)) {
+ st->print(" ");
+ }
+ st->print_raw(mapping_info._file_name);
+ #undef INDENT_BY
+ st->cr();
+ }
+
+ void print_legend() const {
+ outputStream* st = _session.out();
+ st->print_cr("from, to, vsize: address range and size");
+ st->print_cr("prot: protection:");
+ st->print_cr(" rwx: read / write / execute");
+ st->print_cr(" c: copy on write");
+ st->print_cr(" g: guard");
+ st->print_cr(" n: no cache");
+ st->print_cr(" W: write combine");
+ st->print_cr("state: region state and type:");
+ st->print_cr(" state: committed / reserved");
+ st->print_cr(" type: image / mapped / private");
+ st->print_cr("file: file mapped, if mapping is not anonymous");
+ st->print_cr("vm info: VM information (requires NMT)");
+ {
+ streamIndentor si(st, 16);
+ _session.print_nmt_flag_legend();
+ }
+ }
+
+ void print_header() const {
+ outputStream* st = _session.out();
+ // 0 1 2 3 4 5 6 7 8 9 0 1 2 3
+ // 01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
+ // 0x00007ffb24565000-0x00007ffb24a7e000 5345280 r-- c-img 0x1155000 C:\work\jdk\build\fastdebug\jdk\bin\server\jvm.dll
+ st->print_cr("from to vsize prot state offset vminfo/file");
+ st->print_cr("===========================================================================================");
+ }
+};
+
+void MemMapPrinter::pd_print_all_mappings(const MappingPrintSession& session) {
+
+ HANDLE hProcess = GetCurrentProcess();
+
+ MappingInfoPrinter printer(session);
+ MappingInfoSummary summary;
+
+ outputStream* const st = session.out();
+
+ printer.print_legend();
+ st->cr();
+ printer.print_header();
+
+ MEMORY_BASIC_INFORMATION mem_info;
+ MappingInfo mapping_info;
+
+ int region_count = 0;
+ ::memset(&mem_info, 0, sizeof(mem_info));
+ for (char* ptr = 0; VirtualQueryEx(hProcess, ptr, &mem_info, sizeof(mem_info)) == sizeof(mem_info); ) {
+ assert(mem_info.RegionSize > 0, "RegionSize is not greater than zero");
+ if (++region_count > MAX_REGIONS_RETURNED) {
+ st->print_cr("limit of %d regions reached (results inaccurate)", region_count);
+ break;
+ }
+ mapping_info.process(mem_info);
+ if (mem_info.State != MEM_FREE) {
+ printer.print_single_mapping(mem_info, mapping_info);
+ summary.add_mapping(mem_info, mapping_info);
+ }
+ ptr += mem_info.RegionSize;
+ ::memset(&mem_info, 0, sizeof(mem_info));
+ }
+ st->cr();
+ summary.print_on(session);
+ st->cr();
+}
diff --git a/src/hotspot/os/windows/osThread_windows.cpp b/src/hotspot/os/windows/osThread_windows.cpp
index 5f369bb7aa05b..922b4b0104be4 100644
--- a/src/hotspot/os/windows/osThread_windows.cpp
+++ b/src/hotspot/os/windows/osThread_windows.cpp
@@ -22,17 +22,17 @@
*
*/
-// no precompiled headers
-#include "runtime/os.hpp"
+#include "precompiled.hpp"
#include "runtime/osThread.hpp"
-void OSThread::pd_initialize() {
- set_thread_handle(nullptr);
- set_thread_id(0);
- set_interrupt_event(nullptr);
-}
+#include
+
+OSThread::OSThread()
+ : _thread_id(0),
+ _thread_handle(nullptr),
+ _interrupt_event(nullptr) {}
-void OSThread::pd_destroy() {
+OSThread::~OSThread() {
if (_interrupt_event != nullptr) {
CloseHandle(_interrupt_event);
}
diff --git a/src/hotspot/os/windows/osThread_windows.hpp b/src/hotspot/os/windows/osThread_windows.hpp
index 5bd07646b1718..e54783aef1c15 100644
--- a/src/hotspot/os/windows/osThread_windows.hpp
+++ b/src/hotspot/os/windows/osThread_windows.hpp
@@ -25,17 +25,29 @@
#ifndef OS_WINDOWS_OSTHREAD_WINDOWS_HPP
#define OS_WINDOWS_OSTHREAD_WINDOWS_HPP
- typedef void* HANDLE;
- public:
+#include "runtime/osThreadBase.hpp"
+#include "utilities/globalDefinitions.hpp"
+
+class OSThread : public OSThreadBase {
+ friend class VMStructs;
+
typedef unsigned long thread_id_t;
+ typedef void* HANDLE;
+
+ thread_id_t _thread_id;
- private:
// Win32-specific thread information
HANDLE _thread_handle; // Win32 thread handle
HANDLE _interrupt_event; // Event signalled on thread interrupt for use by
// Process.waitFor().
public:
+ OSThread();
+ ~OSThread();
+
+ thread_id_t thread_id() const { return _thread_id; }
+ void set_thread_id(thread_id_t id) { _thread_id = id; }
+
// The following will only apply in the Win32 implementation, and should only
// be visible in the concrete class, not this which should be an abstract base class
HANDLE thread_handle() const { return _thread_handle; }
@@ -45,13 +57,9 @@
// This is specialized on Windows to interact with the _interrupt_event.
void set_interrupted(bool z);
-#ifndef PRODUCT
- // Used for debugging, return a unique integer for each thread.
- int thread_identifier() const { return _thread_id; }
-#endif
-
- private:
- void pd_initialize();
- void pd_destroy();
+ uintx thread_id_for_printing() const override {
+ return (uintx)_thread_id;
+ }
+};
#endif // OS_WINDOWS_OSTHREAD_WINDOWS_HPP
diff --git a/src/hotspot/os/windows/os_windows.cpp b/src/hotspot/os/windows/os_windows.cpp
index a1e0a78837f74..fe093aee116b6 100644
--- a/src/hotspot/os/windows/os_windows.cpp
+++ b/src/hotspot/os/windows/os_windows.cpp
@@ -63,6 +63,7 @@
#include "runtime/sharedRuntime.hpp"
#include "runtime/statSampler.hpp"
#include "runtime/stubRoutines.hpp"
+#include "runtime/suspendedThreadTask.hpp"
#include "runtime/threadCritical.hpp"
#include "runtime/threads.hpp"
#include "runtime/timer.hpp"
@@ -1286,38 +1287,50 @@ void os::shutdown() {
static HANDLE dumpFile = nullptr;
-// Check if dump file can be created.
-void os::check_dump_limit(char* buffer, size_t buffsz) {
- bool status = true;
+// Check if core dump is active and if a core dump file can be created
+void os::check_core_dump_prerequisites(char* buffer, size_t bufferSize, bool check_only) {
if (!FLAG_IS_DEFAULT(CreateCoredumpOnCrash) && !CreateCoredumpOnCrash) {
- jio_snprintf(buffer, buffsz, "CreateCoredumpOnCrash is disabled from command line");
- status = false;
- }
-
+ jio_snprintf(buffer, bufferSize, "CreateCoredumpOnCrash is disabled from command line");
+ VMError::record_coredump_status(buffer, false);
+ } else {
+ bool success = true;
+ bool warn = true;
#ifndef ASSERT
- if (!os::win32::is_windows_server() && FLAG_IS_DEFAULT(CreateCoredumpOnCrash)) {
- jio_snprintf(buffer, buffsz, "Minidumps are not enabled by default on client versions of Windows");
- status = false;
- }
+ if (!os::win32::is_windows_server() && FLAG_IS_DEFAULT(CreateCoredumpOnCrash)) {
+ jio_snprintf(buffer, bufferSize, "Minidumps are not enabled by default on client versions of Windows");
+ success = false;
+ warn = true;
+ }
#endif
- if (status) {
- const char* cwd = get_current_directory(nullptr, 0);
- int pid = current_process_id();
- if (cwd != nullptr) {
- jio_snprintf(buffer, buffsz, "%s\\hs_err_pid%u.mdmp", cwd, pid);
- } else {
- jio_snprintf(buffer, buffsz, ".\\hs_err_pid%u.mdmp", pid);
+ if (success) {
+ if (!check_only) {
+ const char* cwd = get_current_directory(nullptr, 0);
+ int pid = current_process_id();
+ if (cwd != nullptr) {
+ jio_snprintf(buffer, bufferSize, "%s\\hs_err_pid%u.mdmp", cwd, pid);
+ } else {
+ jio_snprintf(buffer, bufferSize, ".\\hs_err_pid%u.mdmp", pid);
+ }
+
+ if (dumpFile == nullptr &&
+ (dumpFile = CreateFile(buffer, GENERIC_WRITE, 0, nullptr, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, nullptr))
+ == INVALID_HANDLE_VALUE) {
+ jio_snprintf(buffer, bufferSize, "Failed to create minidump file (0x%x).", GetLastError());
+ success = false;
+ }
+ } else {
+ // For now on Windows, there are no more checks that we can do
+ warn = false;
+ }
}
- if (dumpFile == nullptr &&
- (dumpFile = CreateFile(buffer, GENERIC_WRITE, 0, nullptr, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, nullptr))
- == INVALID_HANDLE_VALUE) {
- jio_snprintf(buffer, buffsz, "Failed to create minidump file (0x%x).", GetLastError());
- status = false;
+ if (!check_only) {
+ VMError::record_coredump_status(buffer, success);
+ } else if (warn) {
+ warning("CreateCoredumpOnCrash specified, but %s", buffer);
}
}
- VMError::record_coredump_status(buffer, status);
}
void os::abort(bool dump_core, void* siginfo, const void* context) {
@@ -1947,7 +1960,10 @@ void os::win32::print_windows_version(outputStream* st) {
// - 2016 GA 10/2016 build: 14393
// - 2019 GA 11/2018 build: 17763
// - 2022 GA 08/2021 build: 20348
- if (build_number > 20347) {
+ // - 2025 Preview build : 26040
+ if (build_number > 26039) {
+ st->print("Server 2025");
+ } else if (build_number > 20347) {
st->print("Server 2022");
} else if (build_number > 17762) {
st->print("Server 2019");
@@ -3123,7 +3139,7 @@ class NUMANodeListHolder {
static size_t _large_page_size = 0;
-static bool request_lock_memory_privilege() {
+bool os::win32::request_lock_memory_privilege() {
HANDLE hProcess = OpenProcess(PROCESS_QUERY_INFORMATION, FALSE,
os::current_process_id());
@@ -3307,14 +3323,14 @@ static char* allocate_pages_individually(size_t bytes, char* addr, DWORD flags,
return p_buf;
}
-static size_t large_page_init_decide_size() {
+size_t os::win32::large_page_init_decide_size() {
// print a warning if any large page related flag is specified on command line
bool warn_on_failure = !FLAG_IS_DEFAULT(UseLargePages) ||
!FLAG_IS_DEFAULT(LargePageSizeInBytes);
-#define WARN(msg) if (warn_on_failure) { warning(msg); }
+#define WARN(...) if (warn_on_failure) { warning(__VA_ARGS__); }
- if (!request_lock_memory_privilege()) {
+ if (!os::win32::request_lock_memory_privilege()) {
WARN("JVM cannot use large page memory because it does not have enough privilege to lock pages in memory.");
return 0;
}
@@ -3325,15 +3341,26 @@ static size_t large_page_init_decide_size() {
return 0;
}
-#if defined(IA32) || defined(AMD64)
- if (size > 4*M || LargePageSizeInBytes > 4*M) {
+#if defined(IA32)
+ if (size > 4 * M || LargePageSizeInBytes > 4 * M) {
WARN("JVM cannot use large pages bigger than 4mb.");
return 0;
}
+#elif defined(AMD64)
+ if (!EnableAllLargePageSizesForWindows) {
+ if (size > 4 * M || LargePageSizeInBytes > 4 * M) {
+ WARN("JVM cannot use large pages bigger than 4mb.");
+ return 0;
+ }
+ }
#endif
- if (LargePageSizeInBytes > 0 && LargePageSizeInBytes % size == 0) {
- size = LargePageSizeInBytes;
+ if (LargePageSizeInBytes > 0) {
+ if (LargePageSizeInBytes % size == 0) {
+ size = LargePageSizeInBytes;
+ } else {
+ WARN("The specified large page size (%d) is not a multiple of the minimum large page size (%d), defaulting to minimum page size.", LargePageSizeInBytes, size);
+ }
}
#undef WARN
@@ -3346,12 +3373,23 @@ void os::large_page_init() {
return;
}
- _large_page_size = large_page_init_decide_size();
+ _large_page_size = os::win32::large_page_init_decide_size();
const size_t default_page_size = os::vm_page_size();
if (_large_page_size > default_page_size) {
+#if !defined(IA32)
+ if (EnableAllLargePageSizesForWindows) {
+ size_t min_size = GetLargePageMinimum();
+
+ // Populate _page_sizes with large page sizes less than or equal to _large_page_size, ensuring each page size is double the size of the previous one.
+ for (size_t page_size = min_size; page_size < _large_page_size; page_size *= 2) {
+ _page_sizes.add(page_size);
+ }
+ }
+#endif
+
_page_sizes.add(_large_page_size);
}
-
+ // Set UseLargePages based on whether a large page size was successfully determined
UseLargePages = _large_page_size != 0;
}
@@ -3428,7 +3466,7 @@ char* os::replace_existing_mapping_with_file_mapping(char* base, size_t size, in
// Multiple threads can race in this code but it's not possible to unmap small sections of
// virtual space to get requested alignment, like posix-like os's.
// Windows prevents multiple thread from remapping over each other so this loop is thread-safe.
-static char* map_or_reserve_memory_aligned(size_t size, size_t alignment, int file_desc, MEMFLAGS flag = mtNone) {
+static char* map_or_reserve_memory_aligned(size_t size, size_t alignment, int file_desc, MemTag mem_tag = mtNone) {
assert(is_aligned(alignment, os::vm_allocation_granularity()),
"Alignment must be a multiple of allocation granularity (page size)");
assert(is_aligned(size, os::vm_allocation_granularity()),
@@ -3441,8 +3479,8 @@ static char* map_or_reserve_memory_aligned(size_t size, size_t alignment, int fi
static const int max_attempts = 20;
for (int attempt = 0; attempt < max_attempts && aligned_base == nullptr; attempt ++) {
- char* extra_base = file_desc != -1 ? os::map_memory_to_file(extra_size, file_desc, flag) :
- os::reserve_memory(extra_size, false, flag);
+ char* extra_base = file_desc != -1 ? os::map_memory_to_file(extra_size, file_desc, mem_tag) :
+ os::reserve_memory(extra_size, false, mem_tag);
if (extra_base == nullptr) {
return nullptr;
}
@@ -3458,8 +3496,8 @@ static char* map_or_reserve_memory_aligned(size_t size, size_t alignment, int fi
// Attempt to map, into the just vacated space, the slightly smaller aligned area.
// Which may fail, hence the loop.
- aligned_base = file_desc != -1 ? os::attempt_map_memory_to_file_at(aligned_base, size, file_desc, flag) :
- os::attempt_reserve_memory_at(aligned_base, size, false, flag);
+ aligned_base = file_desc != -1 ? os::attempt_map_memory_to_file_at(aligned_base, size, file_desc, mem_tag) :
+ os::attempt_reserve_memory_at(aligned_base, size, false, mem_tag);
}
assert(aligned_base != nullptr,
@@ -3473,8 +3511,8 @@ char* os::reserve_memory_aligned(size_t size, size_t alignment, bool exec) {
return map_or_reserve_memory_aligned(size, alignment, -1 /* file_desc */);
}
-char* os::map_memory_to_file_aligned(size_t size, size_t alignment, int fd, MEMFLAGS flag) {
- return map_or_reserve_memory_aligned(size, alignment, fd, flag);
+char* os::map_memory_to_file_aligned(size_t size, size_t alignment, int fd, MemTag mem_tag) {
+ return map_or_reserve_memory_aligned(size, alignment, fd, mem_tag);
}
char* os::pd_reserve_memory(size_t bytes, bool exec) {
@@ -3615,7 +3653,6 @@ static char* reserve_large_pages_aligned(size_t size, size_t alignment, bool exe
char* os::pd_reserve_memory_special(size_t bytes, size_t alignment, size_t page_size, char* addr,
bool exec) {
assert(UseLargePages, "only for large pages");
- assert(page_size == os::large_page_size(), "Currently only support one large page size on Windows");
assert(is_aligned(addr, alignment), "Must be");
assert(is_aligned(addr, page_size), "Must be");
@@ -3624,11 +3661,17 @@ char* os::pd_reserve_memory_special(size_t bytes, size_t alignment, size_t page_
return nullptr;
}
+ // Ensure GetLargePageMinimum() returns a valid positive value
+ size_t large_page_min = GetLargePageMinimum();
+ if (large_page_min <= 0) {
+ return nullptr;
+ }
+
// The requested alignment can be larger than the page size, for example with G1
// the alignment is bound to the heap region size. So this reservation needs to
// ensure that the requested alignment is met. When there is a requested address
// this solves it self, since it must be properly aligned already.
- if (addr == nullptr && alignment > page_size) {
+ if (addr == nullptr && alignment > large_page_min) {
return reserve_large_pages_aligned(bytes, alignment, exec);
}
@@ -4090,6 +4133,39 @@ int os::win32::_build_minor = 0;
bool os::win32::_processor_group_warning_displayed = false;
bool os::win32::_job_object_processor_group_warning_displayed = false;
+void getWindowsInstallationType(char* buffer, int bufferSize) {
+ HKEY hKey;
+ const char* subKey = "SOFTWARE\\Microsoft\\Windows NT\\CurrentVersion";
+ const char* valueName = "InstallationType";
+
+ DWORD valueLength = bufferSize;
+
+ // Initialize buffer with empty string
+ buffer[0] = '\0';
+
+ // Open the registry key
+ if (RegOpenKeyExA(HKEY_LOCAL_MACHINE, subKey, 0, KEY_READ, &hKey) != ERROR_SUCCESS) {
+ // Return empty buffer if key cannot be opened
+ return;
+ }
+
+ // Query the value
+ if (RegQueryValueExA(hKey, valueName, NULL, NULL, (LPBYTE)buffer, &valueLength) != ERROR_SUCCESS) {
+ RegCloseKey(hKey);
+ buffer[0] = '\0';
+ return;
+ }
+
+ RegCloseKey(hKey);
+}
+
+bool isNanoServer() {
+ const int BUFFER_SIZE = 256;
+ char installationType[BUFFER_SIZE];
+ getWindowsInstallationType(installationType, BUFFER_SIZE);
+ return (strcmp(installationType, "Nano Server") == 0);
+}
+
void os::win32::initialize_windows_version() {
assert(_major_version == 0, "windows version already initialized.");
@@ -4107,7 +4183,13 @@ void os::win32::initialize_windows_version() {
warning("Attempt to determine system directory failed: %s", buf_len != 0 ? error_msg_buffer : "");
return;
}
- strncat(kernel32_path, "\\kernel32.dll", MAX_PATH - ret);
+
+ if (isNanoServer()) {
+ // On Windows Nanoserver the kernel32.dll is located in the forwarders subdirectory
+ strncat(kernel32_path, "\\forwarders\\kernel32.dll", MAX_PATH - ret);
+ } else {
+ strncat(kernel32_path, "\\kernel32.dll", MAX_PATH - ret);
+ }
DWORD version_size = GetFileVersionInfoSize(kernel32_path, nullptr);
if (version_size == 0) {
@@ -5923,7 +6005,7 @@ static void do_resume(HANDLE* h) {
// retrieve a suspend/resume context capable handle
// from the tid. Caller validates handle return value.
void get_thread_handle_for_extended_context(HANDLE* h,
- OSThread::thread_id_t tid) {
+ DWORD tid) {
if (h != nullptr) {
*h = OpenThread(THREAD_SUSPEND_RESUME | THREAD_GET_CONTEXT | THREAD_QUERY_INFORMATION, FALSE, tid);
}
diff --git a/src/hotspot/os/windows/os_windows.hpp b/src/hotspot/os/windows/os_windows.hpp
index 3bc5ab9eef1f3..1d5237243000b 100644
--- a/src/hotspot/os/windows/os_windows.hpp
+++ b/src/hotspot/os/windows/os_windows.hpp
@@ -65,6 +65,8 @@ class os::win32 {
static void setmode_streams();
static bool is_windows_11_or_greater();
static bool is_windows_server_2022_or_greater();
+ static bool request_lock_memory_privilege();
+ static size_t large_page_init_decide_size();
static int windows_major_version() {
assert(_major_version > 0, "windows version not initialized.");
return _major_version;
diff --git a/src/hotspot/os/windows/vmStructs_windows.hpp b/src/hotspot/os/windows/vmStructs_windows.hpp
index 2550e685f16e2..93f4ea7c8111d 100644
--- a/src/hotspot/os/windows/vmStructs_windows.hpp
+++ b/src/hotspot/os/windows/vmStructs_windows.hpp
@@ -29,9 +29,18 @@
// constants required by the Serviceability Agent. This file is
// referenced by vmStructs.cpp.
-#define VM_STRUCTS_OS(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field)
-
-#define VM_TYPES_OS(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type)
+#define VM_STRUCTS_OS(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
+ \
+ /******************************/ \
+ /* Threads (NOTE: incomplete) */ \
+ /******************************/ \
+ \
+ nonstatic_field(OSThread, _thread_id, OSThread::thread_id_t) \
+ unchecked_nonstatic_field(OSThread, _thread_handle, sizeof(HANDLE)) /* NOTE: no type */
+
+#define VM_TYPES_OS(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \
+ \
+ declare_unsigned_integer_type(OSThread::thread_id_t)
#define VM_INT_CONSTANTS_OS(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
diff --git a/src/hotspot/os_cpu/aix_ppc/vmStructs_aix_ppc.hpp b/src/hotspot/os_cpu/aix_ppc/vmStructs_aix_ppc.hpp
index 157d57f8e0fa2..123cd67248f86 100644
--- a/src/hotspot/os_cpu/aix_ppc/vmStructs_aix_ppc.hpp
+++ b/src/hotspot/os_cpu/aix_ppc/vmStructs_aix_ppc.hpp
@@ -30,21 +30,9 @@
// constants required by the Serviceability Agent. This file is
// referenced by vmStructs.cpp.
-#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
- \
- /******************************/ \
- /* Threads (NOTE: incomplete) */ \
- /******************************/ \
- nonstatic_field(OSThread, _thread_id, pthread_t) \
+#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field)
-
-#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \
- \
- /**********************/ \
- /* Posix Thread IDs */ \
- /**********************/ \
- \
- declare_unsigned_integer_type(pthread_t)
+#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type)
#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
diff --git a/src/hotspot/os_cpu/bsd_aarch64/vmStructs_bsd_aarch64.hpp b/src/hotspot/os_cpu/bsd_aarch64/vmStructs_bsd_aarch64.hpp
index 07b878106cfcd..c384afac7ecff 100644
--- a/src/hotspot/os_cpu/bsd_aarch64/vmStructs_bsd_aarch64.hpp
+++ b/src/hotspot/os_cpu/bsd_aarch64/vmStructs_bsd_aarch64.hpp
@@ -31,22 +31,9 @@
// constants required by the Serviceability Agent. This file is
// referenced by vmStructs.cpp.
-#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
- \
- /******************************/ \
- /* Threads (NOTE: incomplete) */ \
- /******************************/ \
- nonstatic_field(OSThread, _thread_id, OSThread::thread_id_t) \
- nonstatic_field(OSThread, _unique_thread_id, uint64_t)
+#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field)
-
-#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \
- \
- /**********************/ \
- /* Thread IDs */ \
- /**********************/ \
- \
- declare_unsigned_integer_type(OSThread::thread_id_t)
+#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type)
#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
diff --git a/src/hotspot/os_cpu/bsd_x86/vmStructs_bsd_x86.hpp b/src/hotspot/os_cpu/bsd_x86/vmStructs_bsd_x86.hpp
index fb43541fa775a..b48ea82712ecd 100644
--- a/src/hotspot/os_cpu/bsd_x86/vmStructs_bsd_x86.hpp
+++ b/src/hotspot/os_cpu/bsd_x86/vmStructs_bsd_x86.hpp
@@ -29,22 +29,9 @@
// constants required by the Serviceability Agent. This file is
// referenced by vmStructs.cpp.
-#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
- \
- /******************************/ \
- /* Threads (NOTE: incomplete) */ \
- /******************************/ \
- nonstatic_field(OSThread, _thread_id, OSThread::thread_id_t) \
- nonstatic_field(OSThread, _unique_thread_id, uint64_t)
+#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field)
-
-#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \
- \
- /**********************/ \
- /* Thread IDs */ \
- /**********************/ \
- \
- declare_unsigned_integer_type(OSThread::thread_id_t)
+#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type)
#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
diff --git a/src/hotspot/os_cpu/linux_aarch64/vmStructs_linux_aarch64.hpp b/src/hotspot/os_cpu/linux_aarch64/vmStructs_linux_aarch64.hpp
index f2ad002996b5c..3c8e9c4441477 100644
--- a/src/hotspot/os_cpu/linux_aarch64/vmStructs_linux_aarch64.hpp
+++ b/src/hotspot/os_cpu/linux_aarch64/vmStructs_linux_aarch64.hpp
@@ -30,23 +30,9 @@
// constants required by the Serviceability Agent. This file is
// referenced by vmStructs.cpp.
-#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
- \
- /******************************/ \
- /* Threads (NOTE: incomplete) */ \
- /******************************/ \
- nonstatic_field(OSThread, _thread_id, OSThread::thread_id_t) \
- nonstatic_field(OSThread, _pthread_id, pthread_t)
+#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field)
-
-#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \
- \
- /**********************/ \
- /* Posix Thread IDs */ \
- /**********************/ \
- \
- declare_integer_type(OSThread::thread_id_t) \
- declare_unsigned_integer_type(pthread_t)
+#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type)
#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
diff --git a/src/hotspot/os_cpu/linux_arm/vmStructs_linux_arm.hpp b/src/hotspot/os_cpu/linux_arm/vmStructs_linux_arm.hpp
index 9b4bd0faf0ad9..120726bf55fcd 100644
--- a/src/hotspot/os_cpu/linux_arm/vmStructs_linux_arm.hpp
+++ b/src/hotspot/os_cpu/linux_arm/vmStructs_linux_arm.hpp
@@ -29,22 +29,9 @@
// constants required by the Serviceability Agent. This file is
// referenced by vmStructs.cpp.
-#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
- \
- /******************************/ \
- /* Threads (NOTE: incomplete) */ \
- /******************************/ \
- nonstatic_field(OSThread, _thread_id, OSThread::thread_id_t) \
- nonstatic_field(OSThread, _pthread_id, pthread_t)
+#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field)
-#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \
- \
- /**********************/ \
- /* Posix Thread IDs */ \
- /**********************/ \
- \
- declare_integer_type(OSThread::thread_id_t) \
- declare_unsigned_integer_type(pthread_t)
+#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type)
#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
diff --git a/src/hotspot/os_cpu/linux_ppc/vmStructs_linux_ppc.hpp b/src/hotspot/os_cpu/linux_ppc/vmStructs_linux_ppc.hpp
index 9464c35977078..ae948c7303101 100644
--- a/src/hotspot/os_cpu/linux_ppc/vmStructs_linux_ppc.hpp
+++ b/src/hotspot/os_cpu/linux_ppc/vmStructs_linux_ppc.hpp
@@ -30,23 +30,9 @@
// constants required by the Serviceability Agent. This file is
// referenced by vmStructs.cpp.
-#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
- \
- /******************************/ \
- /* Threads (NOTE: incomplete) */ \
- /******************************/ \
- nonstatic_field(OSThread, _thread_id, pid_t) \
- nonstatic_field(OSThread, _pthread_id, pthread_t)
+#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field)
-
-#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \
- \
- /**********************/ \
- /* Posix Thread IDs */ \
- /**********************/ \
- \
- declare_integer_type(pid_t) \
- declare_unsigned_integer_type(pthread_t)
+#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type)
#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
diff --git a/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp
index a7dc84770f84c..368d6c971fae0 100644
--- a/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp
+++ b/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp
@@ -54,6 +54,24 @@ inline void OrderAccess::fence() {
}
inline void OrderAccess::cross_modify_fence_impl() {
+ // From 3 “Zifencei” Instruction-Fetch Fence, Version 2.0
+ // "RISC-V does not guarantee that stores to instruction memory will be made
+ // visible to instruction fetches on a RISC-V hart until that hart executes a
+ // FENCE.I instruction. A FENCE.I instruction ensures that a subsequent
+ // instruction fetch on a RISC-V hart will see any previous data stores
+ // already visible to the same RISC-V hart. FENCE.I does not ensure that other
+ // RISC-V harts’ instruction fetches will observe the local hart’s stores in a
+ // multiprocessor system."
+ //
+ // Hence to be able to use fence.i directly we need a kernel that supports
+ // PR_RISCV_CTX_SW_FENCEI_ON. Thus if context switch to another hart we are
+ // ensured that instruction fetch will see any previous data stores
+ //
+ // The alternative is using full system IPI (system wide icache sync) then
+ // this barrier is not strictly needed. As this is emitted in runtime slow-path
+ // we will just always emit it, typically after a safepoint.
+ guarantee(VM_Version::supports_fencei_barrier(), "Linux kernel require fence.i");
+ __asm__ volatile("fence.i" : : : "memory");
}
#endif // OS_CPU_LINUX_RISCV_ORDERACCESS_LINUX_RISCV_HPP
diff --git a/src/hotspot/os_cpu/linux_riscv/vmStructs_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/vmStructs_linux_riscv.hpp
index 6cf7683a58602..3946394c19b1f 100644
--- a/src/hotspot/os_cpu/linux_riscv/vmStructs_linux_riscv.hpp
+++ b/src/hotspot/os_cpu/linux_riscv/vmStructs_linux_riscv.hpp
@@ -30,23 +30,9 @@
// constants required by the Serviceability Agent. This file is
// referenced by vmStructs.cpp.
-#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
- \
- /******************************/ \
- /* Threads (NOTE: incomplete) */ \
- /******************************/ \
- nonstatic_field(OSThread, _thread_id, OSThread::thread_id_t) \
- nonstatic_field(OSThread, _pthread_id, pthread_t)
+#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field)
-
-#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \
- \
- /**********************/ \
- /* Posix Thread IDs */ \
- /**********************/ \
- \
- declare_integer_type(OSThread::thread_id_t) \
- declare_unsigned_integer_type(pthread_t)
+#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type)
#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
diff --git a/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp
index 3f9f26b525ba5..a3a226502f6fc 100644
--- a/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp
+++ b/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp
@@ -35,6 +35,7 @@
#include
#include
#include
+#include
#ifndef HWCAP_ISA_I
#define HWCAP_ISA_I nth_bit('I' - 'A')
@@ -82,6 +83,23 @@
__v; \
})
+// prctl PR_RISCV_SET_ICACHE_FLUSH_CTX is from Linux 6.9
+#ifndef PR_RISCV_SET_ICACHE_FLUSH_CTX
+#define PR_RISCV_SET_ICACHE_FLUSH_CTX 71
+#endif
+#ifndef PR_RISCV_CTX_SW_FENCEI_ON
+#define PR_RISCV_CTX_SW_FENCEI_ON 0
+#endif
+#ifndef PR_RISCV_CTX_SW_FENCEI_OFF
+#define PR_RISCV_CTX_SW_FENCEI_OFF 1
+#endif
+#ifndef PR_RISCV_SCOPE_PER_PROCESS
+#define PR_RISCV_SCOPE_PER_PROCESS 0
+#endif
+#ifndef PR_RISCV_SCOPE_PER_THREAD
+#define PR_RISCV_SCOPE_PER_THREAD 1
+#endif
+
uint32_t VM_Version::cpu_vector_length() {
assert(ext_V.enabled(), "should not call this");
return (uint32_t)read_csr(CSR_VLENB);
@@ -102,6 +120,7 @@ void VM_Version::setup_cpu_available_features() {
if (!RiscvHwprobe::probe_features()) {
os_aux_features();
}
+
char* uarch = os_uarch_additional_features();
vendor_features();
@@ -155,6 +174,24 @@ void VM_Version::setup_cpu_available_features() {
i++;
}
+ // Linux kernel require Zifencei
+ if (!ext_Zifencei.enabled()) {
+ log_info(os, cpu)("Zifencei not found, required by Linux, enabling.");
+ ext_Zifencei.enable_feature();
+ }
+
+ if (UseCtxFencei) {
+ // Note that we can set this up only for effected threads
+ // via PR_RISCV_SCOPE_PER_THREAD, i.e. on VM attach/deattach.
+ int ret = prctl(PR_RISCV_SET_ICACHE_FLUSH_CTX, PR_RISCV_CTX_SW_FENCEI_ON, PR_RISCV_SCOPE_PER_PROCESS);
+ if (ret == 0) {
+ log_debug(os, cpu)("UseCtxFencei (PR_RISCV_CTX_SW_FENCEI_ON) enabled.");
+ } else {
+ FLAG_SET_ERGO(UseCtxFencei, false);
+ log_info(os, cpu)("UseCtxFencei (PR_RISCV_CTX_SW_FENCEI_ON) disabled, unsupported by kernel.");
+ }
+ }
+
_features_string = os::strdup(buf);
}
diff --git a/src/hotspot/os_cpu/linux_s390/vmStructs_linux_s390.hpp b/src/hotspot/os_cpu/linux_s390/vmStructs_linux_s390.hpp
index 0442510fa247a..a0fb5eb1a6ab9 100644
--- a/src/hotspot/os_cpu/linux_s390/vmStructs_linux_s390.hpp
+++ b/src/hotspot/os_cpu/linux_s390/vmStructs_linux_s390.hpp
@@ -30,23 +30,9 @@
// constants required by the Serviceability Agent. This file is
// referenced by vmStructs.cpp.
-#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
- \
- /******************************/ \
- /* Threads (NOTE: incomplete) */ \
- /******************************/ \
- nonstatic_field(OSThread, _thread_id, pid_t) \
- nonstatic_field(OSThread, _pthread_id, pthread_t)
+#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field)
-
-#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \
- \
- /**********************/ \
- /* Posix Thread IDs */ \
- /**********************/ \
- \
- declare_integer_type(pid_t) \
- declare_unsigned_integer_type(pthread_t)
+#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type)
#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
diff --git a/src/hotspot/os_cpu/linux_x86/vmStructs_linux_x86.hpp b/src/hotspot/os_cpu/linux_x86/vmStructs_linux_x86.hpp
index 277486549c035..8f6d365723700 100644
--- a/src/hotspot/os_cpu/linux_x86/vmStructs_linux_x86.hpp
+++ b/src/hotspot/os_cpu/linux_x86/vmStructs_linux_x86.hpp
@@ -29,23 +29,9 @@
// constants required by the Serviceability Agent. This file is
// referenced by vmStructs.cpp.
-#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
- \
- /******************************/ \
- /* Threads (NOTE: incomplete) */ \
- /******************************/ \
- nonstatic_field(OSThread, _thread_id, OSThread::thread_id_t) \
- nonstatic_field(OSThread, _pthread_id, pthread_t)
+#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field)
-
-#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \
- \
- /**********************/ \
- /* Posix Thread IDs */ \
- /**********************/ \
- \
- declare_integer_type(OSThread::thread_id_t) \
- declare_unsigned_integer_type(pthread_t)
+#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type)
#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
diff --git a/src/hotspot/os_cpu/windows_aarch64/vmStructs_windows_aarch64.hpp b/src/hotspot/os_cpu/windows_aarch64/vmStructs_windows_aarch64.hpp
index 220787823dc69..18a5588b743b9 100644
--- a/src/hotspot/os_cpu/windows_aarch64/vmStructs_windows_aarch64.hpp
+++ b/src/hotspot/os_cpu/windows_aarch64/vmStructs_windows_aarch64.hpp
@@ -29,18 +29,9 @@
// constants required by the Serviceability Agent. This file is
// referenced by vmStructs.cpp.
-#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
- \
- /******************************/ \
- /* Threads (NOTE: incomplete) */ \
- /******************************/ \
- \
- nonstatic_field(OSThread, _thread_id, OSThread::thread_id_t) \
- unchecked_nonstatic_field(OSThread, _thread_handle, sizeof(HANDLE)) /* NOTE: no type */
+#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field)
-#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \
- \
- declare_unsigned_integer_type(OSThread::thread_id_t)
+#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type)
#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
diff --git a/src/hotspot/os_cpu/windows_x86/vmStructs_windows_x86.hpp b/src/hotspot/os_cpu/windows_x86/vmStructs_windows_x86.hpp
index 9f50a7ed9ae29..985a6a331daba 100644
--- a/src/hotspot/os_cpu/windows_x86/vmStructs_windows_x86.hpp
+++ b/src/hotspot/os_cpu/windows_x86/vmStructs_windows_x86.hpp
@@ -29,18 +29,9 @@
// constants required by the Serviceability Agent. This file is
// referenced by vmStructs.cpp.
-#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
- \
- /******************************/ \
- /* Threads (NOTE: incomplete) */ \
- /******************************/ \
- \
- nonstatic_field(OSThread, _thread_id, OSThread::thread_id_t) \
- unchecked_nonstatic_field(OSThread, _thread_handle, sizeof(HANDLE)) /* NOTE: no type */
+#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field)
-#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \
- \
- declare_unsigned_integer_type(OSThread::thread_id_t)
+#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type)
#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
diff --git a/src/hotspot/share/adlc/adlArena.cpp b/src/hotspot/share/adlc/adlArena.cpp
index d5a1dd500fa66..ebd1f74911d57 100644
--- a/src/hotspot/share/adlc/adlArena.cpp
+++ b/src/hotspot/share/adlc/adlArena.cpp
@@ -63,8 +63,6 @@ void AdlChunk::chop() {
AdlChunk *k = this;
while( k ) {
AdlChunk *tmp = k->_next;
- // clear out this chunk (to detect allocation bugs)
- memset(k, 0xBE, k->_len);
free(k); // Free chunk (was malloc'd)
k = tmp;
}
diff --git a/src/hotspot/share/adlc/forms.cpp b/src/hotspot/share/adlc/forms.cpp
index 068d745254e3d..c34a73ea1e13f 100644
--- a/src/hotspot/share/adlc/forms.cpp
+++ b/src/hotspot/share/adlc/forms.cpp
@@ -276,7 +276,6 @@ Form::DataType Form::is_load_from_memory(const char *opType) const {
Form::DataType Form::is_store_to_memory(const char *opType) const {
if( strcmp(opType,"StoreB")==0) return Form::idealB;
- if( strcmp(opType,"StoreCM")==0) return Form::idealB;
if( strcmp(opType,"StoreC")==0) return Form::idealC;
if( strcmp(opType,"StoreD")==0) return Form::idealD;
if( strcmp(opType,"StoreF")==0) return Form::idealF;
diff --git a/src/hotspot/share/adlc/formssel.cpp b/src/hotspot/share/adlc/formssel.cpp
index e7df38ff221a9..ac2d3d94153f7 100644
--- a/src/hotspot/share/adlc/formssel.cpp
+++ b/src/hotspot/share/adlc/formssel.cpp
@@ -3654,7 +3654,6 @@ int MatchNode::needs_ideal_memory_edge(FormDict &globals) const {
#if INCLUDE_SHENANDOAHGC
"ShenandoahCompareAndSwapN", "ShenandoahCompareAndSwapP", "ShenandoahWeakCompareAndSwapP", "ShenandoahWeakCompareAndSwapN", "ShenandoahCompareAndExchangeP", "ShenandoahCompareAndExchangeN",
#endif
- "StoreCM",
"GetAndSetB", "GetAndSetS", "GetAndAddI", "GetAndSetI", "GetAndSetP",
"GetAndAddB", "GetAndAddS", "GetAndAddL", "GetAndSetL", "GetAndSetN",
"ClearArray"
@@ -4357,7 +4356,7 @@ bool MatchRule::is_vector() const {
"RoundDoubleModeV","RotateLeftV" , "RotateRightV", "LoadVector","StoreVector",
"LoadVectorGather", "StoreVectorScatter", "LoadVectorGatherMasked", "StoreVectorScatterMasked",
"VectorTest", "VectorLoadMask", "VectorStoreMask", "VectorBlend", "VectorInsert",
- "VectorRearrange","VectorLoadShuffle", "VectorLoadConst",
+ "VectorRearrange", "VectorLoadShuffle", "VectorLoadConst",
"VectorCastB2X", "VectorCastS2X", "VectorCastI2X",
"VectorCastL2X", "VectorCastF2X", "VectorCastD2X", "VectorCastF2HF", "VectorCastHF2F",
"VectorUCastB2X", "VectorUCastS2X", "VectorUCastI2X",
diff --git a/src/hotspot/share/c1/c1_Compiler.cpp b/src/hotspot/share/c1/c1_Compiler.cpp
index e1c4e90d0637d..a0944c864e68f 100644
--- a/src/hotspot/share/c1/c1_Compiler.cpp
+++ b/src/hotspot/share/c1/c1_Compiler.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1999, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -167,6 +167,9 @@ bool Compiler::is_intrinsic_supported(vmIntrinsics::ID id) {
case vmIntrinsics::_dsin:
case vmIntrinsics::_dcos:
case vmIntrinsics::_dtan:
+ #if defined(AMD64)
+ case vmIntrinsics::_dtanh:
+ #endif
case vmIntrinsics::_dlog:
case vmIntrinsics::_dlog10:
case vmIntrinsics::_dexp:
diff --git a/src/hotspot/share/c1/c1_GraphBuilder.cpp b/src/hotspot/share/c1/c1_GraphBuilder.cpp
index a2e903edc342f..02be6f8d49e4a 100644
--- a/src/hotspot/share/c1/c1_GraphBuilder.cpp
+++ b/src/hotspot/share/c1/c1_GraphBuilder.cpp
@@ -3339,6 +3339,7 @@ GraphBuilder::GraphBuilder(Compilation* compilation, IRScope* scope)
case vmIntrinsics::_dsin : // fall through
case vmIntrinsics::_dcos : // fall through
case vmIntrinsics::_dtan : // fall through
+ case vmIntrinsics::_dtanh : // fall through
case vmIntrinsics::_dlog : // fall through
case vmIntrinsics::_dlog10 : // fall through
case vmIntrinsics::_dexp : // fall through
diff --git a/src/hotspot/share/c1/c1_LIR.hpp b/src/hotspot/share/c1/c1_LIR.hpp
index 5d73ab5b88dba..c568caeca4b30 100644
--- a/src/hotspot/share/c1/c1_LIR.hpp
+++ b/src/hotspot/share/c1/c1_LIR.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2000, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -29,6 +29,7 @@
#include "c1/c1_ValueType.hpp"
#include "oops/method.hpp"
#include "utilities/globalDefinitions.hpp"
+#include "utilities/macros.hpp"
class BlockBegin;
class BlockList;
@@ -1122,7 +1123,7 @@ class LIR_Op: public CompilationResourceObj {
}
#endif
- virtual const char * name() const PRODUCT_RETURN0;
+ virtual const char * name() const PRODUCT_RETURN_NULL;
virtual void visit(LIR_OpVisitState* state);
int id() const { return _id; }
@@ -1400,7 +1401,7 @@ class LIR_Op1: public LIR_Op {
virtual bool is_patching() { return _patch != lir_patch_none; }
virtual void emit_code(LIR_Assembler* masm);
virtual LIR_Op1* as_Op1() { return this; }
- virtual const char * name() const PRODUCT_RETURN0;
+ virtual const char * name() const PRODUCT_RETURN_NULL;
void set_in_opr(LIR_Opr opr) { _opr = opr; }
@@ -2033,8 +2034,9 @@ class LIR_OpProfileCall : public LIR_Op {
virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
bool should_profile_receiver_type() const {
bool callee_is_static = _profiled_callee->is_loaded() && _profiled_callee->is_static();
+ bool callee_is_private = _profiled_callee->is_loaded() && _profiled_callee->is_private();
Bytecodes::Code bc = _profiled_method->java_code_at_bci(_profiled_bci);
- bool call_is_virtual = (bc == Bytecodes::_invokevirtual && !_profiled_callee->can_be_statically_bound()) || bc == Bytecodes::_invokeinterface;
+ bool call_is_virtual = (bc == Bytecodes::_invokevirtual && !callee_is_private) || bc == Bytecodes::_invokeinterface;
return C1ProfileVirtualCalls && call_is_virtual && !callee_is_static;
}
};
diff --git a/src/hotspot/share/c1/c1_LIRGenerator.cpp b/src/hotspot/share/c1/c1_LIRGenerator.cpp
index 4e63736503fe0..74fdf7a5b76a3 100644
--- a/src/hotspot/share/c1/c1_LIRGenerator.cpp
+++ b/src/hotspot/share/c1/c1_LIRGenerator.cpp
@@ -2971,6 +2971,7 @@ void LIRGenerator::do_Intrinsic(Intrinsic* x) {
case vmIntrinsics::_dsqrt: // fall through
case vmIntrinsics::_dsqrt_strict: // fall through
case vmIntrinsics::_dtan: // fall through
+ case vmIntrinsics::_dtanh: // fall through
case vmIntrinsics::_dsin : // fall through
case vmIntrinsics::_dcos : // fall through
case vmIntrinsics::_dexp : // fall through
diff --git a/src/hotspot/share/c1/c1_Runtime1.cpp b/src/hotspot/share/c1/c1_Runtime1.cpp
index 5b44d5c0f1983..915f00f77c523 100644
--- a/src/hotspot/share/c1/c1_Runtime1.cpp
+++ b/src/hotspot/share/c1/c1_Runtime1.cpp
@@ -347,6 +347,7 @@ const char* Runtime1::name_for_address(address entry) {
FUNCTION_CASE(entry, StubRoutines::dsin());
FUNCTION_CASE(entry, StubRoutines::dcos());
FUNCTION_CASE(entry, StubRoutines::dtan());
+ FUNCTION_CASE(entry, StubRoutines::dtanh());
#undef FUNCTION_CASE
diff --git a/src/hotspot/share/cds/archiveHeapLoader.cpp b/src/hotspot/share/cds/archiveHeapLoader.cpp
index 6325fb6f49d73..0e7ef08064c37 100644
--- a/src/hotspot/share/cds/archiveHeapLoader.cpp
+++ b/src/hotspot/share/cds/archiveHeapLoader.cpp
@@ -376,13 +376,12 @@ void ArchiveHeapLoader::finish_initialization() {
intptr_t bottom = is_loaded() ? _loaded_heap_bottom : _mapped_heap_bottom;
// The heap roots are stored in one or more segments that are laid out consecutively.
- // The byte size of each segment (except for the last one) is max_size.
+ // The size of each segment (except for the last one) is max_size_in_{elems,bytes}.
HeapRootSegments segments = FileMapInfo::current_info()->heap_root_segments();
- int max_size = segments.max_size_in_bytes();
- HeapShared::init_root_segment_sizes(max_size);
+ HeapShared::init_root_segment_sizes(segments.max_size_in_elems());
intptr_t first_segment_addr = bottom + segments.base_offset();
for (size_t c = 0; c < segments.count(); c++) {
- oop segment_oop = cast_to_oop(first_segment_addr + (c * max_size));
+ oop segment_oop = cast_to_oop(first_segment_addr + (c * segments.max_size_in_bytes()));
assert(segment_oop->is_objArray(), "Must be");
HeapShared::add_root_segment((objArrayOop)segment_oop);
}
diff --git a/src/hotspot/share/cds/archiveHeapWriter.cpp b/src/hotspot/share/cds/archiveHeapWriter.cpp
index d8ee7155452ff..710e693bfdb14 100644
--- a/src/hotspot/share/cds/archiveHeapWriter.cpp
+++ b/src/hotspot/share/cds/archiveHeapWriter.cpp
@@ -88,7 +88,6 @@ void ArchiveHeapWriter::init() {
_native_pointers = new GrowableArrayCHeap(2048);
_source_objs = new GrowableArrayCHeap(10000);
- guarantee(UseG1GC, "implementation limitation");
guarantee(MIN_GC_REGION_ALIGNMENT <= G1HeapRegion::min_region_size_in_words() * HeapWordSize, "must be");
}
}
@@ -224,6 +223,7 @@ void ArchiveHeapWriter::copy_roots_to_buffer(GrowableArrayCHeapat(root_index++));
@@ -246,14 +245,21 @@ void ArchiveHeapWriter::copy_roots_to_buffer(GrowableArrayCHeaplength(), "Post-condition: All roots are handled");
+
_heap_root_segments = segments;
}
+// The goal is to sort the objects in increasing order of:
+// - objects that have only oop pointers
+// - objects that have both native and oop pointers
+// - objects that have only native pointers
+// - objects that have no pointers
static int oop_sorting_rank(oop o) {
bool has_oop_ptr, has_native_ptr;
HeapShared::get_pointer_info(o, has_oop_ptr, has_native_ptr);
- if (!has_oop_ptr) {
+ if (has_oop_ptr) {
if (!has_native_ptr) {
return 0;
} else {
@@ -268,11 +274,6 @@ static int oop_sorting_rank(oop o) {
}
}
-// The goal is to sort the objects in increasing order of:
-// - objects that have no pointers
-// - objects that have only native pointers
-// - objects that have both native and oop pointers
-// - objects that have only oop pointers
int ArchiveHeapWriter::compare_objs_by_oop_fields(HeapObjOrder* a, HeapObjOrder* b) {
int rank_a = a->_rank;
int rank_b = b->_rank;
@@ -452,26 +453,30 @@ size_t ArchiveHeapWriter::copy_one_source_obj_to_buffer(oop src_obj) {
void ArchiveHeapWriter::set_requested_address(ArchiveHeapInfo* info) {
assert(!info->is_used(), "only set once");
- assert(UseG1GC, "must be");
- address heap_end = (address)G1CollectedHeap::heap()->reserved().end();
- log_info(cds, heap)("Heap end = %p", heap_end);
size_t heap_region_byte_size = _buffer_used;
assert(heap_region_byte_size > 0, "must archived at least one object!");
-
if (UseCompressedOops) {
- _requested_bottom = align_down(heap_end - heap_region_byte_size, G1HeapRegion::GrainBytes);
+ if (UseG1GC) {
+ address heap_end = (address)G1CollectedHeap::heap()->reserved().end();
+ log_info(cds, heap)("Heap end = %p", heap_end);
+ _requested_bottom = align_down(heap_end - heap_region_byte_size, G1HeapRegion::GrainBytes);
+ _requested_bottom = align_down(_requested_bottom, MIN_GC_REGION_ALIGNMENT);
+ assert(is_aligned(_requested_bottom, G1HeapRegion::GrainBytes), "sanity");
+ } else {
+ _requested_bottom = align_up(CompressedOops::begin(), MIN_GC_REGION_ALIGNMENT);
+ }
} else {
// We always write the objects as if the heap started at this address. This
// makes the contents of the archive heap deterministic.
//
// Note that at runtime, the heap address is selected by the OS, so the archive
// heap will not be mapped at 0x10000000, and the contents need to be patched.
- _requested_bottom = (address)NOCOOPS_REQUESTED_BASE;
+ _requested_bottom = align_up((address)NOCOOPS_REQUESTED_BASE, MIN_GC_REGION_ALIGNMENT);
}
- assert(is_aligned(_requested_bottom, G1HeapRegion::GrainBytes), "sanity");
+ assert(is_aligned(_requested_bottom, MIN_GC_REGION_ALIGNMENT), "sanity");
_requested_top = _requested_bottom + _buffer_used;
diff --git a/src/hotspot/share/cds/archiveHeapWriter.hpp b/src/hotspot/share/cds/archiveHeapWriter.hpp
index 961d2b52133f8..29ea50ba5fe86 100644
--- a/src/hotspot/share/cds/archiveHeapWriter.hpp
+++ b/src/hotspot/share/cds/archiveHeapWriter.hpp
@@ -111,11 +111,10 @@ class ArchiveHeapWriter : AllStatic {
public:
static const intptr_t NOCOOPS_REQUESTED_BASE = 0x10000000;
- // The minimum region size of all collectors that are supported by CDS in
- // ArchiveHeapLoader::can_map() mode. Currently only G1 is supported. G1's region size
- // depends on -Xmx, but can never be smaller than 1 * M.
- // (TODO: Perhaps change to 256K to be compatible with Shenandoah)
- static constexpr int MIN_GC_REGION_ALIGNMENT = 1 * M;
+ // The minimum region size of all collectors that are supported by CDS.
+ // G1 heap region size can never be smaller than 1M.
+ // Shenandoah heap region size can never be smaller than 256K.
+ static constexpr int MIN_GC_REGION_ALIGNMENT = 256 * K;
private:
class EmbeddedOopRelocator;
diff --git a/src/hotspot/share/cds/archiveUtils.hpp b/src/hotspot/share/cds/archiveUtils.hpp
index 2e361ab0c4650..5a78bc26ee627 100644
--- a/src/hotspot/share/cds/archiveUtils.hpp
+++ b/src/hotspot/share/cds/archiveUtils.hpp
@@ -277,7 +277,6 @@ class HeapRootSegments {
memset(this, 0, sizeof(*this));
}
HeapRootSegments(size_t base_offset, int roots_count, int max_size_in_bytes, int max_size_in_elems) {
- assert(is_power_of_2(max_size_in_bytes), "must be");
memset(this, 0, sizeof(*this));
_base_offset = base_offset;
_count = (roots_count + max_size_in_elems - 1) / max_size_in_elems;
diff --git a/src/hotspot/share/cds/cdsConfig.cpp b/src/hotspot/share/cds/cdsConfig.cpp
index a0a562eca21a0..5915424c4fe87 100644
--- a/src/hotspot/share/cds/cdsConfig.cpp
+++ b/src/hotspot/share/cds/cdsConfig.cpp
@@ -236,7 +236,7 @@ void CDSConfig::init_shared_archive_paths() {
}
void CDSConfig::check_internal_module_property(const char* key, const char* value) {
- if (Arguments::is_internal_module_property(key)) {
+ if (Arguments::is_internal_module_property(key) && !Arguments::is_module_path_property(key)) {
stop_using_optimized_module_handling();
log_info(cds)("optimized module handling: disabled due to incompatible property: %s=%s", key, value);
}
diff --git a/src/hotspot/share/cds/classListParser.cpp b/src/hotspot/share/cds/classListParser.cpp
index f8d24295a12e5..694a179d7ee6c 100644
--- a/src/hotspot/share/cds/classListParser.cpp
+++ b/src/hotspot/share/cds/classListParser.cpp
@@ -508,7 +508,9 @@ InstanceKlass* ClassListParser::load_class_from_source(Symbol* class_name, TRAPS
THROW_NULL(vmSymbols::java_lang_ClassNotFoundException());
}
- InstanceKlass* k = UnregisteredClasses::load_class(class_name, _source, CHECK_NULL);
+ ResourceMark rm;
+ char * source_path = os::strdup_check_oom(ClassLoader::uri_to_path(_source));
+ InstanceKlass* k = UnregisteredClasses::load_class(class_name, source_path, CHECK_NULL);
if (k->local_interfaces()->length() != _interfaces->length()) {
print_specified_interfaces();
print_actual_interfaces(k);
diff --git a/src/hotspot/share/cds/classListWriter.cpp b/src/hotspot/share/cds/classListWriter.cpp
index 78cd092445b70..1b9f589f1c5e5 100644
--- a/src/hotspot/share/cds/classListWriter.cpp
+++ b/src/hotspot/share/cds/classListWriter.cpp
@@ -174,6 +174,8 @@ void ClassListWriter::write_to_stream(const InstanceKlass* k, outputStream* stre
}
}
+ // NB: the string following "source: " is not really a proper file name, but rather
+ // a truncated URI referring to a file. It must be decoded after reading.
#ifdef _WINDOWS
// "file:/C:/dir/foo.jar" -> "C:/dir/foo.jar"
stream->print(" source: %s", cfs->source() + 6);
diff --git a/src/hotspot/share/cds/filemap.cpp b/src/hotspot/share/cds/filemap.cpp
index b86118c686886..715fce5f3fc86 100644
--- a/src/hotspot/share/cds/filemap.cpp
+++ b/src/hotspot/share/cds/filemap.cpp
@@ -581,7 +581,7 @@ int FileMapInfo::get_module_shared_path_index(Symbol* location) {
// skip_uri_protocol was also called during dump time -- see ClassLoaderExt::process_module_table()
ResourceMark rm;
- const char* file = ClassLoader::skip_uri_protocol(location->as_C_string());
+ const char* file = ClassLoader::uri_to_path(location->as_C_string());
for (int i = ClassLoaderExt::app_module_paths_start_index(); i < get_number_of_shared_paths(); i++) {
SharedClassPathEntry* ent = shared_path(i);
if (!ent->is_non_existent()) {
@@ -781,12 +781,12 @@ bool FileMapInfo::check_paths(int shared_path_start_idx, int num_paths, Growable
assert(strlen(rp_array->at(i)) > (size_t)runtime_prefix_len, "sanity");
const char* runtime_path = rp_array->at(i) + runtime_prefix_len;
if (!os::same_files(dumptime_path, runtime_path)) {
- return true;
+ return false;
}
i++;
j++;
}
- return false;
+ return true;
}
bool FileMapInfo::validate_boot_class_paths() {
@@ -810,7 +810,7 @@ bool FileMapInfo::validate_boot_class_paths() {
char* rp = skip_first_path_entry(runtime_boot_path);
assert(shared_path(0)->is_modules_image(), "first shared_path must be the modules image");
int dp_len = header()->app_class_paths_start_index() - 1; // ignore the first path to the module image
- bool mismatch = false;
+ bool match = true;
bool relaxed_check = !header()->has_platform_or_app_classes();
if (dp_len == 0 && rp == nullptr) {
@@ -823,7 +823,7 @@ bool FileMapInfo::validate_boot_class_paths() {
if (check_paths_existence(rp)) {
// If a path exists in the runtime boot paths, it is considered a mismatch
// since there's no boot path specified during dump time.
- mismatch = true;
+ match = false;
}
}
} else if (dp_len > 0 && rp != nullptr) {
@@ -840,16 +840,16 @@ bool FileMapInfo::validate_boot_class_paths() {
// check the full runtime boot path, must match with dump time
num = rp_len;
}
- mismatch = check_paths(1, num, rp_array, 0, 0);
+ match = check_paths(1, num, rp_array, 0, 0);
} else {
// create_path_array() ignores non-existing paths. Although the dump time and runtime boot classpath lengths
// are the same initially, after the call to create_path_array(), the runtime boot classpath length could become
// shorter. We consider boot classpath mismatch in this case.
- mismatch = true;
+ match = false;
}
}
- if (mismatch) {
+ if (!match) {
// The paths are different
return classpath_failure("[BOOT classpath mismatch, actual =", runtime_boot_path);
}
@@ -860,7 +860,7 @@ bool FileMapInfo::validate_app_class_paths(int shared_app_paths_len) {
const char *appcp = Arguments::get_appclasspath();
assert(appcp != nullptr, "null app classpath");
int rp_len = num_paths(appcp);
- bool mismatch = false;
+ bool match = false;
if (rp_len < shared_app_paths_len) {
return classpath_failure("Run time APP classpath is shorter than the one at dump time: ", appcp);
}
@@ -889,8 +889,8 @@ bool FileMapInfo::validate_app_class_paths(int shared_app_paths_len) {
// run 2: -cp x.jar:NE4:b.jar -> x.jar:b.jar -> mismatched
int j = header()->app_class_paths_start_index();
- mismatch = check_paths(j, shared_app_paths_len, rp_array, 0, 0);
- if (mismatch) {
+ match = check_paths(j, shared_app_paths_len, rp_array, 0, 0);
+ if (!match) {
// To facilitate app deployment, we allow the JAR files to be moved *together* to
// a different location, as long as they are still stored under the same directory
// structure. E.g., the following is OK.
@@ -901,10 +901,10 @@ bool FileMapInfo::validate_app_class_paths(int shared_app_paths_len) {
if (dumptime_prefix_len != 0 || runtime_prefix_len != 0) {
log_info(class, path)("LCP length for app classpath (dumptime: %u, runtime: %u)",
dumptime_prefix_len, runtime_prefix_len);
- mismatch = check_paths(j, shared_app_paths_len, rp_array,
+ match = check_paths(j, shared_app_paths_len, rp_array,
dumptime_prefix_len, runtime_prefix_len);
}
- if (mismatch) {
+ if (!match) {
return classpath_failure("[APP classpath mismatch, actual: -Djava.class.path=", appcp);
}
}
@@ -926,15 +926,35 @@ void FileMapInfo::log_paths(const char* msg, int start_idx, int end_idx) {
}
}
+void FileMapInfo::extract_module_paths(const char* runtime_path, GrowableArray* module_paths) {
+ GrowableArray* path_array = create_path_array(runtime_path);
+ int num_paths = path_array->length();
+ for (int i = 0; i < num_paths; i++) {
+ const char* name = path_array->at(i);
+ ClassLoaderExt::extract_jar_files_from_path(name, module_paths);
+ }
+ // module paths are stored in sorted order in the CDS archive.
+ module_paths->sort(ClassLoaderExt::compare_module_path_by_name);
+}
+
bool FileMapInfo::check_module_paths() {
- const char* rp = Arguments::get_property("jdk.module.path");
- int num_paths = CDSConfig::num_archives(rp);
- if (num_paths != header()->num_module_paths()) {
+ const char* runtime_path = Arguments::get_property("jdk.module.path");
+ int archived_num_module_paths = header()->num_module_paths();
+ if (runtime_path == nullptr && archived_num_module_paths == 0) {
+ return true;
+ }
+ if ((runtime_path == nullptr && archived_num_module_paths > 0) ||
+ (runtime_path != nullptr && archived_num_module_paths == 0)) {
return false;
}
ResourceMark rm;
- GrowableArray* rp_array = create_path_array(rp);
- return check_paths(header()->app_module_paths_start_index(), num_paths, rp_array, 0, 0);
+ GrowableArray* module_paths = new GrowableArray(3);
+ extract_module_paths(runtime_path, module_paths);
+ int num_paths = module_paths->length();
+ if (num_paths != archived_num_module_paths) {
+ return false;
+ }
+ return check_paths(header()->app_module_paths_start_index(), num_paths, module_paths, 0, 0);
}
bool FileMapInfo::validate_shared_path_table() {
@@ -944,6 +964,16 @@ bool FileMapInfo::validate_shared_path_table() {
// Load the shared path table info from the archive header
_shared_path_table = header()->shared_path_table();
+
+ bool matched_module_paths = true;
+ if (CDSConfig::is_dumping_dynamic_archive() || header()->has_full_module_graph()) {
+ matched_module_paths = check_module_paths();
+ }
+ if (header()->has_full_module_graph() && !matched_module_paths) {
+ CDSConfig::stop_using_optimized_module_handling();
+ log_info(cds)("optimized module handling: disabled because of mismatched module paths");
+ }
+
if (CDSConfig::is_dumping_dynamic_archive()) {
// Only support dynamic dumping with the usage of the default CDS archive
// or a simple base archive.
@@ -959,7 +989,7 @@ bool FileMapInfo::validate_shared_path_table() {
"Dynamic archiving is disabled because base layer archive has appended boot classpath");
}
if (header()->num_module_paths() > 0) {
- if (!check_module_paths()) {
+ if (!matched_module_paths) {
CDSConfig::disable_dumping_dynamic_archive();
log_warning(cds)(
"Dynamic archiving is disabled because base layer archive has a different module path");
@@ -1581,39 +1611,38 @@ static size_t write_bitmap(const CHeapBitMap* map, char* output, size_t offset)
return offset + size_in_bytes;
}
-// The start of the archived heap has many primitive arrays (String
-// bodies) that are not marked by the oop/ptr maps. So we must have
-// lots of leading zeros.
-size_t FileMapInfo::remove_bitmap_leading_zeros(CHeapBitMap* map) {
- size_t old_zeros = map->find_first_set_bit(0);
+// The sorting code groups the objects with non-null oop/ptrs together.
+// Relevant bitmaps then have lots of leading and trailing zeros, which
+// we do not have to store.
+size_t FileMapInfo::remove_bitmap_zeros(CHeapBitMap* map) {
+ BitMap::idx_t first_set = map->find_first_set_bit(0);
+ BitMap::idx_t last_set = map->find_last_set_bit(0);
size_t old_size = map->size();
// Slice and resize bitmap
- map->truncate(old_zeros, map->size());
+ map->truncate(first_set, last_set + 1);
- DEBUG_ONLY(
- size_t new_zeros = map->find_first_set_bit(0);
- assert(new_zeros == 0, "Should have removed leading zeros");
- )
+ assert(map->at(0), "First bit should be set");
+ assert(map->at(map->size() - 1), "Last bit should be set");
assert(map->size() <= old_size, "sanity");
- return old_zeros;
+
+ return first_set;
}
char* FileMapInfo::write_bitmap_region(CHeapBitMap* rw_ptrmap, CHeapBitMap* ro_ptrmap, ArchiveHeapInfo* heap_info,
size_t &size_in_bytes) {
- size_t removed_rw_zeros = remove_bitmap_leading_zeros(rw_ptrmap);
- size_t removed_ro_zeros = remove_bitmap_leading_zeros(ro_ptrmap);
- header()->set_rw_ptrmap_start_pos(removed_rw_zeros);
- header()->set_ro_ptrmap_start_pos(removed_ro_zeros);
+ size_t removed_rw_leading_zeros = remove_bitmap_zeros(rw_ptrmap);
+ size_t removed_ro_leading_zeros = remove_bitmap_zeros(ro_ptrmap);
+ header()->set_rw_ptrmap_start_pos(removed_rw_leading_zeros);
+ header()->set_ro_ptrmap_start_pos(removed_ro_leading_zeros);
size_in_bytes = rw_ptrmap->size_in_bytes() + ro_ptrmap->size_in_bytes();
if (heap_info->is_used()) {
- // Remove leading zeros
- size_t removed_oop_zeros = remove_bitmap_leading_zeros(heap_info->oopmap());
- size_t removed_ptr_zeros = remove_bitmap_leading_zeros(heap_info->ptrmap());
-
- header()->set_heap_oopmap_start_pos(removed_oop_zeros);
- header()->set_heap_ptrmap_start_pos(removed_ptr_zeros);
+ // Remove leading and trailing zeros
+ size_t removed_oop_leading_zeros = remove_bitmap_zeros(heap_info->oopmap());
+ size_t removed_ptr_leading_zeros = remove_bitmap_zeros(heap_info->ptrmap());
+ header()->set_heap_oopmap_start_pos(removed_oop_leading_zeros);
+ header()->set_heap_ptrmap_start_pos(removed_ptr_leading_zeros);
size_in_bytes += heap_info->oopmap()->size_in_bytes();
size_in_bytes += heap_info->ptrmap()->size_in_bytes();
@@ -1716,10 +1745,10 @@ void FileMapInfo::close() {
*/
static char* map_memory(int fd, const char* file_name, size_t file_offset,
char *addr, size_t bytes, bool read_only,
- bool allow_exec, MEMFLAGS flags = mtNone) {
+ bool allow_exec, MemTag mem_tag = mtNone) {
char* mem = os::map_memory(fd, file_name, file_offset, addr, bytes,
AlwaysPreTouch ? false : read_only,
- allow_exec, flags);
+ allow_exec, mem_tag);
if (mem != nullptr && AlwaysPreTouch) {
os::pretouch_memory(mem, mem + bytes);
}
@@ -2178,7 +2207,7 @@ bool FileMapInfo::map_heap_region_impl() {
_mapped_heap_memregion = MemRegion(start, word_size);
- // Map the archived heap data. No need to call MemTracker::record_virtual_memory_type()
+ // Map the archived heap data. No need to call MemTracker::record_virtual_memory_tag()
// for mapped region as it is part of the reserved java heap, which is already recorded.
char* addr = (char*)_mapped_heap_memregion.start();
char* base;
diff --git a/src/hotspot/share/cds/filemap.hpp b/src/hotspot/share/cds/filemap.hpp
index aa728b9d4949f..6650f52440881 100644
--- a/src/hotspot/share/cds/filemap.hpp
+++ b/src/hotspot/share/cds/filemap.hpp
@@ -271,6 +271,7 @@ class FileMapHeader: private CDSFileMapHeaderBase {
bool compressed_oops() const { return _compressed_oops; }
bool compressed_class_pointers() const { return _compressed_class_ptrs; }
HeapRootSegments heap_root_segments() const { return _heap_root_segments; }
+ bool has_full_module_graph() const { return _has_full_module_graph; }
size_t heap_oopmap_start_pos() const { return _heap_oopmap_start_pos; }
size_t heap_ptrmap_start_pos() const { return _heap_ptrmap_start_pos; }
size_t rw_ptrmap_start_pos() const { return _rw_ptrmap_start_pos; }
@@ -445,7 +446,7 @@ class FileMapInfo : public CHeapObj {
void write_header();
void write_region(int region, char* base, size_t size,
bool read_only, bool allow_exec);
- size_t remove_bitmap_leading_zeros(CHeapBitMap* map);
+ size_t remove_bitmap_zeros(CHeapBitMap* map);
char* write_bitmap_region(CHeapBitMap* rw_ptrmap, CHeapBitMap* ro_ptrmap, ArchiveHeapInfo* heap_info,
size_t &size_in_bytes);
size_t write_heap_region(ArchiveHeapInfo* heap_info);
@@ -554,6 +555,7 @@ class FileMapInfo : public CHeapObj {
GrowableArray* rp_array,
unsigned int dumptime_prefix_len,
unsigned int runtime_prefix_len) NOT_CDS_RETURN_(false);
+ void extract_module_paths(const char* runtime_path, GrowableArray* module_paths);
bool validate_boot_class_paths() NOT_CDS_RETURN_(false);
bool validate_app_class_paths(int shared_app_paths_len) NOT_CDS_RETURN_(false);
bool map_heap_region_impl() NOT_CDS_JAVA_HEAP_RETURN_(false);
diff --git a/src/hotspot/share/cds/heapShared.cpp b/src/hotspot/share/cds/heapShared.cpp
index 2bf75a5ba6512..81aa7ac94dc21 100644
--- a/src/hotspot/share/cds/heapShared.cpp
+++ b/src/hotspot/share/cds/heapShared.cpp
@@ -33,6 +33,7 @@
#include "cds/heapShared.hpp"
#include "cds/metaspaceShared.hpp"
#include "classfile/classLoaderData.hpp"
+#include "classfile/classLoaderExt.hpp"
#include "classfile/javaClasses.inline.hpp"
#include "classfile/modules.hpp"
#include "classfile/stringTable.hpp"
@@ -55,6 +56,7 @@
#include "oops/oop.inline.hpp"
#include "oops/typeArrayOop.inline.hpp"
#include "prims/jvmtiExport.hpp"
+#include "runtime/arguments.hpp"
#include "runtime/fieldDescriptor.inline.hpp"
#include "runtime/init.hpp"
#include "runtime/javaCalls.hpp"
@@ -134,8 +136,7 @@ static ArchivableStaticFieldInfo fmg_archive_subgraph_entry_fields[] = {
KlassSubGraphInfo* HeapShared::_default_subgraph_info;
GrowableArrayCHeap* HeapShared::_pending_roots = nullptr;
GrowableArrayCHeap* HeapShared::_root_segments;
-int HeapShared::_root_segment_max_size_shift;
-int HeapShared::_root_segment_max_size_mask;
+int HeapShared::_root_segment_max_size_elems;
OopHandle HeapShared::_scratch_basic_type_mirrors[T_VOID+1];
MetaspaceObjToOopHandleTable* HeapShared::_scratch_java_mirror_table = nullptr;
MetaspaceObjToOopHandleTable* HeapShared::_scratch_references_table = nullptr;
@@ -242,15 +243,29 @@ objArrayOop HeapShared::root_segment(int segment_idx) {
return segment;
}
+void HeapShared::get_segment_indexes(int idx, int& seg_idx, int& int_idx) {
+ assert(_root_segment_max_size_elems > 0, "sanity");
+
+ // Try to avoid divisions for the common case.
+ if (idx < _root_segment_max_size_elems) {
+ seg_idx = 0;
+ int_idx = idx;
+ } else {
+ seg_idx = idx / _root_segment_max_size_elems;
+ int_idx = idx % _root_segment_max_size_elems;
+ }
+
+ assert(idx == seg_idx * _root_segment_max_size_elems + int_idx,
+ "sanity: %d index maps to %d segment and %d internal", idx, seg_idx, int_idx);
+}
+
// Returns an objArray that contains all the roots of the archived objects
oop HeapShared::get_root(int index, bool clear) {
- assert(_root_segment_max_size_shift > 0, "sanity");
- assert(_root_segment_max_size_mask > 0, "sanity");
assert(index >= 0, "sanity");
assert(!CDSConfig::is_dumping_heap() && CDSConfig::is_using_archive(), "runtime only");
assert(!_root_segments->is_empty(), "must have loaded shared heap");
- int seg_idx = index >> _root_segment_max_size_shift;
- int int_idx = index & _root_segment_max_size_mask;
+ int seg_idx, int_idx;
+ get_segment_indexes(index, seg_idx, int_idx);
oop result = root_segment(seg_idx)->obj_at(int_idx);
if (clear) {
clear_root(index);
@@ -262,10 +277,8 @@ void HeapShared::clear_root(int index) {
assert(index >= 0, "sanity");
assert(CDSConfig::is_using_archive(), "must be");
if (ArchiveHeapLoader::is_in_use()) {
- assert(_root_segment_max_size_shift > 0, "sanity");
- assert(_root_segment_max_size_mask > 0, "sanity");
- int seg_idx = index >> _root_segment_max_size_shift;
- int int_idx = index & _root_segment_max_size_mask;
+ int seg_idx, int_idx;
+ get_segment_indexes(index, seg_idx, int_idx);
if (log_is_enabled(Debug, cds, heap)) {
oop old = root_segment(seg_idx)->obj_at(int_idx);
log_debug(cds, heap)("Clearing root %d: was " PTR_FORMAT, index, p2i(old));
@@ -471,11 +484,13 @@ void HeapShared::archive_objects(ArchiveHeapInfo *heap_info) {
// Cache for recording where the archived objects are copied to
create_archived_object_cache();
- log_info(cds)("Heap range = [" PTR_FORMAT " - " PTR_FORMAT "]",
- UseCompressedOops ? p2i(CompressedOops::begin()) :
- p2i((address)G1CollectedHeap::heap()->reserved().start()),
- UseCompressedOops ? p2i(CompressedOops::end()) :
- p2i((address)G1CollectedHeap::heap()->reserved().end()));
+ if (UseCompressedOops || UseG1GC) {
+ log_info(cds)("Heap range = [" PTR_FORMAT " - " PTR_FORMAT "]",
+ UseCompressedOops ? p2i(CompressedOops::begin()) :
+ p2i((address)G1CollectedHeap::heap()->reserved().start()),
+ UseCompressedOops ? p2i(CompressedOops::end()) :
+ p2i((address)G1CollectedHeap::heap()->reserved().end()));
+ }
copy_objects();
CDSHeapVerifier::verify();
@@ -783,10 +798,8 @@ void HeapShared::add_root_segment(objArrayOop segment_oop) {
_root_segments->push(OopHandle(Universe::vm_global(), segment_oop));
}
-void HeapShared::init_root_segment_sizes(int max_size) {
- assert(is_power_of_2(max_size), "must be");
- _root_segment_max_size_shift = log2i_exact(max_size);
- _root_segment_max_size_mask = max_size - 1;
+void HeapShared::init_root_segment_sizes(int max_size_elems) {
+ _root_segment_max_size_elems = max_size_elems;
}
void HeapShared::serialize_tables(SerializeClosure* soc) {
@@ -873,6 +886,17 @@ void HeapShared::initialize_from_archived_subgraph(JavaThread* current, Klass* k
return; // nothing to do
}
+ if (k->name()->equals("jdk/internal/module/ArchivedModuleGraph") &&
+ !CDSConfig::is_using_optimized_module_handling() &&
+ // archive was created with --module-path
+ ClassLoaderExt::num_module_paths() > 0) {
+ // ArchivedModuleGraph was created with a --module-path that's different than the runtime --module-path.
+ // Thus, it might contain references to modules that do not exist at runtime. We cannot use it.
+ log_info(cds, heap)("Skip initializing ArchivedModuleGraph subgraph: is_using_optimized_module_handling=%s num_module_paths=%d",
+ BOOL_TO_STR(CDSConfig::is_using_optimized_module_handling()), ClassLoaderExt::num_module_paths());
+ return;
+ }
+
ExceptionMark em(THREAD);
const ArchivedKlassSubGraphInfoRecord* record =
resolve_or_init_classes_for_subgraph_of(k, /*do_init=*/true, THREAD);
@@ -1121,6 +1145,13 @@ bool HeapShared::archive_reachable_objects_from(int level,
// these objects that are referenced (directly or indirectly) by static fields.
ResourceMark rm;
log_error(cds, heap)("Cannot archive object of class %s", orig_obj->klass()->external_name());
+ if (log_is_enabled(Trace, cds, heap)) {
+ WalkOopAndArchiveClosure* walker = WalkOopAndArchiveClosure::current();
+ if (walker != nullptr) {
+ LogStream ls(Log(cds, heap)::trace());
+ CDSHeapVerifier::trace_to_root(&ls, walker->referencing_obj());
+ }
+ }
MetaspaceShared::unrecoverable_writing_error();
}
diff --git a/src/hotspot/share/cds/heapShared.hpp b/src/hotspot/share/cds/heapShared.hpp
index 9bb85db0fe9d2..01d664945ee74 100644
--- a/src/hotspot/share/cds/heapShared.hpp
+++ b/src/hotspot/share/cds/heapShared.hpp
@@ -143,13 +143,13 @@ class HeapShared: AllStatic {
friend class VerifySharedOopClosure;
public:
- // Can this VM write a heap region into the CDS archive? Currently only G1+compressed{oops,cp}
+ // Can this VM write a heap region into the CDS archive? Currently only {G1|Parallel|Serial}+compressed_cp
static bool can_write() {
CDS_JAVA_HEAP_ONLY(
if (_disable_writing) {
return false;
}
- return (UseG1GC && UseCompressedClassPointers);
+ return (UseG1GC || UseParallelGC || UseSerialGC) && UseCompressedClassPointers;
)
NOT_CDS_JAVA_HEAP(return false;)
}
@@ -291,8 +291,7 @@ class HeapShared: AllStatic {
static GrowableArrayCHeap* _pending_roots;
static GrowableArrayCHeap* _root_segments;
- static int _root_segment_max_size_shift;
- static int _root_segment_max_size_mask;
+ static int _root_segment_max_size_elems;
static OopHandle _scratch_basic_type_mirrors[T_VOID+1];
static MetaspaceObjToOopHandleTable* _scratch_java_mirror_table;
static MetaspaceObjToOopHandleTable* _scratch_references_table;
@@ -407,6 +406,8 @@ class HeapShared: AllStatic {
// Run-time only
static void clear_root(int index);
+ static void get_segment_indexes(int index, int& segment_index, int& internal_index);
+
static void setup_test_class(const char* test_class_name) PRODUCT_RETURN;
#endif // INCLUDE_CDS_JAVA_HEAP
@@ -425,7 +426,7 @@ class HeapShared: AllStatic {
static void init_for_dumping(TRAPS) NOT_CDS_JAVA_HEAP_RETURN;
static void write_subgraph_info_table() NOT_CDS_JAVA_HEAP_RETURN;
static void add_root_segment(objArrayOop segment_oop) NOT_CDS_JAVA_HEAP_RETURN;
- static void init_root_segment_sizes(int max_size) NOT_CDS_JAVA_HEAP_RETURN;
+ static void init_root_segment_sizes(int max_size_elems) NOT_CDS_JAVA_HEAP_RETURN;
static void serialize_tables(SerializeClosure* soc) NOT_CDS_JAVA_HEAP_RETURN;
#ifndef PRODUCT
diff --git a/src/hotspot/share/cds/metaspaceShared.cpp b/src/hotspot/share/cds/metaspaceShared.cpp
index 4d978a7ad880f..6f646e162ecac 100644
--- a/src/hotspot/share/cds/metaspaceShared.cpp
+++ b/src/hotspot/share/cds/metaspaceShared.cpp
@@ -77,6 +77,7 @@
#include "runtime/globals.hpp"
#include "runtime/globals_extension.hpp"
#include "runtime/handles.inline.hpp"
+#include "runtime/javaCalls.hpp"
#include "runtime/os.inline.hpp"
#include "runtime/safepointVerifiers.hpp"
#include "runtime/sharedRuntime.hpp"
@@ -300,6 +301,7 @@ void MetaspaceShared::post_initialize(TRAPS) {
}
ClassLoaderExt::init_paths_start_index(info->app_class_paths_start_index());
ClassLoaderExt::init_app_module_paths_start_index(info->app_module_paths_start_index());
+ ClassLoaderExt::init_num_module_paths(info->header()->num_module_paths());
}
}
}
@@ -791,9 +793,22 @@ void MetaspaceShared::preload_and_dump_impl(StaticArchiveBuilder& builder, TRAPS
// Do this at the very end, when no Java code will be executed. Otherwise
// some new strings may be added to the intern table.
StringTable::allocate_shared_strings_array(CHECK);
+ } else {
+ log_info(cds)("Not dumping heap, reset CDSConfig::_is_using_optimized_module_handling");
+ CDSConfig::stop_using_optimized_module_handling();
}
#endif
+ // Dummy call to load classes used at CDS runtime
+ JavaValue result(T_OBJECT);
+ Handle path_string = java_lang_String::create_from_str("dummy.jar", CHECK);
+ JavaCalls::call_static(&result,
+ vmClasses::jdk_internal_loader_ClassLoaders_klass(),
+ vmSymbols::toFileURL_name(),
+ vmSymbols::toFileURL_signature(),
+ path_string,
+ CHECK);
+
VM_PopulateDumpSharedSpace op(builder);
VMThread::execute(&op);
@@ -1299,7 +1314,7 @@ char* MetaspaceShared::reserve_address_space_for_archives(FileMapInfo* static_ma
assert(base_address == nullptr ||
(address)archive_space_rs.base() == base_address, "Sanity");
// Register archive space with NMT.
- MemTracker::record_virtual_memory_type(archive_space_rs.base(), mtClassShared);
+ MemTracker::record_virtual_memory_tag(archive_space_rs.base(), mtClassShared);
return archive_space_rs.base();
}
return nullptr;
@@ -1361,8 +1376,8 @@ char* MetaspaceShared::reserve_address_space_for_archives(FileMapInfo* static_ma
return nullptr;
}
// NMT: fix up the space tags
- MemTracker::record_virtual_memory_type(archive_space_rs.base(), mtClassShared);
- MemTracker::record_virtual_memory_type(class_space_rs.base(), mtClass);
+ MemTracker::record_virtual_memory_tag(archive_space_rs.base(), mtClassShared);
+ MemTracker::record_virtual_memory_tag(class_space_rs.base(), mtClass);
} else {
if (use_archive_base_addr && base_address != nullptr) {
total_space_rs = ReservedSpace(total_range_size, base_address_alignment,
diff --git a/src/hotspot/share/ci/ciEnv.cpp b/src/hotspot/share/ci/ciEnv.cpp
index 9caf89628ccfd..155ce032400e8 100644
--- a/src/hotspot/share/ci/ciEnv.cpp
+++ b/src/hotspot/share/ci/ciEnv.cpp
@@ -1616,7 +1616,10 @@ void ciEnv::dump_replay_data_helper(outputStream* out) {
for (int i = 0; i < objects->length(); i++) {
objects->at(i)->dump_replay_data(out);
}
- dump_compile_data(out);
+
+ if (this->task() != nullptr) {
+ dump_compile_data(out);
+ }
out->flush();
}
diff --git a/src/hotspot/share/ci/ciMethod.cpp b/src/hotspot/share/ci/ciMethod.cpp
index 94b405cdbfacd..a74a812c6a23b 100644
--- a/src/hotspot/share/ci/ciMethod.cpp
+++ b/src/hotspot/share/ci/ciMethod.cpp
@@ -1249,7 +1249,6 @@ bool ciMethod::has_jsrs () const { FETCH_FLAG_FROM_VM(has_jsrs);
bool ciMethod::is_getter () const { FETCH_FLAG_FROM_VM(is_getter); }
bool ciMethod::is_setter () const { FETCH_FLAG_FROM_VM(is_setter); }
bool ciMethod::is_accessor () const { FETCH_FLAG_FROM_VM(is_accessor); }
-bool ciMethod::is_initializer () const { FETCH_FLAG_FROM_VM(is_initializer); }
bool ciMethod::is_empty () const { FETCH_FLAG_FROM_VM(is_empty_method); }
bool ciMethod::is_boxing_method() const {
diff --git a/src/hotspot/share/ci/ciMethod.hpp b/src/hotspot/share/ci/ciMethod.hpp
index 5cb63204d0b72..cc524930192cd 100644
--- a/src/hotspot/share/ci/ciMethod.hpp
+++ b/src/hotspot/share/ci/ciMethod.hpp
@@ -352,7 +352,6 @@ class ciMethod : public ciMetadata {
bool is_getter () const;
bool is_setter () const;
bool is_accessor () const;
- bool is_initializer () const;
bool is_empty () const;
bool can_be_statically_bound() const { return _can_be_statically_bound; }
bool has_reserved_stack_access() const { return _has_reserved_stack_access; }
diff --git a/src/hotspot/share/classfile/classFileParser.cpp b/src/hotspot/share/classfile/classFileParser.cpp
index 60fed287df594..c8e95149b7c1a 100644
--- a/src/hotspot/share/classfile/classFileParser.cpp
+++ b/src/hotspot/share/classfile/classFileParser.cpp
@@ -1150,30 +1150,40 @@ static void parse_annotations(const ConstantPool* const cp,
if (AnnotationCollector::_unknown == id) continue;
coll->set_annotation(id);
if (AnnotationCollector::_java_lang_Deprecated == id) {
- assert(count <= 2, "change this if more element-value pairs are added to the @Deprecated annotation");
- // @Deprecated can specify forRemoval=true
+ // @Deprecated can specify forRemoval=true, which we need
+ // to record for JFR to use. If the annotation is not well-formed
+ // then we may not be able to determine that.
const u1* offset = abase + member_off;
- for (int i = 0; i < count; ++i) {
+ // There are only 2 members in @Deprecated.
+ int n_members = MIN2(count, 2);
+ for (int i = 0; i < n_members; ++i) {
int member_index = Bytes::get_Java_u2((address)offset);
offset += 2;
member = check_symbol_at(cp, member_index);
- if (member == vmSymbols::since()) {
- assert(*((address)offset) == s_tag_val, "invariant");
+ if (member == vmSymbols::since() &&
+ (*((address)offset) == s_tag_val)) {
+ // Found `since` first so skip over it
offset += 3;
- continue;
}
- if (member == vmSymbols::for_removal()) {
- assert(*((address)offset) == b_tag_val, "invariant");
+ else if (member == vmSymbols::for_removal() &&
+ (*((address)offset) == b_tag_val)) {
const u2 boolean_value_index = Bytes::get_Java_u2((address)offset + 1);
- if (cp->int_at(boolean_value_index) == 1) {
+ // No guarantee the entry is valid so check it refers to an int in the CP.
+ if (cp->is_within_bounds(boolean_value_index) &&
+ cp->tag_at(boolean_value_index).is_int() &&
+ cp->int_at(boolean_value_index) == 1) {
// forRemoval == true
coll->set_annotation(AnnotationCollector::_java_lang_Deprecated_for_removal);
}
+ break; // no need to check further
+ }
+ else {
+ // This @Deprecated annotation is malformed so we don't try to
+ // determine whether forRemoval is set.
break;
}
-
}
- continue;
+ continue; // proceed to next annotation
}
if (AnnotationCollector::_jdk_internal_vm_annotation_Contended == id) {
@@ -1194,11 +1204,21 @@ static void parse_annotations(const ConstantPool* const cp,
&& s_tag_val == *(abase + tag_off)
&& member == vmSymbols::value_name()) {
group_index = Bytes::get_Java_u2((address)abase + s_con_off);
- if (cp->symbol_at(group_index)->utf8_length() == 0) {
- group_index = 0; // default contended group
+ // No guarantee the group_index is valid so check it refers to a
+ // symbol in the CP.
+ if (cp->is_within_bounds(group_index) &&
+ cp->tag_at(group_index).is_utf8()) {
+ // Seems valid, so check for empty string and reset
+ if (cp->symbol_at(group_index)->utf8_length() == 0) {
+ group_index = 0; // default contended group
+ }
+ } else {
+ // Not valid so use the default
+ group_index = 0;
}
}
coll->set_contended_group(group_index);
+ continue; // proceed to next annotation
}
}
}
diff --git a/src/hotspot/share/classfile/classLoader.cpp b/src/hotspot/share/classfile/classLoader.cpp
index e410824e3001c..9a68e2640443f 100644
--- a/src/hotspot/share/classfile/classLoader.cpp
+++ b/src/hotspot/share/classfile/classLoader.cpp
@@ -81,6 +81,9 @@
#include "utilities/ostream.hpp"
#include "utilities/utf8.hpp"
+#include
+#include
+
// Entry point in java.dll for path canonicalization
typedef int (*canonicalize_fn_t)(const char *orig, char *out, int len);
@@ -579,6 +582,8 @@ void ClassLoader::setup_module_search_path(JavaThread* current, const char* path
new_entry = create_class_path_entry(current, path, &st,
false /*is_boot_append */, false /* from_class_path_attr */);
if (new_entry != nullptr) {
+ // ClassLoaderExt::process_module_table() filters out non-jar entries before calling this function.
+ assert(new_entry->is_jar_file(), "module path entry %s is not a jar file", new_entry->name());
add_to_module_path_entries(path, new_entry);
}
}
@@ -834,7 +839,8 @@ bool ClassLoader::add_to_app_classpath_entries(JavaThread* current,
ClassPathEntry* e = _app_classpath_entries;
if (check_for_duplicates) {
while (e != nullptr) {
- if (strcmp(e->name(), entry->name()) == 0) {
+ if (strcmp(e->name(), entry->name()) == 0 &&
+ e->from_class_path_attr() == entry->from_class_path_attr()) {
// entry already exists
return false;
}
@@ -1208,7 +1214,7 @@ InstanceKlass* ClassLoader::load_class(Symbol* name, PackageEntry* pkg_entry, bo
}
#if INCLUDE_CDS
-char* ClassLoader::skip_uri_protocol(char* source) {
+static const char* skip_uri_protocol(const char* source) {
if (strncmp(source, "file:", 5) == 0) {
// file: protocol path could start with file:/ or file:///
// locate the char after all the forward slashes
@@ -1227,6 +1233,47 @@ char* ClassLoader::skip_uri_protocol(char* source) {
return source;
}
+static char decode_percent_encoded(const char *str, size_t& index) {
+ if (str[index] == '%'
+ && isxdigit(str[index + 1])
+ && isxdigit(str[index + 2])) {
+ char hex[3];
+ hex[0] = str[index + 1];
+ hex[1] = str[index + 2];
+ hex[2] = '\0';
+ index += 2;
+ return (char) strtol(hex, NULL, 16);
+ }
+ return str[index];
+}
+
+char* ClassLoader::uri_to_path(const char* uri) {
+ const size_t len = strlen(uri) + 1;
+ char* path = NEW_RESOURCE_ARRAY(char, len);
+
+ uri = skip_uri_protocol(uri);
+
+ if (strncmp(uri, "//", 2) == 0) {
+ // Skip the empty "authority" part
+ uri += 2;
+ }
+
+#ifdef _WINDOWS
+ if (uri[0] == '/') {
+ // Absolute path name on Windows does not begin with a slash
+ uri += 1;
+ }
+#endif
+
+ size_t path_index = 0;
+ for (size_t i = 0; i < strlen(uri); ++i) {
+ char decoded = decode_percent_encoded(uri, i);
+ path[path_index++] = decoded;
+ }
+ path[path_index] = '\0';
+ return path;
+}
+
// Record the shared classpath index and loader type for classes loaded
// by the builtin loaders at dump time.
void ClassLoader::record_result(JavaThread* current, InstanceKlass* ik,
@@ -1260,7 +1307,7 @@ void ClassLoader::record_result(JavaThread* current, InstanceKlass* ik,
// Save the path from the file: protocol or the module name from the jrt: protocol
// if no protocol prefix is found, path is the same as stream->source(). This path
// must be valid since the class has been successfully parsed.
- char* path = skip_uri_protocol(src);
+ const char* path = ClassLoader::uri_to_path(src);
assert(path != nullptr, "sanity");
for (int i = 0; i < FileMapInfo::get_number_of_shared_paths(); i++) {
SharedClassPathEntry* ent = FileMapInfo::shared_path(i);
diff --git a/src/hotspot/share/classfile/classLoader.hpp b/src/hotspot/share/classfile/classLoader.hpp
index af625082ddabf..e44059b724769 100644
--- a/src/hotspot/share/classfile/classLoader.hpp
+++ b/src/hotspot/share/classfile/classLoader.hpp
@@ -382,7 +382,7 @@ class ClassLoader: AllStatic {
// entries during shared classpath setup time.
static int num_module_path_entries();
static void exit_with_path_failure(const char* error, const char* message);
- static char* skip_uri_protocol(char* source);
+ static char* uri_to_path(const char* uri);
static void record_result(JavaThread* current, InstanceKlass* ik,
const ClassFileStream* stream, bool redefined);
#endif
diff --git a/src/hotspot/share/classfile/classLoaderExt.cpp b/src/hotspot/share/classfile/classLoaderExt.cpp
index 3cd7dd7cd3ba6..16981669deb3a 100644
--- a/src/hotspot/share/classfile/classLoaderExt.cpp
+++ b/src/hotspot/share/classfile/classLoaderExt.cpp
@@ -55,6 +55,7 @@
jshort ClassLoaderExt::_app_class_paths_start_index = ClassLoaderExt::max_classpath_index;
jshort ClassLoaderExt::_app_module_paths_start_index = ClassLoaderExt::max_classpath_index;
jshort ClassLoaderExt::_max_used_path_index = 0;
+int ClassLoaderExt::_num_module_paths = 0;
bool ClassLoaderExt::_has_app_classes = false;
bool ClassLoaderExt::_has_platform_classes = false;
bool ClassLoaderExt::_has_non_jar_in_classpath = false;
@@ -89,23 +90,25 @@ void ClassLoaderExt::setup_app_search_path(JavaThread* current) {
os::free(app_class_path);
}
+int ClassLoaderExt::compare_module_path_by_name(const char** p1, const char** p2) {
+ return strcmp(*p1, *p2);
+}
+
void ClassLoaderExt::process_module_table(JavaThread* current, ModuleEntryTable* met) {
ResourceMark rm(current);
- GrowableArray* module_paths = new GrowableArray(5);
+ GrowableArray* module_paths = new GrowableArray(5);
class ModulePathsGatherer : public ModuleClosure {
JavaThread* _current;
- GrowableArray* _module_paths;
+ GrowableArray* _module_paths;
public:
- ModulePathsGatherer(JavaThread* current, GrowableArray* module_paths) :
+ ModulePathsGatherer(JavaThread* current, GrowableArray* module_paths) :
_current(current), _module_paths(module_paths) {}
void do_module(ModuleEntry* m) {
- char* path = m->location()->as_C_string();
- if (strncmp(path, "file:", 5) == 0) {
- path = ClassLoader::skip_uri_protocol(path);
- char* path_copy = NEW_RESOURCE_ARRAY(char, strlen(path) + 1);
- strcpy(path_copy, path);
- _module_paths->append(path_copy);
+ char* uri = m->location()->as_C_string();
+ if (strncmp(uri, "file:", 5) == 0) {
+ char* path = ClassLoader::uri_to_path(uri);
+ extract_jar_files_from_path(path, _module_paths);
}
}
};
@@ -116,6 +119,10 @@ void ClassLoaderExt::process_module_table(JavaThread* current, ModuleEntryTable*
met->modules_do(&gatherer);
}
+ // Sort the module paths before storing into CDS archive for simpler
+ // checking at runtime.
+ module_paths->sort(compare_module_path_by_name);
+
for (int i = 0; i < module_paths->length(); i++) {
ClassLoader::setup_module_search_path(current, module_paths->at(i));
}
@@ -131,6 +138,38 @@ void ClassLoaderExt::setup_module_paths(JavaThread* current) {
process_module_table(current, met);
}
+bool ClassLoaderExt::has_jar_suffix(const char* filename) {
+ // In jdk.internal.module.ModulePath.readModule(), it checks for the ".jar" suffix.
+ // Performing the same check here.
+ const char* dot = strrchr(filename, '.');
+ if (dot != nullptr && strcmp(dot + 1, "jar") == 0) {
+ return true;
+ }
+ return false;
+}
+
+void ClassLoaderExt::extract_jar_files_from_path(const char* path, GrowableArray* module_paths) {
+ DIR* dirp = os::opendir(path);
+ if (dirp == nullptr && errno == ENOTDIR && has_jar_suffix(path)) {
+ module_paths->append(path);
+ } else {
+ if (dirp != nullptr) {
+ struct dirent* dentry;
+ while ((dentry = os::readdir(dirp)) != nullptr) {
+ const char* file_name = dentry->d_name;
+ if (has_jar_suffix(file_name)) {
+ size_t full_name_len = strlen(path) + strlen(file_name) + strlen(os::file_separator()) + 1;
+ char* full_name = NEW_RESOURCE_ARRAY(char, full_name_len);
+ int n = os::snprintf(full_name, full_name_len, "%s%s%s", path, os::file_separator(), file_name);
+ assert((size_t)n == full_name_len - 1, "Unexpected number of characters in string");
+ module_paths->append(full_name);
+ }
+ }
+ os::closedir(dirp);
+ }
+ }
+}
+
char* ClassLoaderExt::read_manifest(JavaThread* current, ClassPathEntry* entry,
jint *manifest_size, bool clean_text) {
const char* name = "META-INF/MANIFEST.MF";
diff --git a/src/hotspot/share/classfile/classLoaderExt.hpp b/src/hotspot/share/classfile/classLoaderExt.hpp
index b76ce3ff33a32..c3c0b00d55e43 100644
--- a/src/hotspot/share/classfile/classLoaderExt.hpp
+++ b/src/hotspot/share/classfile/classLoaderExt.hpp
@@ -53,12 +53,15 @@ class ClassLoaderExt: public ClassLoader { // AllStatic
static jshort _app_module_paths_start_index;
// the largest path index being used during CDS dump time
static jshort _max_used_path_index;
+ // number of module paths
+ static int _num_module_paths;
static bool _has_app_classes;
static bool _has_platform_classes;
static bool _has_non_jar_in_classpath;
static char* read_manifest(JavaThread* current, ClassPathEntry* entry, jint *manifest_size, bool clean_text);
+ static bool has_jar_suffix(const char* filename);
public:
static void process_jar_manifest(JavaThread* current, ClassPathEntry* entry);
@@ -68,6 +71,8 @@ class ClassLoaderExt: public ClassLoader { // AllStatic
static void setup_search_paths(JavaThread* current);
static void setup_module_paths(JavaThread* current);
+ static void extract_jar_files_from_path(const char* path, GrowableArray* module_paths);
+ static int compare_module_path_by_name(const char** p1, const char** p2);
static char* read_manifest(JavaThread* current, ClassPathEntry* entry, jint *manifest_size) {
// Remove all the new-line continuations (which wrap long lines at 72 characters, see
@@ -87,6 +92,8 @@ class ClassLoaderExt: public ClassLoader { // AllStatic
static jshort max_used_path_index() { return _max_used_path_index; }
+ static int num_module_paths() { return _num_module_paths; }
+
static void set_max_used_path_index(jshort used_index) {
_max_used_path_index = used_index;
}
@@ -99,6 +106,10 @@ class ClassLoaderExt: public ClassLoader { // AllStatic
_app_module_paths_start_index = module_start;
}
+ static void init_num_module_paths(int num_module_paths) {
+ _num_module_paths = num_module_paths;
+ }
+
static bool is_boot_classpath(int classpath_index) {
return classpath_index < _app_class_paths_start_index;
}
diff --git a/src/hotspot/share/classfile/fieldLayoutBuilder.hpp b/src/hotspot/share/classfile/fieldLayoutBuilder.hpp
index cda64788acffa..9b0d80b2a5583 100644
--- a/src/hotspot/share/classfile/fieldLayoutBuilder.hpp
+++ b/src/hotspot/share/classfile/fieldLayoutBuilder.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -101,17 +101,13 @@ class LayoutRawBlock : public ResourceObj {
// sort fields in decreasing order.
// Note: with line types, the comparison should include alignment constraint if sizes are equals
static int compare_size_inverted(LayoutRawBlock** x, LayoutRawBlock** y) {
-#ifdef _WINDOWS
- // qsort() on Windows reverse the order of fields with the same size
- // the extension of the comparison function below preserves this order
int diff = (*y)->size() - (*x)->size();
+ // qsort() may reverse the order of fields with the same size.
+ // The extension is to ensure stable sort.
if (diff == 0) {
diff = (*x)->field_index() - (*y)->field_index();
}
return diff;
-#else
- return (*y)->size() - (*x)->size();
-#endif // _WINDOWS
}
};
diff --git a/src/hotspot/share/classfile/javaClasses.cpp b/src/hotspot/share/classfile/javaClasses.cpp
index b6ef682ae0965..0ad36cd21dbf3 100644
--- a/src/hotspot/share/classfile/javaClasses.cpp
+++ b/src/hotspot/share/classfile/javaClasses.cpp
@@ -3052,9 +3052,10 @@ void java_lang_ClassFrameInfo::serialize_offsets(SerializeClosure* f) {
static int get_flags(const methodHandle& m) {
int flags = (jushort)( m->access_flags().as_short() & JVM_RECOGNIZED_METHOD_MODIFIERS );
- if (m->is_initializer()) {
+ if (m->is_object_initializer()) {
flags |= java_lang_invoke_MemberName::MN_IS_CONSTRUCTOR;
} else {
+ // Note: Static initializers can be here. Record them as plain methods.
flags |= java_lang_invoke_MemberName::MN_IS_METHOD;
}
if (m->caller_sensitive()) {
diff --git a/src/hotspot/share/classfile/systemDictionary.cpp b/src/hotspot/share/classfile/systemDictionary.cpp
index b9a559cf9779f..7b307a0b8a37c 100644
--- a/src/hotspot/share/classfile/systemDictionary.cpp
+++ b/src/hotspot/share/classfile/systemDictionary.cpp
@@ -1069,7 +1069,7 @@ bool SystemDictionary::check_shared_class_super_type(InstanceKlass* klass, Insta
}
Klass *found = resolve_with_circularity_detection(klass->name(), super_type->name(),
- class_loader, protection_domain, is_superclass, CHECK_0);
+ class_loader, protection_domain, is_superclass, CHECK_false);
if (found == super_type) {
return true;
} else {
@@ -1088,16 +1088,21 @@ bool SystemDictionary::check_shared_class_super_types(InstanceKlass* ik, Handle
// If unexpected superclass or interfaces are found, we cannot
// load from the shared archive.
- if (ik->super() != nullptr &&
- !check_shared_class_super_type(ik, InstanceKlass::cast(ik->super()),
- class_loader, protection_domain, true, THREAD)) {
- return false;
+ if (ik->super() != nullptr) {
+ bool check_super = check_shared_class_super_type(ik, InstanceKlass::cast(ik->super()),
+ class_loader, protection_domain, true,
+ CHECK_false);
+ if (!check_super) {
+ return false;
+ }
}
Array* interfaces = ik->local_interfaces();
int num_interfaces = interfaces->length();
for (int index = 0; index < num_interfaces; index++) {
- if (!check_shared_class_super_type(ik, interfaces->at(index), class_loader, protection_domain, false, THREAD)) {
+ bool check_interface = check_shared_class_super_type(ik, interfaces->at(index), class_loader, protection_domain, false,
+ CHECK_false);
+ if (!check_interface) {
return false;
}
}
@@ -1149,10 +1154,13 @@ InstanceKlass* SystemDictionary::load_shared_class(InstanceKlass* ik,
Symbol* class_name = ik->name();
if (!is_shared_class_visible(class_name, ik, pkg_entry, class_loader)) {
+ ik->set_shared_loading_failed();
return nullptr;
}
- if (!check_shared_class_super_types(ik, class_loader, protection_domain, THREAD)) {
+ bool check = check_shared_class_super_types(ik, class_loader, protection_domain, CHECK_NULL);
+ if (!check) {
+ ik->set_shared_loading_failed();
return nullptr;
}
diff --git a/src/hotspot/share/classfile/systemDictionary.hpp b/src/hotspot/share/classfile/systemDictionary.hpp
index ee50aa38dd0cf..04980291716c7 100644
--- a/src/hotspot/share/classfile/systemDictionary.hpp
+++ b/src/hotspot/share/classfile/systemDictionary.hpp
@@ -293,13 +293,6 @@ class SystemDictionary : AllStatic {
const char* message);
static const char* find_nest_host_error(const constantPoolHandle& pool, int which);
-protected:
- static InstanceKlass* _well_known_klasses[];
-
-private:
- // table of box klasses (int_klass, etc.)
- static InstanceKlass* _box_klasses[T_VOID+1];
-
static OopHandle _java_system_loader;
static OopHandle _java_platform_loader;
diff --git a/src/hotspot/share/classfile/verifier.cpp b/src/hotspot/share/classfile/verifier.cpp
index a66fbf645f55f..375570cf19691 100644
--- a/src/hotspot/share/classfile/verifier.cpp
+++ b/src/hotspot/share/classfile/verifier.cpp
@@ -32,6 +32,7 @@
#include "classfile/stackMapTableFormat.hpp"
#include "classfile/symbolTable.hpp"
#include "classfile/systemDictionary.hpp"
+#include "classfile/systemDictionaryShared.hpp"
#include "classfile/verifier.hpp"
#include "classfile/vmClasses.hpp"
#include "classfile/vmSymbols.hpp"
@@ -212,6 +213,11 @@ bool Verifier::verify(InstanceKlass* klass, bool should_verify_class, TRAPS) {
exception_name == vmSymbols::java_lang_ClassFormatError())) {
log_info(verification)("Fail over class verification to old verifier for: %s", klass->external_name());
log_info(class, init)("Fail over class verification to old verifier for: %s", klass->external_name());
+ // Exclude any classes that fail over during dynamic dumping
+ if (CDSConfig::is_dumping_dynamic_archive()) {
+ SystemDictionaryShared::warn_excluded(klass, "Failed over class verification while dynamic dumping");
+ SystemDictionaryShared::set_excluded(klass);
+ }
message_buffer = NEW_RESOURCE_ARRAY(char, message_buffer_len);
exception_message = message_buffer;
exception_name = inference_verify(
diff --git a/src/hotspot/share/classfile/vmClasses.cpp b/src/hotspot/share/classfile/vmClasses.cpp
index 0b9b437c67b78..b62d699dfe20e 100644
--- a/src/hotspot/share/classfile/vmClasses.cpp
+++ b/src/hotspot/share/classfile/vmClasses.cpp
@@ -45,14 +45,6 @@ InstanceKlass* vmClasses::_klasses[static_cast(vmClassID::LIMIT)]
= { nullptr /*, nullptr...*/ };
InstanceKlass* vmClasses::_box_klasses[T_VOID+1] = { nullptr /*, nullptr...*/ };
-
-// CDS: scan and relocate all classes referenced by _klasses[].
-void vmClasses::metaspace_pointers_do(MetaspaceClosure* it) {
- for (auto id : EnumRange{}) {
- it->push(klass_addr_at(id));
- }
-}
-
bool vmClasses::is_loaded(InstanceKlass* klass) {
return klass != nullptr && klass->is_loaded();
}
@@ -205,8 +197,6 @@ void vmClasses::resolve_all(TRAPS) {
_box_klasses[T_SHORT] = vmClasses::Short_klass();
_box_klasses[T_INT] = vmClasses::Integer_klass();
_box_klasses[T_LONG] = vmClasses::Long_klass();
- //_box_klasses[T_OBJECT] = vmClasses::object_klass();
- //_box_klasses[T_ARRAY] = vmClasses::object_klass();
#ifdef ASSERT
if (CDSConfig::is_using_archive()) {
diff --git a/src/hotspot/share/classfile/vmClasses.hpp b/src/hotspot/share/classfile/vmClasses.hpp
index f2b8c5666eeb1..4fa078c50cd80 100644
--- a/src/hotspot/share/classfile/vmClasses.hpp
+++ b/src/hotspot/share/classfile/vmClasses.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2021, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -32,7 +32,6 @@
class ClassLoaderData;
class InstanceKlass;
-class MetaspaceClosure;
class vmClasses : AllStatic {
friend class VMStructs;
@@ -95,7 +94,6 @@ class vmClasses : AllStatic {
return &_klasses[as_int(id)];
}
- static void metaspace_pointers_do(MetaspaceClosure* it);
static void resolve_all(TRAPS);
static BasicType box_klass_type(Klass* k); // inverse of box_klass
diff --git a/src/hotspot/share/classfile/vmIntrinsics.cpp b/src/hotspot/share/classfile/vmIntrinsics.cpp
index b470eb9b8380d..5e352e42efbc1 100644
--- a/src/hotspot/share/classfile/vmIntrinsics.cpp
+++ b/src/hotspot/share/classfile/vmIntrinsics.cpp
@@ -90,6 +90,7 @@ bool vmIntrinsics::preserves_state(vmIntrinsics::ID id) {
case vmIntrinsics::_dsin:
case vmIntrinsics::_dcos:
case vmIntrinsics::_dtan:
+ case vmIntrinsics::_dtanh:
case vmIntrinsics::_dlog:
case vmIntrinsics::_dlog10:
case vmIntrinsics::_dexp:
@@ -141,6 +142,7 @@ bool vmIntrinsics::can_trap(vmIntrinsics::ID id) {
case vmIntrinsics::_dsin:
case vmIntrinsics::_dcos:
case vmIntrinsics::_dtan:
+ case vmIntrinsics::_dtanh:
case vmIntrinsics::_dlog:
case vmIntrinsics::_dlog10:
case vmIntrinsics::_dexp:
@@ -288,6 +290,7 @@ bool vmIntrinsics::disabled_by_jvm_flags(vmIntrinsics::ID id) {
case vmIntrinsics::_dsin:
case vmIntrinsics::_dcos:
case vmIntrinsics::_dtan:
+ case vmIntrinsics::_dtanh:
case vmIntrinsics::_dlog:
case vmIntrinsics::_dexp:
case vmIntrinsics::_dpow:
diff --git a/src/hotspot/share/classfile/vmIntrinsics.hpp b/src/hotspot/share/classfile/vmIntrinsics.hpp
index 4b772c171d5a6..9bb8b2179ae01 100644
--- a/src/hotspot/share/classfile/vmIntrinsics.hpp
+++ b/src/hotspot/share/classfile/vmIntrinsics.hpp
@@ -135,7 +135,7 @@ class methodHandle;
do_name(log_name,"log") do_name(log10_name,"log10") do_name(pow_name,"pow") \
do_name(exp_name,"exp") do_name(min_name,"min") do_name(max_name,"max") \
do_name(floor_name, "floor") do_name(ceil_name, "ceil") do_name(rint_name, "rint") \
- do_name(round_name, "round") \
+ do_name(round_name, "round") do_name(tanh_name,"tanh") \
\
do_name(addExact_name,"addExact") \
do_name(decrementExact_name,"decrementExact") \
@@ -161,6 +161,7 @@ class methodHandle;
do_intrinsic(_dcos, java_lang_Math, cos_name, double_double_signature, F_S) \
do_intrinsic(_dtan, java_lang_Math, tan_name, double_double_signature, F_S) \
do_intrinsic(_datan2, java_lang_Math, atan2_name, double2_double_signature, F_S) \
+ do_intrinsic(_dtanh, java_lang_Math, tanh_name, double_double_signature, F_S) \
do_intrinsic(_dsqrt, java_lang_Math, sqrt_name, double_double_signature, F_S) \
do_intrinsic(_dlog, java_lang_Math, log_name, double_double_signature, F_S) \
do_intrinsic(_dlog10, java_lang_Math, log10_name, double_double_signature, F_S) \
@@ -468,6 +469,8 @@ class methodHandle;
do_intrinsic(_Reference_get, java_lang_ref_Reference, get_name, void_object_signature, F_R) \
do_intrinsic(_Reference_refersTo0, java_lang_ref_Reference, refersTo0_name, object_boolean_signature, F_RN) \
do_intrinsic(_PhantomReference_refersTo0, java_lang_ref_PhantomReference, refersTo0_name, object_boolean_signature, F_RN) \
+ do_intrinsic(_Reference_clear0, java_lang_ref_Reference, clear0_name, void_method_signature, F_RN) \
+ do_intrinsic(_PhantomReference_clear0, java_lang_ref_PhantomReference, clear0_name, void_method_signature, F_RN) \
\
/* support for com.sun.crypto.provider.AESCrypt and some of its callers */ \
do_class(com_sun_crypto_provider_aescrypt, "com/sun/crypto/provider/AESCrypt") \
@@ -1007,6 +1010,15 @@ class methodHandle;
"Ljdk/internal/vm/vector/VectorSupport$Vector;") \
do_name(vector_shuffle_to_vector_name, "shuffleToVector") \
\
+ do_intrinsic(_VectorWrapShuffleIndexes, jdk_internal_vm_vector_VectorSupport, vector_wrap_shuffle_indexes_name, \
+ vector_wrap_shuffle_indexes_sig, F_S) \
+ do_signature(vector_wrap_shuffle_indexes_sig, "(Ljava/lang/Class;" \
+ "Ljava/lang/Class;" \
+ "Ljdk/internal/vm/vector/VectorSupport$VectorShuffle;" \
+ "ILjdk/internal/vm/vector/VectorSupport$WrapShuffleIndexesOperation;)" \
+ "Ljdk/internal/vm/vector/VectorSupport$VectorShuffle;") \
+ do_name(vector_wrap_shuffle_indexes_name, "wrapShuffleIndexes") \
+ \
do_intrinsic(_VectorLoadOp, jdk_internal_vm_vector_VectorSupport, vector_load_op_name, vector_load_op_sig, F_S) \
do_signature(vector_load_op_sig, "(Ljava/lang/Class;" \
"Ljava/lang/Class;" \
@@ -1128,6 +1140,18 @@ class methodHandle;
"Ljdk/internal/vm/vector/VectorSupport$Vector;") \
do_name(vector_rearrange_name, "rearrangeOp") \
\
+ do_intrinsic(_VectorSelectFrom, jdk_internal_vm_vector_VectorSupport, vector_select_from_name, vector_select_from_sig, F_S) \
+ do_signature(vector_select_from_sig, "(Ljava/lang/Class;" \
+ "Ljava/lang/Class;" \
+ "Ljava/lang/Class;" \
+ "I" \
+ "Ljdk/internal/vm/vector/VectorSupport$Vector;" \
+ "Ljdk/internal/vm/vector/VectorSupport$Vector;" \
+ "Ljdk/internal/vm/vector/VectorSupport$VectorMask;" \
+ "Ljdk/internal/vm/vector/VectorSupport$VectorSelectFromOp;)" \
+ "Ljdk/internal/vm/vector/VectorSupport$Vector;") \
+ do_name(vector_select_from_name, "selectFromOp") \
+ \
do_intrinsic(_VectorExtract, jdk_internal_vm_vector_VectorSupport, vector_extract_name, vector_extract_sig, F_S) \
do_signature(vector_extract_sig, "(Ljava/lang/Class;" \
"Ljava/lang/Class;" \
diff --git a/src/hotspot/share/classfile/vmSymbols.hpp b/src/hotspot/share/classfile/vmSymbols.hpp
index a65ab86fa0a8d..3de6d81f10607 100644
--- a/src/hotspot/share/classfile/vmSymbols.hpp
+++ b/src/hotspot/share/classfile/vmSymbols.hpp
@@ -426,6 +426,7 @@ class SerializeClosure;
template(cs_name, "cs") \
template(get_name, "get") \
template(refersTo0_name, "refersTo0") \
+ template(clear0_name, "clear0") \
template(put_name, "put") \
template(type_name, "type") \
template(findNative_name, "findNative") \
diff --git a/src/hotspot/share/code/codeBlob.cpp b/src/hotspot/share/code/codeBlob.cpp
index 81c4d001078cb..23f621ffec832 100644
--- a/src/hotspot/share/code/codeBlob.cpp
+++ b/src/hotspot/share/code/codeBlob.cpp
@@ -41,7 +41,7 @@
#include "runtime/handles.inline.hpp"
#include "runtime/interfaceSupport.inline.hpp"
#include "runtime/javaFrameAnchor.hpp"
-#include "runtime/jniHandles.hpp"
+#include "runtime/jniHandles.inline.hpp"
#include "runtime/mutexLocker.hpp"
#include "runtime/safepoint.hpp"
#include "runtime/sharedRuntime.hpp"
@@ -623,7 +623,7 @@ UpcallStub* UpcallStub::create(const char* name, CodeBuffer* cb, jobject receive
// Track memory usage statistic after releasing CodeCache_lock
MemoryService::track_code_cache_memory_usage();
- trace_new_stub(blob, "UpcallStub");
+ trace_new_stub(blob, "UpcallStub - ", name);
return blob;
}
@@ -772,6 +772,10 @@ void UpcallStub::verify() {
void UpcallStub::print_on(outputStream* st) const {
RuntimeBlob::print_on(st);
print_value_on(st);
+ st->print_cr("Frame data offset: %d", (int) _frame_data_offset);
+ oop recv = JNIHandles::resolve(_receiver);
+ st->print("Receiver MH=");
+ recv->print_on(st);
Disassembler::decode((RuntimeBlob*)this, st);
}
diff --git a/src/hotspot/share/code/compiledIC.cpp b/src/hotspot/share/code/compiledIC.cpp
index 079c8199b1870..684aee509ee53 100644
--- a/src/hotspot/share/code/compiledIC.cpp
+++ b/src/hotspot/share/code/compiledIC.cpp
@@ -83,6 +83,7 @@ void CompiledICData::initialize(CallInfo* call_info, Klass* receiver_klass) {
_speculated_klass = (uintptr_t)receiver_klass;
}
if (call_info->call_kind() == CallInfo::itable_call) {
+ assert(call_info->resolved_method() != nullptr, "virtual or interface method must be found");
_itable_defc_klass = call_info->resolved_method()->method_holder();
_itable_refc_klass = call_info->resolved_klass();
}
@@ -238,6 +239,7 @@ void CompiledIC::set_to_megamorphic(CallInfo* call_info) {
return;
}
#ifdef ASSERT
+ assert(call_info->resolved_method() != nullptr, "virtual or interface method must be found");
int index = call_info->resolved_method()->itable_index();
assert(index == itable_index, "CallInfo pre-computes this");
InstanceKlass* k = call_info->resolved_method()->method_holder();
@@ -254,6 +256,7 @@ void CompiledIC::set_to_megamorphic(CallInfo* call_info) {
}
}
+ assert(call_info->selected_method() != nullptr, "virtual or interface method must be found");
log_trace(inlinecache)("IC@" INTPTR_FORMAT ": to megamorphic %s entry: " INTPTR_FORMAT,
p2i(_call->instruction_address()), call_info->selected_method()->print_value_string(), p2i(entry));
@@ -290,7 +293,7 @@ bool CompiledIC::is_monomorphic() const {
}
bool CompiledIC::is_megamorphic() const {
- return VtableStubs::entry_point(destination()) != nullptr;;
+ return VtableStubs::entry_point(destination()) != nullptr;
}
bool CompiledIC::is_speculated_klass(Klass* receiver_klass) {
diff --git a/src/hotspot/share/code/dependencyContext.cpp b/src/hotspot/share/code/dependencyContext.cpp
index d7ce8e92acf37..0e6b99d172dcb 100644
--- a/src/hotspot/share/code/dependencyContext.cpp
+++ b/src/hotspot/share/code/dependencyContext.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2015, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -227,6 +227,10 @@ void DependencyContext::remove_and_mark_for_deoptimization_all_dependents(Deopti
}
#ifndef PRODUCT
+bool DependencyContext::is_empty() {
+ return dependencies() == nullptr;
+}
+
void DependencyContext::print_dependent_nmethods(bool verbose) {
int idx = 0;
for (nmethodBucket* b = dependencies_not_unloading(); b != nullptr; b = b->next_not_unloading()) {
diff --git a/src/hotspot/share/code/dependencyContext.hpp b/src/hotspot/share/code/dependencyContext.hpp
index e8d2ac41d0d1d..13b845cb59dde 100644
--- a/src/hotspot/share/code/dependencyContext.hpp
+++ b/src/hotspot/share/code/dependencyContext.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2015, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -124,6 +124,7 @@ class DependencyContext : public StackObj {
#ifndef PRODUCT
void print_dependent_nmethods(bool verbose);
+ bool is_empty();
#endif //PRODUCT
bool is_dependent_nmethod(nmethod* nm);
};
diff --git a/src/hotspot/share/compiler/compilerDefinitions.cpp b/src/hotspot/share/compiler/compilerDefinitions.cpp
index ee0c73254f180..7b091d8ade50c 100644
--- a/src/hotspot/share/compiler/compilerDefinitions.cpp
+++ b/src/hotspot/share/compiler/compilerDefinitions.cpp
@@ -497,11 +497,6 @@ bool CompilerConfig::check_args_consistency(bool status) {
"Invalid NonNMethodCodeHeapSize=%dK. Must be at least %uK.\n", NonNMethodCodeHeapSize/K,
min_code_cache_size/K);
status = false;
- } else if (InlineCacheBufferSize > NonNMethodCodeHeapSize / 2) {
- jio_fprintf(defaultStream::error_stream(),
- "Invalid InlineCacheBufferSize=" SIZE_FORMAT "K. Must be less than or equal to " SIZE_FORMAT "K.\n",
- InlineCacheBufferSize/K, NonNMethodCodeHeapSize/2/K);
- status = false;
}
#ifdef _LP64
diff --git a/src/hotspot/share/compiler/oopMap.inline.hpp b/src/hotspot/share/compiler/oopMap.inline.hpp
index f2a3b3ba834df..05ef53f823142 100644
--- a/src/hotspot/share/compiler/oopMap.inline.hpp
+++ b/src/hotspot/share/compiler/oopMap.inline.hpp
@@ -66,12 +66,10 @@ void OopMapDo::iterate_oops_do(const frame
continue;
#ifndef COMPILER2
- COMPILER1_PRESENT(ShouldNotReachHere();)
#if INCLUDE_JVMCI
- if (UseJVMCICompiler) {
- ShouldNotReachHere();
- }
+ if (!EnableJVMCI)
#endif
+ ShouldNotReachHere();
#endif // !COMPILER2
address loc = fr->oopmapreg_to_location(omv.reg(), reg_map);
diff --git a/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.cpp b/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.cpp
index 13b993546cde4..edbf0e902392b 100644
--- a/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.cpp
+++ b/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.cpp
@@ -24,49 +24,32 @@
#include "precompiled.hpp"
#include "classfile/javaClasses.hpp"
+#include "code/vmreg.inline.hpp"
#include "gc/g1/c2/g1BarrierSetC2.hpp"
#include "gc/g1/g1BarrierSet.hpp"
+#include "gc/g1/g1BarrierSetAssembler.hpp"
#include "gc/g1/g1BarrierSetRuntime.hpp"
#include "gc/g1/g1CardTable.hpp"
#include "gc/g1/g1ThreadLocalData.hpp"
#include "gc/g1/g1HeapRegion.hpp"
#include "opto/arraycopynode.hpp"
+#include "opto/block.hpp"
#include "opto/compile.hpp"
#include "opto/escape.hpp"
#include "opto/graphKit.hpp"
#include "opto/idealKit.hpp"
+#include "opto/machnode.hpp"
#include "opto/macro.hpp"
+#include "opto/memnode.hpp"
+#include "opto/node.hpp"
+#include "opto/output.hpp"
+#include "opto/regalloc.hpp"
#include "opto/rootnode.hpp"
+#include "opto/runtime.hpp"
#include "opto/type.hpp"
+#include "utilities/growableArray.hpp"
#include "utilities/macros.hpp"
-const TypeFunc *G1BarrierSetC2::write_ref_field_pre_entry_Type() {
- const Type **fields = TypeTuple::fields(2);
- fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL; // original field value
- fields[TypeFunc::Parms+1] = TypeRawPtr::NOTNULL; // thread
- const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2, fields);
-
- // create result type (range)
- fields = TypeTuple::fields(0);
- const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0, fields);
-
- return TypeFunc::make(domain, range);
-}
-
-const TypeFunc *G1BarrierSetC2::write_ref_field_post_entry_Type() {
- const Type **fields = TypeTuple::fields(2);
- fields[TypeFunc::Parms+0] = TypeRawPtr::NOTNULL; // Card addr
- fields[TypeFunc::Parms+1] = TypeRawPtr::NOTNULL; // thread
- const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2, fields);
-
- // create result type (range)
- fields = TypeTuple::fields(0);
- const TypeTuple *range = TypeTuple::make(TypeFunc::Parms, fields);
-
- return TypeFunc::make(domain, range);
-}
-
-#define __ ideal.
/*
* Determine if the G1 pre-barrier can be removed. The pre-barrier is
* required by SATB to make sure all objects live at the start of the
@@ -84,8 +67,6 @@ const TypeFunc *G1BarrierSetC2::write_ref_field_post_entry_Type() {
* The compiler needs to determine that the object in which a field is about
* to be written is newly allocated, and that no prior store to the same field
* has happened since the allocation.
- *
- * Returns true if the pre-barrier can be removed
*/
bool G1BarrierSetC2::g1_can_remove_pre_barrier(GraphKit* kit,
PhaseValues* phase,
@@ -97,34 +78,28 @@ bool G1BarrierSetC2::g1_can_remove_pre_barrier(GraphKit* kit,
AllocateNode* alloc = AllocateNode::Ideal_allocation(base);
if (offset == Type::OffsetBot) {
- return false; // cannot unalias unless there are precise offsets
+ return false; // Cannot unalias unless there are precise offsets.
}
-
if (alloc == nullptr) {
- return false; // No allocation found
+ return false; // No allocation found.
}
intptr_t size_in_bytes = type2aelembytes(bt);
-
- Node* mem = kit->memory(adr_idx); // start searching here...
+ Node* mem = kit->memory(adr_idx); // Start searching here.
for (int cnt = 0; cnt < 50; cnt++) {
-
if (mem->is_Store()) {
-
Node* st_adr = mem->in(MemNode::Address);
intptr_t st_offset = 0;
Node* st_base = AddPNode::Ideal_base_and_offset(st_adr, phase, st_offset);
if (st_base == nullptr) {
- break; // inscrutable pointer
+ break; // Inscrutable pointer.
}
-
- // Break we have found a store with same base and offset as ours so break
if (st_base == base && st_offset == offset) {
+ // We have found a store with same base and offset as ours.
break;
}
-
if (st_offset != offset && st_offset != Type::OffsetBot) {
const int MAX_STORE = BytesPerLong;
if (st_offset >= offset + size_in_bytes ||
@@ -136,20 +111,18 @@ bool G1BarrierSetC2::g1_can_remove_pre_barrier(GraphKit* kit,
// in the same sequence of RawMem effects. We sometimes initialize
// a whole 'tile' of array elements with a single jint or jlong.)
mem = mem->in(MemNode::Memory);
- continue; // advance through independent store memory
+ continue; // Advance through independent store memory.
}
}
-
if (st_base != base
&& MemNode::detect_ptr_independence(base, alloc, st_base,
AllocateNode::Ideal_allocation(st_base),
phase)) {
- // Success: The bases are provably independent.
+ // Success: the bases are provably independent.
mem = mem->in(MemNode::Memory);
- continue; // advance through independent store memory
+ continue; // Advance through independent store memory.
}
} else if (mem->is_Proj() && mem->in(0)->is_Initialize()) {
-
InitializeNode* st_init = mem->in(0)->as_Initialize();
AllocateNode* st_alloc = st_init->allocation();
@@ -157,7 +130,7 @@ bool G1BarrierSetC2::g1_can_remove_pre_barrier(GraphKit* kit,
// The alloc variable is guaranteed to not be null here from earlier check.
if (alloc == st_alloc) {
// Check that the initialization is storing null so that no previous store
- // has been moved up and directly write a reference
+ // has been moved up and directly write a reference.
Node* captured_store = st_init->find_captured_store(offset,
type2aelembytes(T_OBJECT),
phase);
@@ -166,164 +139,55 @@ bool G1BarrierSetC2::g1_can_remove_pre_barrier(GraphKit* kit,
}
}
}
-
// Unless there is an explicit 'continue', we must bail out here,
// because 'mem' is an inscrutable memory state (e.g., a call).
break;
}
-
return false;
}
-// G1 pre/post barriers
-void G1BarrierSetC2::pre_barrier(GraphKit* kit,
- bool do_load,
- Node* ctl,
- Node* obj,
- Node* adr,
- uint alias_idx,
- Node* val,
- const TypeOopPtr* val_type,
- Node* pre_val,
- BasicType bt) const {
- // Some sanity checks
- // Note: val is unused in this routine.
-
- if (do_load) {
- // We need to generate the load of the previous value
- assert(obj != nullptr, "must have a base");
- assert(adr != nullptr, "where are loading from?");
- assert(pre_val == nullptr, "loaded already?");
- assert(val_type != nullptr, "need a type");
-
- if (use_ReduceInitialCardMarks()
- && g1_can_remove_pre_barrier(kit, &kit->gvn(), adr, bt, alias_idx)) {
- return;
- }
-
- } else {
- // In this case both val_type and alias_idx are unused.
- assert(pre_val != nullptr, "must be loaded already");
- // Nothing to be done if pre_val is null.
- if (pre_val->bottom_type() == TypePtr::NULL_PTR) return;
- assert(pre_val->bottom_type()->basic_type() == T_OBJECT, "or we shouldn't be here");
- }
- assert(bt == T_OBJECT, "or we shouldn't be here");
-
- IdealKit ideal(kit, true);
-
- Node* tls = __ thread(); // ThreadLocalStorage
-
- Node* no_base = __ top();
- Node* zero = __ ConI(0);
- Node* zeroX = __ ConX(0);
-
- float likely = PROB_LIKELY(0.999);
- float unlikely = PROB_UNLIKELY(0.999);
-
- BasicType active_type = in_bytes(SATBMarkQueue::byte_width_of_active()) == 4 ? T_INT : T_BYTE;
- assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 4 || in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "flag width");
-
- // Offsets into the thread
- const int marking_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset());
- const int index_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset());
- const int buffer_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset());
-
- // Now the actual pointers into the thread
- Node* marking_adr = __ AddP(no_base, tls, __ ConX(marking_offset));
- Node* buffer_adr = __ AddP(no_base, tls, __ ConX(buffer_offset));
- Node* index_adr = __ AddP(no_base, tls, __ ConX(index_offset));
-
- // Now some of the values
- Node* marking = __ load(__ ctrl(), marking_adr, TypeInt::INT, active_type, Compile::AliasIdxRaw);
-
- // if (!marking)
- __ if_then(marking, BoolTest::ne, zero, unlikely); {
- BasicType index_bt = TypeX_X->basic_type();
- assert(sizeof(size_t) == type2aelembytes(index_bt), "Loading G1 SATBMarkQueue::_index with wrong size.");
- Node* index = __ load(__ ctrl(), index_adr, TypeX_X, index_bt, Compile::AliasIdxRaw);
-
- if (do_load) {
- // load original value
- pre_val = __ load(__ ctrl(), adr, val_type, bt, alias_idx, false, MemNode::unordered, LoadNode::Pinned);
- }
-
- // if (pre_val != nullptr)
- __ if_then(pre_val, BoolTest::ne, kit->null()); {
- Node* buffer = __ load(__ ctrl(), buffer_adr, TypeRawPtr::NOTNULL, T_ADDRESS, Compile::AliasIdxRaw);
-
- // is the queue for this thread full?
- __ if_then(index, BoolTest::ne, zeroX, likely); {
-
- // decrement the index
- Node* next_index = kit->gvn().transform(new SubXNode(index, __ ConX(sizeof(intptr_t))));
-
- // Now get the buffer location we will log the previous value into and store it
- Node *log_addr = __ AddP(no_base, buffer, next_index);
- __ store(__ ctrl(), log_addr, pre_val, T_OBJECT, Compile::AliasIdxRaw, MemNode::unordered);
- // update the index
- __ store(__ ctrl(), index_adr, next_index, index_bt, Compile::AliasIdxRaw, MemNode::unordered);
-
- } __ else_(); {
-
- // logging buffer is full, call the runtime
- const TypeFunc *tf = write_ref_field_pre_entry_Type();
- __ make_leaf_call(tf, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), "write_ref_field_pre_entry", pre_val, tls);
- } __ end_if(); // (!index)
- } __ end_if(); // (pre_val != nullptr)
- } __ end_if(); // (!marking)
-
- // Final sync IdealKit and GraphKit.
- kit->final_sync(ideal);
-}
-
/*
- * G1 similar to any GC with a Young Generation requires a way to keep track of
- * references from Old Generation to Young Generation to make sure all live
+ * G1, similar to any GC with a Young Generation, requires a way to keep track
+ * of references from Old Generation to Young Generation to make sure all live
* objects are found. G1 also requires to keep track of object references
* between different regions to enable evacuation of old regions, which is done
- * as part of mixed collections. References are tracked in remembered sets and
- * is continuously updated as reference are written to with the help of the
- * post-barrier.
+ * as part of mixed collections. References are tracked in remembered sets,
+ * which are continuously updated as references are written to with the help of
+ * the post-barrier.
*
- * To reduce the number of updates to the remembered set the post-barrier
- * filters updates to fields in objects located in the Young Generation,
- * the same region as the reference, when the null is being written or
- * if the card is already marked as dirty by an earlier write.
+ * To reduce the number of updates to the remembered set, the post-barrier
+ * filters out updates to fields in objects located in the Young Generation, the
+ * same region as the reference, when null is being written, or if the card is
+ * already marked as dirty by an earlier write.
*
* Under certain circumstances it is possible to avoid generating the
- * post-barrier completely if it is possible during compile time to prove
- * the object is newly allocated and that no safepoint exists between the
- * allocation and the store.
- *
- * In the case of slow allocation the allocation code must handle the barrier
- * as part of the allocation in the case the allocated object is not located
- * in the nursery; this would happen for humongous objects.
+ * post-barrier completely, if it is possible during compile time to prove the
+ * object is newly allocated and that no safepoint exists between the allocation
+ * and the store. This can be seen as a compile-time version of the
+ * above-mentioned Young Generation filter.
*
- * Returns true if the post barrier can be removed
+ * In the case of a slow allocation, the allocation code must handle the barrier
+ * as part of the allocation if the allocated object is not located in the
+ * nursery; this would happen for humongous objects.
*/
bool G1BarrierSetC2::g1_can_remove_post_barrier(GraphKit* kit,
- PhaseValues* phase, Node* store,
+ PhaseValues* phase, Node* store_ctrl,
Node* adr) const {
intptr_t offset = 0;
Node* base = AddPNode::Ideal_base_and_offset(adr, phase, offset);
AllocateNode* alloc = AllocateNode::Ideal_allocation(base);
if (offset == Type::OffsetBot) {
- return false; // cannot unalias unless there are precise offsets
+ return false; // Cannot unalias unless there are precise offsets.
}
-
if (alloc == nullptr) {
- return false; // No allocation found
+ return false; // No allocation found.
}
- // Start search from Store node
- Node* mem = store->in(MemNode::Control);
+ Node* mem = store_ctrl; // Start search from Store node.
if (mem->is_Proj() && mem->in(0)->is_Initialize()) {
-
InitializeNode* st_init = mem->in(0)->as_Initialize();
AllocateNode* st_alloc = st_init->allocation();
-
// Make sure we are looking at the same allocation
if (alloc == st_alloc) {
return true;
@@ -333,725 +197,367 @@ bool G1BarrierSetC2::g1_can_remove_post_barrier(GraphKit* kit,
return false;
}
-//
-// Update the card table and add card address to the queue
-//
-void G1BarrierSetC2::g1_mark_card(GraphKit* kit,
- IdealKit& ideal,
- Node* card_adr,
- Node* oop_store,
- uint oop_alias_idx,
- Node* index,
- Node* index_adr,
- Node* buffer,
- const TypeFunc* tf) const {
- Node* zero = __ ConI(0);
- Node* zeroX = __ ConX(0);
- Node* no_base = __ top();
- BasicType card_bt = T_BYTE;
- // Smash zero into card. MUST BE ORDERED WRT TO STORE
- __ storeCM(__ ctrl(), card_adr, zero, oop_store, oop_alias_idx, card_bt, Compile::AliasIdxRaw);
-
- // Now do the queue work
- __ if_then(index, BoolTest::ne, zeroX); {
-
- Node* next_index = kit->gvn().transform(new SubXNode(index, __ ConX(sizeof(intptr_t))));
- Node* log_addr = __ AddP(no_base, buffer, next_index);
-
- // Order, see storeCM.
- __ store(__ ctrl(), log_addr, card_adr, T_ADDRESS, Compile::AliasIdxRaw, MemNode::unordered);
- __ store(__ ctrl(), index_adr, next_index, TypeX_X->basic_type(), Compile::AliasIdxRaw, MemNode::unordered);
-
- } __ else_(); {
- __ make_leaf_call(tf, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), "write_ref_field_post_entry", card_adr, __ thread());
- } __ end_if();
-
+Node* G1BarrierSetC2::load_at_resolved(C2Access& access, const Type* val_type) const {
+ DecoratorSet decorators = access.decorators();
+ bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0;
+ bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0;
+ bool no_keepalive = (decorators & AS_NO_KEEPALIVE) != 0;
+ // If we are reading the value of the referent field of a Reference object, we
+ // need to record the referent in an SATB log buffer using the pre-barrier
+ // mechanism. Also we need to add a memory barrier to prevent commoning reads
+ // from this field across safepoints, since GC can change its value.
+ bool need_read_barrier = ((on_weak || on_phantom) && !no_keepalive);
+ if (access.is_oop() && need_read_barrier) {
+ access.set_barrier_data(G1C2BarrierPre);
+ }
+ return CardTableBarrierSetC2::load_at_resolved(access, val_type);
}
-void G1BarrierSetC2::post_barrier(GraphKit* kit,
- Node* ctl,
- Node* oop_store,
- Node* obj,
- Node* adr,
- uint alias_idx,
- Node* val,
- BasicType bt,
- bool use_precise) const {
- // If we are writing a null then we need no post barrier
+void G1BarrierSetC2::eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const {
+ eliminate_gc_barrier_data(node);
+}
- if (val != nullptr && val->is_Con() && val->bottom_type() == TypePtr::NULL_PTR) {
- // Must be null
- const Type* t = val->bottom_type();
- assert(t == Type::TOP || t == TypePtr::NULL_PTR, "must be null");
- // No post barrier if writing null
- return;
+void G1BarrierSetC2::eliminate_gc_barrier_data(Node* node) const {
+ if (node->is_LoadStore()) {
+ LoadStoreNode* loadstore = node->as_LoadStore();
+ loadstore->set_barrier_data(0);
+ } else if (node->is_Mem()) {
+ MemNode* mem = node->as_Mem();
+ mem->set_barrier_data(0);
}
+}
- if (use_ReduceInitialCardMarks() && obj == kit->just_allocated_object(kit->control())) {
- // We can skip marks on a freshly-allocated object in Eden.
- // Keep this code in sync with CardTableBarrierSet::on_slowpath_allocation_exit.
- // That routine informs GC to take appropriate compensating steps,
- // upon a slow-path allocation, so as to make this card-mark
- // elision safe.
+static void refine_barrier_by_new_val_type(const Node* n) {
+ if (n->Opcode() != Op_StoreP &&
+ n->Opcode() != Op_StoreN) {
return;
}
-
- if (use_ReduceInitialCardMarks()
- && g1_can_remove_post_barrier(kit, &kit->gvn(), oop_store, adr)) {
+ MemNode* store = n->as_Mem();
+ const Node* newval = n->in(MemNode::ValueIn);
+ assert(newval != nullptr, "");
+ const Type* newval_bottom = newval->bottom_type();
+ TypePtr::PTR newval_type = newval_bottom->make_ptr()->ptr();
+ uint8_t barrier_data = store->barrier_data();
+ if (!newval_bottom->isa_oopptr() &&
+ !newval_bottom->isa_narrowoop() &&
+ newval_type != TypePtr::Null) {
+ // newval is neither an OOP nor null, so there is no barrier to refine.
+ assert(barrier_data == 0, "non-OOP stores should have no barrier data");
return;
}
-
- if (!use_precise) {
- // All card marks for a (non-array) instance are in one place:
- adr = obj;
+ if (barrier_data == 0) {
+ // No barrier to refine.
+ return;
}
- // (Else it's an array (or unknown), and we want more precise card marks.)
- assert(adr != nullptr, "");
-
- IdealKit ideal(kit, true);
-
- Node* tls = __ thread(); // ThreadLocalStorage
-
- Node* no_base = __ top();
- float likely = PROB_LIKELY_MAG(3);
- float unlikely = PROB_UNLIKELY_MAG(3);
- Node* young_card = __ ConI((jint)G1CardTable::g1_young_card_val());
- Node* dirty_card = __ ConI((jint)G1CardTable::dirty_card_val());
- Node* zeroX = __ ConX(0);
-
- const TypeFunc *tf = write_ref_field_post_entry_Type();
-
- // Offsets into the thread
- const int index_offset = in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset());
- const int buffer_offset = in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset());
-
- // Pointers into the thread
-
- Node* buffer_adr = __ AddP(no_base, tls, __ ConX(buffer_offset));
- Node* index_adr = __ AddP(no_base, tls, __ ConX(index_offset));
-
- // Now some values
- // Use ctrl to avoid hoisting these values past a safepoint, which could
- // potentially reset these fields in the JavaThread.
- Node* index = __ load(__ ctrl(), index_adr, TypeX_X, TypeX_X->basic_type(), Compile::AliasIdxRaw);
- Node* buffer = __ load(__ ctrl(), buffer_adr, TypeRawPtr::NOTNULL, T_ADDRESS, Compile::AliasIdxRaw);
-
- // Convert the store obj pointer to an int prior to doing math on it
- // Must use ctrl to prevent "integerized oop" existing across safepoint
- Node* cast = __ CastPX(__ ctrl(), adr);
-
- // Divide pointer by card size
- Node* card_offset = __ URShiftX( cast, __ ConI(CardTable::card_shift()) );
-
- // Combine card table base and card offset
- Node* card_adr = __ AddP(no_base, byte_map_base_node(kit), card_offset );
-
- // If we know the value being stored does it cross regions?
-
- if (val != nullptr) {
- // Does the store cause us to cross regions?
-
- // Should be able to do an unsigned compare of region_size instead of
- // and extra shift. Do we have an unsigned compare??
- // Node* region_size = __ ConI(1 << G1HeapRegion::LogOfHRGrainBytes);
- Node* xor_res = __ URShiftX ( __ XorX( cast, __ CastPX(__ ctrl(), val)), __ ConI(checked_cast(G1HeapRegion::LogOfHRGrainBytes)));
-
- // if (xor_res == 0) same region so skip
- __ if_then(xor_res, BoolTest::ne, zeroX, likely); {
-
- // No barrier if we are storing a null.
- __ if_then(val, BoolTest::ne, kit->null(), likely); {
-
- // Ok must mark the card if not already dirty
-
- // load the original value of the card
- Node* card_val = __ load(__ ctrl(), card_adr, TypeInt::INT, T_BYTE, Compile::AliasIdxRaw);
-
- __ if_then(card_val, BoolTest::ne, young_card, unlikely); {
- kit->sync_kit(ideal);
- kit->insert_mem_bar(Op_MemBarVolatile, oop_store);
- __ sync_kit(kit);
-
- Node* card_val_reload = __ load(__ ctrl(), card_adr, TypeInt::INT, T_BYTE, Compile::AliasIdxRaw);
- __ if_then(card_val_reload, BoolTest::ne, dirty_card); {
- g1_mark_card(kit, ideal, card_adr, oop_store, alias_idx, index, index_adr, buffer, tf);
- } __ end_if();
- } __ end_if();
- } __ end_if();
- } __ end_if();
- } else {
- // The Object.clone() intrinsic uses this path if !ReduceInitialCardMarks.
- // We don't need a barrier here if the destination is a newly allocated object
- // in Eden. Otherwise, GC verification breaks because we assume that cards in Eden
- // are set to 'g1_young_gen' (see G1CardTable::verify_g1_young_region()).
- assert(!use_ReduceInitialCardMarks(), "can only happen with card marking");
- Node* card_val = __ load(__ ctrl(), card_adr, TypeInt::INT, T_BYTE, Compile::AliasIdxRaw);
- __ if_then(card_val, BoolTest::ne, young_card); {
- g1_mark_card(kit, ideal, card_adr, oop_store, alias_idx, index, index_adr, buffer, tf);
- } __ end_if();
+ if (newval_type == TypePtr::Null) {
+ // Simply elide post-barrier if writing null.
+ barrier_data &= ~G1C2BarrierPost;
+ barrier_data &= ~G1C2BarrierPostNotNull;
+ } else if (((barrier_data & G1C2BarrierPost) != 0) &&
+ newval_type == TypePtr::NotNull) {
+ // If the post-barrier has not been elided yet (e.g. due to newval being
+ // freshly allocated), mark it as not-null (simplifies barrier tests and
+ // compressed OOPs logic).
+ barrier_data |= G1C2BarrierPostNotNull;
}
-
- // Final sync IdealKit and GraphKit.
- kit->final_sync(ideal);
+ store->set_barrier_data(barrier_data);
+ return;
}
-// Helper that guards and inserts a pre-barrier.
-void G1BarrierSetC2::insert_pre_barrier(GraphKit* kit, Node* base_oop, Node* offset,
- Node* pre_val, bool need_mem_bar) const {
- // We could be accessing the referent field of a reference object. If so, when G1
- // is enabled, we need to log the value in the referent field in an SATB buffer.
- // This routine performs some compile time filters and generates suitable
- // runtime filters that guard the pre-barrier code.
- // Also add memory barrier for non volatile load from the referent field
- // to prevent commoning of loads across safepoint.
-
- // Some compile time checks.
-
- // If offset is a constant, is it java_lang_ref_Reference::_reference_offset?
- const TypeX* otype = offset->find_intptr_t_type();
- if (otype != nullptr && otype->is_con() &&
- otype->get_con() != java_lang_ref_Reference::referent_offset()) {
- // Constant offset but not the reference_offset so just return
- return;
- }
-
- // We only need to generate the runtime guards for instances.
- const TypeOopPtr* btype = base_oop->bottom_type()->isa_oopptr();
- if (btype != nullptr) {
- if (btype->isa_aryptr()) {
- // Array type so nothing to do
- return;
+// Refine (not really expand) G1 barriers by looking at the new value type
+// (whether it is necessarily null or necessarily non-null).
+bool G1BarrierSetC2::expand_barriers(Compile* C, PhaseIterGVN& igvn) const {
+ ResourceMark rm;
+ VectorSet visited;
+ Node_List worklist;
+ worklist.push(C->root());
+ while (worklist.size() > 0) {
+ Node* n = worklist.pop();
+ if (visited.test_set(n->_idx)) {
+ continue;
}
-
- const TypeInstPtr* itype = btype->isa_instptr();
- if (itype != nullptr) {
- // Can the klass of base_oop be statically determined to be
- // _not_ a sub-class of Reference and _not_ Object?
- ciKlass* klass = itype->instance_klass();
- if (klass->is_loaded() &&
- !klass->is_subtype_of(kit->env()->Reference_klass()) &&
- !kit->env()->Object_klass()->is_subtype_of(klass)) {
- return;
+ refine_barrier_by_new_val_type(n);
+ for (uint j = 0; j < n->req(); j++) {
+ Node* in = n->in(j);
+ if (in != nullptr) {
+ worklist.push(in);
}
}
}
+ return false;
+}
- // The compile time filters did not reject base_oop/offset so
- // we need to generate the following runtime filters
- //
- // if (offset == java_lang_ref_Reference::_reference_offset) {
- // if (instance_of(base, java.lang.ref.Reference)) {
- // pre_barrier(_, pre_val, ...);
+uint G1BarrierSetC2::estimated_barrier_size(const Node* node) const {
+ // These Ideal node counts are extracted from the pre-matching Ideal graph
+ // generated when compiling the following method with early barrier expansion:
+ // static void write(MyObject obj1, Object o) {
+ // obj1.o1 = o;
// }
- // }
-
- float likely = PROB_LIKELY( 0.999);
- float unlikely = PROB_UNLIKELY(0.999);
-
- IdealKit ideal(kit);
-
- Node* referent_off = __ ConX(java_lang_ref_Reference::referent_offset());
-
- __ if_then(offset, BoolTest::eq, referent_off, unlikely); {
- // Update graphKit memory and control from IdealKit.
- kit->sync_kit(ideal);
-
- Node* ref_klass_con = kit->makecon(TypeKlassPtr::make(kit->env()->Reference_klass()));
- Node* is_instof = kit->gen_instanceof(base_oop, ref_klass_con);
-
- // Update IdealKit memory and control from graphKit.
- __ sync_kit(kit);
-
- Node* one = __ ConI(1);
- // is_instof == 0 if base_oop == nullptr
- __ if_then(is_instof, BoolTest::eq, one, unlikely); {
-
- // Update graphKit from IdeakKit.
- kit->sync_kit(ideal);
-
- // Use the pre-barrier to record the value in the referent field
- pre_barrier(kit, false /* do_load */,
- __ ctrl(),
- nullptr /* obj */, nullptr /* adr */, max_juint /* alias_idx */, nullptr /* val */, nullptr /* val_type */,
- pre_val /* pre_val */,
- T_OBJECT);
- if (need_mem_bar) {
- // Add memory barrier to prevent commoning reads from this field
- // across safepoint since GC can change its value.
- kit->insert_mem_bar(Op_MemBarCPUOrder);
- }
- // Update IdealKit from graphKit.
- __ sync_kit(kit);
-
- } __ end_if(); // _ref_type != ref_none
- } __ end_if(); // offset == referent_offset
+ uint8_t barrier_data = MemNode::barrier_data(node);
+ uint nodes = 0;
+ if ((barrier_data & G1C2BarrierPre) != 0) {
+ nodes += 50;
+ }
+ if ((barrier_data & G1C2BarrierPost) != 0) {
+ nodes += 60;
+ }
+ return nodes;
+}
- // Final sync IdealKit and GraphKit.
- kit->final_sync(ideal);
+bool G1BarrierSetC2::can_initialize_object(const StoreNode* store) const {
+ assert(store->Opcode() == Op_StoreP || store->Opcode() == Op_StoreN, "OOP store expected");
+ // It is OK to move the store across the object initialization boundary only
+ // if it does not have any barrier, or if it has barriers that can be safely
+ // elided (because of the compensation steps taken on the allocation slow path
+ // when ReduceInitialCardMarks is enabled).
+ return (MemNode::barrier_data(store) == 0) || use_ReduceInitialCardMarks();
}
-#undef __
+void G1BarrierSetC2::clone_at_expansion(PhaseMacroExpand* phase, ArrayCopyNode* ac) const {
+ if (ac->is_clone_inst() && !use_ReduceInitialCardMarks()) {
+ clone_in_runtime(phase, ac, G1BarrierSetRuntime::clone_addr(), "G1BarrierSetRuntime::clone");
+ return;
+ }
+ BarrierSetC2::clone_at_expansion(phase, ac);
+}
-Node* G1BarrierSetC2::load_at_resolved(C2Access& access, const Type* val_type) const {
+Node* G1BarrierSetC2::store_at_resolved(C2Access& access, C2AccessValue& val) const {
DecoratorSet decorators = access.decorators();
- Node* adr = access.addr().node();
- Node* obj = access.base();
-
- bool anonymous = (decorators & C2_UNSAFE_ACCESS) != 0;
- bool mismatched = (decorators & C2_MISMATCHED) != 0;
- bool unknown = (decorators & ON_UNKNOWN_OOP_REF) != 0;
+ bool anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0;
bool in_heap = (decorators & IN_HEAP) != 0;
- bool in_native = (decorators & IN_NATIVE) != 0;
- bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0;
- bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0;
- bool is_unordered = (decorators & MO_UNORDERED) != 0;
+ bool tightly_coupled_alloc = (decorators & C2_TIGHTLY_COUPLED_ALLOC) != 0;
+ bool need_store_barrier = !(tightly_coupled_alloc && use_ReduceInitialCardMarks()) && (in_heap || anonymous);
bool no_keepalive = (decorators & AS_NO_KEEPALIVE) != 0;
- bool is_mixed = !in_heap && !in_native;
- bool need_cpu_mem_bar = !is_unordered || mismatched || is_mixed;
-
- Node* top = Compile::current()->top();
- Node* offset = adr->is_AddP() ? adr->in(AddPNode::Offset) : top;
-
- // If we are reading the value of the referent field of a Reference
- // object (either by using Unsafe directly or through reflection)
- // then, if G1 is enabled, we need to record the referent in an
- // SATB log buffer using the pre-barrier mechanism.
- // Also we need to add memory barrier to prevent commoning reads
- // from this field across safepoint since GC can change its value.
- bool need_read_barrier = (((on_weak || on_phantom) && !no_keepalive) ||
- (in_heap && unknown && offset != top && obj != top));
+ if (access.is_oop() && need_store_barrier) {
+ access.set_barrier_data(get_store_barrier(access));
+ if (tightly_coupled_alloc) {
+ assert(!use_ReduceInitialCardMarks(),
+ "post-barriers are only needed for tightly-coupled initialization stores when ReduceInitialCardMarks is disabled");
+ // Pre-barriers are unnecessary for tightly-coupled initialization stores.
+ access.set_barrier_data(access.barrier_data() & ~G1C2BarrierPre);
+ }
+ }
+ if (no_keepalive) {
+ // No keep-alive means no need for the pre-barrier.
+ access.set_barrier_data(access.barrier_data() & ~G1C2BarrierPre);
+ }
+ return BarrierSetC2::store_at_resolved(access, val);
+}
- if (!access.is_oop() || !need_read_barrier) {
- return CardTableBarrierSetC2::load_at_resolved(access, val_type);
+Node* G1BarrierSetC2::atomic_cmpxchg_val_at_resolved(C2AtomicParseAccess& access, Node* expected_val,
+ Node* new_val, const Type* value_type) const {
+ GraphKit* kit = access.kit();
+ if (!access.is_oop()) {
+ return BarrierSetC2::atomic_cmpxchg_val_at_resolved(access, expected_val, new_val, value_type);
}
+ access.set_barrier_data(G1C2BarrierPre | G1C2BarrierPost);
+ return BarrierSetC2::atomic_cmpxchg_val_at_resolved(access, expected_val, new_val, value_type);
+}
- assert(access.is_parse_access(), "entry not supported at optimization time");
+Node* G1BarrierSetC2::atomic_cmpxchg_bool_at_resolved(C2AtomicParseAccess& access, Node* expected_val,
+ Node* new_val, const Type* value_type) const {
+ GraphKit* kit = access.kit();
+ if (!access.is_oop()) {
+ return BarrierSetC2::atomic_cmpxchg_bool_at_resolved(access, expected_val, new_val, value_type);
+ }
+ access.set_barrier_data(G1C2BarrierPre | G1C2BarrierPost);
+ return BarrierSetC2::atomic_cmpxchg_bool_at_resolved(access, expected_val, new_val, value_type);
+}
- C2ParseAccess& parse_access = static_cast(access);
- GraphKit* kit = parse_access.kit();
- Node* load;
+Node* G1BarrierSetC2::atomic_xchg_at_resolved(C2AtomicParseAccess& access, Node* new_val, const Type* value_type) const {
+ GraphKit* kit = access.kit();
+ if (!access.is_oop()) {
+ return BarrierSetC2::atomic_xchg_at_resolved(access, new_val, value_type);
+ }
+ access.set_barrier_data(G1C2BarrierPre | G1C2BarrierPost);
+ return BarrierSetC2::atomic_xchg_at_resolved(access, new_val, value_type);
+}
- Node* control = kit->control();
- const TypePtr* adr_type = access.addr().type();
- MemNode::MemOrd mo = access.mem_node_mo();
- bool requires_atomic_access = (decorators & MO_UNORDERED) == 0;
- bool unaligned = (decorators & C2_UNALIGNED) != 0;
- bool unsafe = (decorators & C2_UNSAFE_ACCESS) != 0;
- // Pinned control dependency is the strictest. So it's ok to substitute it for any other.
- load = kit->make_load(control, adr, val_type, access.type(), adr_type, mo,
- LoadNode::Pinned, requires_atomic_access, unaligned, mismatched, unsafe,
- access.barrier_data());
+class G1BarrierSetC2State : public BarrierSetC2State {
+private:
+ GrowableArray* _stubs;
+public:
+ G1BarrierSetC2State(Arena* arena)
+ : BarrierSetC2State(arena),
+ _stubs(new (arena) GrowableArray(arena, 8, 0, nullptr)) {}
- if (on_weak || on_phantom) {
- // Use the pre-barrier to record the value in the referent field
- pre_barrier(kit, false /* do_load */,
- kit->control(),
- nullptr /* obj */, nullptr /* adr */, max_juint /* alias_idx */, nullptr /* val */, nullptr /* val_type */,
- load /* pre_val */, T_OBJECT);
- // Add memory barrier to prevent commoning reads from this field
- // across safepoint since GC can change its value.
- kit->insert_mem_bar(Op_MemBarCPUOrder);
- } else if (unknown) {
- // We do not require a mem bar inside pre_barrier if need_mem_bar
- // is set: the barriers would be emitted by us.
- insert_pre_barrier(kit, obj, offset, load, !need_cpu_mem_bar);
+ GrowableArray* stubs() {
+ return _stubs;
}
- return load;
-}
-
-bool G1BarrierSetC2::is_gc_barrier_node(Node* node) const {
- if (CardTableBarrierSetC2::is_gc_barrier_node(node)) {
- return true;
+ bool needs_liveness_data(const MachNode* mach) const {
+ return G1PreBarrierStubC2::needs_barrier(mach) ||
+ G1PostBarrierStubC2::needs_barrier(mach);
}
- if (node->Opcode() != Op_CallLeaf) {
- return false;
- }
- CallLeafNode *call = node->as_CallLeaf();
- if (call->_name == nullptr) {
+
+ bool needs_livein_data() const {
return false;
}
+};
- return strcmp(call->_name, "write_ref_field_pre_entry") == 0 || strcmp(call->_name, "write_ref_field_post_entry") == 0;
+static G1BarrierSetC2State* barrier_set_state() {
+ return reinterpret_cast(Compile::current()->barrier_set_state());
}
-bool G1BarrierSetC2::is_g1_pre_val_load(Node* n) {
- if (n->is_Load() && n->as_Load()->has_pinned_control_dependency()) {
- // Make sure the only users of it are: CmpP, StoreP, and a call to write_ref_field_pre_entry
+G1BarrierStubC2::G1BarrierStubC2(const MachNode* node) : BarrierStubC2(node) {}
- // Skip possible decode
- if (n->outcnt() == 1 && n->unique_out()->is_DecodeN()) {
- n = n->unique_out();
- }
+G1PreBarrierStubC2::G1PreBarrierStubC2(const MachNode* node) : G1BarrierStubC2(node) {}
- if (n->outcnt() == 3) {
- int found = 0;
- for (SimpleDUIterator iter(n); iter.has_next(); iter.next()) {
- Node* use = iter.get();
- if (use->is_Cmp() || use->is_Store()) {
- ++found;
- } else if (use->is_CallLeaf()) {
- CallLeafNode* call = use->as_CallLeaf();
- if (strcmp(call->_name, "write_ref_field_pre_entry") == 0) {
- ++found;
- }
- }
- }
- if (found == 3) {
- return true;
- }
- }
+bool G1PreBarrierStubC2::needs_barrier(const MachNode* node) {
+ return (node->barrier_data() & G1C2BarrierPre) != 0;
+}
+
+G1PreBarrierStubC2* G1PreBarrierStubC2::create(const MachNode* node) {
+ G1PreBarrierStubC2* const stub = new (Compile::current()->comp_arena()) G1PreBarrierStubC2(node);
+ if (!Compile::current()->output()->in_scratch_emit_size()) {
+ barrier_set_state()->stubs()->append(stub);
}
- return false;
+ return stub;
}
-bool G1BarrierSetC2::is_gc_pre_barrier_node(Node *node) const {
- return is_g1_pre_val_load(node);
+void G1PreBarrierStubC2::initialize_registers(Register obj, Register pre_val, Register thread, Register tmp1, Register tmp2) {
+ _obj = obj;
+ _pre_val = pre_val;
+ _thread = thread;
+ _tmp1 = tmp1;
+ _tmp2 = tmp2;
}
-void G1BarrierSetC2::eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const {
- if (is_g1_pre_val_load(node)) {
- macro->replace_node(node, macro->zerocon(node->as_Load()->bottom_type()->basic_type()));
- } else {
- assert(node->Opcode() == Op_CastP2X, "ConvP2XNode required");
- assert(node->outcnt() <= 2, "expects 1 or 2 users: Xor and URShift nodes");
- // It could be only one user, URShift node, in Object.clone() intrinsic
- // but the new allocation is passed to arraycopy stub and it could not
- // be scalar replaced. So we don't check the case.
+Register G1PreBarrierStubC2::obj() const {
+ return _obj;
+}
- // An other case of only one user (Xor) is when the value check for null
- // in G1 post barrier is folded after CCP so the code which used URShift
- // is removed.
+Register G1PreBarrierStubC2::pre_val() const {
+ return _pre_val;
+}
- // Take Region node before eliminating post barrier since it also
- // eliminates CastP2X node when it has only one user.
- Node* this_region = node->in(0);
- assert(this_region != nullptr, "");
+Register G1PreBarrierStubC2::thread() const {
+ return _thread;
+}
- // Remove G1 post barrier.
+Register G1PreBarrierStubC2::tmp1() const {
+ return _tmp1;
+}
+
+Register G1PreBarrierStubC2::tmp2() const {
+ return _tmp2;
+}
- // Search for CastP2X->Xor->URShift->Cmp path which
- // checks if the store done to a different from the value's region.
- // And replace Cmp with #0 (false) to collapse G1 post barrier.
- Node* xorx = node->find_out_with(Op_XorX);
- if (xorx != nullptr) {
- Node* shift = xorx->unique_out();
- Node* cmpx = shift->unique_out();
- assert(cmpx->is_Cmp() && cmpx->unique_out()->is_Bool() &&
- cmpx->unique_out()->as_Bool()->_test._test == BoolTest::ne,
- "missing region check in G1 post barrier");
- macro->replace_node(cmpx, macro->makecon(TypeInt::CC_EQ));
+void G1PreBarrierStubC2::emit_code(MacroAssembler& masm) {
+ G1BarrierSetAssembler* bs = static_cast(BarrierSet::barrier_set()->barrier_set_assembler());
+ bs->generate_c2_pre_barrier_stub(&masm, this);
+}
- // Remove G1 pre barrier.
+G1PostBarrierStubC2::G1PostBarrierStubC2(const MachNode* node) : G1BarrierStubC2(node) {}
- // Search "if (marking != 0)" check and set it to "false".
- // There is no G1 pre barrier if previous stored value is null
- // (for example, after initialization).
- if (this_region->is_Region() && this_region->req() == 3) {
- int ind = 1;
- if (!this_region->in(ind)->is_IfFalse()) {
- ind = 2;
- }
- if (this_region->in(ind)->is_IfFalse() &&
- this_region->in(ind)->in(0)->Opcode() == Op_If) {
- Node* bol = this_region->in(ind)->in(0)->in(1);
- assert(bol->is_Bool(), "");
- cmpx = bol->in(1);
- if (bol->as_Bool()->_test._test == BoolTest::ne &&
- cmpx->is_Cmp() && cmpx->in(2) == macro->intcon(0) &&
- cmpx->in(1)->is_Load()) {
- Node* adr = cmpx->in(1)->as_Load()->in(MemNode::Address);
- const int marking_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset());
- if (adr->is_AddP() && adr->in(AddPNode::Base) == macro->top() &&
- adr->in(AddPNode::Address)->Opcode() == Op_ThreadLocal &&
- adr->in(AddPNode::Offset) == macro->MakeConX(marking_offset)) {
- macro->replace_node(cmpx, macro->makecon(TypeInt::CC_EQ));
- }
- }
- }
- }
- } else {
- assert(!use_ReduceInitialCardMarks(), "can only happen with card marking");
- // This is a G1 post barrier emitted by the Object.clone() intrinsic.
- // Search for the CastP2X->URShiftX->AddP->LoadB->Cmp path which checks if the card
- // is marked as young_gen and replace the Cmp with 0 (false) to collapse the barrier.
- Node* shift = node->find_out_with(Op_URShiftX);
- assert(shift != nullptr, "missing G1 post barrier");
- Node* addp = shift->unique_out();
- Node* load = addp->find_out_with(Op_LoadB);
- assert(load != nullptr, "missing G1 post barrier");
- Node* cmpx = load->unique_out();
- assert(cmpx->is_Cmp() && cmpx->unique_out()->is_Bool() &&
- cmpx->unique_out()->as_Bool()->_test._test == BoolTest::ne,
- "missing card value check in G1 post barrier");
- macro->replace_node(cmpx, macro->makecon(TypeInt::CC_EQ));
- // There is no G1 pre barrier in this case
- }
- // Now CastP2X can be removed since it is used only on dead path
- // which currently still alive until igvn optimize it.
- assert(node->outcnt() == 0 || node->unique_out()->Opcode() == Op_URShiftX, "");
- macro->replace_node(node, macro->top());
- }
+bool G1PostBarrierStubC2::needs_barrier(const MachNode* node) {
+ return (node->barrier_data() & G1C2BarrierPost) != 0;
}
-Node* G1BarrierSetC2::step_over_gc_barrier(Node* c) const {
- if (!use_ReduceInitialCardMarks() &&
- c != nullptr && c->is_Region() && c->req() == 3) {
- for (uint i = 1; i < c->req(); i++) {
- if (c->in(i) != nullptr && c->in(i)->is_Region() &&
- c->in(i)->req() == 3) {
- Node* r = c->in(i);
- for (uint j = 1; j < r->req(); j++) {
- if (r->in(j) != nullptr && r->in(j)->is_Proj() &&
- r->in(j)->in(0) != nullptr &&
- r->in(j)->in(0)->Opcode() == Op_CallLeaf &&
- r->in(j)->in(0)->as_Call()->entry_point() == CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry)) {
- Node* call = r->in(j)->in(0);
- c = c->in(i == 1 ? 2 : 1);
- if (c != nullptr && c->Opcode() != Op_Parm) {
- c = c->in(0);
- if (c != nullptr) {
- c = c->in(0);
- assert(call->in(0) == nullptr ||
- call->in(0)->in(0) == nullptr ||
- call->in(0)->in(0)->in(0) == nullptr ||
- call->in(0)->in(0)->in(0)->in(0) == nullptr ||
- call->in(0)->in(0)->in(0)->in(0)->in(0) == nullptr ||
- c == call->in(0)->in(0)->in(0)->in(0)->in(0), "bad barrier shape");
- return c;
- }
- }
- }
- }
- }
- }
+G1PostBarrierStubC2* G1PostBarrierStubC2::create(const MachNode* node) {
+ G1PostBarrierStubC2* const stub = new (Compile::current()->comp_arena()) G1PostBarrierStubC2(node);
+ if (!Compile::current()->output()->in_scratch_emit_size()) {
+ barrier_set_state()->stubs()->append(stub);
}
- return c;
+ return stub;
}
-#ifdef ASSERT
-bool G1BarrierSetC2::has_cas_in_use_chain(Node *n) const {
- Unique_Node_List visited;
- Node_List worklist;
- worklist.push(n);
- while (worklist.size() > 0) {
- Node* x = worklist.pop();
- if (visited.member(x)) {
- continue;
- } else {
- visited.push(x);
- }
+void G1PostBarrierStubC2::initialize_registers(Register thread, Register tmp1, Register tmp2, Register tmp3) {
+ _thread = thread;
+ _tmp1 = tmp1;
+ _tmp2 = tmp2;
+ _tmp3 = tmp3;
+}
- if (x->is_LoadStore()) {
- int op = x->Opcode();
- if (op == Op_CompareAndExchangeP || op == Op_CompareAndExchangeN ||
- op == Op_CompareAndSwapP || op == Op_CompareAndSwapN ||
- op == Op_WeakCompareAndSwapP || op == Op_WeakCompareAndSwapN) {
- return true;
- }
- }
- if (!x->is_CFG()) {
- for (SimpleDUIterator iter(x); iter.has_next(); iter.next()) {
- Node* use = iter.get();
- worklist.push(use);
- }
- }
- }
- return false;
+Register G1PostBarrierStubC2::thread() const {
+ return _thread;
}
-void G1BarrierSetC2::verify_pre_load(Node* marking_if, Unique_Node_List& loads /*output*/) const {
- assert(loads.size() == 0, "Loads list should be empty");
- Node* pre_val_if = marking_if->find_out_with(Op_IfTrue)->find_out_with(Op_If);
- if (pre_val_if != nullptr) {
- Unique_Node_List visited;
- Node_List worklist;
- Node* pre_val = pre_val_if->in(1)->in(1)->in(1);
+Register G1PostBarrierStubC2::tmp1() const {
+ return _tmp1;
+}
- worklist.push(pre_val);
- while (worklist.size() > 0) {
- Node* x = worklist.pop();
- if (visited.member(x)) {
- continue;
- } else {
- visited.push(x);
- }
+Register G1PostBarrierStubC2::tmp2() const {
+ return _tmp2;
+}
- if (has_cas_in_use_chain(x)) {
- loads.clear();
- return;
- }
+Register G1PostBarrierStubC2::tmp3() const {
+ return _tmp3;
+}
- if (x->is_Con()) {
- continue;
- }
- if (x->is_EncodeP() || x->is_DecodeN()) {
- worklist.push(x->in(1));
- continue;
- }
- if (x->is_Load() || x->is_LoadStore()) {
- assert(x->in(0) != nullptr, "Pre-val load has to have a control");
- loads.push(x);
- continue;
- }
- if (x->is_Phi()) {
- for (uint i = 1; i < x->req(); i++) {
- worklist.push(x->in(i));
- }
- continue;
- }
- assert(false, "Pre-val anomaly");
- }
- }
+void G1PostBarrierStubC2::emit_code(MacroAssembler& masm) {
+ G1BarrierSetAssembler* bs = static_cast(BarrierSet::barrier_set()->barrier_set_assembler());
+ bs->generate_c2_post_barrier_stub(&masm, this);
}
-void G1BarrierSetC2::verify_no_safepoints(Compile* compile, Node* marking_check_if, const Unique_Node_List& loads) const {
- if (loads.size() == 0) {
- return;
- }
+void* G1BarrierSetC2::create_barrier_state(Arena* comp_arena) const {
+ return new (comp_arena) G1BarrierSetC2State(comp_arena);
+}
- if (loads.size() == 1) { // Handle the typical situation when there a single pre-value load
- // that is dominated by the marking_check_if, that's true when the
- // barrier itself does the pre-val load.
- Node *pre_val = loads.at(0);
- if (pre_val->in(0)->in(0) == marking_check_if) { // IfTrue->If
- return;
- }
+int G1BarrierSetC2::get_store_barrier(C2Access& access) const {
+ if (!access.is_parse_access()) {
+ // Only support for eliding barriers at parse time for now.
+ return G1C2BarrierPre | G1C2BarrierPost;
}
-
- // All other cases are when pre-value loads dominate the marking check.
- Unique_Node_List controls;
- for (uint i = 0; i < loads.size(); i++) {
- Node *c = loads.at(i)->in(0);
- controls.push(c);
+ GraphKit* kit = (static_cast(access)).kit();
+ Node* ctl = kit->control();
+ Node* adr = access.addr().node();
+ uint adr_idx = kit->C->get_alias_index(access.addr().type());
+ assert(adr_idx != Compile::AliasIdxTop, "use other store_to_memory factory");
+
+ bool can_remove_pre_barrier = g1_can_remove_pre_barrier(kit, &kit->gvn(), adr, access.type(), adr_idx);
+
+ // We can skip marks on a freshly-allocated object in Eden. Keep this code in
+ // sync with CardTableBarrierSet::on_slowpath_allocation_exit. That routine
+ // informs GC to take appropriate compensating steps, upon a slow-path
+ // allocation, so as to make this card-mark elision safe.
+ // The post-barrier can also be removed if null is written. This case is
+ // handled by G1BarrierSetC2::expand_barriers, which runs at the end of C2's
+ // platform-independent optimizations to exploit stronger type information.
+ bool can_remove_post_barrier = use_ReduceInitialCardMarks() &&
+ ((access.base() == kit->just_allocated_object(ctl)) ||
+ g1_can_remove_post_barrier(kit, &kit->gvn(), ctl, adr));
+
+ int barriers = 0;
+ if (!can_remove_pre_barrier) {
+ barriers |= G1C2BarrierPre;
+ }
+ if (!can_remove_post_barrier) {
+ barriers |= G1C2BarrierPost;
}
- Unique_Node_List visited;
- Unique_Node_List safepoints;
- Node_List worklist;
- uint found = 0;
+ return barriers;
+}
- worklist.push(marking_check_if);
- while (worklist.size() > 0 && found < controls.size()) {
- Node* x = worklist.pop();
- if (x == nullptr || x == compile->top()) continue;
- if (visited.member(x)) {
- continue;
- } else {
- visited.push(x);
- }
+void G1BarrierSetC2::late_barrier_analysis() const {
+ compute_liveness_at_stubs();
+}
- if (controls.member(x)) {
- found++;
- }
- if (x->is_Region()) {
- for (uint i = 1; i < x->req(); i++) {
- worklist.push(x->in(i));
- }
- } else {
- if (!x->is_SafePoint()) {
- worklist.push(x->in(0));
- } else {
- safepoints.push(x);
- }
+void G1BarrierSetC2::emit_stubs(CodeBuffer& cb) const {
+ MacroAssembler masm(&cb);
+ GrowableArray* const stubs = barrier_set_state()->stubs();
+ for (int i = 0; i < stubs->length(); i++) {
+ // Make sure there is enough space in the code buffer
+ if (cb.insts()->maybe_expand_to_ensure_remaining(PhaseOutput::MAX_inst_size) && cb.blob() == nullptr) {
+ ciEnv::current()->record_failure("CodeCache is full");
+ return;
}
+ stubs->at(i)->emit_code(masm);
}
- assert(found == controls.size(), "Pre-barrier structure anomaly or possible safepoint");
+ masm.flush();
}
-void G1BarrierSetC2::verify_gc_barriers(Compile* compile, CompilePhase phase) const {
- if (phase != BarrierSetC2::BeforeCodeGen) {
- return;
+#ifndef PRODUCT
+void G1BarrierSetC2::dump_barrier_data(const MachNode* mach, outputStream* st) const {
+ if ((mach->barrier_data() & G1C2BarrierPre) != 0) {
+ st->print("pre ");
}
- // Verify G1 pre-barriers
- const int marking_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset());
-
- Unique_Node_List visited;
- Node_List worklist;
- // We're going to walk control flow backwards starting from the Root
- worklist.push(compile->root());
- while (worklist.size() > 0) {
- Node* x = worklist.pop();
- if (x == nullptr || x == compile->top()) continue;
- if (visited.member(x)) {
- continue;
- } else {
- visited.push(x);
- }
-
- if (x->is_Region()) {
- for (uint i = 1; i < x->req(); i++) {
- worklist.push(x->in(i));
- }
- } else {
- worklist.push(x->in(0));
- // We are looking for the pattern:
- // /->ThreadLocal
- // If->Bool->CmpI->LoadB->AddP->ConL(marking_offset)
- // \->ConI(0)
- // We want to verify that the If and the LoadB have the same control
- // See GraphKit::g1_write_barrier_pre()
- if (x->is_If()) {
- IfNode *iff = x->as_If();
- if (iff->in(1)->is_Bool() && iff->in(1)->in(1)->is_Cmp()) {
- CmpNode *cmp = iff->in(1)->in(1)->as_Cmp();
- if (cmp->Opcode() == Op_CmpI && cmp->in(2)->is_Con() && cmp->in(2)->bottom_type()->is_int()->get_con() == 0
- && cmp->in(1)->is_Load()) {
- LoadNode* load = cmp->in(1)->as_Load();
- if (load->Opcode() == Op_LoadB && load->in(2)->is_AddP() && load->in(2)->in(2)->Opcode() == Op_ThreadLocal
- && load->in(2)->in(3)->is_Con()
- && load->in(2)->in(3)->bottom_type()->is_intptr_t()->get_con() == marking_offset) {
-
- Node* if_ctrl = iff->in(0);
- Node* load_ctrl = load->in(0);
-
- if (if_ctrl != load_ctrl) {
- // Skip possible CProj->NeverBranch in infinite loops
- if ((if_ctrl->is_Proj() && if_ctrl->Opcode() == Op_CProj)
- && if_ctrl->in(0)->is_NeverBranch()) {
- if_ctrl = if_ctrl->in(0)->in(0);
- }
- }
- assert(load_ctrl != nullptr && if_ctrl == load_ctrl, "controls must match");
-
- Unique_Node_List loads;
- verify_pre_load(iff, loads);
- verify_no_safepoints(compile, iff, loads);
- }
- }
- }
- }
- }
+ if ((mach->barrier_data() & G1C2BarrierPost) != 0) {
+ st->print("post ");
}
-}
-#endif
-
-bool G1BarrierSetC2::escape_add_to_con_graph(ConnectionGraph* conn_graph, PhaseGVN* gvn, Unique_Node_List* delayed_worklist, Node* n, uint opcode) const {
- if (opcode == Op_StoreP) {
- Node* adr = n->in(MemNode::Address);
- const Type* adr_type = gvn->type(adr);
- // Pointer stores in G1 barriers looks like unsafe access.
- // Ignore such stores to be able scalar replace non-escaping
- // allocations.
- if (adr_type->isa_rawptr() && adr->is_AddP()) {
- Node* base = conn_graph->get_addp_base(adr);
- if (base->Opcode() == Op_LoadP &&
- base->in(MemNode::Address)->is_AddP()) {
- adr = base->in(MemNode::Address);
- Node* tls = conn_graph->get_addp_base(adr);
- if (tls->Opcode() == Op_ThreadLocal) {
- int offs = (int) gvn->find_intptr_t_con(adr->in(AddPNode::Offset), Type::OffsetBot);
- const int buf_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset());
- if (offs == buf_offset) {
- return true; // G1 pre barrier previous oop value store.
- }
- if (offs == in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset())) {
- return true; // G1 post barrier card address store.
- }
- }
- }
- }
+ if ((mach->barrier_data() & G1C2BarrierPostNotNull) != 0) {
+ st->print("notnull ");
}
- return false;
}
+#endif // !PRODUCT
diff --git a/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.hpp b/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.hpp
index c445a87d2e46d..dc333d8c33174 100644
--- a/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.hpp
+++ b/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.hpp
@@ -31,29 +31,62 @@ class PhaseTransform;
class Type;
class TypeFunc;
-class G1BarrierSetC2: public CardTableBarrierSetC2 {
+const int G1C2BarrierPre = 1;
+const int G1C2BarrierPost = 2;
+const int G1C2BarrierPostNotNull = 4;
+
+class G1BarrierStubC2 : public BarrierStubC2 {
+public:
+ G1BarrierStubC2(const MachNode* node);
+ virtual void emit_code(MacroAssembler& masm) = 0;
+};
+
+class G1PreBarrierStubC2 : public G1BarrierStubC2 {
+private:
+ Register _obj;
+ Register _pre_val;
+ Register _thread;
+ Register _tmp1;
+ Register _tmp2;
+
+protected:
+ G1PreBarrierStubC2(const MachNode* node);
+
+public:
+ static bool needs_barrier(const MachNode* node);
+ static G1PreBarrierStubC2* create(const MachNode* node);
+ void initialize_registers(Register obj, Register pre_val, Register thread, Register tmp1 = noreg, Register tmp2 = noreg);
+ Register obj() const;
+ Register pre_val() const;
+ Register thread() const;
+ Register tmp1() const;
+ Register tmp2() const;
+ virtual void emit_code(MacroAssembler& masm);
+};
+
+class G1PostBarrierStubC2 : public G1BarrierStubC2 {
+private:
+ Register _thread;
+ Register _tmp1;
+ Register _tmp2;
+ Register _tmp3;
+
protected:
- virtual void pre_barrier(GraphKit* kit,
- bool do_load,
- Node* ctl,
- Node* obj,
- Node* adr,
- uint adr_idx,
- Node* val,
- const TypeOopPtr* val_type,
- Node* pre_val,
- BasicType bt) const;
-
- virtual void post_barrier(GraphKit* kit,
- Node* ctl,
- Node* store,
- Node* obj,
- Node* adr,
- uint adr_idx,
- Node* val,
- BasicType bt,
- bool use_precise) const;
+ G1PostBarrierStubC2(const MachNode* node);
+public:
+ static bool needs_barrier(const MachNode* node);
+ static G1PostBarrierStubC2* create(const MachNode* node);
+ void initialize_registers(Register thread, Register tmp1 = noreg, Register tmp2 = noreg, Register tmp3 = noreg);
+ Register thread() const;
+ Register tmp1() const;
+ Register tmp2() const;
+ Register tmp3() const;
+ virtual void emit_code(MacroAssembler& masm);
+};
+
+class G1BarrierSetC2: public CardTableBarrierSetC2 {
+protected:
bool g1_can_remove_pre_barrier(GraphKit* kit,
PhaseValues* phase,
Node* adr,
@@ -64,44 +97,31 @@ class G1BarrierSetC2: public CardTableBarrierSetC2 {
PhaseValues* phase, Node* store,
Node* adr) const;
- void g1_mark_card(GraphKit* kit,
- IdealKit& ideal,
- Node* card_adr,
- Node* oop_store,
- uint oop_alias_idx,
- Node* index,
- Node* index_adr,
- Node* buffer,
- const TypeFunc* tf) const;
-
- // Helper for unsafe accesses, that may or may not be on the referent field.
- // Generates the guards that check whether the result of
- // Unsafe.getReference should be recorded in an SATB log buffer.
- void insert_pre_barrier(GraphKit* kit, Node* base_oop, Node* offset, Node* pre_val, bool need_mem_bar) const;
-
- static const TypeFunc* write_ref_field_pre_entry_Type();
- static const TypeFunc* write_ref_field_post_entry_Type();
+ int get_store_barrier(C2Access& access) const;
virtual Node* load_at_resolved(C2Access& access, const Type* val_type) const;
+ virtual Node* store_at_resolved(C2Access& access, C2AccessValue& val) const;
+ virtual Node* atomic_cmpxchg_val_at_resolved(C2AtomicParseAccess& access, Node* expected_val,
+ Node* new_val, const Type* value_type) const;
+ virtual Node* atomic_cmpxchg_bool_at_resolved(C2AtomicParseAccess& access, Node* expected_val,
+ Node* new_val, const Type* value_type) const;
+ virtual Node* atomic_xchg_at_resolved(C2AtomicParseAccess& access, Node* new_val, const Type* value_type) const;
-#ifdef ASSERT
- bool has_cas_in_use_chain(Node* x) const;
- void verify_pre_load(Node* marking_check_if, Unique_Node_List& loads /*output*/) const;
- void verify_no_safepoints(Compile* compile, Node* marking_load, const Unique_Node_List& loads) const;
-#endif
-
- static bool is_g1_pre_val_load(Node* n);
public:
- virtual bool is_gc_pre_barrier_node(Node* node) const;
- virtual bool is_gc_barrier_node(Node* node) const;
virtual void eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const;
- virtual Node* step_over_gc_barrier(Node* c) const;
-
-#ifdef ASSERT
- virtual void verify_gc_barriers(Compile* compile, CompilePhase phase) const;
+ virtual void eliminate_gc_barrier_data(Node* node) const;
+ virtual bool expand_barriers(Compile* C, PhaseIterGVN& igvn) const;
+ virtual uint estimated_barrier_size(const Node* node) const;
+ virtual bool can_initialize_object(const StoreNode* store) const;
+ virtual void clone_at_expansion(PhaseMacroExpand* phase,
+ ArrayCopyNode* ac) const;
+ virtual void* create_barrier_state(Arena* comp_arena) const;
+ virtual void emit_stubs(CodeBuffer& cb) const;
+ virtual void late_barrier_analysis() const;
+
+#ifndef PRODUCT
+ virtual void dump_barrier_data(const MachNode* mach, outputStream* st) const;
#endif
-
- virtual bool escape_add_to_con_graph(ConnectionGraph* conn_graph, PhaseGVN* gvn, Unique_Node_List* delayed_worklist, Node* n, uint opcode) const;
};
#endif // SHARE_GC_G1_C2_G1BARRIERSETC2_HPP
diff --git a/src/hotspot/share/gc/g1/g1BarrierSetRuntime.cpp b/src/hotspot/share/gc/g1/g1BarrierSetRuntime.cpp
index a0fce437807f4..2e247f46c93d8 100644
--- a/src/hotspot/share/gc/g1/g1BarrierSetRuntime.cpp
+++ b/src/hotspot/share/gc/g1/g1BarrierSetRuntime.cpp
@@ -61,3 +61,11 @@ JRT_LEAF(void, G1BarrierSetRuntime::write_ref_field_post_entry(volatile G1CardTa
G1DirtyCardQueue& queue = G1ThreadLocalData::dirty_card_queue(thread);
G1BarrierSet::dirty_card_queue_set().enqueue(queue, card_addr);
JRT_END
+
+JRT_LEAF(void, G1BarrierSetRuntime::clone(oopDesc* src, oopDesc* dst, size_t size))
+ HeapAccess<>::clone(src, dst, size);
+JRT_END
+
+address G1BarrierSetRuntime::clone_addr() {
+ return reinterpret_cast(clone);
+}
diff --git a/src/hotspot/share/gc/g1/g1BarrierSetRuntime.hpp b/src/hotspot/share/gc/g1/g1BarrierSetRuntime.hpp
index 366679f032ba9..f98e94096e72d 100644
--- a/src/hotspot/share/gc/g1/g1BarrierSetRuntime.hpp
+++ b/src/hotspot/share/gc/g1/g1BarrierSetRuntime.hpp
@@ -35,6 +35,8 @@ class oopDesc;
class JavaThread;
class G1BarrierSetRuntime: public AllStatic {
+private:
+ static void clone(oopDesc* src, oopDesc* dst, size_t size);
public:
using CardValue = G1CardTable::CardValue;
@@ -46,6 +48,8 @@ class G1BarrierSetRuntime: public AllStatic {
// C2 slow-path runtime calls.
static void write_ref_field_pre_entry(oopDesc* orig, JavaThread *thread);
static void write_ref_field_post_entry(volatile CardValue* card_addr, JavaThread* thread);
+
+ static address clone_addr();
};
#endif // SHARE_GC_G1_G1BARRIERSETRUNTIME_HPP
diff --git a/src/hotspot/share/gc/g1/g1BatchedTask.hpp b/src/hotspot/share/gc/g1/g1BatchedTask.hpp
index aa16f4ddfd48d..020fda634e4b8 100644
--- a/src/hotspot/share/gc/g1/g1BatchedTask.hpp
+++ b/src/hotspot/share/gc/g1/g1BatchedTask.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021, 2022, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2021, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -29,7 +29,7 @@
#include "gc/shared/workerThread.hpp"
#include "memory/allocation.hpp"
-template
+template
class GrowableArrayCHeap;
// G1AbstractSubTask represents a task to be performed either within a
diff --git a/src/hotspot/share/gc/g1/g1CollectionSet.cpp b/src/hotspot/share/gc/g1/g1CollectionSet.cpp
index d315497268f99..ec90fd377503d 100644
--- a/src/hotspot/share/gc/g1/g1CollectionSet.cpp
+++ b/src/hotspot/share/gc/g1/g1CollectionSet.cpp
@@ -26,7 +26,7 @@
#include "gc/g1/g1Analytics.hpp"
#include "gc/g1/g1CollectedHeap.inline.hpp"
#include "gc/g1/g1CollectionSet.hpp"
-#include "gc/g1/g1CollectionSetCandidates.hpp"
+#include "gc/g1/g1CollectionSetCandidates.inline.hpp"
#include "gc/g1/g1CollectorState.hpp"
#include "gc/g1/g1HeapRegion.inline.hpp"
#include "gc/g1/g1HeapRegionRemSet.inline.hpp"
@@ -346,20 +346,16 @@ void G1CollectionSet::finalize_old_part(double time_remaining_ms) {
G1CollectionCandidateRegionList pinned_retained_regions;
if (collector_state()->in_mixed_phase()) {
- time_remaining_ms = _policy->select_candidates_from_marking(&candidates()->marking_regions(),
- time_remaining_ms,
- &initial_old_regions,
- &_optional_old_regions,
- &pinned_marking_regions);
+ time_remaining_ms = select_candidates_from_marking(time_remaining_ms,
+ &initial_old_regions,
+ &pinned_marking_regions);
} else {
log_debug(gc, ergo, cset)("Do not add marking candidates to collection set due to pause type.");
}
- _policy->select_candidates_from_retained(&candidates()->retained_regions(),
- time_remaining_ms,
- &initial_old_regions,
- &_optional_old_regions,
- &pinned_retained_regions);
+ select_candidates_from_retained(time_remaining_ms,
+ &initial_old_regions,
+ &pinned_retained_regions);
// Move initially selected old regions to collection set directly.
move_candidates_to_collection_set(&initial_old_regions);
@@ -394,6 +390,215 @@ void G1CollectionSet::move_candidates_to_collection_set(G1CollectionCandidateReg
candidates()->remove(regions);
}
+static void print_finish_message(const char* reason, bool from_marking) {
+ log_debug(gc, ergo, cset)("Finish adding %s candidates to collection set (%s).",
+ from_marking ? "marking" : "retained", reason);
+}
+
+double G1CollectionSet::select_candidates_from_marking(double time_remaining_ms,
+ G1CollectionCandidateRegionList* initial_old_regions,
+ G1CollectionCandidateRegionList* pinned_old_regions) {
+ uint num_expensive_regions = 0;
+
+ uint num_initial_regions_selected = 0;
+ uint num_optional_regions_selected = 0;
+ uint num_pinned_regions = 0;
+
+ double predicted_initial_time_ms = 0.0;
+ double predicted_optional_time_ms = 0.0;
+
+ double optional_threshold_ms = time_remaining_ms * _policy->optional_prediction_fraction();
+
+ const uint min_old_cset_length = _policy->calc_min_old_cset_length(candidates()->last_marking_candidates_length());
+ const uint max_old_cset_length = MAX2(min_old_cset_length, _policy->calc_max_old_cset_length());
+ const uint max_optional_regions = max_old_cset_length - min_old_cset_length;
+ bool check_time_remaining = _policy->use_adaptive_young_list_length();
+
+ G1CollectionCandidateList* marking_list = &candidates()->marking_regions();
+ assert(marking_list != nullptr, "must be");
+
+ log_debug(gc, ergo, cset)("Start adding marking candidates to collection set. "
+ "Min %u regions, max %u regions, available %u regions"
+ "time remaining %1.2fms, optional threshold %1.2fms",
+ min_old_cset_length, max_old_cset_length, marking_list->length(), time_remaining_ms, optional_threshold_ms);
+
+ G1CollectionCandidateListIterator iter = marking_list->begin();
+ for (; iter != marking_list->end(); ++iter) {
+ if (num_initial_regions_selected + num_optional_regions_selected >= max_old_cset_length) {
+ // Added maximum number of old regions to the CSet.
+ print_finish_message("Maximum number of regions reached", true);
+ break;
+ }
+ G1HeapRegion* hr = (*iter)->_r;
+ // Skip evacuating pinned marking regions because we are not getting any free
+ // space from them (and we expect to get free space from marking candidates).
+ // Also prepare to move them to retained regions to be evacuated optionally later
+ // to not impact the mixed phase too much.
+ if (hr->has_pinned_objects()) {
+ num_pinned_regions++;
+ (*iter)->update_num_unreclaimed();
+ log_trace(gc, ergo, cset)("Marking candidate %u can not be reclaimed currently. Skipping.", hr->hrm_index());
+ pinned_old_regions->append(hr);
+ continue;
+ }
+ double predicted_time_ms = _policy->predict_region_total_time_ms(hr, false);
+ time_remaining_ms = MAX2(time_remaining_ms - predicted_time_ms, 0.0);
+ // Add regions to old set until we reach the minimum amount
+ if (initial_old_regions->length() < min_old_cset_length) {
+ initial_old_regions->append(hr);
+ num_initial_regions_selected++;
+ predicted_initial_time_ms += predicted_time_ms;
+ // Record the number of regions added with no time remaining
+ if (time_remaining_ms == 0.0) {
+ num_expensive_regions++;
+ }
+ } else if (!check_time_remaining) {
+ // In the non-auto-tuning case, we'll finish adding regions
+ // to the CSet if we reach the minimum.
+ print_finish_message("Region amount reached min", true);
+ break;
+ } else {
+ // Keep adding regions to old set until we reach the optional threshold
+ if (time_remaining_ms > optional_threshold_ms) {
+ predicted_initial_time_ms += predicted_time_ms;
+ initial_old_regions->append(hr);
+ num_initial_regions_selected++;
+ } else if (time_remaining_ms > 0) {
+ // Keep adding optional regions until time is up.
+ assert(_optional_old_regions.length() < max_optional_regions, "Should not be possible.");
+ predicted_optional_time_ms += predicted_time_ms;
+ _optional_old_regions.append(hr);
+ num_optional_regions_selected++;
+ } else {
+ print_finish_message("Predicted time too high", true);
+ break;
+ }
+ }
+ }
+ if (iter == marking_list->end()) {
+ log_debug(gc, ergo, cset)("Marking candidates exhausted.");
+ }
+
+ if (num_expensive_regions > 0) {
+ log_debug(gc, ergo, cset)("Added %u marking candidates to collection set although the predicted time was too high.",
+ num_expensive_regions);
+ }
+
+ log_debug(gc, ergo, cset)("Finish adding marking candidates to collection set. Initial: %u, optional: %u, pinned: %u, "
+ "predicted initial time: %1.2fms, predicted optional time: %1.2fms, time remaining: %1.2fms",
+ num_initial_regions_selected, num_optional_regions_selected, num_pinned_regions,
+ predicted_initial_time_ms, predicted_optional_time_ms, time_remaining_ms);
+
+ assert(initial_old_regions->length() == num_initial_regions_selected, "must be");
+ assert(_optional_old_regions.length() == num_optional_regions_selected, "must be");
+ return time_remaining_ms;
+}
+
+void G1CollectionSet::select_candidates_from_retained(double time_remaining_ms,
+ G1CollectionCandidateRegionList* initial_old_regions,
+ G1CollectionCandidateRegionList* pinned_old_regions) {
+ uint num_initial_regions_selected = 0;
+ uint num_optional_regions_selected = 0;
+ uint num_expensive_regions_selected = 0;
+ uint num_pinned_regions = 0;
+
+ double predicted_initial_time_ms = 0.0;
+ double predicted_optional_time_ms = 0.0;
+
+ uint const min_regions = _policy->min_retained_old_cset_length();
+ // We want to make sure that on the one hand we process the retained regions asap,
+ // but on the other hand do not take too many of them as optional regions.
+ // So we split the time budget into budget we will unconditionally take into the
+ // initial old regions, and budget for taking optional regions from the retained
+ // list.
+ double optional_time_remaining_ms = _policy->max_time_for_retaining();
+ time_remaining_ms = MIN2(time_remaining_ms, optional_time_remaining_ms);
+
+ G1CollectionCandidateList* retained_list = &candidates()->retained_regions();
+
+ log_debug(gc, ergo, cset)("Start adding retained candidates to collection set. "
+ "Min %u regions, available %u, "
+ "time remaining %1.2fms, optional remaining %1.2fms",
+ min_regions, retained_list->length(), time_remaining_ms, optional_time_remaining_ms);
+
+ for (G1CollectionSetCandidateInfo* ci : *retained_list) {
+ G1HeapRegion* r = ci->_r;
+ double predicted_time_ms = _policy->predict_region_total_time_ms(r, collector_state()->in_young_only_phase());
+ bool fits_in_remaining_time = predicted_time_ms <= time_remaining_ms;
+ // If we can't reclaim that region ignore it for now.
+ if (r->has_pinned_objects()) {
+ num_pinned_regions++;
+ if (ci->update_num_unreclaimed()) {
+ log_trace(gc, ergo, cset)("Retained candidate %u can not be reclaimed currently. Skipping.", r->hrm_index());
+ } else {
+ log_trace(gc, ergo, cset)("Retained candidate %u can not be reclaimed currently. Dropping.", r->hrm_index());
+ pinned_old_regions->append(r);
+ }
+ continue;
+ }
+
+ if (fits_in_remaining_time || (num_expensive_regions_selected < min_regions)) {
+ predicted_initial_time_ms += predicted_time_ms;
+ if (!fits_in_remaining_time) {
+ num_expensive_regions_selected++;
+ }
+ initial_old_regions->append(r);
+ num_initial_regions_selected++;
+ } else if (predicted_time_ms <= optional_time_remaining_ms) {
+ predicted_optional_time_ms += predicted_time_ms;
+ _optional_old_regions.append(r);
+ num_optional_regions_selected++;
+ } else {
+ // Fits neither initial nor optional time limit. Exit.
+ break;
+ }
+ time_remaining_ms = MAX2(0.0, time_remaining_ms - predicted_time_ms);
+ optional_time_remaining_ms = MAX2(0.0, optional_time_remaining_ms - predicted_time_ms);
+ }
+
+ uint num_regions_selected = num_initial_regions_selected + num_optional_regions_selected;
+ if (num_regions_selected == retained_list->length()) {
+ log_debug(gc, ergo, cset)("Retained candidates exhausted.");
+ }
+ if (num_expensive_regions_selected > 0) {
+ log_debug(gc, ergo, cset)("Added %u retained candidates to collection set although the predicted time was too high.",
+ num_expensive_regions_selected);
+ }
+
+ log_debug(gc, ergo, cset)("Finish adding retained candidates to collection set. Initial: %u, optional: %u, pinned: %u, "
+ "predicted initial time: %1.2fms, predicted optional time: %1.2fms, "
+ "time remaining: %1.2fms optional time remaining %1.2fms",
+ num_initial_regions_selected, num_optional_regions_selected, num_pinned_regions,
+ predicted_initial_time_ms, predicted_optional_time_ms, time_remaining_ms, optional_time_remaining_ms);
+}
+
+void G1CollectionSet::select_candidates_from_optional_regions(double time_remaining_ms,
+ G1CollectionCandidateRegionList* selected_regions) {
+ assert(optional_region_length() > 0,
+ "Should only be called when there are optional regions");
+
+ double total_prediction_ms = 0.0;
+
+ for (G1HeapRegion* r : _optional_old_regions) {
+ double prediction_ms = _policy->predict_region_total_time_ms(r, false);
+
+ if (prediction_ms > time_remaining_ms) {
+ log_debug(gc, ergo, cset)("Prediction %.3fms for region %u does not fit remaining time: %.3fms.",
+ prediction_ms, r->hrm_index(), time_remaining_ms);
+ break;
+ }
+ // This region will be included in the next optional evacuation.
+
+ total_prediction_ms += prediction_ms;
+ time_remaining_ms -= prediction_ms;
+
+ selected_regions->append(r);
+ }
+
+ log_debug(gc, ergo, cset)("Prepared %u regions out of %u for optional evacuation. Total predicted time: %.3fms",
+ selected_regions->length(), _optional_old_regions.length(), total_prediction_ms);
+}
+
void G1CollectionSet::prepare_optional_regions(G1CollectionCandidateRegionList* regions){
uint cur_index = 0;
for (G1HeapRegion* r : *regions) {
@@ -441,9 +646,8 @@ bool G1CollectionSet::finalize_optional_for_evacuation(double remaining_pause_ti
update_incremental_marker();
G1CollectionCandidateRegionList selected_regions;
- _policy->calculate_optional_collection_set_regions(&_optional_old_regions,
- remaining_pause_time,
- &selected_regions);
+ select_candidates_from_optional_regions(remaining_pause_time,
+ &selected_regions);
move_candidates_to_collection_set(&selected_regions);
diff --git a/src/hotspot/share/gc/g1/g1CollectionSet.hpp b/src/hotspot/share/gc/g1/g1CollectionSet.hpp
index e569d3ee966c3..5280ba7d0fd6c 100644
--- a/src/hotspot/share/gc/g1/g1CollectionSet.hpp
+++ b/src/hotspot/share/gc/g1/g1CollectionSet.hpp
@@ -196,6 +196,22 @@ class G1CollectionSet {
// and retained collection set candidates.
void finalize_old_part(double time_remaining_ms);
+ // Calculate and fill in the initial, optional and pinned old gen candidate regions from
+ // the given candidate list and the remaining time.
+ // Returns the remaining time.
+ double select_candidates_from_marking(double time_remaining_ms,
+ G1CollectionCandidateRegionList* initial_old_regions,
+ G1CollectionCandidateRegionList* pinned_old_regions);
+
+ void select_candidates_from_retained(double time_remaining_ms,
+ G1CollectionCandidateRegionList* initial_old_regions,
+ G1CollectionCandidateRegionList* pinned_old_regions);
+
+ // Calculate the number of optional regions from the given collection set candidates,
+ // the remaining time and the maximum number of these regions.
+ void select_candidates_from_optional_regions(double time_remaining_ms,
+ G1CollectionCandidateRegionList* selected);
+
// Iterate the part of the collection set given by the offset and length applying the given
// G1HeapRegionClosure. The worker_id will determine where in the part to start the iteration
// to allow for more efficient parallel iteration.
diff --git a/src/hotspot/share/gc/g1/g1HeapRegionPrinter.hpp b/src/hotspot/share/gc/g1/g1HeapRegionPrinter.hpp
index d7b1a6da92c17..577a8552091f6 100644
--- a/src/hotspot/share/gc/g1/g1HeapRegionPrinter.hpp
+++ b/src/hotspot/share/gc/g1/g1HeapRegionPrinter.hpp
@@ -35,8 +35,8 @@ class G1HeapRegionPrinter : public AllStatic {
// Print an action event.
static void print(const char* action, G1HeapRegion* hr) {
- log_trace(gc, region)("G1HR %s(%s) [" PTR_FORMAT ", " PTR_FORMAT ", " PTR_FORMAT "]",
- action, hr->get_type_str(), p2i(hr->bottom()), p2i(hr->top()), p2i(hr->end()));
+ log_trace(gc, region)("G1HR %4u %s(%s) [" PTR_FORMAT ", " PTR_FORMAT ", " PTR_FORMAT "]",
+ hr->hrm_index(), action, hr->get_type_str(), p2i(hr->bottom()), p2i(hr->top()), p2i(hr->end()));
}
public:
diff --git a/src/hotspot/share/gc/g1/g1MonotonicArena.cpp b/src/hotspot/share/gc/g1/g1MonotonicArena.cpp
index 81748d277cff9..b2706d7a9463c 100644
--- a/src/hotspot/share/gc/g1/g1MonotonicArena.cpp
+++ b/src/hotspot/share/gc/g1/g1MonotonicArena.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2022, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -29,22 +29,22 @@
#include "runtime/vmOperations.hpp"
#include "utilities/globalCounter.inline.hpp"
-G1MonotonicArena::Segment::Segment(uint slot_size, uint num_slots, Segment* next, MEMFLAGS flag) :
+G1MonotonicArena::Segment::Segment(uint slot_size, uint num_slots, Segment* next, MemTag mem_tag) :
_slot_size(slot_size),
_num_slots(num_slots),
_next(next),
_next_allocate(0),
- _mem_flag(flag) {
+ _mem_tag(mem_tag) {
_bottom = ((char*) this) + header_size();
}
G1MonotonicArena::Segment* G1MonotonicArena::Segment::create_segment(uint slot_size,
uint num_slots,
Segment* next,
- MEMFLAGS mem_flag) {
+ MemTag mem_tag) {
size_t block_size = size_in_bytes(slot_size, num_slots);
- char* alloc_block = NEW_C_HEAP_ARRAY(char, block_size, mem_flag);
- return new (alloc_block) Segment(slot_size, num_slots, next, mem_flag);
+ char* alloc_block = NEW_C_HEAP_ARRAY(char, block_size, mem_tag);
+ return new (alloc_block) Segment(slot_size, num_slots, next, mem_tag);
}
void G1MonotonicArena::Segment::delete_segment(Segment* segment) {
@@ -54,7 +54,7 @@ void G1MonotonicArena::Segment::delete_segment(Segment* segment) {
GlobalCounter::write_synchronize();
}
segment->~Segment();
- FREE_C_HEAP_ARRAY(_mem_flag, segment);
+ FREE_C_HEAP_ARRAY(_mem_tag, segment);
}
void G1MonotonicArena::SegmentFreeList::bulk_add(Segment& first,
@@ -108,7 +108,7 @@ G1MonotonicArena::Segment* G1MonotonicArena::new_segment(Segment* const prev) {
uint prev_num_slots = (prev != nullptr) ? prev->num_slots() : 0;
uint num_slots = _alloc_options->next_num_slots(prev_num_slots);
- next = Segment::create_segment(slot_size(), num_slots, prev, _alloc_options->mem_flag());
+ next = Segment::create_segment(slot_size(), num_slots, prev, _alloc_options->mem_tag());
} else {
assert(slot_size() == next->slot_size() ,
"Mismatch %d != %d", slot_size(), next->slot_size());
diff --git a/src/hotspot/share/gc/g1/g1MonotonicArena.hpp b/src/hotspot/share/gc/g1/g1MonotonicArena.hpp
index bf46e4a33513a..b51f3e37db180 100644
--- a/src/hotspot/share/gc/g1/g1MonotonicArena.hpp
+++ b/src/hotspot/share/gc/g1/g1MonotonicArena.hpp
@@ -27,7 +27,7 @@
#define SHARE_GC_G1_G1MONOTONICARENA_HPP
#include "gc/shared/freeListAllocator.hpp"
-#include "nmt/memflags.hpp"
+#include "nmt/memTag.hpp"
#include "utilities/globalDefinitions.hpp"
#include "utilities/lockFreeStack.hpp"
@@ -120,7 +120,7 @@ class G1MonotonicArena::Segment {
// to _num_slots (can be larger because we atomically increment this value and
// check only afterwards if the allocation has been successful).
uint volatile _next_allocate;
- const MEMFLAGS _mem_flag;
+ const MemTag _mem_tag;
char* _bottom; // Actual data.
// Do not add class member variables beyond this point
@@ -136,7 +136,7 @@ class G1MonotonicArena::Segment {
NONCOPYABLE(Segment);
- Segment(uint slot_size, uint num_slots, Segment* next, MEMFLAGS flag);
+ Segment(uint slot_size, uint num_slots, Segment* next, MemTag mem_tag);
~Segment() = default;
public:
Segment* volatile* next_addr() { return &_next; }
@@ -173,7 +173,7 @@ class G1MonotonicArena::Segment {
return header_size() + payload_size(slot_size, num_slots);
}
- static Segment* create_segment(uint slot_size, uint num_slots, Segment* next, MEMFLAGS mem_flag);
+ static Segment* create_segment(uint slot_size, uint num_slots, Segment* next, MemTag mem_tag);
static void delete_segment(Segment* segment);
// Copies the contents of this segment into the destination.
@@ -222,7 +222,7 @@ class G1MonotonicArena::SegmentFreeList {
class G1MonotonicArena::AllocOptions {
protected:
- const MEMFLAGS _mem_flag;
+ const MemTag _mem_tag;
const uint _slot_size;
const uint _initial_num_slots;
// Defines a limit to the number of slots in the segment
@@ -230,8 +230,8 @@ class G1MonotonicArena::AllocOptions {
const uint _slot_alignment;
public:
- AllocOptions(MEMFLAGS mem_flag, uint slot_size, uint initial_num_slots, uint max_num_slots, uint alignment) :
- _mem_flag(mem_flag),
+ AllocOptions(MemTag mem_tag, uint slot_size, uint initial_num_slots, uint max_num_slots, uint alignment) :
+ _mem_tag(mem_tag),
_slot_size(align_up(slot_size, alignment)),
_initial_num_slots(initial_num_slots),
_max_num_slots(max_num_slots),
@@ -250,7 +250,7 @@ class G1MonotonicArena::AllocOptions {
uint slot_alignment() const { return _slot_alignment; }
- MEMFLAGS mem_flag() const {return _mem_flag; }
+ MemTag mem_tag() const {return _mem_tag; }
};
#endif //SHARE_GC_G1_MONOTONICARENA_HPP
diff --git a/src/hotspot/share/gc/g1/g1ParScanThreadState.hpp b/src/hotspot/share/gc/g1/g1ParScanThreadState.hpp
index 1cfd6fca08a6f..3f7fefd8a07a6 100644
--- a/src/hotspot/share/gc/g1/g1ParScanThreadState.hpp
+++ b/src/hotspot/share/gc/g1/g1ParScanThreadState.hpp
@@ -87,8 +87,6 @@ class G1ParScanThreadState : public CHeapObj {
// Indicates whether in the last generation (old) there is no more space
// available for allocation.
bool _old_gen_is_full;
- // Size (in elements) of a partial objArray task chunk.
- size_t _partial_objarray_chunk_size;
PartialArrayStateAllocator* _partial_array_state_allocator;
PartialArrayTaskStepper _partial_array_stepper;
StringDedup::Requests _string_dedup_requests;
diff --git a/src/hotspot/share/gc/g1/g1Policy.cpp b/src/hotspot/share/gc/g1/g1Policy.cpp
index e7e57c962c734..1b71901f0fe05 100644
--- a/src/hotspot/share/gc/g1/g1Policy.cpp
+++ b/src/hotspot/share/gc/g1/g1Policy.cpp
@@ -53,7 +53,7 @@
#include "gc/shared/gcTraceTime.inline.hpp"
G1Policy::G1Policy(STWGCTimer* gc_timer) :
- _predictor(G1ConfidencePercent / 100.0),
+ _predictor((100 - G1ConfidencePercent) / 100.0),
_analytics(new G1Analytics(&_predictor)),
_remset_tracker(),
_mmu_tracker(new G1MMUTracker(GCPauseIntervalMillis / 1000.0, MaxGCPauseMillis / 1000.0)),
@@ -1467,219 +1467,6 @@ uint G1Policy::calc_max_old_cset_length() const {
return (uint)ceil(result);
}
-static void print_finish_message(const char* reason, bool from_marking) {
- log_debug(gc, ergo, cset)("Finish adding %s candidates to collection set (%s).",
- from_marking ? "marking" : "retained", reason);
-}
-
-double G1Policy::select_candidates_from_marking(G1CollectionCandidateList* marking_list,
- double time_remaining_ms,
- G1CollectionCandidateRegionList* initial_old_regions,
- G1CollectionCandidateRegionList* optional_old_regions,
- G1CollectionCandidateRegionList* pinned_old_regions) {
- assert(marking_list != nullptr, "must be");
-
- uint num_expensive_regions = 0;
-
- uint num_initial_regions_selected = 0;
- uint num_optional_regions_selected = 0;
- uint num_pinned_regions = 0;
-
- double predicted_initial_time_ms = 0.0;
- double predicted_optional_time_ms = 0.0;
-
- double optional_threshold_ms = time_remaining_ms * optional_prediction_fraction();
-
- const uint min_old_cset_length = calc_min_old_cset_length(candidates()->last_marking_candidates_length());
- const uint max_old_cset_length = MAX2(min_old_cset_length, calc_max_old_cset_length());
- const uint max_optional_regions = max_old_cset_length - min_old_cset_length;
- bool check_time_remaining = use_adaptive_young_list_length();
-
- log_debug(gc, ergo, cset)("Start adding marking candidates to collection set. "
- "Min %u regions, max %u regions, available %u regions"
- "time remaining %1.2fms, optional threshold %1.2fms",
- min_old_cset_length, max_old_cset_length, marking_list->length(), time_remaining_ms, optional_threshold_ms);
-
- G1CollectionCandidateListIterator iter = marking_list->begin();
- for (; iter != marking_list->end(); ++iter) {
- if (num_initial_regions_selected + num_optional_regions_selected >= max_old_cset_length) {
- // Added maximum number of old regions to the CSet.
- print_finish_message("Maximum number of regions reached", true);
- break;
- }
- G1HeapRegion* hr = (*iter)->_r;
- // Skip evacuating pinned marking regions because we are not getting any free
- // space from them (and we expect to get free space from marking candidates).
- // Also prepare to move them to retained regions to be evacuated optionally later
- // to not impact the mixed phase too much.
- if (hr->has_pinned_objects()) {
- num_pinned_regions++;
- (*iter)->update_num_unreclaimed();
- log_trace(gc, ergo, cset)("Marking candidate %u can not be reclaimed currently. Skipping.", hr->hrm_index());
- pinned_old_regions->append(hr);
- continue;
- }
- double predicted_time_ms = predict_region_total_time_ms(hr, false);
- time_remaining_ms = MAX2(time_remaining_ms - predicted_time_ms, 0.0);
- // Add regions to old set until we reach the minimum amount
- if (initial_old_regions->length() < min_old_cset_length) {
- initial_old_regions->append(hr);
- num_initial_regions_selected++;
- predicted_initial_time_ms += predicted_time_ms;
- // Record the number of regions added with no time remaining
- if (time_remaining_ms == 0.0) {
- num_expensive_regions++;
- }
- } else if (!check_time_remaining) {
- // In the non-auto-tuning case, we'll finish adding regions
- // to the CSet if we reach the minimum.
- print_finish_message("Region amount reached min", true);
- break;
- } else {
- // Keep adding regions to old set until we reach the optional threshold
- if (time_remaining_ms > optional_threshold_ms) {
- predicted_initial_time_ms += predicted_time_ms;
- initial_old_regions->append(hr);
- num_initial_regions_selected++;
- } else if (time_remaining_ms > 0) {
- // Keep adding optional regions until time is up.
- assert(optional_old_regions->length() < max_optional_regions, "Should not be possible.");
- predicted_optional_time_ms += predicted_time_ms;
- optional_old_regions->append(hr);
- num_optional_regions_selected++;
- } else {
- print_finish_message("Predicted time too high", true);
- break;
- }
- }
- }
- if (iter == marking_list->end()) {
- log_debug(gc, ergo, cset)("Marking candidates exhausted.");
- }
-
- if (num_expensive_regions > 0) {
- log_debug(gc, ergo, cset)("Added %u marking candidates to collection set although the predicted time was too high.",
- num_expensive_regions);
- }
-
- log_debug(gc, ergo, cset)("Finish adding marking candidates to collection set. Initial: %u, optional: %u, pinned: %u, "
- "predicted initial time: %1.2fms, predicted optional time: %1.2fms, time remaining: %1.2fms",
- num_initial_regions_selected, num_optional_regions_selected, num_pinned_regions,
- predicted_initial_time_ms, predicted_optional_time_ms, time_remaining_ms);
-
- assert(initial_old_regions->length() == num_initial_regions_selected, "must be");
- assert(optional_old_regions->length() == num_optional_regions_selected, "must be");
- return time_remaining_ms;
-}
-
-void G1Policy::select_candidates_from_retained(G1CollectionCandidateList* retained_list,
- double time_remaining_ms,
- G1CollectionCandidateRegionList* initial_old_regions,
- G1CollectionCandidateRegionList* optional_old_regions,
- G1CollectionCandidateRegionList* pinned_old_regions) {
-
- uint const min_regions = min_retained_old_cset_length();
-
- uint num_initial_regions_selected = 0;
- uint num_optional_regions_selected = 0;
- uint num_expensive_regions_selected = 0;
- uint num_pinned_regions = 0;
-
- double predicted_initial_time_ms = 0.0;
- double predicted_optional_time_ms = 0.0;
-
- // We want to make sure that on the one hand we process the retained regions asap,
- // but on the other hand do not take too many of them as optional regions.
- // So we split the time budget into budget we will unconditionally take into the
- // initial old regions, and budget for taking optional regions from the retained
- // list.
- double optional_time_remaining_ms = max_time_for_retaining();
- time_remaining_ms = MIN2(time_remaining_ms, optional_time_remaining_ms);
-
- log_debug(gc, ergo, cset)("Start adding retained candidates to collection set. "
- "Min %u regions, available %u, "
- "time remaining %1.2fms, optional remaining %1.2fms",
- min_regions, retained_list->length(), time_remaining_ms, optional_time_remaining_ms);
-
- for (G1CollectionSetCandidateInfo* ci : *retained_list) {
- G1HeapRegion* r = ci->_r;
- double predicted_time_ms = predict_region_total_time_ms(r, collector_state()->in_young_only_phase());
- bool fits_in_remaining_time = predicted_time_ms <= time_remaining_ms;
- // If we can't reclaim that region ignore it for now.
- if (r->has_pinned_objects()) {
- num_pinned_regions++;
- if (ci->update_num_unreclaimed()) {
- log_trace(gc, ergo, cset)("Retained candidate %u can not be reclaimed currently. Skipping.", r->hrm_index());
- } else {
- log_trace(gc, ergo, cset)("Retained candidate %u can not be reclaimed currently. Dropping.", r->hrm_index());
- pinned_old_regions->append(r);
- }
- continue;
- }
-
- if (fits_in_remaining_time || (num_expensive_regions_selected < min_regions)) {
- predicted_initial_time_ms += predicted_time_ms;
- if (!fits_in_remaining_time) {
- num_expensive_regions_selected++;
- }
- initial_old_regions->append(r);
- num_initial_regions_selected++;
- } else if (predicted_time_ms <= optional_time_remaining_ms) {
- predicted_optional_time_ms += predicted_time_ms;
- optional_old_regions->append(r);
- num_optional_regions_selected++;
- } else {
- // Fits neither initial nor optional time limit. Exit.
- break;
- }
- time_remaining_ms = MAX2(0.0, time_remaining_ms - predicted_time_ms);
- optional_time_remaining_ms = MAX2(0.0, optional_time_remaining_ms - predicted_time_ms);
- }
-
- uint num_regions_selected = num_initial_regions_selected + num_optional_regions_selected;
- if (num_regions_selected == retained_list->length()) {
- log_debug(gc, ergo, cset)("Retained candidates exhausted.");
- }
- if (num_expensive_regions_selected > 0) {
- log_debug(gc, ergo, cset)("Added %u retained candidates to collection set although the predicted time was too high.",
- num_expensive_regions_selected);
- }
-
- log_debug(gc, ergo, cset)("Finish adding retained candidates to collection set. Initial: %u, optional: %u, pinned: %u, "
- "predicted initial time: %1.2fms, predicted optional time: %1.2fms, "
- "time remaining: %1.2fms optional time remaining %1.2fms",
- num_initial_regions_selected, num_optional_regions_selected, num_pinned_regions,
- predicted_initial_time_ms, predicted_optional_time_ms, time_remaining_ms, optional_time_remaining_ms);
-}
-
-void G1Policy::calculate_optional_collection_set_regions(G1CollectionCandidateRegionList* optional_regions,
- double time_remaining_ms,
- G1CollectionCandidateRegionList* selected_regions) {
- assert(_collection_set->optional_region_length() > 0,
- "Should only be called when there are optional regions");
-
- double total_prediction_ms = 0.0;
-
- for (G1HeapRegion* r : *optional_regions) {
- double prediction_ms = predict_region_total_time_ms(r, false);
-
- if (prediction_ms > time_remaining_ms) {
- log_debug(gc, ergo, cset)("Prediction %.3fms for region %u does not fit remaining time: %.3fms.",
- prediction_ms, r->hrm_index(), time_remaining_ms);
- break;
- }
- // This region will be included in the next optional evacuation.
-
- total_prediction_ms += prediction_ms;
- time_remaining_ms -= prediction_ms;
-
- selected_regions->append(r);
- }
-
- log_debug(gc, ergo, cset)("Prepared %u regions out of %u for optional evacuation. Total predicted time: %.3fms",
- selected_regions->length(), optional_regions->length(), total_prediction_ms);
-}
-
void G1Policy::transfer_survivors_to_cset(const G1SurvivorRegions* survivors) {
start_adding_survivor_regions();
diff --git a/src/hotspot/share/gc/g1/g1Policy.hpp b/src/hotspot/share/gc/g1/g1Policy.hpp
index 98d444084678c..9a6ffb570be70 100644
--- a/src/hotspot/share/gc/g1/g1Policy.hpp
+++ b/src/hotspot/share/gc/g1/g1Policy.hpp
@@ -335,27 +335,7 @@ class G1Policy: public CHeapObj {
// Amount of allowed waste in bytes in the collection set.
size_t allowed_waste_in_collection_set() const;
- // Calculate and fill in the initial, optional and pinned old gen candidate regions from
- // the given candidate list and the remaining time.
- // Returns the remaining time.
- double select_candidates_from_marking(G1CollectionCandidateList* marking_list,
- double time_remaining_ms,
- G1CollectionCandidateRegionList* initial_old_regions,
- G1CollectionCandidateRegionList* optional_old_regions,
- G1CollectionCandidateRegionList* pinned_old_regions);
-
- void select_candidates_from_retained(G1CollectionCandidateList* retained_list,
- double time_remaining_ms,
- G1CollectionCandidateRegionList* initial_old_regions,
- G1CollectionCandidateRegionList* optional_old_regions,
- G1CollectionCandidateRegionList* pinned_old_regions);
-
- // Calculate the number of optional regions from the given collection set candidates,
- // the remaining time and the maximum number of these regions and return the number
- // of actually selected regions in num_optional_regions.
- void calculate_optional_collection_set_regions(G1CollectionCandidateRegionList* optional_old_regions,
- double time_remaining_ms,
- G1CollectionCandidateRegionList* selected);
+
private:
@@ -423,12 +403,12 @@ class G1Policy: public CHeapObj {
size_t desired_survivor_size(uint max_regions) const;
+public:
// Fraction used when predicting how many optional regions to include in
// the CSet. This fraction of the available time is used for optional regions,
// the rest is used to add old regions to the normal CSet.
double optional_prediction_fraction() const { return 0.2; }
-public:
// Fraction used when evacuating the optional regions. This fraction of the
// remaining time is used to choose what regions to include in the evacuation.
double optional_evacuation_fraction() const { return 0.75; }
diff --git a/src/hotspot/share/gc/g1/g1Predictions.hpp b/src/hotspot/share/gc/g1/g1Predictions.hpp
index 510f296a9f3ac..ae2a8f418802a 100644
--- a/src/hotspot/share/gc/g1/g1Predictions.hpp
+++ b/src/hotspot/share/gc/g1/g1Predictions.hpp
@@ -29,8 +29,9 @@
// Utility class containing various helper methods for prediction.
class G1Predictions {
- private:
- double _sigma;
+private:
+ // Scale factor indicating to which degree stddev should be taking into account in predictions.
+ double _stddev_scale;
// This function is used to estimate the stddev of sample sets. There is some
// special consideration of small sample sets: the actual stddev for them is
@@ -46,16 +47,14 @@ class G1Predictions {
}
return estimate;
}
- public:
- G1Predictions(double sigma) : _sigma(sigma) {
- assert(sigma >= 0.0, "Confidence must be larger than or equal to zero");
- }
- // Confidence factor.
- double sigma() const { return _sigma; }
+public:
+ G1Predictions(double stddev_scale) : _stddev_scale(stddev_scale) {
+ assert(stddev_scale >= 0.0, "must be");
+ }
double predict(TruncatedSeq const* seq) const {
- return seq->davg() + _sigma * stddev_estimate(seq);
+ return seq->davg() + _stddev_scale * stddev_estimate(seq);
}
double predict_in_unit_interval(TruncatedSeq const* seq) const {
diff --git a/src/hotspot/share/gc/g1/g1RegionToSpaceMapper.cpp b/src/hotspot/share/gc/g1/g1RegionToSpaceMapper.cpp
index 5f903960cce8c..4403b4c8dd981 100644
--- a/src/hotspot/share/gc/g1/g1RegionToSpaceMapper.cpp
+++ b/src/hotspot/share/gc/g1/g1RegionToSpaceMapper.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2001, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -40,15 +40,15 @@ G1RegionToSpaceMapper::G1RegionToSpaceMapper(ReservedSpace rs,
size_t page_size,
size_t region_granularity,
size_t commit_factor,
- MEMFLAGS type) :
+ MemTag mem_tag) :
_listener(nullptr),
_storage(rs, used_size, page_size),
_region_commit_map(rs.size() * commit_factor / region_granularity, mtGC),
- _memory_type(type) {
+ _memory_tag(mem_tag) {
guarantee(is_power_of_2(page_size), "must be");
guarantee(is_power_of_2(region_granularity), "must be");
- MemTracker::record_virtual_memory_type((address)rs.base(), type);
+ MemTracker::record_virtual_memory_tag((address)rs.base(), mem_tag);
}
// Used to manually signal a mapper to handle a set of regions as committed.
@@ -72,8 +72,8 @@ class G1RegionsLargerThanCommitSizeMapper : public G1RegionToSpaceMapper {
size_t page_size,
size_t alloc_granularity,
size_t commit_factor,
- MEMFLAGS type) :
- G1RegionToSpaceMapper(rs, actual_size, page_size, alloc_granularity, commit_factor, type),
+ MemTag mem_tag) :
+ G1RegionToSpaceMapper(rs, actual_size, page_size, alloc_granularity, commit_factor, mem_tag),
_pages_per_region(alloc_granularity / (page_size * commit_factor)) {
guarantee(alloc_granularity >= page_size, "allocation granularity smaller than commit granularity");
@@ -97,7 +97,7 @@ class G1RegionsLargerThanCommitSizeMapper : public G1RegionToSpaceMapper {
const size_t start_page = (size_t)start_idx * _pages_per_region;
const size_t size_in_pages = num_regions * _pages_per_region;
bool zero_filled = _storage.commit(start_page, size_in_pages);
- if (_memory_type == mtJavaHeap) {
+ if (_memory_tag == mtJavaHeap) {
for (uint region_index = start_idx; region_index < start_idx + num_regions; region_index++ ) {
void* address = _storage.page_start(region_index * _pages_per_region);
size_t size_in_bytes = _storage.page_size() * _pages_per_region;
@@ -150,7 +150,7 @@ class G1RegionsSmallerThanCommitSizeMapper : public G1RegionToSpaceMapper {
}
void numa_request_on_node(size_t page_idx) {
- if (_memory_type == mtJavaHeap) {
+ if (_memory_tag == mtJavaHeap) {
uint region = (uint)(page_idx * _regions_per_page);
void* address = _storage.page_start(page_idx);
size_t size_in_bytes = _storage.page_size();
@@ -164,8 +164,8 @@ class G1RegionsSmallerThanCommitSizeMapper : public G1RegionToSpaceMapper {
size_t page_size,
size_t alloc_granularity,
size_t commit_factor,
- MEMFLAGS type) :
- G1RegionToSpaceMapper(rs, actual_size, page_size, alloc_granularity, commit_factor, type),
+ MemTag mem_tag) :
+ G1RegionToSpaceMapper(rs, actual_size, page_size, alloc_granularity, commit_factor, mem_tag),
_regions_per_page((page_size * commit_factor) / alloc_granularity),
_lock(Mutex::service-3, "G1Mapper_lock") {
@@ -263,10 +263,10 @@ G1RegionToSpaceMapper* G1RegionToSpaceMapper::create_mapper(ReservedSpace rs,
size_t page_size,
size_t region_granularity,
size_t commit_factor,
- MEMFLAGS type) {
+ MemTag mem_tag) {
if (region_granularity >= (page_size * commit_factor)) {
- return new G1RegionsLargerThanCommitSizeMapper(rs, actual_size, page_size, region_granularity, commit_factor, type);
+ return new G1RegionsLargerThanCommitSizeMapper(rs, actual_size, page_size, region_granularity, commit_factor, mem_tag);
} else {
- return new G1RegionsSmallerThanCommitSizeMapper(rs, actual_size, page_size, region_granularity, commit_factor, type);
+ return new G1RegionsSmallerThanCommitSizeMapper(rs, actual_size, page_size, region_granularity, commit_factor, mem_tag);
}
}
diff --git a/src/hotspot/share/gc/g1/g1RegionToSpaceMapper.hpp b/src/hotspot/share/gc/g1/g1RegionToSpaceMapper.hpp
index 02498b394b39f..5ef0f8ec5ab51 100644
--- a/src/hotspot/share/gc/g1/g1RegionToSpaceMapper.hpp
+++ b/src/hotspot/share/gc/g1/g1RegionToSpaceMapper.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2014, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -52,9 +52,9 @@ class G1RegionToSpaceMapper : public CHeapObj {
// Mapping management
CHeapBitMap _region_commit_map;
- MEMFLAGS _memory_type;
+ MemTag _memory_tag;
- G1RegionToSpaceMapper(ReservedSpace rs, size_t used_size, size_t page_size, size_t region_granularity, size_t commit_factor, MEMFLAGS type);
+ G1RegionToSpaceMapper(ReservedSpace rs, size_t used_size, size_t page_size, size_t region_granularity, size_t commit_factor, MemTag mem_tag);
void fire_on_commit(uint start_idx, size_t num_regions, bool zero_filled);
public:
@@ -85,7 +85,7 @@ class G1RegionToSpaceMapper : public CHeapObj {
size_t page_size,
size_t region_granularity,
size_t byte_translation_factor,
- MEMFLAGS type);
+ MemTag mem_tag);
};
#endif // SHARE_GC_G1_G1REGIONTOSPACEMAPPER_HPP
diff --git a/src/hotspot/share/gc/g1/g1RemSet.cpp b/src/hotspot/share/gc/g1/g1RemSet.cpp
index f5f65cf1c48aa..bb5ac5036fe47 100644
--- a/src/hotspot/share/gc/g1/g1RemSet.cpp
+++ b/src/hotspot/share/gc/g1/g1RemSet.cpp
@@ -967,6 +967,10 @@ class G1MergeHeapRootsTask : public WorkerTask {
_merged[G1GCPhaseTimes::MergeRSCards] += increment;
}
+ void dec_remset_cards(size_t decrement) {
+ _merged[G1GCPhaseTimes::MergeRSCards] -= decrement;
+ }
+
size_t merged(uint i) const { return _merged[i]; }
};
@@ -1091,6 +1095,11 @@ class G1MergeHeapRootsTask : public WorkerTask {
G1MergeCardSetStats stats() {
_merge_card_set_cache.flush();
+ // Compensation for the dummy cards that were initially pushed into the
+ // card cache.
+ // We do not need to compensate for the other counters because the dummy
+ // card mark will never update another counter because it is initally "dirty".
+ _stats.dec_remset_cards(G1MergeCardSetCache::CacheSize);
return _stats;
}
};
diff --git a/src/hotspot/share/gc/g1/g1_globals.hpp b/src/hotspot/share/gc/g1/g1_globals.hpp
index c8016ddc0ddf5..ed02ba2dc5cad 100644
--- a/src/hotspot/share/gc/g1/g1_globals.hpp
+++ b/src/hotspot/share/gc/g1/g1_globals.hpp
@@ -111,7 +111,8 @@
range(1, max_intx) \
\
product(uint, G1ConfidencePercent, 50, \
- "Confidence level for MMU/pause predictions") \
+ "Confidence level for MMU/pause predictions. A higher value " \
+ "means that G1 will use less safety margin for its predictions.") \
range(1, 100) \
\
product(uintx, G1SummarizeRSetStatsPeriod, 0, DIAGNOSTIC, \
diff --git a/src/hotspot/share/gc/parallel/objectStartArray.cpp b/src/hotspot/share/gc/parallel/objectStartArray.cpp
index b1fc956a54a21..ef9de7abfd771 100644
--- a/src/hotspot/share/gc/parallel/objectStartArray.cpp
+++ b/src/hotspot/share/gc/parallel/objectStartArray.cpp
@@ -51,7 +51,7 @@ void ObjectStartArray::initialize(MemRegion reserved_region) {
if (!backing_store.is_reserved()) {
vm_exit_during_initialization("Could not reserve space for ObjectStartArray");
}
- MemTracker::record_virtual_memory_type(backing_store.base(), mtGC);
+ MemTracker::record_virtual_memory_tag(backing_store.base(), mtGC);
// We do not commit any memory initially
_virtual_space.initialize(backing_store);
diff --git a/src/hotspot/share/gc/parallel/parMarkBitMap.cpp b/src/hotspot/share/gc/parallel/parMarkBitMap.cpp
index 658c3ef106fa0..46a178500e576 100644
--- a/src/hotspot/share/gc/parallel/parMarkBitMap.cpp
+++ b/src/hotspot/share/gc/parallel/parMarkBitMap.cpp
@@ -51,7 +51,7 @@ ParMarkBitMap::initialize(MemRegion covered_region)
os::trace_page_sizes("Mark Bitmap", raw_bytes, raw_bytes,
rs.base(), rs.size(), used_page_sz);
- MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
+ MemTracker::record_virtual_memory_tag((address)rs.base(), mtGC);
_virtual_space = new PSVirtualSpace(rs, page_sz);
if (_virtual_space != nullptr && _virtual_space->expand_by(_reserved_byte_size)) {
diff --git a/src/hotspot/share/gc/parallel/psParallelCompact.cpp b/src/hotspot/share/gc/parallel/psParallelCompact.cpp
index 4bff8f8a7d06a..1ab7b2af7ed74 100644
--- a/src/hotspot/share/gc/parallel/psParallelCompact.cpp
+++ b/src/hotspot/share/gc/parallel/psParallelCompact.cpp
@@ -235,7 +235,7 @@ ParallelCompactData::create_vspace(size_t count, size_t element_size)
os::trace_page_sizes("Parallel Compact Data", raw_bytes, raw_bytes, rs.base(),
rs.size(), page_sz);
- MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
+ MemTracker::record_virtual_memory_tag((address)rs.base(), mtGC);
PSVirtualSpace* vspace = new PSVirtualSpace(rs, page_sz);
if (vspace != nullptr) {
diff --git a/src/hotspot/share/gc/parallel/psScavenge.hpp b/src/hotspot/share/gc/parallel/psScavenge.hpp
index 99d0487760b15..55abdfd3cf38e 100644
--- a/src/hotspot/share/gc/parallel/psScavenge.hpp
+++ b/src/hotspot/share/gc/parallel/psScavenge.hpp
@@ -34,9 +34,7 @@
#include "oops/oop.hpp"
#include "utilities/stack.hpp"
-class ReferenceProcessor;
class ParallelScavengeHeap;
-class ParallelScavengeTracer;
class PSIsAliveClosure;
class STWGCTimer;
diff --git a/src/hotspot/share/gc/serial/serialBlockOffsetTable.cpp b/src/hotspot/share/gc/serial/serialBlockOffsetTable.cpp
index 59b7f130df30b..31f18652c63d6 100644
--- a/src/hotspot/share/gc/serial/serialBlockOffsetTable.cpp
+++ b/src/hotspot/share/gc/serial/serialBlockOffsetTable.cpp
@@ -42,7 +42,7 @@ SerialBlockOffsetTable::SerialBlockOffsetTable(MemRegion reserved,
vm_exit_during_initialization("Could not reserve enough space for heap offset array");
}
- MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
+ MemTracker::record_virtual_memory_tag((address)rs.base(), mtGC);
if (!_vs.initialize(rs, 0)) {
vm_exit_during_initialization("Could not reserve enough space for heap offset array");
diff --git a/src/hotspot/share/gc/shared/c2/barrierSetC2.cpp b/src/hotspot/share/gc/shared/c2/barrierSetC2.cpp
index 59e0245204441..643a7936b9b17 100644
--- a/src/hotspot/share/gc/shared/c2/barrierSetC2.cpp
+++ b/src/hotspot/share/gc/shared/c2/barrierSetC2.cpp
@@ -109,6 +109,10 @@ Label* BarrierStubC2::continuation() {
return &_continuation;
}
+uint8_t BarrierStubC2::barrier_data() const {
+ return _node->barrier_data();
+}
+
void BarrierStubC2::preserve(Register r) {
const VMReg vm_reg = r->as_VMReg();
assert(vm_reg->is_Register(), "r must be a general-purpose register");
diff --git a/src/hotspot/share/gc/shared/c2/barrierSetC2.hpp b/src/hotspot/share/gc/shared/c2/barrierSetC2.hpp
index c1485c069c83c..00fbf1f2c9f8b 100644
--- a/src/hotspot/share/gc/shared/c2/barrierSetC2.hpp
+++ b/src/hotspot/share/gc/shared/c2/barrierSetC2.hpp
@@ -254,6 +254,8 @@ class BarrierStubC2 : public ArenaObj {
Label* entry();
// Return point from the stub (typically end of barrier).
Label* continuation();
+ // High-level, GC-specific barrier flags.
+ uint8_t barrier_data() const;
// Preserve the value in reg across runtime calls in this barrier.
void preserve(Register reg);
@@ -340,6 +342,8 @@ class BarrierSetC2: public CHeapObj {
// Estimated size of the node barrier in number of C2 Ideal nodes.
// This is used to guide heuristics in C2, e.g. whether to unroll a loop.
virtual uint estimated_barrier_size(const Node* node) const { return 0; }
+ // Whether the given store can be used to initialize a newly allocated object.
+ virtual bool can_initialize_object(const StoreNode* store) const { return true; }
enum CompilePhase {
BeforeOptimize,
diff --git a/src/hotspot/share/gc/shared/c2/cardTableBarrierSetC2.cpp b/src/hotspot/share/gc/shared/c2/cardTableBarrierSetC2.cpp
index 87bb9f3cd5170..11b742156a831 100644
--- a/src/hotspot/share/gc/shared/c2/cardTableBarrierSetC2.cpp
+++ b/src/hotspot/share/gc/shared/c2/cardTableBarrierSetC2.cpp
@@ -125,39 +125,10 @@ void CardTableBarrierSetC2::post_barrier(GraphKit* kit,
kit->final_sync(ideal);
}
-void CardTableBarrierSetC2::clone(GraphKit* kit, Node* src, Node* dst, Node* size, bool is_array) const {
- BarrierSetC2::clone(kit, src, dst, size, is_array);
- const TypePtr* raw_adr_type = TypeRawPtr::BOTTOM;
-
- // If necessary, emit some card marks afterwards. (Non-arrays only.)
- bool card_mark = !is_array && !use_ReduceInitialCardMarks();
- if (card_mark) {
- assert(!is_array, "");
- // Put in store barrier for any and all oops we are sticking
- // into this object. (We could avoid this if we could prove
- // that the object type contains no oop fields at all.)
- Node* no_particular_value = nullptr;
- Node* no_particular_field = nullptr;
- int raw_adr_idx = Compile::AliasIdxRaw;
- post_barrier(kit, kit->control(),
- kit->memory(raw_adr_type),
- dst,
- no_particular_field,
- raw_adr_idx,
- no_particular_value,
- T_OBJECT,
- false);
- }
-}
-
bool CardTableBarrierSetC2::use_ReduceInitialCardMarks() const {
return ReduceInitialCardMarks;
}
-bool CardTableBarrierSetC2::is_gc_barrier_node(Node* node) const {
- return ModRefBarrierSetC2::is_gc_barrier_node(node) || node->Opcode() == Op_StoreCM;
-}
-
void CardTableBarrierSetC2::eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const {
assert(node->Opcode() == Op_CastP2X, "ConvP2XNode required");
Node *shift = node->unique_out();
diff --git a/src/hotspot/share/gc/shared/c2/cardTableBarrierSetC2.hpp b/src/hotspot/share/gc/shared/c2/cardTableBarrierSetC2.hpp
index 9512f09ff8a6d..3bbf14892d3ef 100644
--- a/src/hotspot/share/gc/shared/c2/cardTableBarrierSetC2.hpp
+++ b/src/hotspot/share/gc/shared/c2/cardTableBarrierSetC2.hpp
@@ -42,8 +42,6 @@ class CardTableBarrierSetC2: public ModRefBarrierSetC2 {
Node* byte_map_base_node(GraphKit* kit) const;
public:
- virtual void clone(GraphKit* kit, Node* src, Node* dst, Node* size, bool is_array) const;
- virtual bool is_gc_barrier_node(Node* node) const;
virtual void eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const;
virtual bool array_copy_requires_gc_barriers(bool tightly_coupled_alloc, BasicType type, bool is_clone, bool is_clone_instance, ArrayCopyPhase phase) const;
diff --git a/src/hotspot/share/gc/shared/cardTable.cpp b/src/hotspot/share/gc/shared/cardTable.cpp
index 95f7058f4e4ee..acd4bda6e1071 100644
--- a/src/hotspot/share/gc/shared/cardTable.cpp
+++ b/src/hotspot/share/gc/shared/cardTable.cpp
@@ -84,7 +84,7 @@ void CardTable::initialize(void* region0_start, void* region1_start) {
MAX2(_page_size, os::vm_allocation_granularity());
ReservedSpace heap_rs(_byte_map_size, rs_align, _page_size);
- MemTracker::record_virtual_memory_type((address)heap_rs.base(), mtGC);
+ MemTracker::record_virtual_memory_tag((address)heap_rs.base(), mtGC);
os::trace_page_sizes("Card Table", num_bytes, num_bytes,
heap_rs.base(), heap_rs.size(), _page_size);
diff --git a/src/hotspot/share/gc/shared/gcArguments.cpp b/src/hotspot/share/gc/shared/gcArguments.cpp
index 9736c0f7fdcab..2522925746be1 100644
--- a/src/hotspot/share/gc/shared/gcArguments.cpp
+++ b/src/hotspot/share/gc/shared/gcArguments.cpp
@@ -30,6 +30,7 @@
#include "runtime/arguments.hpp"
#include "runtime/globals.hpp"
#include "runtime/globals_extension.hpp"
+#include "utilities/formatBuffer.hpp"
#include "utilities/macros.hpp"
size_t HeapAlignment = 0;
@@ -166,6 +167,13 @@ void GCArguments::initialize_heap_flags_and_sizes() {
FLAG_SET_ERGO(MinHeapDeltaBytes, align_up(MinHeapDeltaBytes, SpaceAlignment));
+ if (checked_cast(ObjectAlignmentInBytes) > GCCardSizeInBytes) {
+ err_msg message("ObjectAlignmentInBytes %u is larger than GCCardSizeInBytes %u",
+ ObjectAlignmentInBytes, GCCardSizeInBytes);
+ vm_exit_during_initialization("Invalid combination of GCCardSizeInBytes and ObjectAlignmentInBytes",
+ message);
+ }
+
DEBUG_ONLY(assert_flags();)
}
diff --git a/src/hotspot/share/gc/shared/oopStorage.cpp b/src/hotspot/share/gc/shared/oopStorage.cpp
index 7117b86b26403..2373d6b1d93a8 100644
--- a/src/hotspot/share/gc/shared/oopStorage.cpp
+++ b/src/hotspot/share/gc/shared/oopStorage.cpp
@@ -127,10 +127,10 @@ OopStorage::ActiveArray::~ActiveArray() {
}
OopStorage::ActiveArray* OopStorage::ActiveArray::create(size_t size,
- MEMFLAGS memflags,
+ MemTag mem_tag,
AllocFailType alloc_fail) {
size_t size_in_bytes = blocks_offset() + sizeof(Block*) * size;
- void* mem = NEW_C_HEAP_ARRAY3(char, size_in_bytes, memflags, CURRENT_PC, alloc_fail);
+ void* mem = NEW_C_HEAP_ARRAY3(char, size_in_bytes, mem_tag, CURRENT_PC, alloc_fail);
if (mem == nullptr) return nullptr;
return new (mem) ActiveArray(size);
}
@@ -300,7 +300,12 @@ void OopStorage::Block::set_active_index(size_t index) {
size_t OopStorage::Block::active_index_safe(const Block* block) {
STATIC_ASSERT(sizeof(intptr_t) == sizeof(block->_active_index));
- return SafeFetchN((intptr_t*)&block->_active_index, 0);
+ // Be careful, because block could be a false positive from block_for_ptr.
+ assert(block != nullptr, "precondition");
+ uintptr_t block_addr = reinterpret_cast(block);
+ uintptr_t index_loc = block_addr + offset_of(Block, _active_index);
+ static_assert(sizeof(size_t) == sizeof(intptr_t), "assumption");
+ return static_cast(SafeFetchN(reinterpret_cast(index_loc), 0));
}
unsigned OopStorage::Block::get_index(const oop* ptr) const {
@@ -343,7 +348,7 @@ OopStorage::Block* OopStorage::Block::new_block(const OopStorage* owner) {
// _data must be first member: aligning block => aligning _data.
STATIC_ASSERT(_data_pos == 0);
size_t size_needed = allocation_size();
- void* memory = NEW_C_HEAP_ARRAY_RETURN_NULL(char, size_needed, owner->memflags());
+ void* memory = NEW_C_HEAP_ARRAY_RETURN_NULL(char, size_needed, owner->mem_tag());
if (memory == nullptr) {
return nullptr;
}
@@ -366,21 +371,23 @@ void OopStorage::Block::delete_block(const Block& block) {
OopStorage::Block*
OopStorage::Block::block_for_ptr(const OopStorage* owner, const oop* ptr) {
STATIC_ASSERT(_data_pos == 0);
- // Const-ness of ptr is not related to const-ness of containing block.
+ assert(ptr != nullptr, "precondition");
// Blocks are allocated section-aligned, so get the containing section.
- oop* section_start = align_down(const_cast(ptr), block_alignment);
+ uintptr_t section_start = align_down(reinterpret_cast(ptr), block_alignment);
// Start with a guess that the containing section is the last section,
// so the block starts section_count-1 sections earlier.
- oop* section = section_start - (section_size * (section_count - 1));
+ size_t section_size_in_bytes = sizeof(oop) * section_size;
+ uintptr_t section = section_start - (section_size_in_bytes * (section_count - 1));
// Walk up through the potential block start positions, looking for
// the owner in the expected location. If we're below the actual block
// start position, the value at the owner position will be some oop
// (possibly null), which can never match the owner.
intptr_t owner_addr = reinterpret_cast(owner);
- for (unsigned i = 0; i < section_count; ++i, section += section_size) {
- Block* candidate = reinterpret_cast(section);
- if (SafeFetchN(&candidate->_owner_address, 0) == owner_addr) {
- return candidate;
+ for (unsigned i = 0; i < section_count; ++i, section += section_size_in_bytes) {
+ uintptr_t owner_loc = section + offset_of(Block, _owner_address);
+ static_assert(sizeof(OopStorage*) == sizeof(intptr_t), "assumption");
+ if (SafeFetchN(reinterpret_cast(owner_loc), 0) == owner_addr) {
+ return reinterpret_cast(section);
}
}
return nullptr;
@@ -575,7 +582,7 @@ bool OopStorage::expand_active_array() {
log_debug(oopstorage, blocks)("%s: expand active array " SIZE_FORMAT,
name(), new_size);
ActiveArray* new_array = ActiveArray::create(new_size,
- memflags(),
+ mem_tag(),
AllocFailStrategy::RETURN_NULL);
if (new_array == nullptr) return false;
new_array->copy_from(old_array);
@@ -643,8 +650,7 @@ class OopStorage::WithActiveArray : public StackObj {
}
};
-OopStorage::Block* OopStorage::find_block_or_null(const oop* ptr) const {
- assert(ptr != nullptr, "precondition");
+OopStorage::Block* OopStorage::block_for_ptr(const oop* ptr) const {
return Block::block_for_ptr(this, ptr);
}
@@ -771,7 +777,7 @@ static inline void check_release_entry(const oop* entry) {
void OopStorage::release(const oop* ptr) {
check_release_entry(ptr);
- Block* block = find_block_or_null(ptr);
+ Block* block = block_for_ptr(ptr);
assert(block != nullptr, "%s: invalid release " PTR_FORMAT, name(), p2i(ptr));
log_trace(oopstorage, ref)("%s: releasing " PTR_FORMAT, name(), p2i(ptr));
block->release_entries(block->bitmask_for_entry(ptr), this);
@@ -782,7 +788,7 @@ void OopStorage::release(const oop* const* ptrs, size_t size) {
size_t i = 0;
while (i < size) {
check_release_entry(ptrs[i]);
- Block* block = find_block_or_null(ptrs[i]);
+ Block* block = block_for_ptr(ptrs[i]);
assert(block != nullptr, "%s: invalid release " PTR_FORMAT, name(), p2i(ptrs[i]));
size_t count = 0;
uintx releasing = 0;
@@ -805,8 +811,8 @@ void OopStorage::release(const oop* const* ptrs, size_t size) {
}
}
-OopStorage* OopStorage::create(const char* name, MEMFLAGS memflags) {
- return new (memflags) OopStorage(name, memflags);
+OopStorage* OopStorage::create(const char* name, MemTag mem_tag) {
+ return new (mem_tag) OopStorage(name, mem_tag);
}
const size_t initial_active_array_size = 8;
@@ -819,9 +825,9 @@ static Mutex* make_oopstorage_mutex(const char* storage_name,
return new PaddedMutex(rank, name);
}
-OopStorage::OopStorage(const char* name, MEMFLAGS memflags) :
+OopStorage::OopStorage(const char* name, MemTag mem_tag) :
_name(os::strdup(name)),
- _active_array(ActiveArray::create(initial_active_array_size, memflags)),
+ _active_array(ActiveArray::create(initial_active_array_size, mem_tag)),
_allocation_list(),
_deferred_updates(nullptr),
_allocation_mutex(make_oopstorage_mutex(name, "alloc", Mutex::oopstorage)),
@@ -829,7 +835,7 @@ OopStorage::OopStorage(const char* name, MEMFLAGS memflags) :
_num_dead_callback(nullptr),
_allocation_count(0),
_concurrent_iteration_count(0),
- _memflags(memflags),
+ _mem_tag(mem_tag),
_needs_cleanup(false)
{
_active_array->increment_refcount();
@@ -989,7 +995,8 @@ bool OopStorage::delete_empty_blocks() {
}
OopStorage::EntryStatus OopStorage::allocation_status(const oop* ptr) const {
- const Block* block = find_block_or_null(ptr);
+ if (ptr == nullptr) return INVALID_ENTRY;
+ const Block* block = block_for_ptr(ptr);
if (block != nullptr) {
// Prevent block deletion and _active_array modification.
MutexLocker ml(_allocation_mutex, Mutex::_no_safepoint_check_flag);
@@ -1030,7 +1037,7 @@ size_t OopStorage::total_memory_usage() const {
return total_size;
}
-MEMFLAGS OopStorage::memflags() const { return _memflags; }
+MemTag OopStorage::mem_tag() const { return _mem_tag; }
// Parallel iteration support
@@ -1135,6 +1142,26 @@ void OopStorage::BasicParState::report_num_dead() const {
const char* OopStorage::name() const { return _name; }
+bool OopStorage::print_containing(const oop* addr, outputStream* st) {
+ if (addr != nullptr) {
+ Block* block = block_for_ptr(addr);
+ if (block != nullptr && block->print_containing(addr, st)) {
+ st->print(" in oop storage \"%s\"", name());
+ return true;
+ }
+ }
+ return false;
+}
+
+bool OopStorage::Block::print_containing(const oop* addr, outputStream* st) {
+ if (contains(addr)) {
+ st->print(PTR_FORMAT " is a pointer %u/%zu into block %zu",
+ p2i(addr), get_index(addr), ARRAY_SIZE(_data), _active_index);
+ return true;
+ }
+ return false;
+}
+
#ifndef PRODUCT
void OopStorage::print_on(outputStream* st) const {
diff --git a/src/hotspot/share/gc/shared/oopStorage.hpp b/src/hotspot/share/gc/shared/oopStorage.hpp
index dfc0f83fc1912..34c980a058659 100644
--- a/src/hotspot/share/gc/shared/oopStorage.hpp
+++ b/src/hotspot/share/gc/shared/oopStorage.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -74,7 +74,7 @@ class outputStream;
class OopStorage : public CHeapObjBase {
public:
- static OopStorage* create(const char* name, MEMFLAGS memflags);
+ static OopStorage* create(const char* name, MemTag mem_tag);
~OopStorage();
// These count and usage accessors are racy unless at a safepoint.
@@ -89,8 +89,8 @@ class OopStorage : public CHeapObjBase {
// bookkeeping overhead, including this storage object.
size_t total_memory_usage() const;
- // The memory type for allocations.
- MEMFLAGS memflags() const;
+ // The memory tag for allocations.
+ MemTag mem_tag() const;
enum EntryStatus {
INVALID_ENTRY,
@@ -213,6 +213,7 @@ class OopStorage : public CHeapObjBase {
// Debugging and logging support.
const char* name() const;
void print_on(outputStream* st) const PRODUCT_RETURN;
+ bool print_containing(const oop* addr, outputStream* st);
// Provides access to storage internals, for unit testing.
// Declare, but not define, the public class OopStorage::TestAccess.
@@ -273,21 +274,21 @@ class OopStorage : public CHeapObjBase {
// mutable because this gets set even for const iteration.
mutable int _concurrent_iteration_count;
- // The memory type for allocations.
- MEMFLAGS _memflags;
+ // The memory tag for allocations.
+ MemTag _mem_tag;
// Flag indicating this storage object is a candidate for empty block deletion.
volatile bool _needs_cleanup;
// Clients construct via "create" factory function.
- OopStorage(const char* name, MEMFLAGS memflags);
+ OopStorage(const char* name, MemTag mem_tag);
NONCOPYABLE(OopStorage);
bool try_add_block();
Block* block_for_allocation();
void log_block_transition(Block* block, const char* new_state) const;
- Block* find_block_or_null(const oop* ptr) const;
+ Block* block_for_ptr(const oop* ptr) const;
void delete_empty_block(const Block& block);
bool reduce_deferred_updates();
void record_needs_cleanup();
diff --git a/src/hotspot/share/gc/shared/oopStorage.inline.hpp b/src/hotspot/share/gc/shared/oopStorage.inline.hpp
index e1e815acd094e..da0926a20b6e2 100644
--- a/src/hotspot/share/gc/shared/oopStorage.inline.hpp
+++ b/src/hotspot/share/gc/shared/oopStorage.inline.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -60,7 +60,7 @@ class OopStorage::ActiveArray {
public:
static ActiveArray* create(size_t size,
- MEMFLAGS memflags = mtGC,
+ MemTag mem_tag = mtGC,
AllocFailType alloc_fail = AllocFailStrategy::EXIT_OOM);
static void destroy(ActiveArray* ba);
@@ -184,7 +184,10 @@ class OopStorage::Block /* No base class, to avoid messing up alignment. */ {
void set_active_index(size_t index);
static size_t active_index_safe(const Block* block); // Returns 0 if access fails.
- // Returns null if ptr is not in a block or not allocated in that block.
+ // Return block of owner containing ptr, if ptr is a valid entry of owner.
+ // If ptr is not a valid entry of owner then returns either null or a "false
+ // positive" pointer; see allocation_status.
+ // precondition: ptr != nullptr
static Block* block_for_ptr(const OopStorage* owner, const oop* ptr);
oop* allocate();
@@ -196,6 +199,8 @@ class OopStorage::Block /* No base class, to avoid messing up alignment. */ {
template bool iterate(F f);
template bool iterate(F f) const;
+
+ bool print_containing(const oop* addr, outputStream* st);
}; // class Block
inline OopStorage::Block* OopStorage::AllocationList::head() {
diff --git a/src/hotspot/share/gc/shared/oopStorageSet.cpp b/src/hotspot/share/gc/shared/oopStorageSet.cpp
index e119e570759a4..e3a9fccbad3dc 100644
--- a/src/hotspot/share/gc/shared/oopStorageSet.cpp
+++ b/src/hotspot/share/gc/shared/oopStorageSet.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2019, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -31,18 +31,18 @@
OopStorage* OopStorageSet::_storages[all_count] = {};
-OopStorage* OopStorageSet::create_strong(const char* name, MEMFLAGS memflags) {
+OopStorage* OopStorageSet::create_strong(const char* name, MemTag mem_tag) {
static uint registered_strong = 0;
assert(registered_strong < strong_count, "More registered strong storages than slots");
- OopStorage* storage = OopStorage::create(name, memflags);
+ OopStorage* storage = OopStorage::create(name, mem_tag);
_storages[strong_start + registered_strong++] = storage;
return storage;
}
-OopStorage* OopStorageSet::create_weak(const char* name, MEMFLAGS memflags) {
+OopStorage* OopStorageSet::create_weak(const char* name, MemTag mem_tag) {
static uint registered_weak = 0;
assert(registered_weak < weak_count, "More registered strong storages than slots");
- OopStorage* storage = OopStorage::create(name, memflags);
+ OopStorage* storage = OopStorage::create(name, mem_tag);
_storages[weak_start + registered_weak++] = storage;
return storage;
}
@@ -82,6 +82,25 @@ template OopStorage* OopStorageSet::get_storage(StrongId);
template OopStorage* OopStorageSet::get_storage(WeakId);
template OopStorage* OopStorageSet::get_storage(Id);
+bool OopStorageSet::print_containing(const void* addr, outputStream* st) {
+ if (addr != nullptr) {
+ const void* aligned_addr = align_down(addr, alignof(oop));
+ for (OopStorage* storage : Range()) {
+ // Check for null for extra safety: might get here while handling error
+ // before storage initialization.
+ if ((storage != nullptr) && storage->print_containing((oop*) aligned_addr, st)) {
+ if (aligned_addr != addr) {
+ st->print_cr(" (unaligned)");
+ } else {
+ st->cr();
+ }
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
#ifdef ASSERT
void OopStorageSet::verify_initialized(uint index) {
diff --git a/src/hotspot/share/gc/shared/oopStorageSet.hpp b/src/hotspot/share/gc/shared/oopStorageSet.hpp
index 26e0e9f5a7775..867172c41ad74 100644
--- a/src/hotspot/share/gc/shared/oopStorageSet.hpp
+++ b/src/hotspot/share/gc/shared/oopStorageSet.hpp
@@ -25,7 +25,8 @@
#ifndef SHARE_GC_SHARED_OOPSTORAGESET_HPP
#define SHARE_GC_SHARED_OOPSTORAGESET_HPP
-#include "nmt/memflags.hpp"
+#include "nmt/memTag.hpp"
+#include "oops/oop.hpp"
#include "utilities/debug.hpp"
#include "utilities/enumIterator.hpp"
#include "utilities/globalDefinitions.hpp"
@@ -79,8 +80,8 @@ class OopStorageSet : public AllStatic {
static OopStorage* storage(WeakId id) { return get_storage(id); }
static OopStorage* storage(Id id) { return get_storage(id); }
- static OopStorage* create_strong(const char* name, MEMFLAGS memflags);
- static OopStorage* create_weak(const char* name, MEMFLAGS memflags);
+ static OopStorage* create_strong(const char* name, MemTag mem_tag);
+ static OopStorage* create_weak(const char* name, MemTag mem_tag);
// Support iteration over the storage objects.
template class Range;
@@ -89,6 +90,8 @@ class OopStorageSet : public AllStatic {
template
static void strong_oops_do(Closure* cl);
+ // Debugging: print location info, if in storage.
+ static bool print_containing(const void* addr, outputStream* st);
};
ENUMERATOR_VALUE_RANGE(OopStorageSet::StrongId,
diff --git a/src/hotspot/share/gc/shared/partialArrayState.cpp b/src/hotspot/share/gc/shared/partialArrayState.cpp
index fd23a32022208..48ef974ecfa53 100644
--- a/src/hotspot/share/gc/shared/partialArrayState.cpp
+++ b/src/hotspot/share/gc/shared/partialArrayState.cpp
@@ -26,7 +26,7 @@
#include "gc/shared/partialArrayState.hpp"
#include "memory/allocation.inline.hpp"
#include "memory/arena.hpp"
-#include "nmt/memflags.hpp"
+#include "nmt/memTag.hpp"
#include "oops/oopsHierarchy.hpp"
#include "runtime/atomic.hpp"
#include "runtime/orderAccess.hpp"
diff --git a/src/hotspot/share/gc/shared/stringdedup/stringDedupProcessor.cpp b/src/hotspot/share/gc/shared/stringdedup/stringDedupProcessor.cpp
index aab2f5d312399..ab85c293941df 100644
--- a/src/hotspot/share/gc/shared/stringdedup/stringDedupProcessor.cpp
+++ b/src/hotspot/share/gc/shared/stringdedup/stringDedupProcessor.cpp
@@ -35,7 +35,7 @@
#include "gc/shared/stringdedup/stringDedupTable.hpp"
#include "logging/log.hpp"
#include "memory/iterator.hpp"
-#include "nmt/memflags.hpp"
+#include "nmt/memTag.hpp"
#include "oops/access.inline.hpp"
#include "runtime/atomic.hpp"
#include "runtime/cpuTimeCounters.hpp"
diff --git a/src/hotspot/share/gc/shared/taskqueue.hpp b/src/hotspot/share/gc/shared/taskqueue.hpp
index f4a3731583bc9..efbc1882fbed8 100644
--- a/src/hotspot/share/gc/shared/taskqueue.hpp
+++ b/src/hotspot/share/gc/shared/taskqueue.hpp
@@ -116,8 +116,8 @@ void TaskQueueStats::reset() {
// TaskQueueSuper collects functionality common to all GenericTaskQueue instances.
-template
-class TaskQueueSuper: public CHeapObj {
+template
+class TaskQueueSuper: public CHeapObj {
protected:
// Internal type for indexing the queue; also used for the tag.
typedef NOT_LP64(uint16_t) LP64_ONLY(uint32_t) idx_t;
@@ -324,39 +324,39 @@ class TaskQueueSuper: public CHeapObj {
// practice of parallel programming (PPoPP 2013), 69-80
//
-template
-class GenericTaskQueue: public TaskQueueSuper {
+template
+class GenericTaskQueue: public TaskQueueSuper {
protected:
- typedef typename TaskQueueSuper::Age Age;
- typedef typename TaskQueueSuper::idx_t idx_t;
+ typedef typename TaskQueueSuper::Age Age;
+ typedef typename TaskQueueSuper::idx_t idx_t;
- using TaskQueueSuper::MOD_N_MASK;
+ using TaskQueueSuper::MOD_N_MASK;
- using TaskQueueSuper::bottom_relaxed;
- using TaskQueueSuper::bottom_acquire;
+ using TaskQueueSuper::bottom_relaxed;
+ using TaskQueueSuper::bottom_acquire;
- using TaskQueueSuper::set_bottom_relaxed;
- using TaskQueueSuper::release_set_bottom;
+ using TaskQueueSuper::set_bottom_relaxed;
+ using TaskQueueSuper::release_set_bottom;
- using TaskQueueSuper::age_relaxed;
- using TaskQueueSuper::set_age_relaxed;
- using TaskQueueSuper::cmpxchg_age;
- using TaskQueueSuper::age_top_relaxed;
+ using TaskQueueSuper::age_relaxed;
+ using TaskQueueSuper::set_age_relaxed;
+ using TaskQueueSuper::cmpxchg_age;
+ using TaskQueueSuper::age_top_relaxed;
- using TaskQueueSuper::increment_index;
- using TaskQueueSuper::decrement_index;
- using TaskQueueSuper::dirty_size;
- using TaskQueueSuper::clean_size;
- using TaskQueueSuper::assert_not_underflow;
+ using TaskQueueSuper::increment_index;
+ using TaskQueueSuper::decrement_index;
+ using TaskQueueSuper::dirty_size;
+ using TaskQueueSuper::clean_size;
+ using TaskQueueSuper::assert_not_underflow;
public:
- typedef typename TaskQueueSuper::PopResult PopResult;
+ typedef typename TaskQueueSuper::PopResult PopResult;
- using TaskQueueSuper::max_elems;
- using TaskQueueSuper::size;
+ using TaskQueueSuper::max_elems;
+ using TaskQueueSuper::size;
#if TASKQUEUE_STATS
- using TaskQueueSuper::stats;
+ using TaskQueueSuper::stats;
#endif
private:
@@ -428,12 +428,12 @@ class GenericTaskQueue: public TaskQueueSuper {
// Note that size() is not hidden--it returns the number of elements in the
// TaskQueue, and does not include the size of the overflow stack. This
// simplifies replacement of GenericTaskQueues with OverflowTaskQueues.
-template
-class OverflowTaskQueue: public GenericTaskQueue
+template
+class OverflowTaskQueue: public GenericTaskQueue
{
public:
- typedef Stack overflow_t;
- typedef GenericTaskQueue taskqueue_t;
+ typedef Stack overflow_t;
+ typedef GenericTaskQueue taskqueue_t;
TASKQUEUE_STATS_ONLY(using taskqueue_t::stats;)
@@ -467,11 +467,11 @@ class TaskQueueSetSuper {
virtual uint tasks() const = 0;
};
-template class TaskQueueSetSuperImpl: public CHeapObj, public TaskQueueSetSuper {
+template class TaskQueueSetSuperImpl: public CHeapObj, public TaskQueueSetSuper {
};
-template
-class GenericTaskQueueSet: public TaskQueueSetSuperImpl {
+template
+class GenericTaskQueueSet: public TaskQueueSetSuperImpl {
public:
typedef typename T::element_type E;
typedef typename T::PopResult PopResult;
@@ -518,29 +518,29 @@ class GenericTaskQueueSet: public TaskQueueSetSuperImpl {
#endif // TASKQUEUE_STATS
};
-template void
-GenericTaskQueueSet::register_queue(uint i, T* q) {
+template void
+GenericTaskQueueSet::register_queue(uint i, T* q) {
assert(i < _n, "index out of range.");
_queues[i] = q;
}
-template T*
-GenericTaskQueueSet::queue(uint i) {
+template T*
+GenericTaskQueueSet::queue(uint i) {
assert(i < _n, "index out of range.");
return _queues[i];
}
#ifdef ASSERT
-template
-void GenericTaskQueueSet::assert_empty() const {
+template
+void GenericTaskQueueSet::assert_empty() const {
for (uint j = 0; j < _n; j++) {
_queues[j]->assert_empty();
}
}
#endif // ASSERT
-template
-uint GenericTaskQueueSet::tasks() const {
+template
+uint GenericTaskQueueSet::tasks() const {
uint n = 0;
for (uint j = 0; j < _n; j++) {
n += _queues[j]->size();
diff --git a/src/hotspot/share/gc/shared/taskqueue.inline.hpp b/src/hotspot/share/gc/shared/taskqueue.inline.hpp
index f937ce8a2e993..8e65cfd704fda 100644
--- a/src/hotspot/share/gc/shared/taskqueue.inline.hpp
+++ b/src/hotspot/share/gc/shared/taskqueue.inline.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2015, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -38,30 +38,30 @@
#include "utilities/ostream.hpp"
#include "utilities/stack.inline.hpp"
-template
-inline GenericTaskQueueSet::GenericTaskQueueSet(uint n) : _n(n) {
+template
+inline GenericTaskQueueSet::GenericTaskQueueSet(uint n) : _n(n) {
typedef T* GenericTaskQueuePtr;
- _queues = NEW_C_HEAP_ARRAY(GenericTaskQueuePtr, n, F);
+ _queues = NEW_C_HEAP_ARRAY(GenericTaskQueuePtr, n, MT);
for (uint i = 0; i < n; i++) {
_queues[i] = nullptr;
}
}
-template
-inline GenericTaskQueueSet::~GenericTaskQueueSet() {
+template
+inline GenericTaskQueueSet::~GenericTaskQueueSet() {
FREE_C_HEAP_ARRAY(T*, _queues);
}
#if TASKQUEUE_STATS
-template
-void GenericTaskQueueSet::print_taskqueue_stats_hdr(outputStream* const st, const char* label) {
+template