Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Backport] 8280872: Reorder code cache segments to improve code density #665

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions src/hotspot/cpu/aarch64/aarch64.ad
Original file line number Diff line number Diff line change
Expand Up @@ -1182,12 +1182,12 @@ class HandlerImpl {
static int emit_deopt_handler(CodeBuffer& cbuf);

static uint size_exception_handler() {
return MacroAssembler::far_branch_size();
return MacroAssembler::far_codestub_branch_size();
}

static uint size_deopt_handler() {
// count one adr and one far branch instruction
return 4 * NativeInstruction::instruction_size;
return NativeInstruction::instruction_size + MacroAssembler::far_codestub_branch_size();
}
};

Expand Down Expand Up @@ -2184,7 +2184,7 @@ int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
__ adr(lr, __ pc());
__ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));

assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
assert(__ offset() - offset == (int) size_deopt_handler(), "overflow");
__ end_a_stub();
return offset;
}
Expand Down
10 changes: 8 additions & 2 deletions src/hotspot/cpu/aarch64/icBuffer_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,15 @@ void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached
address start = __ pc();
Label l;
__ ldr(rscratch2, l);
__ far_jump(ExternalAddress(entry_point));
__ align(wordSize);
int jump_code_size = __ far_jump(ExternalAddress(entry_point));
// IC stub code size is not expected to vary depending on target address.
// We use NOPs to make the [ldr + far_jump + nops + int64] stub size equal to ic_stub_code_size.
for (int size = NativeInstruction::instruction_size + jump_code_size + 8;
size < ic_stub_code_size(); size += NativeInstruction::instruction_size) {
__ nop();
}
__ bind(l);
assert((uintptr_t)__ pc() % wordSize == 0, "");
__ emit_int64((int64_t)cached_value);
// Only need to invalidate the 1st two instructions - not the whole ic stub
ICache::invalidate_range(code_begin, InlineCacheBuffer::ic_stub_code_size());
Expand Down
25 changes: 20 additions & 5 deletions src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -400,14 +400,27 @@ void MacroAssembler::set_last_Java_frame(Register last_java_sp,
}
}

static inline bool target_needs_far_branch(address addr) {
// codecache size <= 128M
if (!MacroAssembler::far_branches()) {
return false;
}
// codecache size > 240M
if (MacroAssembler::codestub_branch_needs_far_jump()) {
return true;
}
// codecache size: 128M..240M
return !CodeCache::is_non_nmethod(addr);
}

void MacroAssembler::far_call(Address entry, CodeBuffer *cbuf, Register tmp) {
assert(ReservedCodeCacheSize < 4*G, "branch out of range");
assert(CodeCache::find_blob(entry.target()) != NULL,
"destination of far call not found in code cache");
if (far_branches()) {
if (target_needs_far_branch(entry.target())) {
uint64_t offset;
// We can use ADRP here because we know that the total size of
// the code cache cannot exceed 2Gb.
// the code cache cannot exceed 2Gb (ADRP limit is 4GB).
adrp(tmp, entry, offset);
add(tmp, tmp, offset);
if (cbuf) cbuf->set_insts_mark();
Expand All @@ -418,14 +431,15 @@ void MacroAssembler::far_call(Address entry, CodeBuffer *cbuf, Register tmp) {
}
}

void MacroAssembler::far_jump(Address entry, CodeBuffer *cbuf, Register tmp) {
int MacroAssembler::far_jump(Address entry, CodeBuffer *cbuf, Register tmp) {
assert(ReservedCodeCacheSize < 4*G, "branch out of range");
assert(CodeCache::find_blob(entry.target()) != NULL,
"destination of far call not found in code cache");
if (far_branches()) {
address start = pc();
if (target_needs_far_branch(entry.target())) {
uint64_t offset;
// We can use ADRP here because we know that the total size of
// the code cache cannot exceed 2Gb.
// the code cache cannot exceed 2Gb (ADRP limit is 4GB).
adrp(tmp, entry, offset);
add(tmp, tmp, offset);
if (cbuf) cbuf->set_insts_mark();
Expand All @@ -434,6 +448,7 @@ void MacroAssembler::far_jump(Address entry, CodeBuffer *cbuf, Register tmp) {
if (cbuf) cbuf->set_insts_mark();
b(entry);
}
return pc() - start;
}

void MacroAssembler::reserved_stack_check() {
Expand Down
11 changes: 8 additions & 3 deletions src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1062,13 +1062,18 @@ class MacroAssembler: public Assembler {
return ReservedCodeCacheSize > branch_range || UseAOT;
}

// Check if branches to the the non nmethod section require a far jump
static bool codestub_branch_needs_far_jump() {
return CodeCache::max_distance_to_non_nmethod() > branch_range;
}

// Jumps that can reach anywhere in the code cache.
// Trashes tmp.
void far_call(Address entry, CodeBuffer *cbuf = NULL, Register tmp = rscratch1);
void far_jump(Address entry, CodeBuffer *cbuf = NULL, Register tmp = rscratch1);
int far_jump(Address entry, CodeBuffer *cbuf = NULL, Register tmp = rscratch1);

static int far_branch_size() {
if (far_branches()) {
static int far_codestub_branch_size() {
if (codestub_branch_needs_far_jump()) {
return 3 * 4; // adrp, add, br
} else {
return 4;
Expand Down
30 changes: 24 additions & 6 deletions src/hotspot/share/code/codeCache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -291,19 +291,20 @@ void CodeCache::initialize_heaps() {
const size_t alignment = MAX2(page_size(false, 8), (size_t) os::vm_allocation_granularity());
non_nmethod_size = align_up(non_nmethod_size, alignment);
profiled_size = align_down(profiled_size, alignment);
non_profiled_size = align_down(non_profiled_size, alignment);

// Reserve one continuous chunk of memory for CodeHeaps and split it into
// parts for the individual heaps. The memory layout looks like this:
// ---------- high -----------
// Non-profiled nmethods
// Profiled nmethods
// Non-nmethods
// Profiled nmethods
// ---------- low ------------
ReservedCodeSpace rs = reserve_heap_memory(cache_size);
ReservedSpace non_method_space = rs.first_part(non_nmethod_size);
ReservedSpace rest = rs.last_part(non_nmethod_size);
ReservedSpace profiled_space = rest.first_part(profiled_size);
ReservedSpace non_profiled_space = rest.last_part(profiled_size);
ReservedSpace profiled_space = rs.first_part(profiled_size);
ReservedSpace rest = rs.last_part(profiled_size);
ReservedSpace non_method_space = rest.first_part(non_nmethod_size);
ReservedSpace non_profiled_space = rest.last_part(non_nmethod_size);

// Non-nmethods (stubs, adapters, ...)
add_heap(non_method_space, "CodeHeap 'non-nmethods'", CodeBlobType::NonNMethod);
Expand Down Expand Up @@ -1043,6 +1044,24 @@ size_t CodeCache::max_capacity() {
return max_cap;
}

bool CodeCache::is_non_nmethod(address addr) {
CodeHeap* blob = get_code_heap(CodeBlobType::NonNMethod);
return blob->contains(addr);
}

size_t CodeCache::max_distance_to_non_nmethod() {
if (!SegmentedCodeCache) {
return ReservedCodeCacheSize;
} else {
CodeHeap* blob = get_code_heap(CodeBlobType::NonNMethod);
// the max distance is minimized by placing the NonNMethod segment
// in between MethodProfiled and MethodNonProfiled segments
size_t dist1 = (size_t)blob->high() - (size_t)_low_bound;
size_t dist2 = (size_t)_high_bound - (size_t)blob->low();
return dist1 > dist2 ? dist1 : dist2;
}
}

/**
* Returns the reverse free ratio. E.g., if 25% (1/4) of the code heap
* is free, reverse_free_ratio() returns 4.
Expand All @@ -1052,7 +1071,6 @@ double CodeCache::reverse_free_ratio(int code_blob_type) {
if (heap == NULL) {
return 0;
}

double unallocated_capacity = MAX2((double)heap->unallocated_capacity(), 1.0); // Avoid division by 0;
double max_capacity = (double)heap->max_capacity();
double result = max_capacity / unallocated_capacity;
Expand Down
3 changes: 3 additions & 0 deletions src/hotspot/share/code/codeCache.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,9 @@ class CodeCache : AllStatic {
static bool needs_cache_clean() { return _needs_cache_clean; }
static void set_needs_cache_clean(bool v) { _needs_cache_clean = v; }

static size_t max_distance_to_non_nmethod();
static bool is_non_nmethod(address addr);

static void clear_inline_caches(); // clear all inline caches
static void cleanup_inline_caches(); // clean unloaded/zombie nmethods from inline caches
static void do_unloading_nmethod_caches(bool class_unloading_occurred); // clean all nmethod caches for unloading, including inline caches
Expand Down
137 changes: 137 additions & 0 deletions test/hotspot/jtreg/compiler/c2/aarch64/TestFarJump.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
/*
* Copyright (c) 2022, BELLSOFT. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package compiler.c2.aarch64;

import jdk.test.lib.process.OutputAnalyzer;
import jdk.test.lib.process.ProcessTools;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.*;

/*
* @test
* @bug 8280872
* @summary Far call to runtime stub should be generated with single instruction for CodeHeap up to 250MB
* @library /test/lib /
*
* @requires vm.flagless
* @requires os.arch=="aarch64"
* @requires vm.debug == false
* @requires vm.compiler2.enabled
*
* @run driver compiler.c2.aarch64.TestFarJump
*/
public class TestFarJump {

// ADRP instruction encoding:
// |31 30 29 28|27 26 25 24|23 22 21 20|19 18 17 16|15 14 13 12|11 10 09 08|07 06 05 04|03 02 01 10|
// | 1|immlo| 1 0 0 0 0| immhi | Rd |
static boolean isADRP(int encoding) {
final int mask = 0b1001_1111;
final int val = 0b1001_0000;
return ((encoding >> 24) & mask) == val;
}

// Looking for adrp instruction in binary/text assembly output:
// 0x0000ffff7ff1b7d0: c8ff ffd0 | 0801 1091 | 0001 1fd6
// 0x0000ffff6bf20ee0: adrp x8, 0x0000ffff6bef1000
static boolean containsADRP(String input) {
int index = input.indexOf(": ");
if (index == -1) {
return false;
}
input = input.substring(index + 1);
if (input.contains("adrp")) {
return true;
}
Pattern pattern = Pattern.compile("[0-9a-f ]*");
Matcher matcher = pattern.matcher(input);
while (matcher.find()) {
String match = matcher.group();
match = match.replace(" " , "");
if (match.length() != 8) {
continue;
}
int dump = (int)Long.parseLong(match, 16);
int encoding = Integer.reverseBytes(dump);
if (isADRP(encoding)) {
return true;
}
}
return false;
}

static void runVM(boolean bigCodeHeap) throws Exception {
String className = TestFarJump.class.getName();
String[] procArgs = {
"-XX:-Inline",
"-Xcomp",
"-Xbatch",
"-XX:+TieredCompilation",
"-XX:+SegmentedCodeCache",
"-XX:CompileOnly=" + className + "::main",
"-XX:ReservedCodeCacheSize=" + (bigCodeHeap ? "256M" : "200M"),
"-XX:+UnlockDiagnosticVMOptions",
"-XX:+PrintAssembly",
className};

ProcessBuilder pb = ProcessTools.createJavaProcessBuilder(procArgs);
OutputAnalyzer output = new OutputAnalyzer(pb.start());
List<String> lines = output.asLines();

ListIterator<String> itr = lines.listIterator();
while (itr.hasNext()) {
String line = itr.next();
if (line.contains("[Exception Handler]")) {
String next1 = itr.next();
String next2 = itr.next();
System.out.println(line);
System.out.println(next1);
System.out.println(next2);
boolean containsADRP = containsADRP(next1) || containsADRP(next2);
if (bigCodeHeap && !containsADRP) {
throw new RuntimeException("ADRP instruction is expected on far jump");
}
if (!bigCodeHeap && containsADRP) {
throw new RuntimeException("for CodeHeap < 250MB the far jump is expected to be encoded with a single branch instruction");
}
return;
}
}
throw new RuntimeException("Assembly output: exception Handler is not found");
}

public static void main(String[] args) throws Exception {
if (args.length == 0) {
// Main VM: fork VM with options
runVM(true);
runVM(false);
return;
}
if (args.length > 0) {
// We are in a forked VM. Just exit
System.out.println("Ok");
}
}
}

Loading